Skip to content

Commit c2e3d6a

Browse files
bvanasschekawasaki
authored andcommitted
blk-zoned: Support pipelining of zoned writes
Support pipelining of zoned writes if the block driver preserves the write order per hardware queue. Track per zone to which software queue writes have been queued. If zoned writes are pipelined, submit new writes to the same software queue as the writes that are already in progress. This prevents reordering by submitting requests for the same zone to different software or hardware queues. Cc: Christoph Hellwig <[email protected]> Cc: Damien Le Moal <[email protected]> Signed-off-by: Bart Van Assche <[email protected]>
1 parent 5aa860b commit c2e3d6a

2 files changed

Lines changed: 56 additions & 14 deletions

File tree

block/blk-mq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3145,8 +3145,8 @@ void blk_mq_submit_bio(struct bio *bio)
31453145
/*
31463146
* A BIO that was released from a zone write plug has already been
31473147
* through the preparation in this function, already holds a reference
3148-
* on the queue usage counter, and is the only write BIO in-flight for
3149-
* the target zone. Go straight to preparing a request for it.
3148+
* on the queue usage counter. Go straight to preparing a request for
3149+
* it.
31503150
*/
31513151
if (bio_zone_write_plugging(bio)) {
31523152
nr_segs = bio->__bi_nr_segments;

block/blk-zoned.c

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ static const char *const zone_cond_name[] = {
5353
* @zone_no: The number of the zone the plug is managing.
5454
* @wp_offset: The zone write pointer location relative to the start of the zone
5555
* as a number of 512B sectors.
56+
* @from_cpu: Software queue to submit writes from for drivers that preserve
57+
* the write order.
5658
* @bio_list: The list of BIOs that are currently plugged.
5759
* @bio_work: Work struct to handle issuing of plugged BIOs
5860
* @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -65,6 +67,7 @@ struct blk_zone_wplug {
6567
unsigned int flags;
6668
unsigned int zone_no;
6769
unsigned int wp_offset;
70+
int from_cpu;
6871
struct bio_list bio_list;
6972
struct work_struct bio_work;
7073
struct rcu_head rcu_head;
@@ -74,8 +77,7 @@ struct blk_zone_wplug {
7477
/*
7578
* Zone write plug flags bits:
7679
* - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
77-
* that is, that write BIOs are being throttled due to a write BIO already
78-
* being executed or the zone write plug bio list is not empty.
80+
* that is, that write BIOs are being throttled.
7981
* - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
8082
* write pointer offset and need to update it.
8183
* - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
@@ -572,6 +574,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
572574
zwplug->flags = 0;
573575
zwplug->zone_no = zno;
574576
zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
577+
zwplug->from_cpu = -1;
575578
bio_list_init(&zwplug->bio_list);
576579
INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
577580
zwplug->disk = disk;
@@ -768,14 +771,19 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
768771
static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
769772
struct blk_zone_wplug *zwplug)
770773
{
774+
lockdep_assert_held(&zwplug->lock);
775+
771776
/*
772777
* Take a reference on the zone write plug and schedule the submission
773778
* of the next plugged BIO. blk_zone_wplug_bio_work() will release the
774779
* reference we take here.
775780
*/
776-
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
777781
refcount_inc(&zwplug->ref);
778-
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
782+
if (zwplug->from_cpu >= 0)
783+
queue_work_on(zwplug->from_cpu, disk->zone_wplugs_wq,
784+
&zwplug->bio_work);
785+
else
786+
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
779787
}
780788

781789
static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
@@ -972,9 +980,12 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
972980
return true;
973981
}
974982

975-
static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
983+
static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs,
984+
int from_cpu)
976985
{
977986
struct gendisk *disk = bio->bi_bdev->bd_disk;
987+
const bool ordered_hwq = bio_op(bio) != REQ_OP_ZONE_APPEND &&
988+
disk->queue->limits.features & BLK_FEAT_ORDERED_HWQ;
978989
sector_t sector = bio->bi_iter.bi_sector;
979990
bool schedule_bio_work = false;
980991
struct blk_zone_wplug *zwplug;
@@ -1034,15 +1045,38 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10341045
if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
10351046
goto add_to_bio_list;
10361047

1048+
if (ordered_hwq && zwplug->from_cpu < 0) {
1049+
/* No zoned writes are in progress. Select the current CPU. */
1050+
zwplug->from_cpu = raw_smp_processor_id();
1051+
}
1052+
1053+
if (ordered_hwq && zwplug->from_cpu == from_cpu) {
1054+
/*
1055+
* The block driver preserves the write order, zoned writes have
1056+
* not been plugged and the zoned write will be submitted from
1057+
* zwplug->from_cpu. Let the caller submit the bio.
1058+
*/
1059+
} else if (ordered_hwq) {
1060+
/*
1061+
* The block driver preserves the write order but the caller
1062+
* allocated a request from another CPU. Submit the bio from
1063+
* zwplug->from_cpu.
1064+
*/
1065+
goto plug;
1066+
} else {
1067+
/*
1068+
* The block driver does not preserve the write order. Plug and
1069+
* let the caller submit the BIO.
1070+
*/
1071+
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
1072+
}
1073+
10371074
if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
10381075
spin_unlock_irqrestore(&zwplug->lock, flags);
10391076
bio_io_error(bio);
10401077
return true;
10411078
}
10421079

1043-
/* Otherwise, plug and submit the BIO. */
1044-
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
1045-
10461080
spin_unlock_irqrestore(&zwplug->lock, flags);
10471081

10481082
return false;
@@ -1150,7 +1184,7 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int rq_cpu)
11501184
fallthrough;
11511185
case REQ_OP_WRITE:
11521186
case REQ_OP_WRITE_ZEROES:
1153-
return blk_zone_wplug_handle_write(bio, nr_segs);
1187+
return blk_zone_wplug_handle_write(bio, nr_segs, rq_cpu);
11541188
case REQ_OP_ZONE_RESET:
11551189
return blk_zone_wplug_handle_reset_or_finish(bio, 0);
11561190
case REQ_OP_ZONE_FINISH:
@@ -1182,6 +1216,9 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
11821216

11831217
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
11841218

1219+
if (refcount_read(&zwplug->ref) == 2)
1220+
zwplug->from_cpu = -1;
1221+
11851222
/*
11861223
* If the zone is full (it was fully written or finished, or empty
11871224
* (it was reset), remove its zone write plug from the hash table.
@@ -1283,6 +1320,8 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
12831320
struct blk_zone_wplug *zwplug =
12841321
container_of(work, struct blk_zone_wplug, bio_work);
12851322
struct block_device *bdev;
1323+
bool ordered_hwq = zwplug->disk->queue->limits.features &
1324+
BLK_FEAT_ORDERED_HWQ;
12861325
struct bio *bio;
12871326

12881327
do {
@@ -1323,7 +1362,7 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
13231362
} else {
13241363
blk_mq_submit_bio(bio);
13251364
}
1326-
} while (0);
1365+
} while (ordered_hwq);
13271366

13281367
put_zwplug:
13291368
/* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
@@ -1850,17 +1889,20 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
18501889
unsigned int zwp_zone_no, zwp_ref;
18511890
unsigned int zwp_bio_list_size;
18521891
unsigned long flags;
1892+
int from_cpu;
18531893

18541894
spin_lock_irqsave(&zwplug->lock, flags);
18551895
zwp_zone_no = zwplug->zone_no;
18561896
zwp_flags = zwplug->flags;
18571897
zwp_ref = refcount_read(&zwplug->ref);
18581898
zwp_wp_offset = zwplug->wp_offset;
18591899
zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
1900+
from_cpu = zwplug->from_cpu;
18601901
spin_unlock_irqrestore(&zwplug->lock, flags);
18611902

1862-
seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref,
1863-
zwp_wp_offset, zwp_bio_list_size);
1903+
seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u bio_list_size %u from_cpu %d\n",
1904+
zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset,
1905+
zwp_bio_list_size, from_cpu);
18641906
}
18651907

18661908
int queue_zone_wplugs_show(void *data, struct seq_file *m)

0 commit comments

Comments
 (0)