Skip to content

Commit d4ae499

Browse files
bvanasschekawasaki
authored andcommitted
blk-zoned: Support pipelining of zoned writes
Support pipelining of zoned writes if the block driver preserves the write order per hardware queue. Track per zone to which software queue writes have been queued. If zoned writes are pipelined, submit new writes to the same software queue as the writes that are already in progress. This prevents reordering by submitting requests for the same zone to different software or hardware queues. Cc: Christoph Hellwig <[email protected]> Cc: Damien Le Moal <[email protected]> Signed-off-by: Bart Van Assche <[email protected]>
1 parent 6236dc5 commit d4ae499

2 files changed

Lines changed: 55 additions & 17 deletions

File tree

block/blk-mq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3145,8 +3145,8 @@ void blk_mq_submit_bio(struct bio *bio)
31453145
/*
31463146
* A BIO that was released from a zone write plug has already been
31473147
* through the preparation in this function, already holds a reference
3148-
* on the queue usage counter, and is the only write BIO in-flight for
3149-
* the target zone. Go straight to preparing a request for it.
3148+
* on the queue usage counter. Go straight to preparing a request for
3149+
* it.
31503150
*/
31513151
if (bio_zone_write_plugging(bio)) {
31523152
nr_segs = bio->__bi_nr_segments;

block/blk-zoned.c

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ static const char *const zone_cond_name[] = {
5151
* @zone_no: The number of the zone the plug is managing.
5252
* @wp_offset: The zone write pointer location relative to the start of the zone
5353
* as a number of 512B sectors.
54+
* @from_cpu: Software queue to submit writes from for drivers that preserve
55+
* the write order.
5456
* @bio_list: The list of BIOs that are currently plugged.
5557
* @bio_work: Work struct to handle issuing of plugged BIOs
5658
* @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -63,6 +65,7 @@ struct blk_zone_wplug {
6365
unsigned int flags;
6466
unsigned int zone_no;
6567
unsigned int wp_offset;
68+
int from_cpu;
6669
struct bio_list bio_list;
6770
struct work_struct bio_work;
6871
struct rcu_head rcu_head;
@@ -72,8 +75,7 @@ struct blk_zone_wplug {
7275
/*
7376
* Zone write plug flags bits:
7477
* - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
75-
* that is, that write BIOs are being throttled due to a write BIO already
76-
* being executed or the zone write plug bio list is not empty.
78+
* that is, that write BIOs are being throttled.
7779
* - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
7880
* write pointer offset and need to update it.
7981
* - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
@@ -568,6 +570,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
568570
zwplug->flags = 0;
569571
zwplug->zone_no = zno;
570572
zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
573+
zwplug->from_cpu = -1;
571574
bio_list_init(&zwplug->bio_list);
572575
INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
573576
zwplug->disk = disk;
@@ -764,14 +767,18 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
764767
static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
765768
struct blk_zone_wplug *zwplug)
766769
{
770+
int cpu;
771+
772+
lockdep_assert_held(&zwplug->lock);
773+
767774
/*
768775
* Take a reference on the zone write plug and schedule the submission
769776
* of the next plugged BIO. blk_zone_wplug_bio_work() will release the
770777
* reference we take here.
771778
*/
772-
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
773779
refcount_inc(&zwplug->ref);
774-
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
780+
cpu = zwplug->from_cpu >= 0 ? zwplug->from_cpu : WORK_CPU_UNBOUND;
781+
queue_work_on(cpu, disk->zone_wplugs_wq, &zwplug->bio_work);
775782
}
776783

777784
static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
@@ -932,7 +939,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
932939
* We know such BIO will fail, and that would potentially overflow our
933940
* write pointer offset beyond the end of the zone.
934941
*/
935-
if (disk_zone_wplug_is_full(disk, zwplug))
942+
if (!disk->queue->limits.driver_preserves_write_order
943+
&& disk_zone_wplug_is_full(disk, zwplug))
936944
return false;
937945

938946
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
@@ -956,7 +964,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
956964
* with a start sector not unaligned to the zone write pointer
957965
* will fail.
958966
*/
959-
if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
967+
if (!disk->queue->limits.driver_preserves_write_order
968+
&& bio_offset_from_zone_start(bio) != zwplug->wp_offset)
960969
return false;
961970
}
962971

@@ -966,9 +975,11 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
966975
return true;
967976
}
968977

969-
static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
978+
static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs,
979+
int from_cpu)
970980
{
971981
struct gendisk *disk = bio->bi_bdev->bd_disk;
982+
const bool dpwo = disk->queue->limits.driver_preserves_write_order;
972983
sector_t sector = bio->bi_iter.bi_sector;
973984
bool schedule_bio_work = false;
974985
struct blk_zone_wplug *zwplug;
@@ -1033,8 +1044,23 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10331044
return true;
10341045
}
10351046

1036-
/* Otherwise, plug and submit the BIO. */
1037-
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
1047+
if (dpwo && zwplug->from_cpu < 0) {
1048+
/* No zoned writes are in progress. Select the current CPU. */
1049+
zwplug->from_cpu = raw_smp_processor_id();
1050+
goto plug;
1051+
} else if (dpwo) {
1052+
/*
1053+
* The block driver preserves the write order. Submit the bio
1054+
* from zwplug->from_cpu.
1055+
*/
1056+
goto plug;
1057+
} else {
1058+
/*
1059+
* The block driver does not preserve the write order. Plug and
1060+
* submit the BIO.
1061+
*/
1062+
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
1063+
}
10381064

10391065
spin_unlock_irqrestore(&zwplug->lock, flags);
10401066

@@ -1143,7 +1169,7 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int rq_cpu)
11431169
fallthrough;
11441170
case REQ_OP_WRITE:
11451171
case REQ_OP_WRITE_ZEROES:
1146-
return blk_zone_wplug_handle_write(bio, nr_segs);
1172+
return blk_zone_wplug_handle_write(bio, nr_segs, rq_cpu);
11471173
case REQ_OP_ZONE_RESET:
11481174
return blk_zone_wplug_handle_reset_or_finish(bio, 0);
11491175
case REQ_OP_ZONE_FINISH:
@@ -1175,6 +1201,9 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
11751201

11761202
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
11771203

1204+
if (refcount_read(&zwplug->ref) == 2)
1205+
zwplug->from_cpu = -1;
1206+
11781207
/*
11791208
* If the zone is full (it was fully written or finished, or empty
11801209
* (it was reset), remove its zone write plug from the hash table.
@@ -1257,9 +1286,10 @@ void blk_zone_write_plug_finish_request(struct request *req)
12571286
disk_put_zone_wplug(zwplug);
12581287
}
12591288

1260-
static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
1289+
static bool blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12611290
{
12621291
struct block_device *bdev;
1292+
struct gendisk *disk;
12631293
unsigned long flags;
12641294
struct bio *bio;
12651295

@@ -1274,7 +1304,7 @@ static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12741304
if (!bio) {
12751305
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
12761306
spin_unlock_irqrestore(&zwplug->lock, flags);
1277-
return;
1307+
return false;
12781308
}
12791309

12801310
if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
@@ -1285,6 +1315,7 @@ static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12851315
spin_unlock_irqrestore(&zwplug->lock, flags);
12861316

12871317
bdev = bio->bi_bdev;
1318+
disk = bdev->bd_disk;
12881319

12891320
/*
12901321
* blk-mq devices will reuse the extra reference on the request queue
@@ -1298,14 +1329,18 @@ static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12981329
} else {
12991330
blk_mq_submit_bio(bio);
13001331
}
1332+
1333+
return disk->queue->limits.driver_preserves_write_order &&
1334+
!need_resched();
13011335
}
13021336

13031337
static void blk_zone_wplug_bio_work(struct work_struct *work)
13041338
{
13051339
struct blk_zone_wplug *zwplug =
13061340
container_of(work, struct blk_zone_wplug, bio_work);
13071341

1308-
blk_zone_submit_one_bio(zwplug);
1342+
while (blk_zone_submit_one_bio(zwplug))
1343+
;
13091344

13101345
/* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
13111346
disk_put_zone_wplug(zwplug);
@@ -1831,17 +1866,20 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
18311866
unsigned int zwp_zone_no, zwp_ref;
18321867
unsigned int zwp_bio_list_size;
18331868
unsigned long flags;
1869+
int from_cpu;
18341870

18351871
spin_lock_irqsave(&zwplug->lock, flags);
18361872
zwp_zone_no = zwplug->zone_no;
18371873
zwp_flags = zwplug->flags;
18381874
zwp_ref = refcount_read(&zwplug->ref);
18391875
zwp_wp_offset = zwplug->wp_offset;
18401876
zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
1877+
from_cpu = zwplug->from_cpu;
18411878
spin_unlock_irqrestore(&zwplug->lock, flags);
18421879

1843-
seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref,
1844-
zwp_wp_offset, zwp_bio_list_size);
1880+
seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u bio_list_size %u from_cpu %d\n",
1881+
zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset,
1882+
zwp_bio_list_size, from_cpu);
18451883
}
18461884

18471885
int queue_zone_wplugs_show(void *data, struct seq_file *m)

0 commit comments

Comments
 (0)