@@ -51,6 +51,8 @@ static const char *const zone_cond_name[] = {
5151 * @zone_no: The number of the zone the plug is managing.
5252 * @wp_offset: The zone write pointer location relative to the start of the zone
5353 * as a number of 512B sectors.
54+ * @from_cpu: Software queue to submit writes from for drivers that preserve
55+ * the write order.
5456 * @bio_list: The list of BIOs that are currently plugged.
5557 * @bio_work: Work struct to handle issuing of plugged BIOs
5658 * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -63,6 +65,7 @@ struct blk_zone_wplug {
6365 unsigned int flags ;
6466 unsigned int zone_no ;
6567 unsigned int wp_offset ;
68+ int from_cpu ;
6669 struct bio_list bio_list ;
6770 struct work_struct bio_work ;
6871 struct rcu_head rcu_head ;
@@ -72,8 +75,7 @@ struct blk_zone_wplug {
7275/*
7376 * Zone write plug flags bits:
7477 * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
75- * that is, that write BIOs are being throttled due to a write BIO already
76- * being executed or the zone write plug bio list is not empty.
78+ * that is, that write BIOs are being throttled.
7779 * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
7880 * write pointer offset and need to update it.
7981 * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
@@ -568,6 +570,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
568570 zwplug -> flags = 0 ;
569571 zwplug -> zone_no = zno ;
570572 zwplug -> wp_offset = bdev_offset_from_zone_start (disk -> part0 , sector );
573+ zwplug -> from_cpu = -1 ;
571574 bio_list_init (& zwplug -> bio_list );
572575 INIT_WORK (& zwplug -> bio_work , blk_zone_wplug_bio_work );
573576 zwplug -> disk = disk ;
@@ -764,14 +767,18 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
764767static void disk_zone_wplug_schedule_bio_work (struct gendisk * disk ,
765768 struct blk_zone_wplug * zwplug )
766769{
770+ int cpu ;
771+
772+ lockdep_assert_held (& zwplug -> lock );
773+
767774 /*
768775 * Take a reference on the zone write plug and schedule the submission
769776 * of the next plugged BIO. blk_zone_wplug_bio_work() will release the
770777 * reference we take here.
771778 */
772- WARN_ON_ONCE (!(zwplug -> flags & BLK_ZONE_WPLUG_PLUGGED ));
773779 refcount_inc (& zwplug -> ref );
774- queue_work (disk -> zone_wplugs_wq , & zwplug -> bio_work );
780+ cpu = zwplug -> from_cpu >= 0 ? zwplug -> from_cpu : WORK_CPU_UNBOUND ;
781+ queue_work_on (cpu , disk -> zone_wplugs_wq , & zwplug -> bio_work );
775782}
776783
777784static inline void disk_zone_wplug_add_bio (struct gendisk * disk ,
@@ -932,7 +939,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
932939 * We know such BIO will fail, and that would potentially overflow our
933940 * write pointer offset beyond the end of the zone.
934941 */
935- if (disk_zone_wplug_is_full (disk , zwplug ))
942+ if (!disk -> queue -> limits .driver_preserves_write_order
943+ && disk_zone_wplug_is_full (disk , zwplug ))
936944 return false;
937945
938946 if (bio_op (bio ) == REQ_OP_ZONE_APPEND ) {
@@ -956,7 +964,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
956964 * with a start sector not unaligned to the zone write pointer
957965 * will fail.
958966 */
959- if (bio_offset_from_zone_start (bio ) != zwplug -> wp_offset )
967+ if (!disk -> queue -> limits .driver_preserves_write_order
968+ && bio_offset_from_zone_start (bio ) != zwplug -> wp_offset )
960969 return false;
961970 }
962971
@@ -966,9 +975,11 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
966975 return true;
967976}
968977
969- static bool blk_zone_wplug_handle_write (struct bio * bio , unsigned int nr_segs )
978+ static bool blk_zone_wplug_handle_write (struct bio * bio , unsigned int nr_segs ,
979+ int from_cpu )
970980{
971981 struct gendisk * disk = bio -> bi_bdev -> bd_disk ;
982+ const bool dpwo = disk -> queue -> limits .driver_preserves_write_order ;
972983 sector_t sector = bio -> bi_iter .bi_sector ;
973984 bool schedule_bio_work = false;
974985 struct blk_zone_wplug * zwplug ;
@@ -1033,8 +1044,23 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10331044 return true;
10341045 }
10351046
1036- /* Otherwise, plug and submit the BIO. */
1037- zwplug -> flags |= BLK_ZONE_WPLUG_PLUGGED ;
1047+ if (dpwo && zwplug -> from_cpu < 0 ) {
1048+ /* No zoned writes are in progress. Select the current CPU. */
1049+ zwplug -> from_cpu = raw_smp_processor_id ();
1050+ goto plug ;
1051+ } else if (dpwo ) {
1052+ /*
1053+ * The block driver preserves the write order. Submit the bio
1054+ * from zwplug->from_cpu.
1055+ */
1056+ goto plug ;
1057+ } else {
1058+ /*
1059+ * The block driver does not preserve the write order. Plug and
1060+ * submit the BIO.
1061+ */
1062+ zwplug -> flags |= BLK_ZONE_WPLUG_PLUGGED ;
1063+ }
10381064
10391065 spin_unlock_irqrestore (& zwplug -> lock , flags );
10401066
@@ -1143,7 +1169,7 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int rq_cpu)
11431169 fallthrough ;
11441170 case REQ_OP_WRITE :
11451171 case REQ_OP_WRITE_ZEROES :
1146- return blk_zone_wplug_handle_write (bio , nr_segs );
1172+ return blk_zone_wplug_handle_write (bio , nr_segs , rq_cpu );
11471173 case REQ_OP_ZONE_RESET :
11481174 return blk_zone_wplug_handle_reset_or_finish (bio , 0 );
11491175 case REQ_OP_ZONE_FINISH :
@@ -1175,6 +1201,9 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
11751201
11761202 zwplug -> flags &= ~BLK_ZONE_WPLUG_PLUGGED ;
11771203
1204+ if (refcount_read (& zwplug -> ref ) == 2 )
1205+ zwplug -> from_cpu = -1 ;
1206+
11781207 /*
11791208 * If the zone is full (it was fully written or finished, or empty
11801209 * (it was reset), remove its zone write plug from the hash table.
@@ -1257,9 +1286,10 @@ void blk_zone_write_plug_finish_request(struct request *req)
12571286 disk_put_zone_wplug (zwplug );
12581287}
12591288
1260- static void blk_zone_submit_one_bio (struct blk_zone_wplug * zwplug )
1289+ static bool blk_zone_submit_one_bio (struct blk_zone_wplug * zwplug )
12611290{
12621291 struct block_device * bdev ;
1292+ struct gendisk * disk ;
12631293 unsigned long flags ;
12641294 struct bio * bio ;
12651295
@@ -1274,7 +1304,7 @@ static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12741304 if (!bio ) {
12751305 zwplug -> flags &= ~BLK_ZONE_WPLUG_PLUGGED ;
12761306 spin_unlock_irqrestore (& zwplug -> lock , flags );
1277- return ;
1307+ return false ;
12781308 }
12791309
12801310 if (!blk_zone_wplug_prepare_bio (zwplug , bio )) {
@@ -1285,6 +1315,7 @@ static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12851315 spin_unlock_irqrestore (& zwplug -> lock , flags );
12861316
12871317 bdev = bio -> bi_bdev ;
1318+ disk = bdev -> bd_disk ;
12881319
12891320 /*
12901321 * blk-mq devices will reuse the extra reference on the request queue
@@ -1298,14 +1329,18 @@ static void blk_zone_submit_one_bio(struct blk_zone_wplug *zwplug)
12981329 } else {
12991330 blk_mq_submit_bio (bio );
13001331 }
1332+
1333+ return disk -> queue -> limits .driver_preserves_write_order &&
1334+ !need_resched ();
13011335}
13021336
13031337static void blk_zone_wplug_bio_work (struct work_struct * work )
13041338{
13051339 struct blk_zone_wplug * zwplug =
13061340 container_of (work , struct blk_zone_wplug , bio_work );
13071341
1308- blk_zone_submit_one_bio (zwplug );
1342+ while (blk_zone_submit_one_bio (zwplug ))
1343+ ;
13091344
13101345 /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
13111346 disk_put_zone_wplug (zwplug );
@@ -1831,17 +1866,20 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
18311866 unsigned int zwp_zone_no , zwp_ref ;
18321867 unsigned int zwp_bio_list_size ;
18331868 unsigned long flags ;
1869+ int from_cpu ;
18341870
18351871 spin_lock_irqsave (& zwplug -> lock , flags );
18361872 zwp_zone_no = zwplug -> zone_no ;
18371873 zwp_flags = zwplug -> flags ;
18381874 zwp_ref = refcount_read (& zwplug -> ref );
18391875 zwp_wp_offset = zwplug -> wp_offset ;
18401876 zwp_bio_list_size = bio_list_size (& zwplug -> bio_list );
1877+ from_cpu = zwplug -> from_cpu ;
18411878 spin_unlock_irqrestore (& zwplug -> lock , flags );
18421879
1843- seq_printf (m , "%u 0x%x %u %u %u\n" , zwp_zone_no , zwp_flags , zwp_ref ,
1844- zwp_wp_offset , zwp_bio_list_size );
1880+ seq_printf (m , "zone_no %u flags 0x%x ref %u wp_offset %u bio_list_size %u from_cpu %d\n" ,
1881+ zwp_zone_no , zwp_flags , zwp_ref , zwp_wp_offset ,
1882+ zwp_bio_list_size , from_cpu );
18451883}
18461884
18471885int queue_zone_wplugs_show (void * data , struct seq_file * m )
0 commit comments