@@ -53,6 +53,8 @@ static const char *const zone_cond_name[] = {
5353 * @zone_no: The number of the zone the plug is managing.
5454 * @wp_offset: The zone write pointer location relative to the start of the zone
5555 * as a number of 512B sectors.
56+ * @from_cpu: Software queue to submit writes from for drivers that preserve
57+ * the write order.
5658 * @bio_list: The list of BIOs that are currently plugged.
5759 * @bio_work: Work struct to handle issuing of plugged BIOs
5860 * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -65,6 +67,7 @@ struct blk_zone_wplug {
6567 unsigned int flags ;
6668 unsigned int zone_no ;
6769 unsigned int wp_offset ;
70+ int from_cpu ;
6871 struct bio_list bio_list ;
6972 struct work_struct bio_work ;
7073 struct rcu_head rcu_head ;
@@ -74,8 +77,7 @@ struct blk_zone_wplug {
7477/*
7578 * Zone write plug flags bits:
7679 * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
77- * that is, that write BIOs are being throttled due to a write BIO already
78- * being executed or the zone write plug bio list is not empty.
80+ * that is, that write BIOs are being throttled.
7981 * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
8082 * write pointer offset and need to update it.
8183 * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
@@ -572,6 +574,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
572574 zwplug -> flags = 0 ;
573575 zwplug -> zone_no = zno ;
574576 zwplug -> wp_offset = bdev_offset_from_zone_start (disk -> part0 , sector );
577+ zwplug -> from_cpu = -1 ;
575578 bio_list_init (& zwplug -> bio_list );
576579 INIT_WORK (& zwplug -> bio_work , blk_zone_wplug_bio_work );
577580 zwplug -> disk = disk ;
@@ -768,14 +771,19 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
768771static void disk_zone_wplug_schedule_bio_work (struct gendisk * disk ,
769772 struct blk_zone_wplug * zwplug )
770773{
774+ lockdep_assert_held (& zwplug -> lock );
775+
771776 /*
772777 * Take a reference on the zone write plug and schedule the submission
773778 * of the next plugged BIO. blk_zone_wplug_bio_work() will release the
774779 * reference we take here.
775780 */
776- WARN_ON_ONCE (!(zwplug -> flags & BLK_ZONE_WPLUG_PLUGGED ));
777781 refcount_inc (& zwplug -> ref );
778- queue_work (disk -> zone_wplugs_wq , & zwplug -> bio_work );
782+ if (zwplug -> from_cpu >= 0 )
783+ queue_work_on (zwplug -> from_cpu , disk -> zone_wplugs_wq ,
784+ & zwplug -> bio_work );
785+ else
786+ queue_work (disk -> zone_wplugs_wq , & zwplug -> bio_work );
779787}
780788
781789static inline void disk_zone_wplug_add_bio (struct gendisk * disk ,
@@ -972,9 +980,12 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
972980 return true;
973981}
974982
975- static bool blk_zone_wplug_handle_write (struct bio * bio , unsigned int nr_segs )
983+ static bool blk_zone_wplug_handle_write (struct bio * bio , unsigned int nr_segs ,
984+ int from_cpu )
976985{
977986 struct gendisk * disk = bio -> bi_bdev -> bd_disk ;
987+ const bool ordered_hwq = bio_op (bio ) != REQ_OP_ZONE_APPEND &&
988+ disk -> queue -> limits .features & BLK_FEAT_ORDERED_HWQ ;
978989 sector_t sector = bio -> bi_iter .bi_sector ;
979990 bool schedule_bio_work = false;
980991 struct blk_zone_wplug * zwplug ;
@@ -1034,15 +1045,38 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10341045 if (zwplug -> flags & BLK_ZONE_WPLUG_PLUGGED )
10351046 goto add_to_bio_list ;
10361047
1048+ if (ordered_hwq && zwplug -> from_cpu < 0 ) {
1049+ /* No zoned writes are in progress. Select the current CPU. */
1050+ zwplug -> from_cpu = raw_smp_processor_id ();
1051+ }
1052+
1053+ if (ordered_hwq && zwplug -> from_cpu == from_cpu ) {
1054+ /*
1055+ * The block driver preserves the write order, zoned writes have
1056+ * not been plugged and the zoned write will be submitted from
1057+ * zwplug->from_cpu. Let the caller submit the bio.
1058+ */
1059+ } else if (ordered_hwq ) {
1060+ /*
1061+ * The block driver preserves the write order but the caller
1062+ * allocated a request from another CPU. Submit the bio from
1063+ * zwplug->from_cpu.
1064+ */
1065+ goto plug ;
1066+ } else {
1067+ /*
1068+ * The block driver does not preserve the write order. Plug and
1069+ * let the caller submit the BIO.
1070+ */
1071+ zwplug -> flags |= BLK_ZONE_WPLUG_PLUGGED ;
1072+ }
1073+
10371074 if (!blk_zone_wplug_prepare_bio (zwplug , bio )) {
10381075 spin_unlock_irqrestore (& zwplug -> lock , flags );
10391076 bio_io_error (bio );
10401077 return true;
10411078 }
10421079
1043- /* Otherwise, plug and submit the BIO. */
1044- zwplug -> flags |= BLK_ZONE_WPLUG_PLUGGED ;
1045-
10461080 spin_unlock_irqrestore (& zwplug -> lock , flags );
10471081
10481082 return false;
@@ -1150,7 +1184,7 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int rq_cpu)
11501184 fallthrough ;
11511185 case REQ_OP_WRITE :
11521186 case REQ_OP_WRITE_ZEROES :
1153- return blk_zone_wplug_handle_write (bio , nr_segs );
1187+ return blk_zone_wplug_handle_write (bio , nr_segs , rq_cpu );
11541188 case REQ_OP_ZONE_RESET :
11551189 return blk_zone_wplug_handle_reset_or_finish (bio , 0 );
11561190 case REQ_OP_ZONE_FINISH :
@@ -1182,6 +1216,9 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
11821216
11831217 zwplug -> flags &= ~BLK_ZONE_WPLUG_PLUGGED ;
11841218
1219+ if (refcount_read (& zwplug -> ref ) == 2 )
1220+ zwplug -> from_cpu = -1 ;
1221+
11851222 /*
11861223 * If the zone is full (it was fully written or finished, or empty
11871224 * (it was reset), remove its zone write plug from the hash table.
@@ -1283,6 +1320,8 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
12831320 struct blk_zone_wplug * zwplug =
12841321 container_of (work , struct blk_zone_wplug , bio_work );
12851322 struct block_device * bdev ;
1323+ bool ordered_hwq = zwplug -> disk -> queue -> limits .features &
1324+ BLK_FEAT_ORDERED_HWQ ;
12861325 struct bio * bio ;
12871326
12881327 do {
@@ -1323,7 +1362,7 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
13231362 } else {
13241363 blk_mq_submit_bio (bio );
13251364 }
1326- } while (0 );
1365+ } while (ordered_hwq );
13271366
13281367put_zwplug :
13291368 /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
@@ -1850,17 +1889,20 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
18501889 unsigned int zwp_zone_no , zwp_ref ;
18511890 unsigned int zwp_bio_list_size ;
18521891 unsigned long flags ;
1892+ int from_cpu ;
18531893
18541894 spin_lock_irqsave (& zwplug -> lock , flags );
18551895 zwp_zone_no = zwplug -> zone_no ;
18561896 zwp_flags = zwplug -> flags ;
18571897 zwp_ref = refcount_read (& zwplug -> ref );
18581898 zwp_wp_offset = zwplug -> wp_offset ;
18591899 zwp_bio_list_size = bio_list_size (& zwplug -> bio_list );
1900+ from_cpu = zwplug -> from_cpu ;
18601901 spin_unlock_irqrestore (& zwplug -> lock , flags );
18611902
1862- seq_printf (m , "%u 0x%x %u %u %u\n" , zwp_zone_no , zwp_flags , zwp_ref ,
1863- zwp_wp_offset , zwp_bio_list_size );
1903+ seq_printf (m , "zone_no %u flags 0x%x ref %u wp_offset %u bio_list_size %u from_cpu %d\n" ,
1904+ zwp_zone_no , zwp_flags , zwp_ref , zwp_wp_offset ,
1905+ zwp_bio_list_size , from_cpu );
18641906}
18651907
18661908int queue_zone_wplugs_show (void * data , struct seq_file * m )
0 commit comments