Skip to content

Commit 143cbd5

Browse files
committed
Merge branch 'block-6.17' into for-next
* block-6.17: block: fix potential deadlock while running nr_hw_queue update block: fix lockdep warning caused by lock dependency in elv_iosched_store block: move elevator queue allocation logic into blk_mq_init_sched blk-ioc: don't hold queue_lock for ioc_lookup_icq() block: Enforce power-of-2 physical block size block: avoid possible overflow for chunk_sectors check in blk_stack_limits() block: Improve read ahead size for rotational devices
2 parents 4b290aa + 04225d1 commit 143cbd5

11 files changed

Lines changed: 252 additions & 149 deletions

File tree

block/bfq-iosched.c

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -454,17 +454,10 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
454454
*/
455455
static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
456456
{
457-
struct bfq_io_cq *icq;
458-
unsigned long flags;
459-
460457
if (!current->io_context)
461458
return NULL;
462459

463-
spin_lock_irqsave(&q->queue_lock, flags);
464-
icq = icq_to_bic(ioc_lookup_icq(q));
465-
spin_unlock_irqrestore(&q->queue_lock, flags);
466-
467-
return icq;
460+
return icq_to_bic(ioc_lookup_icq(q));
468461
}
469462

470463
/*
@@ -2457,15 +2450,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
24572450
unsigned int nr_segs)
24582451
{
24592452
struct bfq_data *bfqd = q->elevator->elevator_data;
2460-
struct request *free = NULL;
2461-
/*
2462-
* bfq_bic_lookup grabs the queue_lock: invoke it now and
2463-
* store its return value for later use, to avoid nesting
2464-
* queue_lock inside the bfqd->lock. We assume that the bic
2465-
* returned by bfq_bic_lookup does not go away before
2466-
* bfqd->lock is taken.
2467-
*/
24682453
struct bfq_io_cq *bic = bfq_bic_lookup(q);
2454+
struct request *free = NULL;
24692455
bool ret;
24702456

24712457
spin_lock_irq(&bfqd->lock);
@@ -7232,22 +7218,16 @@ static void bfq_init_root_group(struct bfq_group *root_group,
72327218
root_group->sched_data.bfq_class_idle_last_service = jiffies;
72337219
}
72347220

7235-
static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
7221+
static int bfq_init_queue(struct request_queue *q, struct elevator_queue *eq)
72367222
{
72377223
struct bfq_data *bfqd;
7238-
struct elevator_queue *eq;
72397224
unsigned int i;
72407225
struct blk_independent_access_ranges *ia_ranges = q->disk->ia_ranges;
72417226

7242-
eq = elevator_alloc(q, e);
7243-
if (!eq)
7244-
return -ENOMEM;
7245-
72467227
bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
7247-
if (!bfqd) {
7248-
kobject_put(&eq->kobj);
7228+
if (!bfqd)
72497229
return -ENOMEM;
7250-
}
7230+
72517231
eq->elevator_data = bfqd;
72527232

72537233
spin_lock_irq(&q->queue_lock);
@@ -7405,7 +7385,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
74057385

74067386
out_free:
74077387
kfree(bfqd);
7408-
kobject_put(&eq->kobj);
74097388
return -ENOMEM;
74107389
}
74117390

block/blk-ioc.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -308,24 +308,23 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
308308

309309
#ifdef CONFIG_BLK_ICQ
310310
/**
311-
* ioc_lookup_icq - lookup io_cq from ioc
311+
* ioc_lookup_icq - lookup io_cq from ioc in io issue path
312312
* @q: the associated request_queue
313313
*
314314
* Look up io_cq associated with @ioc - @q pair from @ioc. Must be called
315-
* with @q->queue_lock held.
315+
* from io issue path, either return NULL if current issue io to @q for the
316+
* first time, or return a valid icq.
316317
*/
317318
struct io_cq *ioc_lookup_icq(struct request_queue *q)
318319
{
319320
struct io_context *ioc = current->io_context;
320321
struct io_cq *icq;
321322

322-
lockdep_assert_held(&q->queue_lock);
323-
324323
/*
325324
* icq's are indexed from @ioc using radix tree and hint pointer,
326-
* both of which are protected with RCU. All removals are done
327-
* holding both q and ioc locks, and we're holding q lock - if we
328-
* find a icq which points to us, it's guaranteed to be valid.
325+
* both of which are protected with RCU, io issue path ensures that
326+
* both request_queue and current task are valid, the found icq
327+
* is guaranteed to be valid until the io is done.
329328
*/
330329
rcu_read_lock();
331330
icq = rcu_dereference(ioc->icq_hint);
@@ -419,10 +418,7 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q)
419418
task_unlock(current);
420419
} else {
421420
get_io_context(ioc);
422-
423-
spin_lock_irq(&q->queue_lock);
424421
icq = ioc_lookup_icq(q);
425-
spin_unlock_irq(&q->queue_lock);
426422
}
427423

428424
if (!icq) {

block/blk-mq-sched.c

Lines changed: 152 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -374,64 +374,17 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
374374
}
375375
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
376376

377-
static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
378-
struct blk_mq_hw_ctx *hctx,
379-
unsigned int hctx_idx)
380-
{
381-
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
382-
hctx->sched_tags = q->sched_shared_tags;
383-
return 0;
384-
}
385-
386-
hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
387-
q->nr_requests);
388-
389-
if (!hctx->sched_tags)
390-
return -ENOMEM;
391-
return 0;
392-
}
393-
394-
static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
395-
{
396-
blk_mq_free_rq_map(queue->sched_shared_tags);
397-
queue->sched_shared_tags = NULL;
398-
}
399-
400377
/* called in queue's release handler, tagset has gone away */
401378
static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
402379
{
403380
struct blk_mq_hw_ctx *hctx;
404381
unsigned long i;
405382

406-
queue_for_each_hw_ctx(q, hctx, i) {
407-
if (hctx->sched_tags) {
408-
if (!blk_mq_is_shared_tags(flags))
409-
blk_mq_free_rq_map(hctx->sched_tags);
410-
hctx->sched_tags = NULL;
411-
}
412-
}
383+
queue_for_each_hw_ctx(q, hctx, i)
384+
hctx->sched_tags = NULL;
413385

414386
if (blk_mq_is_shared_tags(flags))
415-
blk_mq_exit_sched_shared_tags(q);
416-
}
417-
418-
static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
419-
{
420-
struct blk_mq_tag_set *set = queue->tag_set;
421-
422-
/*
423-
* Set initial depth at max so that we don't need to reallocate for
424-
* updating nr_requests.
425-
*/
426-
queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set,
427-
BLK_MQ_NO_HCTX_IDX,
428-
MAX_SCHED_RQ);
429-
if (!queue->sched_shared_tags)
430-
return -ENOMEM;
431-
432-
blk_mq_tag_update_sched_shared_tags(queue);
433-
434-
return 0;
387+
q->sched_shared_tags = NULL;
435388
}
436389

437390
void blk_mq_sched_reg_debugfs(struct request_queue *q)
@@ -458,45 +411,174 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q)
458411
mutex_unlock(&q->debugfs_mutex);
459412
}
460413

414+
void blk_mq_free_sched_tags(struct elevator_tags *et,
415+
struct blk_mq_tag_set *set)
416+
{
417+
unsigned long i;
418+
419+
/* Shared tags are stored at index 0 in @tags. */
420+
if (blk_mq_is_shared_tags(set->flags))
421+
blk_mq_free_map_and_rqs(set, et->tags[0], BLK_MQ_NO_HCTX_IDX);
422+
else {
423+
for (i = 0; i < et->nr_hw_queues; i++)
424+
blk_mq_free_map_and_rqs(set, et->tags[i], i);
425+
}
426+
427+
kfree(et);
428+
}
429+
430+
void blk_mq_free_sched_tags_batch(struct xarray *et_table,
431+
struct blk_mq_tag_set *set)
432+
{
433+
struct request_queue *q;
434+
struct elevator_tags *et;
435+
436+
lockdep_assert_held_write(&set->update_nr_hwq_lock);
437+
438+
list_for_each_entry(q, &set->tag_list, tag_set_list) {
439+
/*
440+
* Accessing q->elevator without holding q->elevator_lock is
441+
* safe because we're holding here set->update_nr_hwq_lock in
442+
* the writer context. So, scheduler update/switch code (which
443+
* acquires the same lock but in the reader context) can't run
444+
* concurrently.
445+
*/
446+
if (q->elevator) {
447+
et = xa_load(et_table, q->id);
448+
if (unlikely(!et))
449+
WARN_ON_ONCE(1);
450+
else
451+
blk_mq_free_sched_tags(et, set);
452+
}
453+
}
454+
}
455+
456+
struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
457+
unsigned int nr_hw_queues)
458+
{
459+
unsigned int nr_tags;
460+
int i;
461+
struct elevator_tags *et;
462+
gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
463+
464+
if (blk_mq_is_shared_tags(set->flags))
465+
nr_tags = 1;
466+
else
467+
nr_tags = nr_hw_queues;
468+
469+
et = kmalloc(sizeof(struct elevator_tags) +
470+
nr_tags * sizeof(struct blk_mq_tags *), gfp);
471+
if (!et)
472+
return NULL;
473+
/*
474+
* Default to double of smaller one between hw queue_depth and
475+
* 128, since we don't split into sync/async like the old code
476+
* did. Additionally, this is a per-hw queue depth.
477+
*/
478+
et->nr_requests = 2 * min_t(unsigned int, set->queue_depth,
479+
BLKDEV_DEFAULT_RQ);
480+
et->nr_hw_queues = nr_hw_queues;
481+
482+
if (blk_mq_is_shared_tags(set->flags)) {
483+
/* Shared tags are stored at index 0 in @tags. */
484+
et->tags[0] = blk_mq_alloc_map_and_rqs(set, BLK_MQ_NO_HCTX_IDX,
485+
MAX_SCHED_RQ);
486+
if (!et->tags[0])
487+
goto out;
488+
} else {
489+
for (i = 0; i < et->nr_hw_queues; i++) {
490+
et->tags[i] = blk_mq_alloc_map_and_rqs(set, i,
491+
et->nr_requests);
492+
if (!et->tags[i])
493+
goto out_unwind;
494+
}
495+
}
496+
497+
return et;
498+
out_unwind:
499+
while (--i >= 0)
500+
blk_mq_free_map_and_rqs(set, et->tags[i], i);
501+
out:
502+
kfree(et);
503+
return NULL;
504+
}
505+
506+
int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
507+
struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
508+
{
509+
struct request_queue *q;
510+
struct elevator_tags *et;
511+
gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
512+
513+
lockdep_assert_held_write(&set->update_nr_hwq_lock);
514+
515+
list_for_each_entry(q, &set->tag_list, tag_set_list) {
516+
/*
517+
* Accessing q->elevator without holding q->elevator_lock is
518+
* safe because we're holding here set->update_nr_hwq_lock in
519+
* the writer context. So, scheduler update/switch code (which
520+
* acquires the same lock but in the reader context) can't run
521+
* concurrently.
522+
*/
523+
if (q->elevator) {
524+
et = blk_mq_alloc_sched_tags(set, nr_hw_queues);
525+
if (!et)
526+
goto out_unwind;
527+
if (xa_insert(et_table, q->id, et, gfp))
528+
goto out_free_tags;
529+
}
530+
}
531+
return 0;
532+
out_free_tags:
533+
blk_mq_free_sched_tags(et, set);
534+
out_unwind:
535+
list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
536+
if (q->elevator) {
537+
et = xa_load(et_table, q->id);
538+
if (et)
539+
blk_mq_free_sched_tags(et, set);
540+
}
541+
}
542+
return -ENOMEM;
543+
}
544+
461545
/* caller must have a reference to @e, will grab another one if successful */
462-
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
546+
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e,
547+
struct elevator_tags *et)
463548
{
464549
unsigned int flags = q->tag_set->flags;
465550
struct blk_mq_hw_ctx *hctx;
466551
struct elevator_queue *eq;
467552
unsigned long i;
468553
int ret;
469554

470-
/*
471-
* Default to double of smaller one between hw queue_depth and 128,
472-
* since we don't split into sync/async like the old code did.
473-
* Additionally, this is a per-hw queue depth.
474-
*/
475-
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
476-
BLKDEV_DEFAULT_RQ);
555+
eq = elevator_alloc(q, e, et);
556+
if (!eq)
557+
return -ENOMEM;
558+
559+
q->nr_requests = et->nr_requests;
477560

478561
if (blk_mq_is_shared_tags(flags)) {
479-
ret = blk_mq_init_sched_shared_tags(q);
480-
if (ret)
481-
return ret;
562+
/* Shared tags are stored at index 0 in @et->tags. */
563+
q->sched_shared_tags = et->tags[0];
564+
blk_mq_tag_update_sched_shared_tags(q);
482565
}
483566

484567
queue_for_each_hw_ctx(q, hctx, i) {
485-
ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
486-
if (ret)
487-
goto err_free_map_and_rqs;
568+
if (blk_mq_is_shared_tags(flags))
569+
hctx->sched_tags = q->sched_shared_tags;
570+
else
571+
hctx->sched_tags = et->tags[i];
488572
}
489573

490-
ret = e->ops.init_sched(q, e);
574+
ret = e->ops.init_sched(q, eq);
491575
if (ret)
492-
goto err_free_map_and_rqs;
576+
goto out;
493577

494578
queue_for_each_hw_ctx(q, hctx, i) {
495579
if (e->ops.init_hctx) {
496580
ret = e->ops.init_hctx(hctx, i);
497581
if (ret) {
498-
eq = q->elevator;
499-
blk_mq_sched_free_rqs(q);
500582
blk_mq_exit_sched(q, eq);
501583
kobject_put(&eq->kobj);
502584
return ret;
@@ -505,10 +587,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
505587
}
506588
return 0;
507589

508-
err_free_map_and_rqs:
509-
blk_mq_sched_free_rqs(q);
590+
out:
510591
blk_mq_sched_tags_teardown(q, flags);
511-
592+
kobject_put(&eq->kobj);
512593
q->elevator = NULL;
513594
return ret;
514595
}

0 commit comments

Comments
 (0)