Skip to content

Commit 539d1b4

Browse files
shroffniaxboe
authored andcommitted
block: break pcpu_alloc_mutex dependency on freeze_lock
While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex after ->freeze_lock is acquired or queue is frozen. This potentially creates a circular dependency involving ->fs_reclaim if reclaim is triggered simultaneously in a code path which first acquires ->pcpu_ alloc_mutex. As the queue is already frozen while nr_hw_queue update allocates tagsets, the reclaim can't forward progress and thus it could cause a potential deadlock as reported in lockdep splat[1]. Fix this by pre-allocating tagset tags before we freeze queue during nr_hw_queue update. Later the allocated tagset tags could be safely installed and used after queue is frozen. Reported-by: Yi Zhang <[email protected]> Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1] Signed-off-by: Nilay Shroff <[email protected]> Reviewed-by: Ming Lei <[email protected]> Tested-by: Yi Zhang <[email protected]> Reviewed-by: Yu Kuai <[email protected]> [axboe: fix brace style issue] Signed-off-by: Jens Axboe <[email protected]>
1 parent da46b5d commit 539d1b4

1 file changed

Lines changed: 30 additions & 15 deletions

File tree

block/blk-mq.c

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
47934793
}
47944794
}
47954795

4796-
static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
4797-
int new_nr_hw_queues)
4796+
static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags(
4797+
struct blk_mq_tag_set *set,
4798+
int new_nr_hw_queues)
47984799
{
47994800
struct blk_mq_tags **new_tags;
48004801
int i;
48014802

48024803
if (set->nr_hw_queues >= new_nr_hw_queues)
4803-
goto done;
4804+
return NULL;
48044805

48054806
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
48064807
GFP_KERNEL, set->numa_node);
48074808
if (!new_tags)
4808-
return -ENOMEM;
4809+
return ERR_PTR(-ENOMEM);
48094810

48104811
if (set->tags)
48114812
memcpy(new_tags, set->tags, set->nr_hw_queues *
48124813
sizeof(*set->tags));
4813-
kfree(set->tags);
4814-
set->tags = new_tags;
48154814

48164815
for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
4817-
if (!__blk_mq_alloc_map_and_rqs(set, i)) {
4818-
while (--i >= set->nr_hw_queues)
4819-
__blk_mq_free_map_and_rqs(set, i);
4820-
return -ENOMEM;
4816+
if (blk_mq_is_shared_tags(set->flags)) {
4817+
new_tags[i] = set->shared_tags;
4818+
} else {
4819+
new_tags[i] = blk_mq_alloc_map_and_rqs(set, i,
4820+
set->queue_depth);
4821+
if (!new_tags[i])
4822+
goto out_unwind;
48214823
}
48224824
cond_resched();
48234825
}
48244826

4825-
done:
4826-
set->nr_hw_queues = new_nr_hw_queues;
4827-
return 0;
4827+
return new_tags;
4828+
out_unwind:
4829+
while (--i >= set->nr_hw_queues) {
4830+
if (!blk_mq_is_shared_tags(set->flags))
4831+
blk_mq_free_map_and_rqs(set, new_tags[i], i);
4832+
}
4833+
kfree(new_tags);
4834+
return ERR_PTR(-ENOMEM);
48284835
}
48294836

48304837
/*
@@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
51135120
unsigned int memflags;
51145121
int i;
51155122
struct xarray elv_tbl;
5123+
struct blk_mq_tags **new_tags;
51165124
bool queues_frozen = false;
51175125

51185126
lockdep_assert_held(&set->tag_list_lock);
@@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
51475155
if (blk_mq_elv_switch_none(q, &elv_tbl))
51485156
goto switch_back;
51495157

5158+
new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues);
5159+
if (IS_ERR(new_tags))
5160+
goto switch_back;
5161+
51505162
list_for_each_entry(q, &set->tag_list, tag_set_list)
51515163
blk_mq_freeze_queue_nomemsave(q);
51525164
queues_frozen = true;
5153-
if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
5154-
goto switch_back;
5165+
if (new_tags) {
5166+
kfree(set->tags);
5167+
set->tags = new_tags;
5168+
}
5169+
set->nr_hw_queues = nr_hw_queues;
51555170

51565171
fallback:
51575172
blk_mq_update_queue_map(set);

0 commit comments

Comments
 (0)