From c6bc208d4877c54023851ff1825e62ca3b275930 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 23 Apr 2026 11:30:56 +0800 Subject: [PATCH 1/3] ublk: fix maple tree lockdep warning in ublk_buf_cleanup ublk_buf_cleanup() iterates the maple tree with mas_for_each() without holding mas_lock, triggering a lockdep splat on CONFIG_PROVE_RCU kernels since mas_find() internally uses rcu_dereference_check() which requires either RCU or the tree lock. Fix by holding mas_lock around the iteration, and call mas_erase() before freeing each range to avoid dangling pointers in the tree. Fixes: 5e864438e285 ("ublk: replace xarray with IDA for shmem buffer index allocation") Reported-by: Jens Axboe Closes: https://lore.kernel.org/linux-block/0349d72d-dff8-4f9f-b448-919fa5ae96da@kernel.dk/ Signed-off-by: Ming Lei --- drivers/block/ublk_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 603a98a30989..5d31fc19a488 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -5478,11 +5478,14 @@ static void ublk_buf_cleanup(struct ublk_device *ub) struct ublk_buf_range *range; struct page *pages[32]; + mas_lock(&mas); mas_for_each(&mas, range, ULONG_MAX) { unsigned long base = mas.index; unsigned long nr = mas.last - base + 1; unsigned long off; + mas_erase(&mas); + for (off = 0; off < nr; ) { unsigned int batch = min_t(unsigned long, nr - off, 32); @@ -5495,6 +5498,7 @@ static void ublk_buf_cleanup(struct ublk_device *ub) } kfree(range); } + mas_unlock(&mas); mtree_destroy(&ub->buf_tree); ida_destroy(&ub->buf_ida); } From ab02b692b8f9f3b230d07bb9f7005930f0d768e4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 23 Apr 2026 11:30:57 +0800 Subject: [PATCH 2/3] ublk: refactor common helper ublk_shmem_remove_ranges() Extract the shared walk+erase+unpin+kfree loop into ublk_shmem_remove_ranges(). When buf_index >= 0, only ranges matching that index are removed; when buf_index < 0, all ranges are removed. Also extract ublk_unpin_range_pages() to share the page unpinning loop. Convert both __ublk_ctrl_unreg_buf() and ublk_buf_cleanup() to use the new helper. Signed-off-by: Ming Lei --- drivers/block/ublk_drv.c | 69 +++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 5d31fc19a488..fa6f79376630 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -5413,18 +5413,40 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, return ret; } -static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index) +static void ublk_unpin_range_pages(unsigned long base_pfn, + unsigned long nr_pages) +{ +#define UBLK_UNPIN_BATCH 32 + struct page *pages[UBLK_UNPIN_BATCH]; + unsigned long off; + + for (off = 0; off < nr_pages; ) { + unsigned int batch = min_t(unsigned long, + nr_pages - off, UBLK_UNPIN_BATCH); + unsigned int j; + + for (j = 0; j < batch; j++) + pages[j] = pfn_to_page(base_pfn + off + j); + unpin_user_pages(pages, batch); + off += batch; + } +} + +/* + * Remove ranges from the maple tree matching buf_index, unpin pages + * and free range structs. If buf_index < 0, remove all ranges. + */ +static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index) { MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); struct ublk_buf_range *range; - struct page *pages[32]; int ret = -ENOENT; mas_lock(&mas); mas_for_each(&mas, range, ULONG_MAX) { - unsigned long base, nr, off; + unsigned long base, nr; - if (range->buf_index != buf_index) + if (buf_index >= 0 && range->buf_index != buf_index) continue; ret = 0; @@ -5432,16 +5454,7 @@ static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index) nr = mas.last - base + 1; mas_erase(&mas); - for (off = 0; off < nr; ) { - unsigned int batch = min_t(unsigned long, - nr - off, 32); - unsigned int j; - - for (j = 0; j < batch; j++) - pages[j] = pfn_to_page(base + off + j); - unpin_user_pages(pages, batch); - off += batch; - } + ublk_unpin_range_pages(base, nr); kfree(range); } mas_unlock(&mas); @@ -5464,7 +5477,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, memflags = ublk_lock_buf_tree(ub); - ret = __ublk_ctrl_unreg_buf(ub, index); + ret = ublk_shmem_remove_ranges(ub, index); if (!ret) ida_free(&ub->buf_ida, index); @@ -5474,31 +5487,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, static void ublk_buf_cleanup(struct ublk_device *ub) { - MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); - struct ublk_buf_range *range; - struct page *pages[32]; - - mas_lock(&mas); - mas_for_each(&mas, range, ULONG_MAX) { - unsigned long base = mas.index; - unsigned long nr = mas.last - base + 1; - unsigned long off; - - mas_erase(&mas); - - for (off = 0; off < nr; ) { - unsigned int batch = min_t(unsigned long, - nr - off, 32); - unsigned int j; - - for (j = 0; j < batch; j++) - pages[j] = pfn_to_page(base + off + j); - unpin_user_pages(pages, batch); - off += batch; - } - kfree(range); - } - mas_unlock(&mas); + ublk_shmem_remove_ranges(ub, -1); mtree_destroy(&ub->buf_tree); ida_destroy(&ub->buf_ida); } From 07371b34199e0de7407c28ebcb8083a9b84e3af4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 23 Apr 2026 11:30:58 +0800 Subject: [PATCH 3/3] ublk: avoid unpinning pages under maple tree spinlock ublk_shmem_remove_ranges() calls unpin_user_pages() while holding the maple tree spinlock (mas_lock). Although unpin_user_pages() is safe in atomic context, holding the spinlock across potentially many page unpinning operations is not ideal. Split into __ublk_shmem_remove_ranges() which erases up to 64 ranges under mas_lock, collecting base_pfn and nr_pages into a temporary xarray. Then drop the lock and unpin pages outside spinlock context. ublk_shmem_remove_ranges() loops until all matching ranges are processed. Signed-off-by: Ming Lei --- drivers/block/ublk_drv.c | 56 +++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index fa6f79376630..721f8190040a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -5433,32 +5433,68 @@ static void ublk_unpin_range_pages(unsigned long base_pfn, } /* - * Remove ranges from the maple tree matching buf_index, unpin pages - * and free range structs. If buf_index < 0, remove all ranges. + * Inner loop: erase up to UBLK_REMOVE_BATCH matching ranges under + * mas_lock, collecting them into an xarray. Then drop the lock and + * unpin pages + free ranges outside spinlock context. + * + * Returns true if the tree walk completed, false if more ranges remain. + * Xarray key is the base PFN, value encodes nr_pages via xa_mk_value(). */ -static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index) +#define UBLK_REMOVE_BATCH 64 + +static bool __ublk_shmem_remove_ranges(struct ublk_device *ub, + int buf_index, int *ret) { MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); struct ublk_buf_range *range; - int ret = -ENOENT; + struct xarray to_unpin; + unsigned long idx; + unsigned int count = 0; + bool done = false; + void *entry; + + xa_init(&to_unpin); mas_lock(&mas); mas_for_each(&mas, range, ULONG_MAX) { - unsigned long base, nr; + unsigned long nr; if (buf_index >= 0 && range->buf_index != buf_index) continue; - ret = 0; - base = mas.index; - nr = mas.last - base + 1; + *ret = 0; + nr = mas.last - mas.index + 1; + if (xa_err(xa_store(&to_unpin, mas.index, + xa_mk_value(nr), GFP_ATOMIC))) + goto unlock; mas_erase(&mas); - - ublk_unpin_range_pages(base, nr); kfree(range); + if (++count >= UBLK_REMOVE_BATCH) + goto unlock; } + done = true; +unlock: mas_unlock(&mas); + xa_for_each(&to_unpin, idx, entry) + ublk_unpin_range_pages(idx, xa_to_value(entry)); + xa_destroy(&to_unpin); + + return done; +} + +/* + * Remove ranges from the maple tree matching buf_index, unpin pages + * and free range structs. If buf_index < 0, remove all ranges. + * Processes ranges in batches to avoid holding the maple tree spinlock + * across potentially expensive page unpinning. + */ +static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index) +{ + int ret = -ENOENT; + + while (!__ublk_shmem_remove_ranges(ub, buf_index, &ret)) + cond_resched(); return ret; }