From 01132e6e49adb6a0957cf23665f908f8d1009697 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Apr 2026 08:08:56 +0800 Subject: [PATCH] ublk: fix maple tree lockdep warning and unpin under spinlock Fix two issues in the shmem buffer maple tree usage: 1) ublk_buf_cleanup() iterates the tree with mas_for_each() without holding rcu_read_lock or mas_lock, triggering a lockdep splat on CONFIG_PROVE_RCU kernels. Add mas_lock/unlock around the iteration. 2) __ublk_ctrl_unreg_buf() calls unpin_user_pages() under mas_lock (a spinlock). unpin_user_pages can be expensive for large buffers and may take additional locks if folio refcount drops to zero. Restructure to drop mas_lock before unpinning, re-acquiring it to continue iteration. Both functions now use the same pattern: erase under lock, drop lock, unpin and free, re-lock to continue. Extract ublk_unpin_range_pages() helper to share the page unpinning loop. Reported-by: Jens Axboe Closes: https://lore.kernel.org/linux-block/0349d72d-dff8-4f9f-b448-919fa5ae96da@kernel.dk/ Cc: Liam R. Howlett Signed-off-by: Ming Lei Tested-by: Shin'ichiro Kawasaki --- drivers/block/ublk_drv.c | 63 +++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 603a98a30989..3fb3070e75db 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -5413,16 +5413,39 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, return ret; } +static void ublk_unpin_range_pages(unsigned long base_pfn, + unsigned long nr_pages) +{ +#define UBLK_UNPIN_BATCH 32 + struct page *pages[UBLK_UNPIN_BATCH]; + unsigned long off; + + for (off = 0; off < nr_pages; ) { + unsigned int batch = min_t(unsigned long, + nr_pages - off, UBLK_UNPIN_BATCH); + unsigned int j; + + for (j = 0; j < batch; j++) + pages[j] = pfn_to_page(base_pfn + off + j); + unpin_user_pages(pages, batch); + off += batch; + } +} + +/* + * Drop mas_lock during iteration to avoid unpinning pages under spinlock. + * Safe because callers hold ub->mutex (via ublk_lock_buf_tree), preventing + * concurrent tree modifications. + */ static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index) { MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); struct ublk_buf_range *range; - struct page *pages[32]; int ret = -ENOENT; mas_lock(&mas); mas_for_each(&mas, range, ULONG_MAX) { - unsigned long base, nr, off; + unsigned long base, nr; if (range->buf_index != buf_index) continue; @@ -5431,18 +5454,12 @@ static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index) base = mas.index; nr = mas.last - base + 1; mas_erase(&mas); + mas_unlock(&mas); - for (off = 0; off < nr; ) { - unsigned int batch = min_t(unsigned long, - nr - off, 32); - unsigned int j; - - for (j = 0; j < batch; j++) - pages[j] = pfn_to_page(base + off + j); - unpin_user_pages(pages, batch); - off += batch; - } + ublk_unpin_range_pages(base, nr); kfree(range); + + mas_lock(&mas); } mas_unlock(&mas); @@ -5472,29 +5489,27 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, return ret; } +/* + * Drop mas_lock during iteration to avoid unpinning pages under spinlock. + * Safe because this is called from device release with exclusive access. + */ static void ublk_buf_cleanup(struct ublk_device *ub) { MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); struct ublk_buf_range *range; - struct page *pages[32]; + mas_lock(&mas); mas_for_each(&mas, range, ULONG_MAX) { unsigned long base = mas.index; unsigned long nr = mas.last - base + 1; - unsigned long off; - for (off = 0; off < nr; ) { - unsigned int batch = min_t(unsigned long, - nr - off, 32); - unsigned int j; - - for (j = 0; j < batch; j++) - pages[j] = pfn_to_page(base + off + j); - unpin_user_pages(pages, batch); - off += batch; - } + mas_erase(&mas); + mas_unlock(&mas); + ublk_unpin_range_pages(base, nr); kfree(range); + mas_lock(&mas); } + mas_unlock(&mas); mtree_destroy(&ub->buf_tree); ida_destroy(&ub->buf_ida); }