Skip to content

Commit 99a2ef5

Browse files
Chen Ridonghtejun
authored andcommitted
cgroup/dmem: avoid pool UAF
An UAF issue was observed: BUG: KASAN: slab-use-after-free in page_counter_uncharge+0x65/0x150 Write of size 8 at addr ffff888106715440 by task insmod/527 CPU: 4 UID: 0 PID: 527 Comm: insmod 6.19.0-rc7-next-20260129+ #11 Tainted: [O]=OOT_MODULE Call Trace: <TASK> dump_stack_lvl+0x82/0xd0 kasan_report+0xca/0x100 kasan_check_range+0x39/0x1c0 page_counter_uncharge+0x65/0x150 dmem_cgroup_uncharge+0x1f/0x260 Allocated by task 527: Freed by task 0: The buggy address belongs to the object at ffff888106715400 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 64 bytes inside of freed 512-byte region [ffff888106715400, ffff888106715600) The buggy address belongs to the physical page: Memory state around the buggy address: ffff888106715300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888106715380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888106715400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888106715480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888106715500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb The issue occurs because a pool can still be held by a caller after its associated memory region is unregistered. The current implementation frees the pool even if users still hold references to it (e.g., before uncharge operations complete). This patch adds a reference counter to each pool, ensuring that a pool is only freed when its reference count drops to zero. Fixes: b168ed4 ("kernel/cgroup: Add "dmem" memory accounting cgroup") Cc: [email protected] # v6.14+ Signed-off-by: Chen Ridong <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent 592a682 commit 99a2ef5

1 file changed

Lines changed: 58 additions & 2 deletions

File tree

kernel/cgroup/dmem.c

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/mutex.h>
1515
#include <linux/page_counter.h>
1616
#include <linux/parser.h>
17+
#include <linux/refcount.h>
1718
#include <linux/rculist.h>
1819
#include <linux/slab.h>
1920

@@ -71,7 +72,9 @@ struct dmem_cgroup_pool_state {
7172
struct rcu_head rcu;
7273

7374
struct page_counter cnt;
75+
struct dmem_cgroup_pool_state *parent;
7476

77+
refcount_t ref;
7578
bool inited;
7679
};
7780

@@ -88,6 +91,9 @@ struct dmem_cgroup_pool_state {
8891
static DEFINE_SPINLOCK(dmemcg_lock);
8992
static LIST_HEAD(dmem_cgroup_regions);
9093

94+
static void dmemcg_free_region(struct kref *ref);
95+
static void dmemcg_pool_free_rcu(struct rcu_head *rcu);
96+
9197
static inline struct dmemcg_state *
9298
css_to_dmemcs(struct cgroup_subsys_state *css)
9399
{
@@ -104,10 +110,38 @@ static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg)
104110
return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL;
105111
}
106112

113+
static void dmemcg_pool_get(struct dmem_cgroup_pool_state *pool)
114+
{
115+
refcount_inc(&pool->ref);
116+
}
117+
118+
static bool dmemcg_pool_tryget(struct dmem_cgroup_pool_state *pool)
119+
{
120+
return refcount_inc_not_zero(&pool->ref);
121+
}
122+
123+
static void dmemcg_pool_put(struct dmem_cgroup_pool_state *pool)
124+
{
125+
if (!refcount_dec_and_test(&pool->ref))
126+
return;
127+
128+
call_rcu(&pool->rcu, dmemcg_pool_free_rcu);
129+
}
130+
131+
static void dmemcg_pool_free_rcu(struct rcu_head *rcu)
132+
{
133+
struct dmem_cgroup_pool_state *pool = container_of(rcu, typeof(*pool), rcu);
134+
135+
if (pool->parent)
136+
dmemcg_pool_put(pool->parent);
137+
kref_put(&pool->region->ref, dmemcg_free_region);
138+
kfree(pool);
139+
}
140+
107141
static void free_cg_pool(struct dmem_cgroup_pool_state *pool)
108142
{
109143
list_del(&pool->region_node);
110-
kfree(pool);
144+
dmemcg_pool_put(pool);
111145
}
112146

113147
static void
@@ -342,6 +376,12 @@ alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region
342376
page_counter_init(&pool->cnt,
343377
ppool ? &ppool->cnt : NULL, true);
344378
reset_all_resource_limits(pool);
379+
refcount_set(&pool->ref, 1);
380+
kref_get(&region->ref);
381+
if (ppool && !pool->parent) {
382+
pool->parent = ppool;
383+
dmemcg_pool_get(ppool);
384+
}
345385

346386
list_add_tail_rcu(&pool->css_node, &dmemcs->pools);
347387
list_add_tail(&pool->region_node, &region->pools);
@@ -389,6 +429,10 @@ get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *regio
389429

390430
/* Fix up parent links, mark as inited. */
391431
pool->cnt.parent = &ppool->cnt;
432+
if (ppool && !pool->parent) {
433+
pool->parent = ppool;
434+
dmemcg_pool_get(ppool);
435+
}
392436
pool->inited = true;
393437

394438
pool = ppool;
@@ -435,6 +479,8 @@ void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region)
435479

436480
list_for_each_entry_safe(pool, next, &region->pools, region_node) {
437481
list_del_rcu(&pool->css_node);
482+
list_del(&pool->region_node);
483+
dmemcg_pool_put(pool);
438484
}
439485

440486
/*
@@ -515,8 +561,10 @@ static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name)
515561
*/
516562
void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool)
517563
{
518-
if (pool)
564+
if (pool) {
519565
css_put(&pool->cs->css);
566+
dmemcg_pool_put(pool);
567+
}
520568
}
521569
EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put);
522570

@@ -530,6 +578,8 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
530578
pool = find_cg_pool_locked(cg, region);
531579
if (pool && !READ_ONCE(pool->inited))
532580
pool = NULL;
581+
if (pool && !dmemcg_pool_tryget(pool))
582+
pool = NULL;
533583
rcu_read_unlock();
534584

535585
while (!pool) {
@@ -538,6 +588,8 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
538588
pool = get_cg_pool_locked(cg, region, &allocpool);
539589
else
540590
pool = ERR_PTR(-ENODEV);
591+
if (!IS_ERR(pool))
592+
dmemcg_pool_get(pool);
541593
spin_unlock(&dmemcg_lock);
542594

543595
if (pool == ERR_PTR(-ENOMEM)) {
@@ -573,6 +625,7 @@ void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size)
573625

574626
page_counter_uncharge(&pool->cnt, size);
575627
css_put(&pool->cs->css);
628+
dmemcg_pool_put(pool);
576629
}
577630
EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge);
578631

@@ -624,7 +677,9 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
624677
if (ret_limit_pool) {
625678
*ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt);
626679
css_get(&(*ret_limit_pool)->cs->css);
680+
dmemcg_pool_get(*ret_limit_pool);
627681
}
682+
dmemcg_pool_put(pool);
628683
ret = -EAGAIN;
629684
goto err;
630685
}
@@ -719,6 +774,7 @@ static ssize_t dmemcg_limit_write(struct kernfs_open_file *of,
719774

720775
/* And commit */
721776
apply(pool, new_limit);
777+
dmemcg_pool_put(pool);
722778

723779
out_put:
724780
kref_put(&region->ref, dmemcg_free_region);

0 commit comments

Comments
 (0)