Skip to content

Commit 3b3bea6

Browse files
committed
Merge tag 'cgroup-for-7.1-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: - Fix UAF race in psi pressure_write() against cgroup file release by extending cgroup_mutex coverage and ordering of->priv access after cgroup_kn_lock_live() - Fix integer overflow in rdmacg_try_charge() when usage equals INT_MAX by performing the increment in s64 - Fix asymmetric DL bandwidth accounting on cpuset attach rollback by recording the CPU used by dl_bw_alloc() so cancel_attach() returns the reservation to the same root domain - Fix nr_dying_subsys_* race that briefly showed 0 in cgroup.stat after rmdir by incrementing from kill_css() instead of offline_css() - Typo fix in cgroup-v2 documentation * tag 'cgroup-for-7.1-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: docs: cgroup: fix typo 'protetion' -> 'protection' cgroup: Increment nr_dying_subsys_* from rmdir context cgroup/cpuset: record DL BW alloc CPU for attach rollback cgroup/rdma: fix integer overflow in rdmacg_try_charge() sched/psi: fix race between file release and pressure write
2 parents a1a6710 + 981cd33 commit 3b3bea6

5 files changed

Lines changed: 44 additions & 24 deletions

File tree

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ cgroup v2 currently supports the following mount options.
220220
memory_hugetlb_accounting
221221
Count HugeTLB memory usage towards the cgroup's overall
222222
memory usage for the memory controller (for the purpose of
223-
statistics reporting and memory protetion). This is a new
223+
statistics reporting and memory protection). This is a new
224224
behavior that could regress existing setups, so it must be
225225
explicitly opted in with this mount option.
226226

kernel/cgroup/cgroup.c

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3934,33 +3934,41 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
39343934
static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
39353935
size_t nbytes, enum psi_res res)
39363936
{
3937-
struct cgroup_file_ctx *ctx = of->priv;
3937+
struct cgroup_file_ctx *ctx;
39383938
struct psi_trigger *new;
39393939
struct cgroup *cgrp;
39403940
struct psi_group *psi;
3941+
ssize_t ret = 0;
39413942

39423943
cgrp = cgroup_kn_lock_live(of->kn, false);
39433944
if (!cgrp)
39443945
return -ENODEV;
39453946

3946-
cgroup_get(cgrp);
3947-
cgroup_kn_unlock(of->kn);
3947+
ctx = of->priv;
3948+
if (!ctx) {
3949+
ret = -ENODEV;
3950+
goto out_unlock;
3951+
}
39483952

39493953
/* Allow only one trigger per file descriptor */
39503954
if (ctx->psi.trigger) {
3951-
cgroup_put(cgrp);
3952-
return -EBUSY;
3955+
ret = -EBUSY;
3956+
goto out_unlock;
39533957
}
39543958

39553959
psi = cgroup_psi(cgrp);
39563960
new = psi_trigger_create(psi, buf, res, of->file, of);
39573961
if (IS_ERR(new)) {
3958-
cgroup_put(cgrp);
3959-
return PTR_ERR(new);
3962+
ret = PTR_ERR(new);
3963+
goto out_unlock;
39603964
}
39613965

39623966
smp_store_release(&ctx->psi.trigger, new);
3963-
cgroup_put(cgrp);
3967+
3968+
out_unlock:
3969+
cgroup_kn_unlock(of->kn);
3970+
if (ret)
3971+
return ret;
39643972

39653973
return nbytes;
39663974
}
@@ -5716,16 +5724,6 @@ static void offline_css(struct cgroup_subsys_state *css)
57165724
RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
57175725

57185726
wake_up_all(&css->cgroup->offline_waitq);
5719-
5720-
css->cgroup->nr_dying_subsys[ss->id]++;
5721-
/*
5722-
* Parent css and cgroup cannot be freed until after the freeing
5723-
* of child css, see css_free_rwork_fn().
5724-
*/
5725-
while ((css = css->parent)) {
5726-
css->nr_descendants--;
5727-
css->cgroup->nr_dying_subsys[ss->id]++;
5728-
}
57295727
}
57305728

57315729
/**
@@ -6038,6 +6036,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
60386036
*/
60396037
static void kill_css(struct cgroup_subsys_state *css)
60406038
{
6039+
struct cgroup_subsys *ss = css->ss;
6040+
60416041
lockdep_assert_held(&cgroup_mutex);
60426042

60436043
if (css->flags & CSS_DYING)
@@ -6074,6 +6074,16 @@ static void kill_css(struct cgroup_subsys_state *css)
60746074
* css is confirmed to be seen as killed on all CPUs.
60756075
*/
60766076
percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
6077+
6078+
css->cgroup->nr_dying_subsys[ss->id]++;
6079+
/*
6080+
* Parent css and cgroup cannot be freed until after the freeing
6081+
* of child css, see css_free_rwork_fn().
6082+
*/
6083+
while ((css = css->parent)) {
6084+
css->nr_descendants--;
6085+
css->cgroup->nr_dying_subsys[ss->id]++;
6086+
}
60776087
}
60786088

60796089
/**

kernel/cgroup/cpuset-internal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ struct cpuset {
168168
int nr_deadline_tasks;
169169
int nr_migrate_dl_tasks;
170170
u64 sum_migrate_dl_bw;
171+
/*
172+
* CPU used for temporary DL bandwidth allocation during attach;
173+
* -1 if no DL bandwidth was allocated in the current attach.
174+
*/
175+
int dl_bw_cpu;
171176

172177
/* Invalid partition error code, not lock protected */
173178
enum prs_errcode prs_err;

kernel/cgroup/cpuset.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ struct cpuset top_cpuset = {
288288
.flags = BIT(CS_CPU_EXCLUSIVE) |
289289
BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE),
290290
.partition_root_state = PRS_ROOT,
291+
.dl_bw_cpu = -1,
291292
};
292293

293294
/**
@@ -579,6 +580,8 @@ static struct cpuset *dup_or_alloc_cpuset(struct cpuset *cs)
579580
if (!trial)
580581
return NULL;
581582

583+
trial->dl_bw_cpu = -1;
584+
582585
/* Setup cpumask pointer array */
583586
cpumask_var_t *pmask[4] = {
584587
&trial->cpus_allowed,
@@ -2980,6 +2983,7 @@ static void reset_migrate_dl_data(struct cpuset *cs)
29802983
{
29812984
cs->nr_migrate_dl_tasks = 0;
29822985
cs->sum_migrate_dl_bw = 0;
2986+
cs->dl_bw_cpu = -1;
29832987
}
29842988

29852989
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
@@ -3056,6 +3060,8 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
30563060
reset_migrate_dl_data(cs);
30573061
goto out_unlock;
30583062
}
3063+
3064+
cs->dl_bw_cpu = cpu;
30593065
}
30603066

30613067
out_success:
@@ -3080,12 +3086,11 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
30803086
mutex_lock(&cpuset_mutex);
30813087
dec_attach_in_progress_locked(cs);
30823088

3083-
if (cs->nr_migrate_dl_tasks) {
3084-
int cpu = cpumask_any(cs->effective_cpus);
3089+
if (cs->dl_bw_cpu >= 0)
3090+
dl_bw_free(cs->dl_bw_cpu, cs->sum_migrate_dl_bw);
30853091

3086-
dl_bw_free(cpu, cs->sum_migrate_dl_bw);
3092+
if (cs->nr_migrate_dl_tasks)
30873093
reset_migrate_dl_data(cs);
3088-
}
30893094

30903095
mutex_unlock(&cpuset_mutex);
30913096
}

kernel/cgroup/rdma.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
283283
ret = PTR_ERR(rpool);
284284
goto err;
285285
} else {
286-
new = rpool->resources[index].usage + 1;
286+
new = (s64)rpool->resources[index].usage + 1;
287287
if (new > rpool->resources[index].max) {
288288
ret = -EAGAIN;
289289
goto err;

0 commit comments

Comments
 (0)