Skip to content

Commit 4414975

Browse files
committed
Merge tag 'cgroup-for-6.3-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "This is a relatively big pull request this late in the cycle but the major contributor is the cpuset bug which is rather significant: - Fix several cpuset bugs including one where it wasn't applying the target cgroup when tasks are created with CLONE_INTO_CGROUP With a few smaller fixes: - Fix inversed locking order in cgroup1 freezer implementation - Fix garbage cpu.stat::core_sched.forceidle_usec reporting in the root cgroup" * tag 'cgroup-for-6.3-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/cpuset: Make cpuset_attach_task() skip subpartitions CPUs for top_cpuset cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach() cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex cgroup/cpuset: Fix partition root's cpuset.cpus update bug cgroup: fix display of forceidle time at root
2 parents e44f45f + 7e27cb6 commit 4414975

3 files changed

Lines changed: 150 additions & 39 deletions

File tree

kernel/cgroup/cpuset.c

Lines changed: 144 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
15131513
spin_unlock_irq(&callback_lock);
15141514

15151515
if (adding || deleting)
1516-
update_tasks_cpumask(parent, tmp->new_cpus);
1516+
update_tasks_cpumask(parent, tmp->addmask);
15171517

15181518
/*
15191519
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
@@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
17701770
/*
17711771
* Use the cpumasks in trialcs for tmpmasks when they are pointers
17721772
* to allocated cpumasks.
1773+
*
1774+
* Note that update_parent_subparts_cpumask() uses only addmask &
1775+
* delmask, but not new_cpus.
17731776
*/
17741777
tmp.addmask = trialcs->subparts_cpus;
17751778
tmp.delmask = trialcs->effective_cpus;
1776-
tmp.new_cpus = trialcs->cpus_allowed;
1779+
tmp.new_cpus = NULL;
17771780
#endif
17781781

17791782
retval = validate_change(cs, trialcs);
@@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
18381841
}
18391842
spin_unlock_irq(&callback_lock);
18401843

1844+
#ifdef CONFIG_CPUMASK_OFFSTACK
1845+
/* Now trialcs->cpus_allowed is available */
1846+
tmp.new_cpus = trialcs->cpus_allowed;
1847+
#endif
1848+
18411849
/* effective_cpus will be updated here */
18421850
update_cpumasks_hier(cs, &tmp, false);
18431851

@@ -2445,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
24452453

24462454
static struct cpuset *cpuset_attach_old_cs;
24472455

2456+
/*
2457+
* Check to see if a cpuset can accept a new task
2458+
* For v1, cpus_allowed and mems_allowed can't be empty.
2459+
* For v2, effective_cpus can't be empty.
2460+
* Note that in v1, effective_cpus = cpus_allowed.
2461+
*/
2462+
static int cpuset_can_attach_check(struct cpuset *cs)
2463+
{
2464+
if (cpumask_empty(cs->effective_cpus) ||
2465+
(!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
2466+
return -ENOSPC;
2467+
return 0;
2468+
}
2469+
24482470
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
24492471
static int cpuset_can_attach(struct cgroup_taskset *tset)
24502472
{
@@ -2459,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
24592481

24602482
percpu_down_write(&cpuset_rwsem);
24612483

2462-
/* allow moving tasks into an empty cpuset if on default hierarchy */
2463-
ret = -ENOSPC;
2464-
if (!is_in_v2_mode() &&
2465-
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
2466-
goto out_unlock;
2467-
2468-
/*
2469-
* Task cannot be moved to a cpuset with empty effective cpus.
2470-
*/
2471-
if (cpumask_empty(cs->effective_cpus))
2484+
/* Check to see if task is allowed in the cpuset */
2485+
ret = cpuset_can_attach_check(cs);
2486+
if (ret)
24722487
goto out_unlock;
24732488

24742489
cgroup_taskset_for_each(task, css, tset) {
@@ -2485,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
24852500
* changes which zero cpus/mems_allowed.
24862501
*/
24872502
cs->attach_in_progress++;
2488-
ret = 0;
24892503
out_unlock:
24902504
percpu_up_write(&cpuset_rwsem);
24912505
return ret;
@@ -2494,25 +2508,47 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
24942508
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
24952509
{
24962510
struct cgroup_subsys_state *css;
2511+
struct cpuset *cs;
24972512

24982513
cgroup_taskset_first(tset, &css);
2514+
cs = css_cs(css);
24992515

25002516
percpu_down_write(&cpuset_rwsem);
2501-
css_cs(css)->attach_in_progress--;
2517+
cs->attach_in_progress--;
2518+
if (!cs->attach_in_progress)
2519+
wake_up(&cpuset_attach_wq);
25022520
percpu_up_write(&cpuset_rwsem);
25032521
}
25042522

25052523
/*
2506-
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
2524+
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
25072525
* but we can't allocate it dynamically there. Define it global and
25082526
* allocate from cpuset_init().
25092527
*/
25102528
static cpumask_var_t cpus_attach;
2529+
static nodemask_t cpuset_attach_nodemask_to;
2530+
2531+
static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
2532+
{
2533+
percpu_rwsem_assert_held(&cpuset_rwsem);
2534+
2535+
if (cs != &top_cpuset)
2536+
guarantee_online_cpus(task, cpus_attach);
2537+
else
2538+
cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
2539+
cs->subparts_cpus);
2540+
/*
2541+
* can_attach beforehand should guarantee that this doesn't
2542+
* fail. TODO: have a better way to handle failure here
2543+
*/
2544+
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
2545+
2546+
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
2547+
cpuset_update_task_spread_flags(cs, task);
2548+
}
25112549

25122550
static void cpuset_attach(struct cgroup_taskset *tset)
25132551
{
2514-
/* static buf protected by cpuset_rwsem */
2515-
static nodemask_t cpuset_attach_nodemask_to;
25162552
struct task_struct *task;
25172553
struct task_struct *leader;
25182554
struct cgroup_subsys_state *css;
@@ -2543,20 +2579,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
25432579

25442580
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
25452581

2546-
cgroup_taskset_for_each(task, css, tset) {
2547-
if (cs != &top_cpuset)
2548-
guarantee_online_cpus(task, cpus_attach);
2549-
else
2550-
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
2551-
/*
2552-
* can_attach beforehand should guarantee that this doesn't
2553-
* fail. TODO: have a better way to handle failure here
2554-
*/
2555-
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
2556-
2557-
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
2558-
cpuset_update_task_spread_flags(cs, task);
2559-
}
2582+
cgroup_taskset_for_each(task, css, tset)
2583+
cpuset_attach_task(cs, task);
25602584

25612585
/*
25622586
* Change mm for all threadgroup leaders. This is expensive and may
@@ -3247,18 +3271,102 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
32473271
percpu_up_write(&cpuset_rwsem);
32483272
}
32493273

3274+
/*
3275+
* In case the child is cloned into a cpuset different from its parent,
3276+
* additional checks are done to see if the move is allowed.
3277+
*/
3278+
static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
3279+
{
3280+
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
3281+
bool same_cs;
3282+
int ret;
3283+
3284+
rcu_read_lock();
3285+
same_cs = (cs == task_cs(current));
3286+
rcu_read_unlock();
3287+
3288+
if (same_cs)
3289+
return 0;
3290+
3291+
lockdep_assert_held(&cgroup_mutex);
3292+
percpu_down_write(&cpuset_rwsem);
3293+
3294+
/* Check to see if task is allowed in the cpuset */
3295+
ret = cpuset_can_attach_check(cs);
3296+
if (ret)
3297+
goto out_unlock;
3298+
3299+
ret = task_can_attach(task, cs->effective_cpus);
3300+
if (ret)
3301+
goto out_unlock;
3302+
3303+
ret = security_task_setscheduler(task);
3304+
if (ret)
3305+
goto out_unlock;
3306+
3307+
/*
3308+
* Mark attach is in progress. This makes validate_change() fail
3309+
* changes which zero cpus/mems_allowed.
3310+
*/
3311+
cs->attach_in_progress++;
3312+
out_unlock:
3313+
percpu_up_write(&cpuset_rwsem);
3314+
return ret;
3315+
}
3316+
3317+
static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
3318+
{
3319+
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
3320+
bool same_cs;
3321+
3322+
rcu_read_lock();
3323+
same_cs = (cs == task_cs(current));
3324+
rcu_read_unlock();
3325+
3326+
if (same_cs)
3327+
return;
3328+
3329+
percpu_down_write(&cpuset_rwsem);
3330+
cs->attach_in_progress--;
3331+
if (!cs->attach_in_progress)
3332+
wake_up(&cpuset_attach_wq);
3333+
percpu_up_write(&cpuset_rwsem);
3334+
}
3335+
32503336
/*
32513337
* Make sure the new task conform to the current state of its parent,
32523338
* which could have been changed by cpuset just after it inherits the
32533339
* state from the parent and before it sits on the cgroup's task list.
32543340
*/
32553341
static void cpuset_fork(struct task_struct *task)
32563342
{
3257-
if (task_css_is_root(task, cpuset_cgrp_id))
3343+
struct cpuset *cs;
3344+
bool same_cs;
3345+
3346+
rcu_read_lock();
3347+
cs = task_cs(task);
3348+
same_cs = (cs == task_cs(current));
3349+
rcu_read_unlock();
3350+
3351+
if (same_cs) {
3352+
if (cs == &top_cpuset)
3353+
return;
3354+
3355+
set_cpus_allowed_ptr(task, current->cpus_ptr);
3356+
task->mems_allowed = current->mems_allowed;
32583357
return;
3358+
}
3359+
3360+
/* CLONE_INTO_CGROUP */
3361+
percpu_down_write(&cpuset_rwsem);
3362+
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
3363+
cpuset_attach_task(cs, task);
3364+
3365+
cs->attach_in_progress--;
3366+
if (!cs->attach_in_progress)
3367+
wake_up(&cpuset_attach_wq);
32593368

3260-
set_cpus_allowed_ptr(task, current->cpus_ptr);
3261-
task->mems_allowed = current->mems_allowed;
3369+
percpu_up_write(&cpuset_rwsem);
32623370
}
32633371

32643372
struct cgroup_subsys cpuset_cgrp_subsys = {
@@ -3271,6 +3379,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
32713379
.attach = cpuset_attach,
32723380
.post_attach = cpuset_post_attach,
32733381
.bind = cpuset_bind,
3382+
.can_fork = cpuset_can_fork,
3383+
.cancel_fork = cpuset_cancel_fork,
32743384
.fork = cpuset_fork,
32753385
.legacy_cftypes = legacy_files,
32763386
.dfl_cftypes = dfl_files,

kernel/cgroup/legacy_freezer.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/freezer.h>
2323
#include <linux/seq_file.h>
2424
#include <linux/mutex.h>
25+
#include <linux/cpu.h>
2526

2627
/*
2728
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
350351

351352
if (freeze) {
352353
if (!(freezer->state & CGROUP_FREEZING))
353-
static_branch_inc(&freezer_active);
354+
static_branch_inc_cpuslocked(&freezer_active);
354355
freezer->state |= state;
355356
freeze_cgroup(freezer);
356357
} else {
@@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
361362
if (!(freezer->state & CGROUP_FREEZING)) {
362363
freezer->state &= ~CGROUP_FROZEN;
363364
if (was_freezing)
364-
static_branch_dec(&freezer_active);
365+
static_branch_dec_cpuslocked(&freezer_active);
365366
unfreeze_cgroup(freezer);
366367
}
367368
}
@@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
379380
{
380381
struct cgroup_subsys_state *pos;
381382

383+
cpus_read_lock();
382384
/*
383385
* Update all its descendants in pre-order traversal. Each
384386
* descendant will try to inherit its parent's FREEZING state as
@@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
407409
}
408410
rcu_read_unlock();
409411
mutex_unlock(&freezer_mutex);
412+
cpus_read_unlock();
410413
}
411414

412415
static ssize_t freezer_write(struct kernfs_open_file *of,

kernel/cgroup/rstat.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
457457
struct task_cputime *cputime = &bstat->cputime;
458458
int i;
459459

460-
cputime->stime = 0;
461-
cputime->utime = 0;
462-
cputime->sum_exec_runtime = 0;
460+
memset(bstat, 0, sizeof(*bstat));
463461
for_each_possible_cpu(i) {
464462
struct kernel_cpustat kcpustat;
465463
u64 *cpustat = kcpustat.cpustat;

0 commit comments

Comments
 (0)