Skip to content

Commit 3c7b4d1

Browse files
committed
Merge tag 'sched_ext-for-6.19-rc8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fix from Tejun Heo: - Fix race where sched_class operations (sched_setscheduler() and friends) could be invoked on dead tasks after sched_ext_dead() already ran, causing invalid SCX task state transitions and NULL pointer dereferences. This was a regression from the cgroup exit ordering fix which moved sched_ext_free() to finish_task_switch(). * tag 'sched_ext-for-6.19-rc8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Short-circuit sched_class operations on dead tasks
2 parents 27db1ae + 0eca95c commit 3c7b4d1

1 file changed

Lines changed: 48 additions & 0 deletions

File tree

kernel/sched/ext.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ MODULE_PARM_DESC(bypass_lb_intv_us, "bypass load balance interval in microsecond
194194
#include <trace/events/sched_ext.h>
195195

196196
static void process_ddsp_deferred_locals(struct rq *rq);
197+
static bool task_dead_and_done(struct task_struct *p);
197198
static u32 reenq_local(struct rq *rq);
198199
static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags);
199200
static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind,
@@ -2619,6 +2620,9 @@ static void set_cpus_allowed_scx(struct task_struct *p,
26192620

26202621
set_cpus_allowed_common(p, ac);
26212622

2623+
if (task_dead_and_done(p))
2624+
return;
2625+
26222626
/*
26232627
* The effective cpumask is stored in @p->cpus_ptr which may temporarily
26242628
* differ from the configured one in @p->cpus_mask. Always tell the bpf
@@ -3034,10 +3038,45 @@ void scx_cancel_fork(struct task_struct *p)
30343038
percpu_up_read(&scx_fork_rwsem);
30353039
}
30363040

3041+
/**
3042+
* task_dead_and_done - Is a task dead and done running?
3043+
* @p: target task
3044+
*
3045+
* Once sched_ext_dead() removes the dead task from scx_tasks and exits it, the
3046+
* task no longer exists from SCX's POV. However, certain sched_class ops may be
3047+
* invoked on these dead tasks leading to failures - e.g. sched_setscheduler()
3048+
* may try to switch a task which finished sched_ext_dead() back into SCX
3049+
* triggering invalid SCX task state transitions and worse.
3050+
*
3051+
* Once a task has finished the final switch, sched_ext_dead() is the only thing
3052+
* that needs to happen on the task. Use this test to short-circuit sched_class
3053+
* operations which may be called on dead tasks.
3054+
*/
3055+
static bool task_dead_and_done(struct task_struct *p)
3056+
{
3057+
struct rq *rq = task_rq(p);
3058+
3059+
lockdep_assert_rq_held(rq);
3060+
3061+
/*
3062+
* In do_task_dead(), a dying task sets %TASK_DEAD with preemption
3063+
* disabled and __schedule(). If @p has %TASK_DEAD set and off CPU, @p
3064+
* won't ever run again.
3065+
*/
3066+
return unlikely(READ_ONCE(p->__state) == TASK_DEAD) &&
3067+
!task_on_cpu(rq, p);
3068+
}
3069+
30373070
void sched_ext_dead(struct task_struct *p)
30383071
{
30393072
unsigned long flags;
30403073

3074+
/*
3075+
* By the time control reaches here, @p has %TASK_DEAD set, switched out
3076+
* for the last time and then dropped the rq lock - task_dead_and_done()
3077+
* should be returning %true nullifying the straggling sched_class ops.
3078+
* Remove from scx_tasks and exit @p.
3079+
*/
30413080
raw_spin_lock_irqsave(&scx_tasks_lock, flags);
30423081
list_del_init(&p->scx.tasks_node);
30433082
raw_spin_unlock_irqrestore(&scx_tasks_lock, flags);
@@ -3063,6 +3102,9 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p,
30633102

30643103
lockdep_assert_rq_held(task_rq(p));
30653104

3105+
if (task_dead_and_done(p))
3106+
return;
3107+
30663108
p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight));
30673109
if (SCX_HAS_OP(sch, set_weight))
30683110
SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq,
@@ -3077,6 +3119,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
30773119
{
30783120
struct scx_sched *sch = scx_root;
30793121

3122+
if (task_dead_and_done(p))
3123+
return;
3124+
30803125
scx_enable_task(p);
30813126

30823127
/*
@@ -3090,6 +3135,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
30903135

30913136
static void switched_from_scx(struct rq *rq, struct task_struct *p)
30923137
{
3138+
if (task_dead_and_done(p))
3139+
return;
3140+
30933141
scx_disable_task(p);
30943142
}
30953143

0 commit comments

Comments
 (0)