@@ -194,6 +194,7 @@ MODULE_PARM_DESC(bypass_lb_intv_us, "bypass load balance interval in microsecond
194194#include <trace/events/sched_ext.h>
195195
196196static void process_ddsp_deferred_locals (struct rq * rq );
197+ static bool task_dead_and_done (struct task_struct * p );
197198static u32 reenq_local (struct rq * rq );
198199static void scx_kick_cpu (struct scx_sched * sch , s32 cpu , u64 flags );
199200static bool scx_vexit (struct scx_sched * sch , enum scx_exit_kind kind ,
@@ -2619,6 +2620,9 @@ static void set_cpus_allowed_scx(struct task_struct *p,
26192620
26202621 set_cpus_allowed_common (p , ac );
26212622
2623+ if (task_dead_and_done (p ))
2624+ return ;
2625+
26222626 /*
26232627 * The effective cpumask is stored in @p->cpus_ptr which may temporarily
26242628 * differ from the configured one in @p->cpus_mask. Always tell the bpf
@@ -3034,10 +3038,45 @@ void scx_cancel_fork(struct task_struct *p)
30343038 percpu_up_read (& scx_fork_rwsem );
30353039}
30363040
3041+ /**
3042+ * task_dead_and_done - Is a task dead and done running?
3043+ * @p: target task
3044+ *
3045+ * Once sched_ext_dead() removes the dead task from scx_tasks and exits it, the
3046+ * task no longer exists from SCX's POV. However, certain sched_class ops may be
3047+ * invoked on these dead tasks leading to failures - e.g. sched_setscheduler()
3048+ * may try to switch a task which finished sched_ext_dead() back into SCX
3049+ * triggering invalid SCX task state transitions and worse.
3050+ *
3051+ * Once a task has finished the final switch, sched_ext_dead() is the only thing
3052+ * that needs to happen on the task. Use this test to short-circuit sched_class
3053+ * operations which may be called on dead tasks.
3054+ */
3055+ static bool task_dead_and_done (struct task_struct * p )
3056+ {
3057+ struct rq * rq = task_rq (p );
3058+
3059+ lockdep_assert_rq_held (rq );
3060+
3061+ /*
3062+ * In do_task_dead(), a dying task sets %TASK_DEAD with preemption
3063+ * disabled and __schedule(). If @p has %TASK_DEAD set and off CPU, @p
3064+ * won't ever run again.
3065+ */
3066+ return unlikely (READ_ONCE (p -> __state ) == TASK_DEAD ) &&
3067+ !task_on_cpu (rq , p );
3068+ }
3069+
30373070void sched_ext_dead (struct task_struct * p )
30383071{
30393072 unsigned long flags ;
30403073
3074+ /*
3075+ * By the time control reaches here, @p has %TASK_DEAD set, switched out
3076+ * for the last time and then dropped the rq lock - task_dead_and_done()
3077+ * should be returning %true nullifying the straggling sched_class ops.
3078+ * Remove from scx_tasks and exit @p.
3079+ */
30413080 raw_spin_lock_irqsave (& scx_tasks_lock , flags );
30423081 list_del_init (& p -> scx .tasks_node );
30433082 raw_spin_unlock_irqrestore (& scx_tasks_lock , flags );
@@ -3063,6 +3102,9 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p,
30633102
30643103 lockdep_assert_rq_held (task_rq (p ));
30653104
3105+ if (task_dead_and_done (p ))
3106+ return ;
3107+
30663108 p -> scx .weight = sched_weight_to_cgroup (scale_load_down (lw -> weight ));
30673109 if (SCX_HAS_OP (sch , set_weight ))
30683110 SCX_CALL_OP_TASK (sch , SCX_KF_REST , set_weight , rq ,
@@ -3077,6 +3119,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
30773119{
30783120 struct scx_sched * sch = scx_root ;
30793121
3122+ if (task_dead_and_done (p ))
3123+ return ;
3124+
30803125 scx_enable_task (p );
30813126
30823127 /*
@@ -3090,6 +3135,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
30903135
30913136static void switched_from_scx (struct rq * rq , struct task_struct * p )
30923137{
3138+ if (task_dead_and_done (p ))
3139+ return ;
3140+
30933141 scx_disable_task (p );
30943142}
30953143
0 commit comments