@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
46484648#endif
46494649}
46504650
4651+ static inline bool entity_is_long_sleeper (struct sched_entity * se )
4652+ {
4653+ struct cfs_rq * cfs_rq ;
4654+ u64 sleep_time ;
4655+
4656+ if (se -> exec_start == 0 )
4657+ return false;
4658+
4659+ cfs_rq = cfs_rq_of (se );
4660+
4661+ sleep_time = rq_clock_task (rq_of (cfs_rq ));
4662+
4663+ /* Happen while migrating because of clock task divergence */
4664+ if (sleep_time <= se -> exec_start )
4665+ return false;
4666+
4667+ sleep_time -= se -> exec_start ;
4668+ if (sleep_time > ((1ULL << 63 ) / scale_load_down (NICE_0_LOAD )))
4669+ return true;
4670+
4671+ return false;
4672+ }
4673+
46514674static void
46524675place_entity (struct cfs_rq * cfs_rq , struct sched_entity * se , int initial )
46534676{
46544677 u64 vruntime = cfs_rq -> min_vruntime ;
4655- u64 sleep_time ;
46564678
46574679 /*
46584680 * The 'current' period is already promised to the current tasks,
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
46844706
46854707 /*
46864708 * Pull vruntime of the entity being placed to the base level of
4687- * cfs_rq, to prevent boosting it if placed backwards. If the entity
4688- * slept for a long time, don't even try to compare its vruntime with
4689- * the base as it may be too far off and the comparison may get
4690- * inversed due to s64 overflow.
4691- */
4692- sleep_time = rq_clock_task (rq_of (cfs_rq )) - se -> exec_start ;
4693- if ((s64 )sleep_time > 60LL * NSEC_PER_SEC )
4709+ * cfs_rq, to prevent boosting it if placed backwards.
4710+ * However, min_vruntime can advance much faster than real time, with
4711+ * the extreme being when an entity with the minimal weight always runs
4712+ * on the cfs_rq. If the waking entity slept for a long time, its
4713+ * vruntime difference from min_vruntime may overflow s64 and their
4714+ * comparison may get inversed, so ignore the entity's original
4715+ * vruntime in that case.
4716+ * The maximal vruntime speedup is given by the ratio of normal to
4717+ * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
4718+ * When placing a migrated waking entity, its exec_start has been set
4719+ * from a different rq. In order to take into account a possible
4720+ * divergence between new and prev rq's clocks task because of irq and
4721+ * stolen time, we take an additional margin.
4722+ * So, cutting off on the sleep time of
4723+ * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
4724+ * should be safe.
4725+ */
4726+ if (entity_is_long_sleeper (se ))
46944727 se -> vruntime = vruntime ;
46954728 else
46964729 se -> vruntime = max_vruntime (se -> vruntime , vruntime );
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
47704803
47714804 if (flags & ENQUEUE_WAKEUP )
47724805 place_entity (cfs_rq , se , 0 );
4806+ /* Entity has migrated, no longer consider this task hot */
4807+ if (flags & ENQUEUE_MIGRATED )
4808+ se -> exec_start = 0 ;
47734809
47744810 check_schedstat_required ();
47754811 update_stats_enqueue_fair (cfs_rq , se , flags );
@@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
76577693 /* Tell new CPU we are migrated */
76587694 se -> avg .last_update_time = 0 ;
76597695
7660- /* We have migrated, no longer consider this task hot */
7661- se -> exec_start = 0 ;
7662-
76637696 update_scan_period (p , new_cpu );
76647697}
76657698
0 commit comments