@@ -61,6 +61,58 @@ static const char * const perr_strings[] = {
6161 [PERR_REMOTE ] = "Have remote partition underneath" ,
6262};
6363
64+ /*
65+ * CPUSET Locking Convention
66+ * -------------------------
67+ *
68+ * Below are the three global locks guarding cpuset structures in lock
69+ * acquisition order:
70+ * - cpu_hotplug_lock (cpus_read_lock/cpus_write_lock)
71+ * - cpuset_mutex
72+ * - callback_lock (raw spinlock)
73+ *
74+ * A task must hold all the three locks to modify externally visible or
75+ * used fields of cpusets, though some of the internally used cpuset fields
76+ * and internal variables can be modified without holding callback_lock. If only
77+ * reliable read access of the externally used fields are needed, a task can
78+ * hold either cpuset_mutex or callback_lock which are exposed to other
79+ * external subsystems.
80+ *
81+ * If a task holds cpu_hotplug_lock and cpuset_mutex, it blocks others,
82+ * ensuring that it is the only task able to also acquire callback_lock and
83+ * be able to modify cpusets. It can perform various checks on the cpuset
84+ * structure first, knowing nothing will change. It can also allocate memory
85+ * without holding callback_lock. While it is performing these checks, various
86+ * callback routines can briefly acquire callback_lock to query cpusets. Once
87+ * it is ready to make the changes, it takes callback_lock, blocking everyone
88+ * else.
89+ *
90+ * Calls to the kernel memory allocator cannot be made while holding
91+ * callback_lock which is a spinlock, as the memory allocator may sleep or
92+ * call back into cpuset code and acquire callback_lock.
93+ *
94+ * Now, the task_struct fields mems_allowed and mempolicy may be changed
95+ * by other task, we use alloc_lock in the task_struct fields to protect
96+ * them.
97+ *
98+ * The cpuset_common_seq_show() handlers only hold callback_lock across
99+ * small pieces of code, such as when reading out possibly multi-word
100+ * cpumasks and nodemasks.
101+ */
102+
103+ static DEFINE_MUTEX (cpuset_mutex );
104+
105+ /*
106+ * File level internal variables below follow one of the following exclusion
107+ * rules.
108+ *
109+ * RWCS: Read/write-able by holding either cpus_write_lock (and optionally
110+ * cpuset_mutex) or both cpus_read_lock and cpuset_mutex.
111+ *
112+ * CSCB: Readable by holding either cpuset_mutex or callback_lock. Writable
113+ * by holding both cpuset_mutex and callback_lock.
114+ */
115+
64116/*
65117 * For local partitions, update to subpartitions_cpus & isolated_cpus is done
66118 * in update_parent_effective_cpumask(). For remote partitions, it is done in
@@ -70,19 +122,18 @@ static const char * const perr_strings[] = {
70122 * Exclusive CPUs distributed out to local or remote sub-partitions of
71123 * top_cpuset
72124 */
73- static cpumask_var_t subpartitions_cpus ;
125+ static cpumask_var_t subpartitions_cpus ; /* RWCS */
74126
75127/*
76- * Exclusive CPUs in isolated partitions
128+ * Exclusive CPUs in isolated partitions (shown in cpuset.cpus.isolated)
77129 */
78- static cpumask_var_t isolated_cpus ;
130+ static cpumask_var_t isolated_cpus ; /* CSCB */
79131
80132/*
81- * isolated_cpus updating flag (protected by cpuset_mutex)
82- * Set if isolated_cpus is going to be updated in the current
83- * cpuset_mutex crtical section.
133+ * Set if isolated_cpus is being updated in the current cpuset_mutex
134+ * critical section.
84135 */
85- static bool isolated_cpus_updating ;
136+ static bool isolated_cpus_updating ; /* RWCS */
86137
87138/*
88139 * A flag to force sched domain rebuild at the end of an operation.
@@ -98,7 +149,7 @@ static bool isolated_cpus_updating;
98149 * Note that update_relax_domain_level() in cpuset-v1.c can still call
99150 * rebuild_sched_domains_locked() directly without using this flag.
100151 */
101- static bool force_sd_rebuild ;
152+ static bool force_sd_rebuild ; /* RWCS */
102153
103154/*
104155 * Partition root states:
@@ -218,42 +269,6 @@ struct cpuset top_cpuset = {
218269 .partition_root_state = PRS_ROOT ,
219270};
220271
221- /*
222- * There are two global locks guarding cpuset structures - cpuset_mutex and
223- * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel
224- * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
225- * structures. Note that cpuset_mutex needs to be a mutex as it is used in
226- * paths that rely on priority inheritance (e.g. scheduler - on RT) for
227- * correctness.
228- *
229- * A task must hold both locks to modify cpusets. If a task holds
230- * cpuset_mutex, it blocks others, ensuring that it is the only task able to
231- * also acquire callback_lock and be able to modify cpusets. It can perform
232- * various checks on the cpuset structure first, knowing nothing will change.
233- * It can also allocate memory while just holding cpuset_mutex. While it is
234- * performing these checks, various callback routines can briefly acquire
235- * callback_lock to query cpusets. Once it is ready to make the changes, it
236- * takes callback_lock, blocking everyone else.
237- *
238- * Calls to the kernel memory allocator can not be made while holding
239- * callback_lock, as that would risk double tripping on callback_lock
240- * from one of the callbacks into the cpuset code from within
241- * __alloc_pages().
242- *
243- * If a task is only holding callback_lock, then it has read-only
244- * access to cpusets.
245- *
246- * Now, the task_struct fields mems_allowed and mempolicy may be changed
247- * by other task, we use alloc_lock in the task_struct fields to protect
248- * them.
249- *
250- * The cpuset_common_seq_show() handlers only hold callback_lock across
251- * small pieces of code, such as when reading out possibly multi-word
252- * cpumasks and nodemasks.
253- */
254-
255- static DEFINE_MUTEX (cpuset_mutex );
256-
257272/**
258273 * cpuset_lock - Acquire the global cpuset mutex
259274 *
@@ -1163,6 +1178,8 @@ static void reset_partition_data(struct cpuset *cs)
11631178static void isolated_cpus_update (int old_prs , int new_prs , struct cpumask * xcpus )
11641179{
11651180 WARN_ON_ONCE (old_prs == new_prs );
1181+ lockdep_assert_held (& callback_lock );
1182+ lockdep_assert_held (& cpuset_mutex );
11661183 if (new_prs == PRS_ISOLATED )
11671184 cpumask_or (isolated_cpus , isolated_cpus , xcpus );
11681185 else
0 commit comments