Skip to content

Commit 7de6b4a

Browse files
committed
Merge tag 'wq-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue updates from Tejun Heo: - New default WQ_AFFN_CACHE_SHARD affinity scope subdivides LLCs into smaller shards to improve scalability on machines with many CPUs per LLC - Misc: - system_dfl_long_wq for long unbound works - devm_alloc_workqueue() for device-managed allocation - sysfs exposure for ordered workqueues and the EFI workqueue - removal of HK_TYPE_WQ from wq_unbound_cpumask - various small fixes * tag 'wq-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: (21 commits) workqueue: validate cpumask_first() result in llc_populate_cpu_shard_id() workqueue: use NR_STD_WORKER_POOLS instead of hardcoded value workqueue: avoid unguarded 64-bit division docs: workqueue: document WQ_AFFN_CACHE_SHARD affinity scope workqueue: add test_workqueue benchmark module tools/workqueue: add CACHE_SHARD support to wq_dump.py workqueue: set WQ_AFFN_CACHE_SHARD as the default affinity scope workqueue: add WQ_AFFN_CACHE_SHARD affinity scope workqueue: fix typo in WQ_AFFN_SMT comment workqueue: Remove HK_TYPE_WQ from affecting wq_unbound_cpumask workqueue: unlink pwqs from wq->pwqs list in alloc_and_link_pwqs() error path workqueue: Remove NULL wq WARN in __queue_delayed_work() workqueue: fix parse_affn_scope() prefix matching bug workqueue: devres: Add device-managed allocate workqueue workqueue: Add system_dfl_long_wq for long unbound works tools/workqueue/wq_dump.py: add NODE prefix to all node columns tools/workqueue/wq_dump.py: fix column alignment in node_nr/max_active section tools/workqueue/wq_dump.py: remove backslash separator from node_nr/max_active header efi: Allow to expose the workqueue via sysfs workqueue: Allow to expose ordered workqueues via sysfs ...
2 parents b71f0be + 76af546 commit 7de6b4a

10 files changed

Lines changed: 629 additions & 51 deletions

File tree

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8543,7 +8543,8 @@ Kernel parameters
85438543
workqueue.default_affinity_scope=
85448544
Select the default affinity scope to use for unbound
85458545
workqueues. Can be one of "cpu", "smt", "cache",
8546-
"numa" and "system". Default is "cache". For more
8546+
"cache_shard", "numa" and "system". Default is
8547+
"cache_shard". For more
85478548
information, see the Affinity Scopes section in
85488549
Documentation/core-api/workqueue.rst.
85498550

Documentation/core-api/workqueue.rst

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -378,9 +378,9 @@ Affinity Scopes
378378

379379
An unbound workqueue groups CPUs according to its affinity scope to improve
380380
cache locality. For example, if a workqueue is using the default affinity
381-
scope of "cache", it will group CPUs according to last level cache
382-
boundaries. A work item queued on the workqueue will be assigned to a worker
383-
on one of the CPUs which share the last level cache with the issuing CPU.
381+
scope of "cache_shard", it will group CPUs into sub-LLC shards. A work item
382+
queued on the workqueue will be assigned to a worker on one of the CPUs
383+
within the same shard as the issuing CPU.
384384
Once started, the worker may or may not be allowed to move outside the scope
385385
depending on the ``affinity_strict`` setting of the scope.
386386

@@ -402,7 +402,13 @@ Workqueue currently supports the following affinity scopes.
402402
``cache``
403403
CPUs are grouped according to cache boundaries. Which specific cache
404404
boundary is used is determined by the arch code. L3 is used in a lot of
405-
cases. This is the default affinity scope.
405+
cases.
406+
407+
``cache_shard``
408+
CPUs are grouped into sub-LLC shards of at most ``wq_cache_shard_size``
409+
cores (default 8, tunable via the ``workqueue.cache_shard_size`` boot
410+
parameter). Shards are always split on core (SMT group) boundaries.
411+
This is the default affinity scope.
406412

407413
``numa``
408414
CPUs are grouped according to NUMA boundaries.

Documentation/driver-api/driver-model/devres.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,3 +464,7 @@ SPI
464464

465465
WATCHDOG
466466
devm_watchdog_register_device()
467+
468+
WORKQUEUE
469+
devm_alloc_workqueue()
470+
devm_alloc_ordered_workqueue()

drivers/firmware/efi/efi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ static int __init efisubsys_init(void)
423423
* ordered workqueue (which creates only one execution context)
424424
* should suffice for all our needs.
425425
*/
426-
efi_rts_wq = alloc_ordered_workqueue("efi_rts_wq", 0);
426+
efi_rts_wq = alloc_ordered_workqueue("efi_runtime", WQ_SYSFS);
427427
if (!efi_rts_wq) {
428428
pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n");
429429
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);

include/linux/workqueue.h

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ struct rcu_work {
131131
enum wq_affn_scope {
132132
WQ_AFFN_DFL, /* use system default */
133133
WQ_AFFN_CPU, /* one pod per CPU */
134-
WQ_AFFN_SMT, /* one pod poer SMT */
134+
WQ_AFFN_SMT, /* one pod per SMT */
135135
WQ_AFFN_CACHE, /* one pod per LLC */
136+
WQ_AFFN_CACHE_SHARD, /* synthetic sub-LLC shards */
136137
WQ_AFFN_NUMA, /* one pod per NUMA node */
137138
WQ_AFFN_SYSTEM, /* one pod across the whole system */
138139

@@ -440,6 +441,9 @@ enum wq_consts {
440441
* system_long_wq is similar to system_percpu_wq but may host long running
441442
* works. Queue flushing might take relatively long.
442443
*
444+
* system_dfl_long_wq is similar to system_dfl_wq but it may host long running
445+
* works.
446+
*
443447
* system_dfl_wq is unbound workqueue. Workers are not bound to
444448
* any specific CPU, not concurrency managed, and all queued works are
445449
* executed immediately as long as max_active limit is not reached and
@@ -468,6 +472,7 @@ extern struct workqueue_struct *system_power_efficient_wq;
468472
extern struct workqueue_struct *system_freezable_power_efficient_wq;
469473
extern struct workqueue_struct *system_bh_wq;
470474
extern struct workqueue_struct *system_bh_highpri_wq;
475+
extern struct workqueue_struct *system_dfl_long_wq;
471476

472477
void workqueue_softirq_action(bool highpri);
473478
void workqueue_softirq_dead(unsigned int cpu);
@@ -512,6 +517,26 @@ __printf(1, 4) struct workqueue_struct *
512517
alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...);
513518
#define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__))
514519

520+
/**
521+
* devm_alloc_workqueue - Resource-managed allocate a workqueue
522+
* @dev: Device to allocate workqueue for
523+
* @fmt: printf format for the name of the workqueue
524+
* @flags: WQ_* flags
525+
* @max_active: max in-flight work items, 0 for default
526+
* @...: args for @fmt
527+
*
528+
* Resource managed workqueue, see alloc_workqueue() for details.
529+
*
530+
* The workqueue will be automatically destroyed on driver detach. Typically
531+
* this should be used in drivers already relying on devm interafaces.
532+
*
533+
* RETURNS:
534+
* Pointer to the allocated workqueue on success, %NULL on failure.
535+
*/
536+
__printf(2, 5) struct workqueue_struct *
537+
devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags,
538+
int max_active, ...);
539+
515540
#ifdef CONFIG_LOCKDEP
516541
/**
517542
* alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map
@@ -568,6 +593,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
568593
*/
569594
#define alloc_ordered_workqueue(fmt, flags, args...) \
570595
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
596+
#define devm_alloc_ordered_workqueue(dev, fmt, flags, args...) \
597+
devm_alloc_workqueue(dev, fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
571598

572599
#define create_workqueue(name) \
573600
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
@@ -712,14 +739,14 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
712739
}
713740

714741
/**
715-
* schedule_work - put work task in global workqueue
742+
* schedule_work - put work task in per-CPU workqueue
716743
* @work: job to be done
717744
*
718-
* Returns %false if @work was already on the kernel-global workqueue and
745+
* Returns %false if @work was already on the system per-CPU workqueue and
719746
* %true otherwise.
720747
*
721-
* This puts a job in the kernel-global workqueue if it was not already
722-
* queued and leaves it in the same position on the kernel-global
748+
* This puts a job in the system per-CPU workqueue if it was not already
749+
* queued and leaves it in the same position on the system per-CPU
723750
* workqueue otherwise.
724751
*
725752
* Shares the same memory-ordering properties of queue_work(), cf. the
@@ -783,6 +810,8 @@ extern void __warn_flushing_systemwide_wq(void)
783810
_wq == system_highpri_wq) || \
784811
(__builtin_constant_p(_wq == system_long_wq) && \
785812
_wq == system_long_wq) || \
813+
(__builtin_constant_p(_wq == system_dfl_long_wq) && \
814+
_wq == system_dfl_long_wq) || \
786815
(__builtin_constant_p(_wq == system_dfl_wq) && \
787816
_wq == system_dfl_wq) || \
788817
(__builtin_constant_p(_wq == system_freezable_wq) && \
@@ -796,12 +825,12 @@ extern void __warn_flushing_systemwide_wq(void)
796825
})
797826

798827
/**
799-
* schedule_delayed_work_on - queue work in global workqueue on CPU after delay
828+
* schedule_delayed_work_on - queue work in per-CPU workqueue on CPU after delay
800829
* @cpu: cpu to use
801830
* @dwork: job to be done
802831
* @delay: number of jiffies to wait
803832
*
804-
* After waiting for a given time this puts a job in the kernel-global
833+
* After waiting for a given time this puts a job in the system per-CPU
805834
* workqueue on the specified CPU.
806835
*/
807836
static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
@@ -811,11 +840,11 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
811840
}
812841

813842
/**
814-
* schedule_delayed_work - put work task in global workqueue after delay
843+
* schedule_delayed_work - put work task in per-CPU workqueue after delay
815844
* @dwork: job to be done
816845
* @delay: number of jiffies to wait or 0 for immediate execution
817846
*
818-
* After waiting for a given time this puts a job in the kernel-global
847+
* After waiting for a given time this puts a job in the system per-CPU
819848
* workqueue.
820849
*/
821850
static inline bool schedule_delayed_work(struct delayed_work *dwork,

0 commit comments

Comments
 (0)