Skip to content

Commit e5a30c3

Browse files
Lai Jiangshanhtejun
authored andcommitted
workqueue: Process rescuer work items one-by-one using a cursor
Previously, the rescuer scanned for all matching work items at once and processed them within a single rescuer thread, which could cause one blocking work item to stall all others. Make the rescuer process work items one-by-one instead of slurping all matches in a single pass. Break the rescuer loop after finding and processing the first matching work item, then restart the search to pick up the next. This gives normal worker threads a chance to process other items which gives them the opportunity to be processed instead of waiting on the rescuer's queue and prevents a blocking work item from stalling the rest once memory pressure is relieved. Introduce a dummy cursor work item to avoid potentially O(N^2) rescans of the work list. The marker records the resume position for the next scan, eliminating redundant traversals. Also introduce RESCUER_BATCH to control the maximum number of work items the rescuer processes in each turn, and move on to other PWQs when the limit is reached. Cc: ying chen <[email protected]> Reported-by: ying chen <[email protected]> Fixes: e22bee7 ("workqueue: implement concurrency managed dynamic worker pool") Signed-off-by: Lai Jiangshan <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent fc5ff53 commit e5a30c3

1 file changed

Lines changed: 59 additions & 16 deletions

File tree

kernel/workqueue.c

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ enum wq_internal_consts {
117117
MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
118118
CREATE_COOLDOWN = HZ, /* time to breath after fail */
119119

120+
RESCUER_BATCH = 16, /* process items per turn */
121+
120122
/*
121123
* Rescue workers are used only on emergencies and shared by
122124
* all cpus. Give MIN_NICE.
@@ -286,6 +288,7 @@ struct pool_workqueue {
286288
struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
287289
struct list_head pwqs_node; /* WR: node on wq->pwqs */
288290
struct list_head mayday_node; /* MD: node on wq->maydays */
291+
struct work_struct mayday_cursor; /* L: cursor on pool->worklist */
289292

290293
u64 stats[PWQ_NR_STATS];
291294

@@ -1120,6 +1123,12 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
11201123
return NULL;
11211124
}
11221125

1126+
static void mayday_cursor_func(struct work_struct *work)
1127+
{
1128+
/* should not be processed, only for marking position */
1129+
BUG();
1130+
}
1131+
11231132
/**
11241133
* move_linked_works - move linked works to a list
11251134
* @work: start of series of works to be scheduled
@@ -1182,6 +1191,16 @@ static bool assign_work(struct work_struct *work, struct worker *worker,
11821191

11831192
lockdep_assert_held(&pool->lock);
11841193

1194+
/* The cursor work should not be processed */
1195+
if (unlikely(work->func == mayday_cursor_func)) {
1196+
/* only worker_thread() can possibly take this branch */
1197+
WARN_ON_ONCE(worker->rescue_wq);
1198+
if (nextp)
1199+
*nextp = list_next_entry(work, entry);
1200+
list_del_init(&work->entry);
1201+
return false;
1202+
}
1203+
11851204
/*
11861205
* A single work shouldn't be executed concurrently by multiple workers.
11871206
* __queue_work() ensures that @work doesn't jump to a different pool
@@ -3439,22 +3458,30 @@ static int worker_thread(void *__worker)
34393458
static bool assign_rescuer_work(struct pool_workqueue *pwq, struct worker *rescuer)
34403459
{
34413460
struct worker_pool *pool = pwq->pool;
3461+
struct work_struct *cursor = &pwq->mayday_cursor;
34423462
struct work_struct *work, *n;
34433463

34443464
/* need rescue? */
34453465
if (!pwq->nr_active || !need_to_create_worker(pool))
34463466
return false;
34473467

3448-
/*
3449-
* Slurp in all works issued via this workqueue and
3450-
* process'em.
3451-
*/
3452-
list_for_each_entry_safe(work, n, &pool->worklist, entry) {
3453-
if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n))
3468+
/* search from the start or cursor if available */
3469+
if (list_empty(&cursor->entry))
3470+
work = list_first_entry(&pool->worklist, struct work_struct, entry);
3471+
else
3472+
work = list_next_entry(cursor, entry);
3473+
3474+
/* find the next work item to rescue */
3475+
list_for_each_entry_safe_from(work, n, &pool->worklist, entry) {
3476+
if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n)) {
34543477
pwq->stats[PWQ_STAT_RESCUED]++;
3478+
/* put the cursor for next search */
3479+
list_move_tail(&cursor->entry, &n->entry);
3480+
return true;
3481+
}
34553482
}
34563483

3457-
return !list_empty(&rescuer->scheduled);
3484+
return false;
34583485
}
34593486

34603487
/**
@@ -3511,6 +3538,7 @@ static int rescuer_thread(void *__rescuer)
35113538
struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
35123539
struct pool_workqueue, mayday_node);
35133540
struct worker_pool *pool = pwq->pool;
3541+
unsigned int count = 0;
35143542

35153543
__set_current_state(TASK_RUNNING);
35163544
list_del_init(&pwq->mayday_node);
@@ -3523,25 +3551,27 @@ static int rescuer_thread(void *__rescuer)
35233551

35243552
WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
35253553

3526-
if (assign_rescuer_work(pwq, rescuer)) {
3554+
while (assign_rescuer_work(pwq, rescuer)) {
35273555
process_scheduled_works(rescuer);
35283556

35293557
/*
3530-
* The above execution of rescued work items could
3531-
* have created more to rescue through
3532-
* pwq_activate_first_inactive() or chained
3533-
* queueing. Let's put @pwq back on mayday list so
3534-
* that such back-to-back work items, which may be
3535-
* being used to relieve memory pressure, don't
3536-
* incur MAYDAY_INTERVAL delay inbetween.
3558+
* If the per-turn work item limit is reached and other
3559+
* PWQs are in mayday, requeue mayday for this PWQ and
3560+
* let the rescuer handle the other PWQs first.
35373561
*/
3538-
if (pwq->nr_active && need_to_create_worker(pool)) {
3562+
if (++count > RESCUER_BATCH && !list_empty(&pwq->wq->maydays) &&
3563+
pwq->nr_active && need_to_create_worker(pool)) {
35393564
raw_spin_lock(&wq_mayday_lock);
35403565
send_mayday(pwq);
35413566
raw_spin_unlock(&wq_mayday_lock);
3567+
break;
35423568
}
35433569
}
35443570

3571+
/* The cursor can not be left behind without the rescuer watching it. */
3572+
if (!list_empty(&pwq->mayday_cursor.entry) && list_empty(&pwq->mayday_node))
3573+
list_del_init(&pwq->mayday_cursor.entry);
3574+
35453575
/*
35463576
* Leave this pool. Notify regular workers; otherwise, we end up
35473577
* with 0 concurrency and stalling the execution.
@@ -5160,6 +5190,19 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
51605190
INIT_LIST_HEAD(&pwq->pwqs_node);
51615191
INIT_LIST_HEAD(&pwq->mayday_node);
51625192
kthread_init_work(&pwq->release_work, pwq_release_workfn);
5193+
5194+
/*
5195+
* Set the dummy cursor work with valid function and get_work_pwq().
5196+
*
5197+
* The cursor work should only be in the pwq->pool->worklist, and
5198+
* should not be treated as a processable work item.
5199+
*
5200+
* WORK_STRUCT_PENDING and WORK_STRUCT_INACTIVE just make it less
5201+
* surprise for kernel debugging tools and reviewers.
5202+
*/
5203+
INIT_WORK(&pwq->mayday_cursor, mayday_cursor_func);
5204+
atomic_long_set(&pwq->mayday_cursor.data, (unsigned long)pwq |
5205+
WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | WORK_STRUCT_INACTIVE);
51635206
}
51645207

51655208
/* sync @pwq with the current state of its associated wq and link it */

0 commit comments

Comments
 (0)