Skip to content

Commit d6e152d

Browse files
author
Thomas Gleixner
committed
clockevents: Prevent timer interrupt starvation
Calvin reported an odd NMI watchdog lockup which claims that the CPU locked up in user space. He provided a reproducer, which sets up a timerfd based timer and then rearms it in a loop with an absolute expiry time of 1ns. As the expiry time is in the past, the timer ends up as the first expiring timer in the per CPU hrtimer base and the clockevent device is programmed with the minimum delta value. If the machine is fast enough, this ends up in a endless loop of programming the delta value to the minimum value defined by the clock event device, before the timer interrupt can fire, which starves the interrupt and consequently triggers the lockup detector because the hrtimer callback of the lockup mechanism is never invoked. As a first step to prevent this, avoid reprogramming the clock event device when: - a forced minimum delta event is pending - the new expiry delta is less then or equal to the minimum delta Thanks to Calvin for providing the reproducer and to Borislav for testing and providing data from his Zen5 machine. The problem is not limited to Zen5, but depending on the underlying clock event device (e.g. TSC deadline timer on Intel) and the CPU speed not necessarily observable. This change serves only as the last resort and further changes will be made to prevent this scenario earlier in the call chain as far as possible. [ tglx: Updated to restore the old behaviour vs. !force and delta <= 0 and fixed up the tick-broadcast handlers as pointed out by Borislav ] Fixes: d316c57 ("[PATCH] clockevents: add core functionality") Reported-by: Calvin Owens <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Calvin Owens <[email protected]> Tested-by: Borislav Petkov <[email protected]> Link: https://lore.kernel.org/lkml/[email protected]/ Link: https://patch.msgid.link/[email protected]
1 parent 82b9150 commit d6e152d

6 files changed

Lines changed: 31 additions & 9 deletions

File tree

include/linux/clockchips.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ enum clock_event_state {
8080
* @shift: nanoseconds to cycles divisor (power of two)
8181
* @state_use_accessors:current state of the device, assigned by the core code
8282
* @features: features
83+
* @next_event_forced: True if the last programming was a forced event
8384
* @retries: number of forced programming retries
8485
* @set_state_periodic: switch state to periodic
8586
* @set_state_oneshot: switch state to oneshot
@@ -108,6 +109,7 @@ struct clock_event_device {
108109
u32 shift;
109110
enum clock_event_state state_use_accessors;
110111
unsigned int features;
112+
unsigned int next_event_forced;
111113
unsigned long retries;
112114

113115
int (*set_state_periodic)(struct clock_event_device *);

kernel/time/clockevents.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ void clockevents_shutdown(struct clock_event_device *dev)
172172
{
173173
clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
174174
dev->next_event = KTIME_MAX;
175+
dev->next_event_forced = 0;
175176
}
176177

177178
/**
@@ -305,7 +306,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
305306
{
306307
unsigned long long clc;
307308
int64_t delta;
308-
int rc;
309309

310310
if (WARN_ON_ONCE(expires < 0))
311311
return -ETIME;
@@ -324,16 +324,27 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
324324
return dev->set_next_ktime(expires, dev);
325325

326326
delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
327-
if (delta <= 0)
328-
return force ? clockevents_program_min_delta(dev) : -ETIME;
329327

330-
delta = min(delta, (int64_t) dev->max_delta_ns);
331-
delta = max(delta, (int64_t) dev->min_delta_ns);
328+
/* Required for tick_periodic() during early boot */
329+
if (delta <= 0 && !force)
330+
return -ETIME;
331+
332+
if (delta > (int64_t)dev->min_delta_ns) {
333+
delta = min(delta, (int64_t) dev->max_delta_ns);
334+
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
335+
if (!dev->set_next_event((unsigned long) clc, dev))
336+
return 0;
337+
}
332338

333-
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
334-
rc = dev->set_next_event((unsigned long) clc, dev);
339+
if (dev->next_event_forced)
340+
return 0;
335341

336-
return (rc && force) ? clockevents_program_min_delta(dev) : rc;
342+
if (dev->set_next_event(dev->min_delta_ticks, dev)) {
343+
if (!force || clockevents_program_min_delta(dev))
344+
return -ETIME;
345+
}
346+
dev->next_event_forced = 1;
347+
return 0;
337348
}
338349

339350
/*

kernel/time/hrtimer.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,6 +1888,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
18881888
BUG_ON(!cpu_base->hres_active);
18891889
cpu_base->nr_events++;
18901890
dev->next_event = KTIME_MAX;
1891+
dev->next_event_forced = 0;
18911892

18921893
raw_spin_lock_irqsave(&cpu_base->lock, flags);
18931894
entry_time = now = hrtimer_update_base(cpu_base);

kernel/time/tick-broadcast.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,10 @@ const struct clock_event_device *tick_get_wakeup_device(int cpu)
7676
*/
7777
static void tick_broadcast_start_periodic(struct clock_event_device *bc)
7878
{
79-
if (bc)
79+
if (bc) {
80+
bc->next_event_forced = 0;
8081
tick_setup_periodic(bc, 1);
82+
}
8183
}
8284

8385
/*
@@ -403,6 +405,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
403405
bool bc_local;
404406

405407
raw_spin_lock(&tick_broadcast_lock);
408+
tick_broadcast_device.evtdev->next_event_forced = 0;
406409

407410
/* Handle spurious interrupts gracefully */
408411
if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
@@ -696,6 +699,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
696699

697700
raw_spin_lock(&tick_broadcast_lock);
698701
dev->next_event = KTIME_MAX;
702+
tick_broadcast_device.evtdev->next_event_forced = 0;
699703
next_event = KTIME_MAX;
700704
cpumask_clear(tmpmask);
701705
now = ktime_get();
@@ -1063,6 +1067,7 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc,
10631067

10641068

10651069
bc->event_handler = tick_handle_oneshot_broadcast;
1070+
bc->next_event_forced = 0;
10661071
bc->next_event = KTIME_MAX;
10671072

10681073
/*
@@ -1175,6 +1180,7 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
11751180
}
11761181

11771182
/* This moves the broadcast assignment to this CPU: */
1183+
bc->next_event_forced = 0;
11781184
clockevents_program_event(bc, bc->next_event, 1);
11791185
}
11801186
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);

kernel/time/tick-common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
110110
int cpu = smp_processor_id();
111111
ktime_t next = dev->next_event;
112112

113+
dev->next_event_forced = 0;
113114
tick_periodic(cpu);
114115

115116
/*

kernel/time/tick-sched.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,7 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev)
15131513
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
15141514

15151515
dev->next_event = KTIME_MAX;
1516+
dev->next_event_forced = 0;
15161517

15171518
if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART))
15181519
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);

0 commit comments

Comments
 (0)