Skip to content

Commit e700f5d

Browse files
lrq-maxakpm00
authored andcommitted
watchdog: softlockup: panic when lockup duration exceeds N thresholds
The softlockup_panic sysctl is currently a binary option: panic immediately or never panic on soft lockups. Panicking on any soft lockup, regardless of duration, can be overly aggressive for brief stalls that may be caused by legitimate operations. Conversely, never panicking may allow severe system hangs to persist undetected. Extend softlockup_panic to accept an integer threshold, allowing the kernel to panic only when the normalized lockup duration exceeds N watchdog threshold periods. This provides finer-grained control to distinguish between transient delays and persistent system failures. The accepted values are: - 0: Don't panic (unchanged) - 1: Panic when duration >= 1 * threshold (20s default, original behavior) - N > 1: Panic when duration >= N * threshold (e.g., 2 = 40s, 3 = 60s.) The original behavior is preserved for values 0 and 1, maintaining full backward compatibility while allowing systems to tolerate brief lockups while still catching severe, persistent hangs. [[email protected]: v2] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Li RongQing <[email protected]> Cc: Eduard Zingerman <[email protected]> Cc: Hao Luo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: John Fastabend <[email protected]> Cc: KP Singh <[email protected]> Cc: Lance Yang <[email protected]> Cc: Martin KaFai Lau <[email protected]> Cc: Nicholas Piggin <[email protected]> Cc: Song Liu <[email protected]> Cc: Stanislav Fomichev <[email protected]> Cc: Yonghong Song <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent b5bfcc1 commit e700f5d

13 files changed

Lines changed: 28 additions & 25 deletions

File tree

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6969,12 +6969,12 @@ Kernel parameters
69696969

69706970
softlockup_panic=
69716971
[KNL] Should the soft-lockup detector generate panics.
6972-
Format: 0 | 1
6972+
Format: <int>
69736973

6974-
A value of 1 instructs the soft-lockup detector
6975-
to panic the machine when a soft-lockup occurs. It is
6976-
also controlled by the kernel.softlockup_panic sysctl
6977-
and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
6974+
A value of non-zero instructs the soft-lockup detector
6975+
to panic the machine when a soft-lockup duration exceeds
6976+
N thresholds. It is also controlled by the kernel.softlockup_panic
6977+
sysctl and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
69786978
respective build-time switch to that functionality.
69796979

69806980
softlockup_all_cpu_backtrace=

arch/arm/configs/aspeed_g5_defconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ CONFIG_SCHED_STACK_END_CHECK=y
306306
CONFIG_PANIC_ON_OOPS=y
307307
CONFIG_PANIC_TIMEOUT=-1
308308
CONFIG_SOFTLOCKUP_DETECTOR=y
309-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
309+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
310310
CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
311311
CONFIG_WQ_WATCHDOG=y
312312
# CONFIG_SCHED_DEBUG is not set

arch/arm/configs/pxa3xx_defconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ CONFIG_PRINTK_TIME=y
100100
CONFIG_DEBUG_KERNEL=y
101101
CONFIG_MAGIC_SYSRQ=y
102102
CONFIG_DEBUG_SHIRQ=y
103-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
103+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
104104
# CONFIG_SCHED_DEBUG is not set
105105
CONFIG_DEBUG_SPINLOCK=y
106106
CONFIG_DEBUG_SPINLOCK_SLEEP=y

arch/openrisc/configs/or1klitex_defconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,5 @@ CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,bpf"
5252
CONFIG_PRINTK_TIME=y
5353
CONFIG_PANIC_ON_OOPS=y
5454
CONFIG_SOFTLOCKUP_DETECTOR=y
55-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
55+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
5656
CONFIG_BUG_ON_DATA_CORRUPTION=y

arch/powerpc/configs/skiroot_defconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ CONFIG_SCHED_STACK_END_CHECK=y
289289
CONFIG_DEBUG_STACKOVERFLOW=y
290290
CONFIG_PANIC_ON_OOPS=y
291291
CONFIG_SOFTLOCKUP_DETECTOR=y
292-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
292+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
293293
CONFIG_HARDLOCKUP_DETECTOR=y
294294
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
295295
CONFIG_WQ_WATCHDOG=y

drivers/gpu/drm/ci/arm.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ CONFIG_TMPFS=y
5252
CONFIG_PROVE_LOCKING=n
5353
CONFIG_DEBUG_LOCKDEP=n
5454
CONFIG_SOFTLOCKUP_DETECTOR=n
55-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=n
55+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
5656

5757
CONFIG_FW_LOADER_COMPRESS=y
5858

drivers/gpu/drm/ci/arm64.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ CONFIG_TMPFS=y
161161
CONFIG_PROVE_LOCKING=n
162162
CONFIG_DEBUG_LOCKDEP=n
163163
CONFIG_SOFTLOCKUP_DETECTOR=y
164-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
164+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
165165

166166
CONFIG_DETECT_HUNG_TASK=y
167167

drivers/gpu/drm/ci/x86_64.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ CONFIG_TMPFS=y
4747
CONFIG_PROVE_LOCKING=n
4848
CONFIG_DEBUG_LOCKDEP=n
4949
CONFIG_SOFTLOCKUP_DETECTOR=y
50-
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
50+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
5151

5252
CONFIG_DETECT_HUNG_TASK=y
5353

kernel/configs/debug.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ CONFIG_SLUB_DEBUG_ON=y
8484
# Debug Oops, Lockups and Hangs
8585
#
8686
CONFIG_BOOTPARAM_HUNG_TASK_PANIC=0
87-
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
87+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
8888
CONFIG_DEBUG_ATOMIC_SLEEP=y
8989
CONFIG_DETECT_HUNG_TASK=y
9090
CONFIG_PANIC_ON_OOPS=y

kernel/watchdog.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
363363

364364
/* Global variables, exported for sysctl */
365365
unsigned int __read_mostly softlockup_panic =
366-
IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
366+
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC;
367367

368368
static bool softlockup_initialized __read_mostly;
369369
static u64 __read_mostly sample_period;
@@ -774,8 +774,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
774774
{
775775
unsigned long touch_ts, period_ts, now;
776776
struct pt_regs *regs = get_irq_regs();
777-
int duration;
778777
int softlockup_all_cpu_backtrace;
778+
int duration, thresh_count;
779779
unsigned long flags;
780780

781781
if (!watchdog_enabled)
@@ -879,7 +879,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
879879

880880
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
881881
sys_info(softlockup_si_mask & ~SYS_INFO_ALL_BT);
882-
if (softlockup_panic)
882+
thresh_count = duration / get_softlockup_thresh();
883+
884+
if (softlockup_panic && thresh_count >= softlockup_panic)
883885
panic("softlockup: hung tasks");
884886
}
885887

@@ -1228,7 +1230,7 @@ static const struct ctl_table watchdog_sysctls[] = {
12281230
.mode = 0644,
12291231
.proc_handler = proc_dointvec_minmax,
12301232
.extra1 = SYSCTL_ZERO,
1231-
.extra2 = SYSCTL_ONE,
1233+
.extra2 = SYSCTL_INT_MAX,
12321234
},
12331235
{
12341236
.procname = "softlockup_sys_info",

0 commit comments

Comments
 (0)