rcu: Panic after fixed number of stalls
authorchao <chao@eero.com>
Mon, 31 Aug 2020 06:41:17 +0000 (23:41 -0700)
committerPaul E. McKenney <paulmck@kernel.org>
Fri, 20 Nov 2020 03:37:16 +0000 (19:37 -0800)
Some stalls are transient, so that system fully recovers.  This commit
therefore allows users to configure the number of stalls that must happen
in order to trigger kernel panic.

Signed-off-by: chao <chao@eero.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
include/linux/kernel.h
kernel/rcu/tree_stall.h
kernel/sysctl.c

index 2f05e91..4b5fd3d 100644 (file)
@@ -536,6 +536,7 @@ extern int panic_on_warn;
 extern unsigned long panic_on_taint;
 extern bool panic_on_taint_nousertaint;
 extern int sysctl_panic_on_rcu_stall;
+extern int sysctl_max_rcu_stall_to_panic;
 extern int sysctl_panic_on_stackoverflow;
 
 extern bool crash_kexec_post_notifiers;
index ca21d28..70d48c5 100644 (file)
@@ -13,6 +13,7 @@
 
 /* panic() on RCU Stall sysctl. */
 int sysctl_panic_on_rcu_stall __read_mostly;
+int sysctl_max_rcu_stall_to_panic __read_mostly;
 
 #ifdef CONFIG_PROVE_RCU
 #define RCU_STALL_DELAY_DELTA          (5 * HZ)
@@ -106,6 +107,11 @@ early_initcall(check_cpu_stall_init);
 /* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
 static void panic_on_rcu_stall(void)
 {
+       static int cpu_stall;
+
+       if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
+               return;
+
        if (sysctl_panic_on_rcu_stall)
                panic("RCU Stall\n");
 }
index afad085..c9fbdd8 100644 (file)
@@ -2650,6 +2650,17 @@ static struct ctl_table kern_table[] = {
                .extra2         = SYSCTL_ONE,
        },
 #endif
+#if defined(CONFIG_TREE_RCU)
+       {
+               .procname       = "max_rcu_stall_to_panic",
+               .data           = &sysctl_max_rcu_stall_to_panic,
+               .maxlen         = sizeof(sysctl_max_rcu_stall_to_panic),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ONE,
+               .extra2         = SYSCTL_INT_MAX,
+       },
+#endif
 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
        {
                .procname       = "stack_erasing",