kernel/smp: Provide boot-time timeout for CSD lock diagnostics
authorPaul E. McKenney <paulmck@kernel.org>
Tue, 1 Mar 2022 02:08:33 +0000 (18:08 -0800)
committerPaul E. McKenney <paulmck@kernel.org>
Wed, 20 Apr 2022 23:50:30 +0000 (16:50 -0700)
Debugging of problems involving insanely long-running SMI handlers
proceeds better if the CSD-lock timeout can be adjusted.  This commit
therefore provides a new smp.csd_lock_timeout kernel boot parameter
that specifies the timeout in milliseconds.  The default remains at the
previously hard-coded value of five seconds.

[ paulmck: Apply feedback from Juergen Gross. ]

Cc: Rik van Riel <riel@surriel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Documentation/admin-guide/kernel-parameters.txt
kernel/smp.c

index 3f1cc5e..645c4c0 100644 (file)
        smart2=         [HW]
                        Format: <io1>[,<io2>[,...,<io8>]]
 
+       smp.csd_lock_timeout= [KNL]
+                       Specify the period of time in milliseconds
+                       that smp_call_function() and friends will wait
+                       for a CPU to release the CSD lock.  This is
+                       useful when diagnosing bugs involving CPUs
+                       disabling interrupts for extended periods
+                       of time.  Defaults to 5,000 milliseconds, and
+                       setting a value of zero disables this feature.
+                       This feature may be more efficiently disabled
+                       using the csdlock_debug- kernel parameter.
+
        smsc-ircc2.nopnp        [HW] Don't use PNP to discover SMC devices
        smsc-ircc2.ircc_cfg=    [HW] Device configuration I/O port
        smsc-ircc2.ircc_sir=    [HW] SIR base I/O port
index 01a7c17..6a1f1da 100644 (file)
@@ -183,7 +183,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
 static DEFINE_PER_CPU(void *, cur_csd_info);
 static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
 
-#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
+module_param(csd_lock_timeout, ulong, 0444);
+
 static atomic_t csd_bug_count = ATOMIC_INIT(0);
 static u64 cfd_seq;
 
@@ -329,6 +331,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
        u64 ts2, ts_delta;
        call_single_data_t *cpu_cur_csd;
        unsigned int flags = READ_ONCE(csd->node.u_flags);
+       unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
 
        if (!(flags & CSD_FLAG_LOCK)) {
                if (!unlikely(*bug_id))
@@ -341,7 +344,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
 
        ts2 = sched_clock();
        ts_delta = ts2 - *ts1;
-       if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
+       if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
                return false;
 
        firsttime = !*bug_id;