workqueue: Report work funcs that trigger automatic CPU_INTENSIVE mechanism

author Tejun Heo <tj@kernel.org>

Thu, 18 May 2023 03:02:08 +0000 (17:02 -1000)

committer Tejun Heo <tj@kernel.org>

Thu, 18 May 2023 03:02:08 +0000 (17:02 -1000)
author Tejun Heo <tj@kernel.org>
Thu, 18 May 2023 03:02:08 +0000 (17:02 -1000)
committer Tejun Heo <tj@kernel.org>
Thu, 18 May 2023 03:02:08 +0000 (17:02 -1000)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 1f2185c..3ed7dda 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6938,6 +6938,11 @@
                         them from noticeably delaying other per-cpu work
                         items. Default is 10000 (10ms).
  
+                       If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+                       will report the work functions which violate this
+                       threshold repeatedly. They are likely good
+                       candidates for using WQ_UNBOUND workqueues instead.
+
         workqueue.disable_numa
                         By default, all work items queued to unbound
                         workqueues are affine to the NUMA nodes they're
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 3dc83d5..4ca6638 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -948,6 +948,98 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
                         pool->nr_running++;
  }
  
+#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT
+
+/*
+ * Concurrency-managed per-cpu work items that hog CPU for longer than
+ * wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism,
+ * which prevents them from stalling other concurrency-managed work items. If a
+ * work function keeps triggering this mechanism, it's likely that the work item
+ * should be using an unbound workqueue instead.
+ *
+ * wq_cpu_intensive_report() tracks work functions which trigger such conditions
+ * and report them so that they can be examined and converted to use unbound
+ * workqueues as appropriate. To avoid flooding the console, each violating work
+ * function is tracked and reported with exponential backoff.
+ */
+#define WCI_MAX_ENTS 128
+
+struct wci_ent {
+       work_func_t             func;
+       atomic64_t              cnt;
+       struct hlist_node       hash_node;
+};
+
+static struct wci_ent wci_ents[WCI_MAX_ENTS];
+static int wci_nr_ents;
+static DEFINE_RAW_SPINLOCK(wci_lock);
+static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS));
+
+static struct wci_ent *wci_find_ent(work_func_t func)
+{
+       struct wci_ent *ent;
+
+       hash_for_each_possible_rcu(wci_hash, ent, hash_node,
+                                  (unsigned long)func) {
+               if (ent->func == func)
+                       return ent;
+       }
+       return NULL;
+}
+
+static void wq_cpu_intensive_report(work_func_t func)
+{
+       struct wci_ent *ent;
+
+restart:
+       ent = wci_find_ent(func);
+       if (ent) {
+               u64 cnt;
+
+               /*
+                * Start reporting from the fourth time and back off
+                * exponentially.
+                */
+               cnt = atomic64_inc_return_relaxed(&ent->cnt);
+               if (cnt >= 4 && is_power_of_2(cnt))
+                       printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
+                                       ent->func, wq_cpu_intensive_thresh_us,
+                                       atomic64_read(&ent->cnt));
+               return;
+       }
+
+       /*
+        * @func is a new violation. Allocate a new entry for it. If wcn_ents[]
+        * is exhausted, something went really wrong and we probably made enough
+        * noise already.
+        */
+       if (wci_nr_ents >= WCI_MAX_ENTS)
+               return;
+
+       raw_spin_lock(&wci_lock);
+
+       if (wci_nr_ents >= WCI_MAX_ENTS) {
+               raw_spin_unlock(&wci_lock);
+               return;
+       }
+
+       if (wci_find_ent(func)) {
+               raw_spin_unlock(&wci_lock);
+               goto restart;
+       }
+
+       ent = &wci_ents[wci_nr_ents++];
+       ent->func = func;
+       atomic64_set(&ent->cnt, 1);
+       hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);
+
+       raw_spin_unlock(&wci_lock);
+}
+
+#else  /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
+static void wq_cpu_intensive_report(work_func_t func) {}
+#endif /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
+
  /**
   * wq_worker_running - a worker is running again
   * @task: task waking up
@@ -1057,6 +1149,7 @@ void wq_worker_tick(struct task_struct *task)
         raw_spin_lock(&pool->lock);
  
         worker_set_flags(worker, WORKER_CPU_INTENSIVE);
+       wq_cpu_intensive_report(worker->current_func);
         pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;
  
         if (need_more_worker(pool)) {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index ce51d4d..97e880a 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1134,6 +1134,19 @@ config WQ_WATCHDOG
           state.  This can be configured through kernel parameter
           "workqueue.watchdog_thresh" and its sysfs counterpart.
  
+config WQ_CPU_INTENSIVE_REPORT
+       bool "Report per-cpu work items which hog CPU for too long"
+       depends on DEBUG_KERNEL
+       help
+         Say Y here to enable reporting of concurrency-managed per-cpu work
+         items that hog CPUs for longer than
+         workqueue.cpu_intensive_threshold_us. Workqueue automatically
+         detects and excludes them from concurrency management to prevent
+         them from stalling other per-cpu work items. Occassional
+         triggering may not necessarily indicate a problem. Repeated
+         triggering likely indicates that the work item should be switched
+         to use an unbound workqueue.
+
  config TEST_LOCKUP
         tristate "Test module to generate lockups"
         depends on m
author	Tejun Heo <tj@kernel.org>
	Thu, 18 May 2023 03:02:08 +0000 (17:02 -1000)
committer	Tejun Heo <tj@kernel.org>
	Thu, 18 May 2023 03:02:08 +0000 (17:02 -1000)
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history