workqueue: Make sure that wq_unbound_cpumask is never empty
authorTejun Heo <tj@kernel.org>
Tue, 21 Nov 2023 21:39:36 +0000 (11:39 -1000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 13 Dec 2023 17:45:24 +0000 (18:45 +0100)
commit 4a6c5607d4502ccd1b15b57d57f17d12b6f257a7 upstream.

During boot, depending on how the housekeeping and workqueue.unbound_cpus
masks are set, wq_unbound_cpumask can end up empty. Since 8639ecebc9b1
("workqueue: Implement non-strict affinity scope for unbound workqueues"),
this may end up feeding -1 as a CPU number into scheduler leading to oopses.

  BUG: unable to handle page fault for address: ffffffff8305e9c0
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  ...
  Call Trace:
   <TASK>
   select_idle_sibling+0x79/0xaf0
   select_task_rq_fair+0x1cb/0x7b0
   try_to_wake_up+0x29c/0x5c0
   wake_up_process+0x19/0x20
   kick_pool+0x5e/0xb0
   __queue_work+0x119/0x430
   queue_work_on+0x29/0x30
  ...

An empty wq_unbound_cpumask is a clear misconfiguration and already
disallowed once system is booted up. Let's warn on and ignore
unbound_cpumask restrictions which lead to no unbound cpus. While at it,
also remove now unncessary empty check on wq_unbound_cpumask in
wq_select_unbound_cpu().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-Tested-by: Yong He <alexyonghe@tencent.com>
Link: http://lkml.kernel.org/r/20231120121623.119780-1-alexyonghe@tencent.com
Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues")
Cc: stable@vger.kernel.org # v6.6+
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
kernel/workqueue.c

index 0f682da..e6a95bb 100644 (file)
@@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu)
                pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
        }
 
-       if (cpumask_empty(wq_unbound_cpumask))
-               return cpu;
-
        new_cpu = __this_cpu_read(wq_rr_cpu_last);
        new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
        if (unlikely(new_cpu >= nr_cpu_ids)) {
@@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { }
 
 #endif /* CONFIG_WQ_WATCHDOG */
 
+static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
+{
+       if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
+               pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n",
+                       cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
+               return;
+       }
+
+       cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
+}
+
 /**
  * workqueue_init_early - early init for workqueue subsystem
  *
@@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void)
        BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
 
        BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
-       cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
-       cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
-
+       cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+       restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ));
+       restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN));
        if (!cpumask_empty(&wq_cmdline_cpumask))
-               cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask);
+               restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
 
        pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);