workqueue: Don't hold any lock while rcuwait'ing for !POOL_MANAGER_ACTIVE
authorValentin Schneider <vschneid@redhat.com>
Thu, 12 Jan 2023 16:14:30 +0000 (16:14 +0000)
committerTejun Heo <tj@kernel.org>
Thu, 12 Jan 2023 16:21:49 +0000 (06:21 -1000)
put_unbound_pool() currently passes wq_manager_inactive() as exit condition
to rcuwait_wait_event(), which grabs pool->lock to check for

  pool->flags & POOL_MANAGER_ACTIVE

A later patch will require destroy_worker() to be invoked with
wq_pool_attach_mutex held, which needs to be acquired before
pool->lock. A mutex cannot be acquired within rcuwait_wait_event(), as
it could clobber the task state set by rcuwait_wait_event()

Instead, restructure the waiting logic to acquire any necessary lock
outside of rcuwait_wait_event().

Since further work cannot be inserted into unbound pwqs that have reached
->refcnt==0, this is bound to make forward progress as eventually the
worklist will be drained and need_more_worker(pool) will remain false,
preventing any worker from stealing the manager position from us.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/workqueue.c

index e918164..a826956 100644 (file)
@@ -3608,18 +3608,6 @@ static void rcu_free_pool(struct rcu_head *rcu)
        kfree(pool);
 }
 
-/* This returns with the lock held on success (pool manager is inactive). */
-static bool wq_manager_inactive(struct worker_pool *pool)
-{
-       raw_spin_lock_irq(&pool->lock);
-
-       if (pool->flags & POOL_MANAGER_ACTIVE) {
-               raw_spin_unlock_irq(&pool->lock);
-               return false;
-       }
-       return true;
-}
-
 /**
  * put_unbound_pool - put a worker_pool
  * @pool: worker_pool to put
@@ -3655,12 +3643,26 @@ static void put_unbound_pool(struct worker_pool *pool)
         * Become the manager and destroy all workers.  This prevents
         * @pool's workers from blocking on attach_mutex.  We're the last
         * manager and @pool gets freed with the flag set.
-        * Because of how wq_manager_inactive() works, we will hold the
-        * spinlock after a successful wait.
+        *
+        * Having a concurrent manager is quite unlikely to happen as we can
+        * only get here with
+        *   pwq->refcnt == pool->refcnt == 0
+        * which implies no work queued to the pool, which implies no worker can
+        * become the manager. However a worker could have taken the role of
+        * manager before the refcnts dropped to 0, since maybe_create_worker()
+        * drops pool->lock
         */
-       rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
-                          TASK_UNINTERRUPTIBLE);
-       pool->flags |= POOL_MANAGER_ACTIVE;
+       while (true) {
+               rcuwait_wait_event(&manager_wait,
+                                  !(pool->flags & POOL_MANAGER_ACTIVE),
+                                  TASK_UNINTERRUPTIBLE);
+               raw_spin_lock_irq(&pool->lock);
+               if (!(pool->flags & POOL_MANAGER_ACTIVE)) {
+                       pool->flags |= POOL_MANAGER_ACTIVE;
+                       break;
+               }
+               raw_spin_unlock_irq(&pool->lock);
+       }
 
        while ((worker = first_idle_worker(pool)))
                destroy_worker(worker);