workqueue: more destroy_workqueue() fixes
authorTejun Heo <tj@kernel.org>
Mon, 23 Sep 2019 18:08:58 +0000 (11:08 -0700)
committerTejun Heo <tj@kernel.org>
Fri, 4 Oct 2019 17:23:01 +0000 (10:23 -0700)
destroy_workqueue() warnings still, at a lower frequency, trigger
spuriously.  The problem seems to be in-flight operations which
haven't reached put_pwq() yet.

* Make sanity check grab all the related locks so that it's
  synchronized against operations which puts pwq at the end.

* Always print out the offending pwq.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Williams, Gerald S" <gerald.s.williams@intel.com>
kernel/workqueue.c

index 7f7aa45..4a3c301 100644 (file)
@@ -355,6 +355,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 
 static int worker_thread(void *__worker);
 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
+static void show_pwq(struct pool_workqueue *pwq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -4314,6 +4315,22 @@ err_destroy:
 }
 EXPORT_SYMBOL_GPL(alloc_workqueue);
 
+static bool pwq_busy(struct pool_workqueue *pwq)
+{
+       int i;
+
+       for (i = 0; i < WORK_NR_COLORS; i++)
+               if (pwq->nr_in_flight[i])
+                       return true;
+
+       if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
+               return true;
+       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+               return true;
+
+       return false;
+}
+
 /**
  * destroy_workqueue - safely terminate a workqueue
  * @wq: target workqueue
@@ -4348,28 +4365,28 @@ void destroy_workqueue(struct workqueue_struct *wq)
                kfree(rescuer);
        }
 
-       /* sanity checks */
+       /*
+        * Sanity checks - grab all the locks so that we wait for all
+        * in-flight operations which may do put_pwq().
+        */
+       mutex_lock(&wq_pool_mutex);
        mutex_lock(&wq->mutex);
        for_each_pwq(pwq, wq) {
-               int i;
-
-               for (i = 0; i < WORK_NR_COLORS; i++) {
-                       if (WARN_ON(pwq->nr_in_flight[i])) {
-                               mutex_unlock(&wq->mutex);
-                               show_workqueue_state();
-                               return;
-                       }
-               }
-
-               if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
-                   WARN_ON(pwq->nr_active) ||
-                   WARN_ON(!list_empty(&pwq->delayed_works))) {
+               spin_lock_irq(&pwq->pool->lock);
+               if (WARN_ON(pwq_busy(pwq))) {
+                       pr_warning("%s: %s has the following busy pwq (refcnt=%d)\n",
+                                  __func__, wq->name, pwq->refcnt);
+                       show_pwq(pwq);
+                       spin_unlock_irq(&pwq->pool->lock);
                        mutex_unlock(&wq->mutex);
+                       mutex_unlock(&wq_pool_mutex);
                        show_workqueue_state();
                        return;
                }
+               spin_unlock_irq(&pwq->pool->lock);
        }
        mutex_unlock(&wq->mutex);
+       mutex_unlock(&wq_pool_mutex);
 
        /*
         * wq list is used to freeze wq, remove from list after