PR28339: debuginfod: fix groom/scan race condition on just-emptied queue

author Frank Ch. Eigler <fche@redhat.com>

Tue, 14 Sep 2021 12:15:23 +0000 (08:15 -0400)

committer Frank Ch. Eigler <fche@redhat.com>

Tue, 14 Sep 2021 15:27:41 +0000 (11:27 -0400)
author Frank Ch. Eigler <fche@redhat.com>
Tue, 14 Sep 2021 12:15:23 +0000 (08:15 -0400)
committer Frank Ch. Eigler <fche@redhat.com>
Tue, 14 Sep 2021 15:27:41 +0000 (11:27 -0400)
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog

index 1173f9c..4ff59ef 100644 (file)
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,11 @@
+2021-09-14  Frank Ch. Eigler <fche@redhat.com>
+
+       PRPR28339
+       * debuginfod.cxx (waitq::fronters): New field.
+       (waitq::wait_idle): Respect it.
+       (waitq::done_front): New function.
+       (thread_main_scanner): Call it to match wait_front().
+
  2021-09-12  Mark Wielaard  <mark@klomp.org>
  
         * debuginfod.cxx (libarchive_fdcache::lookup): Add endl after
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx

index 6cc9f77..1267efb 100644 (file)
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -663,10 +663,11 @@ class workq
    mutex mtx;
    condition_variable cv;
    bool dead;
-  unsigned idlers;
+  unsigned idlers;   // number of threads busy with wait_idle / done_idle
+  unsigned fronters; // number of threads busy with wait_front / done_front
  
  public:
-  workq() { dead = false; idlers = 0; }
+  workq() { dead = false; idlers = 0; fronters = 0; }
    ~workq() {}
  
    void push_back(const Payload& p)
@@ -690,10 +691,11 @@ public:
      unique_lock<mutex> lock(mtx);
      q.clear();
      set_metric("thread_work_pending","role","scan", q.size());
+    // NB: there may still be some live fronters
      cv.notify_all(); // maybe wake up waiting idlers
    }
  
-  // block this scanner thread until there is work to do and no active
+  // block this scanner thread until there is work to do and no active idler
    bool wait_front (Payload& p)
    {
      unique_lock<mutex> lock(mtx);
@@ -705,19 +707,29 @@ public:
        {
          p = * q.begin();
          q.erase (q.begin());
+        fronters ++; // prevent idlers from starting awhile, even if empty q
          set_metric("thread_work_pending","role","scan", q.size());
-        if (q.size() == 0)
-          cv.notify_all(); // maybe wake up waiting idlers
+        // NB: don't wake up idlers yet!  The consumer is busy
+        // processing this element until it calls done_front().
          return true;
        }
    }
  
+  // notify waitq that scanner thread is done with that last item
+  void done_front ()
+  {
+    unique_lock<mutex> lock(mtx);
+    fronters --;
+    if (q.size() == 0 && fronters == 0)
+      cv.notify_all(); // maybe wake up waiting idlers
+  }
+  
    // block this idler thread until there is no work to do
    void wait_idle ()
    {
      unique_lock<mutex> lock(mtx);
      cv.notify_all(); // maybe wake up waiting scanners
-    while (!dead && (q.size() != 0))
+    while (!dead && ((q.size() != 0) || fronters > 0))
        cv.wait(lock);
      idlers ++;
    }
@@ -3145,6 +3157,8 @@ thread_main_scanner (void* arg)
            e.report(cerr);
          }
  
+      scanq.done_front(); // let idlers run
+      
        if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
          {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
        (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
author	Frank Ch. Eigler <fche@redhat.com>
	Tue, 14 Sep 2021 12:15:23 +0000 (08:15 -0400)
committer	Frank Ch. Eigler <fche@redhat.com>
	Tue, 14 Sep 2021 15:27:41 +0000 (11:27 -0400)
debuginfod/ChangeLog		patch \| blob \| history
debuginfod/debuginfod.cxx		patch \| blob \| history