SkOnce: 2 bytes -> 1 byte
authormtklein <mtklein@chromium.org>
Wed, 20 Apr 2016 20:49:15 +0000 (13:49 -0700)
committerCommit bot <commit-bot@chromium.org>
Wed, 20 Apr 2016 20:49:15 +0000 (13:49 -0700)
This uses the same logic we worked out for SkOncePtr to reduce
the memory footprint of SkOnce from a done byte and lock byte
to a single 3-state byte:

  - NotStarted: no thread has tried to run fn() yet
  - Active:     a thread is running fn()
  - Done:       fn() is complete

Threads which see Done return immediately.
Threads which see NotStarted try to move to Active, run fn(), then move to Done.
Threads which see Active spin until the active thread moves to Done.

This additionally fixes a too-weak memory order bug in SkOncePtr,
and adds a big note to explain.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1904483003

Committed: https://skia.googlesource.com/skia/+/df02d338be8e3c1c50b48a3a9faa582703a39c07

Review URL: https://codereview.chromium.org/1904483003

include/private/SkOnce.h
include/private/SkOncePtr.h

index d83b63f2837382f0179a0c987a67b8a2f979f3b7..1c68fb7da1396771aa3bed37365b321111270dde 100644 (file)
@@ -8,9 +8,9 @@
 #ifndef SkOnce_DEFINED
 #define SkOnce_DEFINED
 
-#include "../private/SkSpinlock.h"
 #include <atomic>
 #include <utility>
+#include "SkTypes.h"
 
 // SkOnce provides call-once guarantees for Skia, much like std::once_flag/std::call_once().
 //
@@ -21,20 +21,50 @@ class SkOnce {
 public:
     template <typename Fn, typename... Args>
     void operator()(Fn&& fn, Args&&... args) {
-        // Vanilla double-checked locking.
-        if (!fDone.load(std::memory_order_acquire)) {
-            fLock.acquire();
-            if (!fDone.load(std::memory_order_relaxed)) {
+        auto state = fState.load(std::memory_order_acquire);
+
+        if (state == Done) {
+            return;
+        }
+
+        if (state == NotStarted) {
+            // Try to claim the job of calling fn() by swapping from NotStarted to Calling.
+            // See [1] below for why we use std::memory_order_acquire instead of relaxed.
+            if (fState.compare_exchange_strong(state, Calling, std::memory_order_acquire)) {
+                // Claimed!  Call fn(), then mark this SkOnce as Done.
                 fn(std::forward<Args>(args)...);
-                fDone.store(true, std::memory_order_release);
+                return fState.store(Done, std::memory_order_release);
             }
-            fLock.release();
         }
+
+        while (state == Calling) {
+            // Some other thread is calling fn().  Wait for them to finish.
+            state = fState.load(std::memory_order_acquire);
+        }
+        SkASSERT(state == Done);
     }
 
 private:
-    std::atomic<bool> fDone{false};
-    SkSpinlock        fLock;
+    enum State : uint8_t { NotStarted, Calling, Done};
+    std::atomic<uint8_t> fState{NotStarted};
 };
 
+/* [1]  Why do we compare_exchange_strong() with std::memory_order_acquire instead of relaxed?
+ *
+ * If we succeed, we really only need a relaxed compare_exchange_strong()... we're the ones
+ * who are about to do a release store, so there's certainly nothing yet for an acquire to
+ * synchronize with.
+ *
+ * If that compare_exchange_strong() fails, we're either in Calling or Done state.
+ * Again, if we're in Calling state, relaxed would have been fine: the spin loop will
+ * acquire up to the Calling thread's release store.
+ *
+ * But if that compare_exchange_strong() fails and we find ourselves in the Done state,
+ * we've never done an acquire load to sync up to the store of that Done state.
+ *
+ * So on failure we need an acquire load.  Generally the failure memory order cannot be
+ * stronger than the success memory order, so we need acquire on success too.  The single
+ * memory order version of compare_exchange_strong() uses the same acquire order for both.
+ */
+
 #endif  // SkOnce_DEFINED
index 3c1ab634ee4397d747cebf7fcfbfb616b189f1fb..b60d968b4a4ef7825d16c558cbaf822206f94abe 100644 (file)
@@ -66,8 +66,9 @@ public:
             if (state == 0) {
                 // It looks like no one has tried to create our pointer yet.
                 // We try to claim that task by atomically swapping our state from '0' to '1'.
+                // See SkOnce.h for why we use an acquire memory order here rather than relaxed.
                 if (sk_atomic_compare_exchange(
-                    &fState, &state, (uintptr_t)1, sk_memory_order_relaxed, sk_memory_order_relaxed)) {
+                    &fState, &state, (uintptr_t)1, sk_memory_order_acquire, sk_memory_order_acquire)) {
                     // We've claimed it.  Create our pointer and store it into fState.
                     state = (uintptr_t)f();
                     SkASSERT(state > 1);