template <typename T> T* sk_new() { return SkNEW(T); }
template <typename T> void sk_delete(T* ptr) { SkDELETE(ptr); }
+// We're basing these implementations here on this article:
+// http://preshing.com/20140709/the-purpose-of-memory_order_consume-in-cpp11/
+//
+// Because the users of SkLazyPtr and SkLazyPtrArray will read the pointers
+// _through_ our atomically set pointer, there is a data dependency between our
+// atomic and the guarded data, and so we only need writer-releases /
+// reader-consumes memory pairing rather than the more general write-releases /
+// reader-acquires convention.
+//
+// This is nice, because a sk_consume_load is free on all our platforms: x86,
+// ARM, MIPS. In contrast, sk_acquire_load issues a memory barrier on non-x86.
+
// This has no constructor and must be zero-initalized (the macro above does this).
template <typename T, T* (*Create)() = sk_new<T>, void (*Destroy)(T*) = sk_delete<T> >
class SkLazyPtr {
public:
T* get() {
- // If fPtr has already been filled, we need an acquire barrier when loading it.
+ // If fPtr has already been filled, we need a consume barrier when loading it.
// If not, we need a release barrier when setting it. try_cas will do that.
- T* ptr = (T*)sk_acquire_load(&fPtr);
+ T* ptr = (T*)sk_consume_load(&fPtr);
return ptr ? ptr : try_cas<T*, Destroy>(&fPtr, Create());
}
public:
T* operator[](int i) {
SkASSERT(i >= 0 && i < N);
- // If fPtr has already been filled, we need an acquire barrier when loading it.
+ // If fPtr has already been filled, we need an consume barrier when loading it.
// If not, we need a release barrier when setting it. try_cas will do that.
- T* ptr = (T*)sk_acquire_load(&fArray[i]);
+ T* ptr = (T*)sk_consume_load(&fArray[i]);
return ptr ? ptr : try_cas<T*, Destroy>(&fArray[i], Create(i));
}
}
template <typename T>
+T sk_consume_load(T* ptr) {
+ T val = *ptr;
+ // Unlike acquire, consume loads (data-dependent loads) are guaranteed not to reorder on ARM.
+ // No memory barrier is needed, so we just use a compiler barrier.
+ // C.f. http://preshing.com/20140709/the-purpose-of-memory_order_consume-in-cpp11/
+ sk_compiler_barrier();
+ return val;
+}
+
+template <typename T>
void sk_release_store(T* ptr, T val) {
__sync_synchronize(); // Issue a full barrier, which is an overkill release barrier.
*ptr = val;
}
template <typename T>
+T sk_consume_load(T* ptr) {
+ SkASSERT(__atomic_always_lock_free(sizeof(T), ptr));
+ return __atomic_load_n(ptr, __ATOMIC_CONSUME);
+}
+
+template <typename T>
void sk_release_store(T* ptr, T val) {
SkASSERT(__atomic_always_lock_free(sizeof(T), ptr));
return __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
}
template <typename T>
+T sk_consume_load(T* ptr) {
+ // On x86, consume is the same as acquire, i.e. a normal load.
+ return sk_acquire_load(ptr);
+}
+
+template <typename T>
void sk_release_store(T* ptr, T val) {
// On x86, all stores are release stores, so we only need a compiler barrier.
sk_compiler_barrier();