From: mtklein Date: Thu, 10 Jul 2014 18:24:18 +0000 (-0700) Subject: Use a consume load in SkLazyPtr. X-Git-Tag: accepted/tizen/5.0/unified/20181102.025319~6825 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=654a9c20acb2e34b7836139615c29dc09fa0fb9c;p=platform%2Fupstream%2FlibSkiaSharp.git Use a consume load in SkLazyPtr. This only affects ARM, where an acquire needs a memory barrier but consume does not. I am interested to see if there's a noticeable effect on the Android bots. BUG=skia: NOTRY=true R=bungeman@google.com, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/381143002 --- diff --git a/src/core/SkLazyPtr.h b/src/core/SkLazyPtr.h index c25d3c8..13218a7 100644 --- a/src/core/SkLazyPtr.h +++ b/src/core/SkLazyPtr.h @@ -88,14 +88,26 @@ static P try_cas(void** dst, P ptr) { template T* sk_new() { return SkNEW(T); } template void sk_delete(T* ptr) { SkDELETE(ptr); } +// We're basing these implementations here on this article: +// http://preshing.com/20140709/the-purpose-of-memory_order_consume-in-cpp11/ +// +// Because the users of SkLazyPtr and SkLazyPtrArray will read the pointers +// _through_ our atomically set pointer, there is a data dependency between our +// atomic and the guarded data, and so we only need writer-releases / +// reader-consumes memory pairing rather than the more general write-releases / +// reader-acquires convention. +// +// This is nice, because a sk_consume_load is free on all our platforms: x86, +// ARM, MIPS. In contrast, sk_acquire_load issues a memory barrier on non-x86. + // This has no constructor and must be zero-initalized (the macro above does this). template , void (*Destroy)(T*) = sk_delete > class SkLazyPtr { public: T* get() { - // If fPtr has already been filled, we need an acquire barrier when loading it. + // If fPtr has already been filled, we need a consume barrier when loading it. // If not, we need a release barrier when setting it. try_cas will do that. - T* ptr = (T*)sk_acquire_load(&fPtr); + T* ptr = (T*)sk_consume_load(&fPtr); return ptr ? ptr : try_cas(&fPtr, Create()); } @@ -122,9 +134,9 @@ class SkLazyPtrArray { public: T* operator[](int i) { SkASSERT(i >= 0 && i < N); - // If fPtr has already been filled, we need an acquire barrier when loading it. + // If fPtr has already been filled, we need an consume barrier when loading it. // If not, we need a release barrier when setting it. try_cas will do that. - T* ptr = (T*)sk_acquire_load(&fArray[i]); + T* ptr = (T*)sk_consume_load(&fArray[i]); return ptr ? ptr : try_cas(&fArray[i], Create(i)); } diff --git a/src/ports/SkBarriers_arm.h b/src/ports/SkBarriers_arm.h index 9161cdd..386294e 100644 --- a/src/ports/SkBarriers_arm.h +++ b/src/ports/SkBarriers_arm.h @@ -18,6 +18,16 @@ T sk_acquire_load(T* ptr) { } template +T sk_consume_load(T* ptr) { + T val = *ptr; + // Unlike acquire, consume loads (data-dependent loads) are guaranteed not to reorder on ARM. + // No memory barrier is needed, so we just use a compiler barrier. + // C.f. http://preshing.com/20140709/the-purpose-of-memory_order_consume-in-cpp11/ + sk_compiler_barrier(); + return val; +} + +template void sk_release_store(T* ptr, T val) { __sync_synchronize(); // Issue a full barrier, which is an overkill release barrier. *ptr = val; diff --git a/src/ports/SkBarriers_tsan.h b/src/ports/SkBarriers_tsan.h index 6f27390..d72dbfd 100644 --- a/src/ports/SkBarriers_tsan.h +++ b/src/ports/SkBarriers_tsan.h @@ -17,6 +17,12 @@ T sk_acquire_load(T* ptr) { } template +T sk_consume_load(T* ptr) { + SkASSERT(__atomic_always_lock_free(sizeof(T), ptr)); + return __atomic_load_n(ptr, __ATOMIC_CONSUME); +} + +template void sk_release_store(T* ptr, T val) { SkASSERT(__atomic_always_lock_free(sizeof(T), ptr)); return __atomic_store_n(ptr, val, __ATOMIC_RELEASE); diff --git a/src/ports/SkBarriers_x86.h b/src/ports/SkBarriers_x86.h index fc57615..56e2658 100644 --- a/src/ports/SkBarriers_x86.h +++ b/src/ports/SkBarriers_x86.h @@ -24,6 +24,12 @@ T sk_acquire_load(T* ptr) { } template +T sk_consume_load(T* ptr) { + // On x86, consume is the same as acquire, i.e. a normal load. + return sk_acquire_load(ptr); +} + +template void sk_release_store(T* ptr, T val) { // On x86, all stores are release stores, so we only need a compiler barrier. sk_compiler_barrier();