From 535a4311f5702ecd57090f8fb397932ba5aa5ecf Mon Sep 17 00:00:00 2001 From: Koundinya Veluri Date: Mon, 16 Apr 2018 18:13:50 -0700 Subject: [PATCH] [Arm64] Add memory barrier after interlocked operations (#17595) Fixes https://github.com/dotnet/coreclr/issues/17591 --- src/gc/env/gcenv.interlocked.h | 6 +++ src/gc/env/gcenv.interlocked.inl | 48 +++++++++++++++++++----- src/pal/inc/pal.h | 80 +++++++++++++++++++++++++++++++--------- 3 files changed, 107 insertions(+), 27 deletions(-) diff --git a/src/gc/env/gcenv.interlocked.h b/src/gc/env/gcenv.interlocked.h index 1b10359..1da222d 100644 --- a/src/gc/env/gcenv.interlocked.h +++ b/src/gc/env/gcenv.interlocked.h @@ -10,6 +10,12 @@ // Interlocked operations class Interlocked { +private: + +#ifndef _MSC_VER + static void ArmInterlockedOperationBarrier(); +#endif // !_MSC_VER + public: // Increment the value of the specified 32-bit variable as an atomic operation. diff --git a/src/gc/env/gcenv.interlocked.inl b/src/gc/env/gcenv.interlocked.inl index fd4f839..3eaaa3a 100644 --- a/src/gc/env/gcenv.interlocked.inl +++ b/src/gc/env/gcenv.interlocked.inl @@ -11,6 +11,16 @@ #include #endif // _MSC_VER +#ifndef _MSC_VER +__forceinline void Interlocked::ArmInterlockedOperationBarrier() +{ +#ifdef _ARM64_ + // See PAL_ArmInterlockedOperationBarrier() in the PAL + __sync_synchronize(); +#endif // _ARM64_ +} +#endif // !_MSC_VER + // Increment the value of the specified 32-bit variable as an atomic operation. // Parameters: // addend - variable to be incremented @@ -23,7 +33,9 @@ __forceinline T Interlocked::Increment(T volatile *addend) static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T"); return _InterlockedIncrement((long*)addend); #else - return __sync_add_and_fetch(addend, 1); + T result = __sync_add_and_fetch(addend, 1); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -39,7 +51,9 @@ __forceinline T Interlocked::Decrement(T volatile *addend) static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T"); return _InterlockedDecrement((long*)addend); #else - return __sync_sub_and_fetch(addend, 1); + T result = __sync_sub_and_fetch(addend, 1); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -56,7 +70,9 @@ __forceinline T Interlocked::Exchange(T volatile *destination, T value) static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T"); return _InterlockedExchange((long*)destination, value); #else - return __sync_swap(destination, value); + T result = __sync_swap(destination, value); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -75,7 +91,9 @@ __forceinline T Interlocked::CompareExchange(T volatile *destination, T exchange static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T"); return _InterlockedCompareExchange((long*)destination, exchange, comparand); #else - return __sync_val_compare_and_swap(destination, comparand, exchange); + T result = __sync_val_compare_and_swap(destination, comparand, exchange); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -92,7 +110,9 @@ __forceinline T Interlocked::ExchangeAdd(T volatile *addend, T value) static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T"); return _InterlockedExchangeAdd((long*)addend, value); #else - return __sync_fetch_and_add(addend, value); + T result = __sync_fetch_and_add(addend, value); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -108,6 +128,7 @@ __forceinline void Interlocked::And(T volatile *destination, T value) _InterlockedAnd((long*)destination, value); #else __sync_and_and_fetch(destination, value); + ArmInterlockedOperationBarrier(); #endif } @@ -123,6 +144,7 @@ __forceinline void Interlocked::Or(T volatile *destination, T value) _InterlockedOr((long*)destination, value); #else __sync_or_and_fetch(destination, value); + ArmInterlockedOperationBarrier(); #endif } @@ -142,7 +164,9 @@ __forceinline T Interlocked::ExchangePointer(T volatile * destination, T value) return (T)(TADDR)_InterlockedExchange((long volatile *)(void* volatile *)destination, (long)(void*)value); #endif #else - return (T)(TADDR)__sync_swap((void* volatile *)destination, value); + T result = (T)(TADDR)__sync_swap((void* volatile *)destination, value); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -156,7 +180,9 @@ __forceinline T Interlocked::ExchangePointer(T volatile * destination, std::null return (T)(TADDR)_InterlockedExchange((long volatile *)(void* volatile *)destination, (long)(void*)value); #endif #else - return (T)(TADDR)__sync_swap((void* volatile *)destination, value); + T result = (T)(TADDR)__sync_swap((void* volatile *)destination, value); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -178,7 +204,9 @@ __forceinline T Interlocked::CompareExchangePointer(T volatile *destination, T e return (T)(TADDR)_InterlockedCompareExchange((long volatile *)(void* volatile *)destination, (long)(void*)exchange, (long)(void*)comparand); #endif #else - return (T)(TADDR)__sync_val_compare_and_swap((void* volatile *)destination, comparand, exchange); + T result = (T)(TADDR)__sync_val_compare_and_swap((void* volatile *)destination, comparand, exchange); + ArmInterlockedOperationBarrier(); + return result; #endif } @@ -192,7 +220,9 @@ __forceinline T Interlocked::CompareExchangePointer(T volatile *destination, T e return (T)(TADDR)_InterlockedCompareExchange((long volatile *)(void* volatile *)destination, (long)(void*)exchange, (long)(void*)comparand); #endif #else - return (T)(TADDR)__sync_val_compare_and_swap((void* volatile *)destination, (void*)comparand, (void*)exchange); + T result = (T)(TADDR)__sync_val_compare_and_swap((void* volatile *)destination, (void*)comparand, (void*)exchange); + ArmInterlockedOperationBarrier(); + return result; #endif } diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index 60f4a81..0e59006 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -3318,6 +3318,24 @@ BitScanReverse64( return bRet; } +FORCEINLINE void PAL_ArmInterlockedOperationBarrier() +{ +#ifdef _ARM64_ + // On arm64, most of the __sync* functions generate a code sequence like: + // loop: + // ldaxr (load acquire exclusive) + // ... + // stlxr (store release exclusive) + // cbnz loop + // + // It is possible for a load following the code sequence above to be reordered to occur prior to the store above due to the + // release barrier, this is substantiated by https://github.com/dotnet/coreclr/pull/17508. Interlocked operations in the PAL + // require the load to occur after the store. This memory barrier should be used following a call to a __sync* function to + // prevent that reordering. Code generated for arm32 includes a 'dmb' after 'cbnz', so no issue there at the moment. + __sync_synchronize(); +#endif // _ARM64_ +} + /*++ Function: InterlockedIncrement @@ -3345,7 +3363,9 @@ PALAPI InterlockedIncrement( IN OUT LONG volatile *lpAddend) { - return __sync_add_and_fetch(lpAddend, (LONG)1); + LONG result = __sync_add_and_fetch(lpAddend, (LONG)1); + PAL_ArmInterlockedOperationBarrier(); + return result; } EXTERN_C @@ -3356,7 +3376,9 @@ PALAPI InterlockedIncrement64( IN OUT LONGLONG volatile *lpAddend) { - return __sync_add_and_fetch(lpAddend, (LONGLONG)1); + LONGLONG result = __sync_add_and_fetch(lpAddend, (LONGLONG)1); + PAL_ArmInterlockedOperationBarrier(); + return result; } /*++ @@ -3386,7 +3408,9 @@ PALAPI InterlockedDecrement( IN OUT LONG volatile *lpAddend) { - return __sync_sub_and_fetch(lpAddend, (LONG)1); + LONG result = __sync_sub_and_fetch(lpAddend, (LONG)1); + PAL_ArmInterlockedOperationBarrier(); + return result; } #define InterlockedDecrementAcquire InterlockedDecrement @@ -3400,7 +3424,9 @@ PALAPI InterlockedDecrement64( IN OUT LONGLONG volatile *lpAddend) { - return __sync_sub_and_fetch(lpAddend, (LONGLONG)1); + LONGLONG result = __sync_sub_and_fetch(lpAddend, (LONGLONG)1); + PAL_ArmInterlockedOperationBarrier(); + return result; } /*++ @@ -3433,7 +3459,9 @@ InterlockedExchange( IN OUT LONG volatile *Target, IN LONG Value) { - return __sync_swap(Target, Value); + LONG result = __sync_swap(Target, Value); + PAL_ArmInterlockedOperationBarrier(); + return result; } EXTERN_C @@ -3445,7 +3473,9 @@ InterlockedExchange64( IN OUT LONGLONG volatile *Target, IN LONGLONG Value) { - return __sync_swap(Target, Value); + LONGLONG result = __sync_swap(Target, Value); + PAL_ArmInterlockedOperationBarrier(); + return result; } /*++ @@ -3481,10 +3511,13 @@ InterlockedCompareExchange( IN LONG Exchange, IN LONG Comperand) { - return __sync_val_compare_and_swap( - Destination, /* The pointer to a variable whose value is to be compared with. */ - Comperand, /* The value to be compared */ - Exchange /* The value to be stored */); + LONG result = + __sync_val_compare_and_swap( + Destination, /* The pointer to a variable whose value is to be compared with. */ + Comperand, /* The value to be compared */ + Exchange /* The value to be stored */); + PAL_ArmInterlockedOperationBarrier(); + return result; } #define InterlockedCompareExchangeAcquire InterlockedCompareExchange @@ -3501,10 +3534,13 @@ InterlockedCompareExchange64( IN LONGLONG Exchange, IN LONGLONG Comperand) { - return __sync_val_compare_and_swap( - Destination, /* The pointer to a variable whose value is to be compared with. */ - Comperand, /* The value to be compared */ - Exchange /* The value to be stored */); + LONGLONG result = + __sync_val_compare_and_swap( + Destination, /* The pointer to a variable whose value is to be compared with. */ + Comperand, /* The value to be compared */ + Exchange /* The value to be stored */); + PAL_ArmInterlockedOperationBarrier(); + return result; } /*++ @@ -3533,7 +3569,9 @@ InterlockedExchangeAdd( IN OUT LONG volatile *Addend, IN LONG Value) { - return __sync_fetch_and_add(Addend, Value); + LONG result = __sync_fetch_and_add(Addend, Value); + PAL_ArmInterlockedOperationBarrier(); + return result; } EXTERN_C @@ -3545,7 +3583,9 @@ InterlockedExchangeAdd64( IN OUT LONGLONG volatile *Addend, IN LONGLONG Value) { - return __sync_fetch_and_add(Addend, Value); + LONGLONG result = __sync_fetch_and_add(Addend, Value); + PAL_ArmInterlockedOperationBarrier(); + return result; } EXTERN_C @@ -3557,7 +3597,9 @@ InterlockedAnd( IN OUT LONG volatile *Destination, IN LONG Value) { - return __sync_fetch_and_and(Destination, Value); + LONG result = __sync_fetch_and_and(Destination, Value); + PAL_ArmInterlockedOperationBarrier(); + return result; } EXTERN_C @@ -3569,7 +3611,9 @@ InterlockedOr( IN OUT LONG volatile *Destination, IN LONG Value) { - return __sync_fetch_and_or(Destination, Value); + LONG result = __sync_fetch_and_or(Destination, Value); + PAL_ArmInterlockedOperationBarrier(); + return result; } EXTERN_C -- 2.7.4