[NativeAOT] implement Interlocked.ReadMemoryBarrier intrinsic (#86842)
authorVladimir Sadov <vsadov@microsoft.com>
Wed, 31 May 2023 14:24:42 +0000 (07:24 -0700)
committerGitHub <noreply@github.com>
Wed, 31 May 2023 14:24:42 +0000 (07:24 -0700)
* ReadMemoryBarrier

* remove more literals (use constants)

* missing include for win-arm64

* removed nonintrinsic implementations of ReadMemoryBarrier

* removed nonintrinsic MemoryBarrier()

* unified Interlocked.MemoryBarrier

* added a test for indirect calling Interlocked.MemoryBarrier

* PR feedback

* use no-inlining peoperties

* NoInlining must be with methods (not properties)

15 files changed:
src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs
src/coreclr/nativeaot/Runtime/portable.cpp
src/coreclr/nativeaot/Runtime/windows/PalRedhawkInline.h
src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs
src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs
src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs
src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs
src/coreclr/vm/comutilnative.cpp
src/coreclr/vm/comutilnative.h
src/coreclr/vm/ecalllist.h
src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs
src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs
src/libraries/System.Threading/tests/InterlockedTests.cs
src/libraries/System.Threading/tests/System.Threading.Tests.csproj
src/mono/System.Private.CoreLib/src/System/Threading/Interlocked.Mono.cs

index 2e683dc..2982dd1 100644 (file)
@@ -208,25 +208,7 @@ namespace System.Threading
             CompareExchange(ref location, 0, 0);
         #endregion
 
-        #region MemoryBarrier
-        /// <summary>
-        /// Synchronizes memory access as follows:
-        /// The processor that executes the current thread cannot reorder instructions in such a way that memory accesses before
-        /// the call to <see cref="MemoryBarrier"/> execute after memory accesses that follow the call to <see cref="MemoryBarrier"/>.
-        /// </summary>
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void MemoryBarrier();
-
-        /// <summary>
-        /// Synchronizes memory access as follows:
-        /// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
-        /// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
-        /// </summary>
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern void ReadMemoryBarrier();
-
+        #region MemoryBarrierProcessWide
         [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Interlocked_MemoryBarrierProcessWide")]
         private static partial void _MemoryBarrierProcessWide();
 
index c8f4379..dfa7471 100644 (file)
@@ -462,14 +462,6 @@ COOP_PINVOKE_HELPER(int64_t, RhpLockCmpXchg64, (int64_t * location, int64_t valu
     return PalInterlockedCompareExchange64(location, value, comparand);
 }
 
-#endif // USE_PORTABLE_HELPERS
-
-COOP_PINVOKE_HELPER(void, RhpMemoryBarrier, ())
-{
-    PalMemoryBarrier();
-}
-
-#if defined(USE_PORTABLE_HELPERS)
 EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
 {
     return NULL;
index c6ed7d6..1d9f7bd 100644 (file)
@@ -1,6 +1,10 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#if defined(HOST_ARM64)
+#include <arm64intr.h>
+#endif
+
 // Implementation of Redhawk PAL inline functions
 
 EXTERN_C long __cdecl _InterlockedIncrement(long volatile *);
@@ -121,7 +125,6 @@ EXTERN_C void __faststorefence();
 #pragma intrinsic(__faststorefence)
 #define PalMemoryBarrier() __faststorefence()
 
-
 #elif defined(HOST_ARM)
 
 EXTERN_C void __yield(void);
@@ -130,11 +133,11 @@ EXTERN_C void __dmb(unsigned int _Type);
 #pragma intrinsic(__dmb)
 FORCEINLINE void PalYieldProcessor()
 {
-    __dmb(0xA /* _ARM_BARRIER_ISHST */);
+    __dmb(_ARM_BARRIER_ISHST);
     __yield();
 }
 
-#define PalMemoryBarrier() __dmb(0xF /* _ARM_BARRIER_SY */)
+#define PalMemoryBarrier() __dmb(_ARM_BARRIER_ISH)
 
 #elif defined(HOST_ARM64)
 
@@ -144,11 +147,11 @@ EXTERN_C void __dmb(unsigned int _Type);
 #pragma intrinsic(__dmb)
 FORCEINLINE void PalYieldProcessor()
 {
-    __dmb(0xA /* _ARM64_BARRIER_ISHST */);
+    __dmb(_ARM64_BARRIER_ISHST);
     __yield();
 }
 
-#define PalMemoryBarrier() __dmb(0xF /* _ARM64_BARRIER_SY */)
+#define PalMemoryBarrier() __dmb(_ARM64_BARRIER_ISH)
 
 #else
 #error Unsupported architecture
index d5c2469..1f3cd33 100644 (file)
@@ -795,10 +795,6 @@ namespace System.Runtime
         [RuntimeImport(RuntimeLibrary, "RhpCheckedXchg")]
         internal static extern object InterlockedExchange([NotNullIfNotNull(nameof(value))] ref object? location1, object? value);
 
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhpMemoryBarrier")]
-        internal static extern void MemoryBarrier();
-
         [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "acos")]
index 56f16cb..c3ef6da 100644 (file)
@@ -196,14 +196,6 @@ namespace System.Threading
 
         #endregion
 
-        #region MemoryBarrier
-        [Intrinsic]
-        public static void MemoryBarrier()
-        {
-            RuntimeImports.MemoryBarrier();
-        }
-        #endregion
-
         #region Read
         public static long Read(ref long location)
         {
index bd1681f..5232ab0 100644 (file)
@@ -103,10 +103,6 @@ namespace System.Runtime
         [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg64")]
         internal static extern long InterlockedCompareExchange(ref long location1, long value, long comparand);
 
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhpMemoryBarrier")]
-        internal static extern void MemoryBarrier();
-
         // Moves memory from smem to dmem. Size must be a positive value.
         // This copy uses an intrinsic to be safe for copying arbitrary bits of
         // heap memory
index 34d02ff..58decc9 100644 (file)
@@ -31,9 +31,6 @@ namespace System.Threading
         }
 
         [Intrinsic]
-        public static void MemoryBarrier()
-        {
-            RuntimeImports.MemoryBarrier();
-        }
+        public static void MemoryBarrier() => MemoryBarrier();
     }
 }
index 6d457a4..4ece25e 100644 (file)
@@ -1637,24 +1637,6 @@ FCIMPL2_IV(INT64,COMInterlocked::ExchangeAdd64, INT64 *location, INT64 value)
 }
 FCIMPLEND
 
-FCIMPL0(void, COMInterlocked::FCMemoryBarrier)
-{
-    FCALL_CONTRACT;
-
-    MemoryBarrier();
-    FC_GC_POLL();
-}
-FCIMPLEND
-
-FCIMPL0(void, COMInterlocked::FCMemoryBarrierLoad)
-{
-    FCALL_CONTRACT;
-
-    VolatileLoadBarrier();
-    FC_GC_POLL();
-}
-FCIMPLEND
-
 #include <optdefault.h>
 
 extern "C" void QCALLTYPE Interlocked_MemoryBarrierProcessWide()
index 0d6d3f8..fa2ce28 100644 (file)
@@ -235,9 +235,6 @@ public:
         static FCDECL3(LPVOID, CompareExchangeObject, LPVOID* location, LPVOID value, LPVOID comparand);
         static FCDECL2(INT32, ExchangeAdd32, INT32 *location, INT32 value);
         static FCDECL2_IV(INT64, ExchangeAdd64, INT64 *location, INT64 value);
-
-        static FCDECL0(void, FCMemoryBarrier);
-        static FCDECL0(void, FCMemoryBarrierLoad);
 };
 
 extern "C" void QCALLTYPE Interlocked_MemoryBarrierProcessWide();
index acda36b..75436d5 100644 (file)
@@ -519,8 +519,6 @@ FCFuncStart(gInterlockedFuncs)
     FCFuncElementSig("CompareExchange", &gsig_SM_RefObj_Obj_Obj_RetObj, COMInterlocked::CompareExchangeObject)
     FCFuncElementSig("ExchangeAdd", &gsig_SM_RefInt_Int_RetInt, COMInterlocked::ExchangeAdd32)
     FCFuncElementSig("ExchangeAdd", &gsig_SM_RefLong_Long_RetLong, COMInterlocked::ExchangeAdd64)
-    FCFuncElement("MemoryBarrier", COMInterlocked::FCMemoryBarrier)
-    FCFuncElement("ReadMemoryBarrier", COMInterlocked::FCMemoryBarrierLoad)
 FCFuncEnd()
 
 FCFuncStart(gJitInfoFuncs)
index fe58141..a333b71 100644 (file)
@@ -152,32 +152,16 @@ namespace System.Runtime.CompilerServices
                 // we must read in this order: version -> [entry parts] -> version
                 // if version is odd or changes, the entry is inconsistent and thus ignored
                 uint version = Volatile.Read(ref pEntry._version);
-
-#if CORECLR
-                // in CoreCLR we do ordinary reads of the entry parts and
-                // Interlocked.ReadMemoryBarrier() before reading the version
                 nuint entrySource = pEntry._source;
-#else
-                // must read this before reading the version again
-                nuint entrySource = Volatile.Read(ref pEntry._source);
-#endif
-
                 // mask the lower version bit to make it even.
                 // This way we can check if version is odd or changing in just one compare.
                 version &= unchecked((uint)~1);
 
                 if (entrySource == source)
                 {
-
-#if CORECLR
                     // in CoreCLR we do ordinary reads of the entry parts and
                     // Interlocked.ReadMemoryBarrier() before reading the version
                     nuint entryTargetAndResult = pEntry._targetAndResult;
-#else
-                    // must read this before reading the version again
-                    nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
-#endif
-
                     // target never has its lower bit set.
                     // a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
                     entryTargetAndResult ^= target;
@@ -189,10 +173,7 @@ namespace System.Runtime.CompilerServices
                         // - use acquires for both _source and _targetAndResults or
                         // - issue a load barrier before reading _version
                         // benchmarks on available hardware (Jan 2020) show that use of a read barrier is cheaper.
-
-#if CORECLR
                         Interlocked.ReadMemoryBarrier();
-#endif
 
                         if (version != pEntry._version)
                         {
index 751035f..4997ea5 100644 (file)
@@ -325,5 +325,23 @@ namespace System.Threading
         public static ulong Or(ref ulong location1, ulong value) =>
             (ulong)Or(ref Unsafe.As<ulong, long>(ref location1), (long)value);
         #endregion
+
+        #region MemoryBarrier
+        /// <summary>
+        /// Synchronizes memory access as follows:
+        /// The processor that executes the current thread cannot reorder instructions in such a way that memory accesses before
+        /// the call to <see cref="MemoryBarrier"/> execute after memory accesses that follow the call to <see cref="MemoryBarrier"/>.
+        /// </summary>
+        [Intrinsic]
+        public static void MemoryBarrier() => MemoryBarrier();
+
+        /// <summary>
+        /// Synchronizes memory access as follows:
+        /// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
+        /// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
+        /// </summary>
+        [Intrinsic]
+        internal static void ReadMemoryBarrier() => ReadMemoryBarrier();
+        #endregion
     }
 }
index f47fc99..933404f 100644 (file)
@@ -9,7 +9,7 @@ using Xunit;
 
 namespace System.Threading.Tests
 {
-    public class InterlockedTests
+    public unsafe class InterlockedTests
     {
         [Fact]
         public void InterlockedAdd_Int32()
@@ -603,6 +603,23 @@ namespace System.Threading.Tests
             Assert.Equal(ThreadCount * IterationCount * Increment, Interlocked.Read(ref value));
         }
 
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static Action MemoryBarrierDelegate() => Interlocked.MemoryBarrier;
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static delegate* <void> MemoryBarrierPointer() => &Interlocked.MemoryBarrier;
+
+        [Fact()]
+        public void MemoryBarrierIntrinsic()
+        {
+            // Interlocked.MemoryBarrier is a self-referring intrinsic
+            // we should be able to call it through a delegate.
+            MemoryBarrierDelegate()();
+
+            // through a method pointer
+            MemoryBarrierPointer()();
+        }
+
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
         public void MemoryBarrierProcessWide()
         {
index 81ac83a..c4bb4f6 100644 (file)
@@ -3,6 +3,7 @@
     <TestRuntime>true</TestRuntime>
     <IncludeRemoteExecutor>true</IncludeRemoteExecutor>
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="AsyncLocalTests.cs" />
index 8b8daae..bc18986 100644 (file)
@@ -145,11 +145,6 @@ namespace System.Threading
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         public static extern long Add(ref long location1, long value);
 
-        [Intrinsic]
-        public static void MemoryBarrier()
-        {
-        }
-
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         public static extern void MemoryBarrierProcessWide();
     }