#if defined(__clang__)
#if defined(_ARM_) || defined(_ARM64_)
// This is functionally equivalent to the MemoryBarrier() macro used on ARM on Windows.
-#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb sy" : : : "memory")
+#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb ish" : : : "memory")
#else
//
// For Clang, we prevent reordering by the compiler by inserting the following after a volatile
inline
T VolatileLoad(T const * pt)
{
+#if defined(_ARM64_) && defined(__clang__)
+ T val;
+ static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8);
+ if((1 << sizeof(T)) & lockFreeAtomicSizeMask)
+ {
+ __atomic_load((T volatile const *)pt, &val, __ATOMIC_ACQUIRE);
+ }
+ else
+ {
+ val = *(T volatile const *)pt;
+ asm volatile ("dmb ishld" : : : "memory");
+ }
+#else
T val = *(T volatile const *)pt;
VOLATILE_MEMORY_BARRIER();
+#endif
return val;
}
inline
void VolatileStore(T* pt, T val)
{
+#if defined(_ARM64_) && defined(__clang__)
+ static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8);
+ if((1 << sizeof(T)) & lockFreeAtomicSizeMask)
+ {
+ __atomic_store((T volatile *)pt, &val, __ATOMIC_RELEASE);
+ }
+ else
+ {
+ VOLATILE_MEMORY_BARRIER();
+ *(T volatile *)pt = val;
+ }
+#else
VOLATILE_MEMORY_BARRIER();
*(T volatile *)pt = val;
+#endif
}
extern GCSystemInfo g_SystemInfo;
#if defined(__GNUC__)
#if defined(_ARM_) || defined(_ARM64_)
// This is functionally equivalent to the MemoryBarrier() macro used on ARM on Windows.
-#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb sy" : : : "memory")
+#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb ish" : : : "memory")
#else
//
// For GCC, we prevent reordering by the compiler by inserting the following after a volatile
STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY;
#ifndef DACCESS_COMPILE
+#if defined(_ARM64_) && defined(__GNUC__)
+ T val;
+ static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8);
+ if((1 << sizeof(T)) & lockFreeAtomicSizeMask)
+ {
+ __atomic_load((T volatile const *)pt, &val, __ATOMIC_ACQUIRE);
+ }
+ else
+ {
+ val = *(T volatile const *)pt;
+ asm volatile ("dmb ishld" : : : "memory");
+ }
+#else
T val = *(T volatile const *)pt;
VOLATILE_MEMORY_BARRIER();
+#endif
#else
T val = *pt;
#endif
STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY;
#ifndef DACCESS_COMPILE
+#if defined(_ARM64_) && defined(__GNUC__)
+ static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8);
+ if((1 << sizeof(T)) & lockFreeAtomicSizeMask)
+ {
+ __atomic_store((T volatile *)pt, &val, __ATOMIC_RELEASE);
+ }
+ else
+ {
+ VOLATILE_MEMORY_BARRIER();
+ *(T volatile *)pt = val;
+ }
+#else
VOLATILE_MEMORY_BARRIER();
*(T volatile *)pt = val;
+#endif
#else
*pt = val;
#endif
_ASSERTE(!pMT->IsArray()); // bunch of assumptions about arrays wrong.
+ // <TODO> @todo Only call MemoryBarrier() if needed.
+ // Reflection is a known use case where this is required.
+ // Unboxing is a use case where this should not be required.
+ // </TODO>
+ MemoryBarrier();
+
// Copy the bulk of the data, and any non-GC refs.
switch (pMT->GetNumInstanceFieldBytes())
{
case 1:
- VolatileStore((UINT8*)dest, *(UINT8*)src);
+ *(UINT8*)dest = *(UINT8*)src;
break;
#ifndef ALIGN_ACCESS
// we can hit an alignment fault if the value type has multiple
// value class can be aligned to 4-byte boundaries, yet the
// NumInstanceFieldBytes is 8
case 2:
- VolatileStore((UINT16*)dest, *(UINT16*)src);
+ *(UINT16*)dest = *(UINT16*)src;
break;
case 4:
- VolatileStore((UINT32*)dest, *(UINT32*)src);
+ *(UINT32*)dest = *(UINT32*)src;
break;
case 8:
- VolatileStore((UINT64*)dest, *(UINT64*)src);
+ *(UINT64*)dest = *(UINT64*)src;
break;
#endif // !ALIGN_ACCESS
default: