[NativeAOT] Using the same CastCache implementation as in CoreClr (#84430)
authorVladimir Sadov <vsadov@microsoft.com>
Fri, 7 Apr 2023 02:04:47 +0000 (19:04 -0700)
committerGitHub <noreply@github.com>
Fri, 7 Apr 2023 02:04:47 +0000 (19:04 -0700)
* Getter

* different limits on debug/release

* tweaks

* remove now unnecessary CrstCastCache

* implement flushing

* move coreclr castcache to a separate file

* Unified CastCache implementation

* comments and cleanups

* couple more cleanups

* trivial implementation of the cast cache for the Test.Corlib

* use Numerics.BitOperations for bit math

12 files changed:
src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs
src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs
src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs
src/coreclr/nativeaot/Runtime/Crst.h
src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
src/coreclr/nativeaot/Runtime/startup.cpp
src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs [new file with mode: 0644]
src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj
src/coreclr/vm/castcache.cpp
src/coreclr/vm/corelib.h
src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs [new file with mode: 0644]

index 818f9ea..435782b 100644 (file)
@@ -3,6 +3,7 @@
 
 using System.Diagnostics;
 using System.Numerics;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Threading;
 
@@ -10,148 +11,6 @@ namespace System.Runtime.CompilerServices
 {
     internal static unsafe class CastHelpers
     {
-        private static int[]? s_table;
-
-        [DebuggerDisplay("Source = {_source}; Target = {_targetAndResult & ~1}; Result = {_targetAndResult & 1}; VersionNum = {_version & ((1 << 29) - 1)}; Distance = {_version >> 29};")]
-        [StructLayout(LayoutKind.Sequential)]
-        private struct CastCacheEntry
-        {
-            // version has the following structure:
-            // [ distance:3bit |  versionNum:29bit ]
-            //
-            // distance is how many iterations the entry is from it ideal position.
-            // we use that for preemption.
-            //
-            // versionNum is a monotonicaly increasing numerical tag.
-            // Writer "claims" entry by atomically incrementing the tag. Thus odd number indicates an entry in progress.
-            // Upon completion of adding an entry the tag is incremented again making it even. Even number indicates a complete entry.
-            //
-            // Readers will read the version twice before and after retrieving the entry.
-            // To have a usable entry both reads must yield the same even version.
-            //
-            internal int  _version;
-            internal nuint _source;
-            // pointers have unused lower bits due to alignment, we use one for the result
-            internal nuint _targetAndResult;
-        };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static int KeyToBucket(ref int tableData, nuint source, nuint target)
-        {
-            // upper bits of addresses do not vary much, so to reduce loss due to cancelling out,
-            // we do `rotl(source, <half-size>) ^ target` for mixing inputs.
-            // then we use fibonacci hashing to reduce the value to desired size.
-
-            int hashShift = HashShift(ref tableData);
-#if TARGET_64BIT
-            ulong hash = BitOperations.RotateLeft((ulong)source, 32) ^ (ulong)target;
-            return (int)((hash * 11400714819323198485ul) >> hashShift);
-#else
-            uint hash = BitOperations.RotateLeft((uint)source, 16) ^ (uint)target;
-            return (int)((hash * 2654435769u) >> hashShift);
-#endif
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static ref int TableData(int[] table)
-        {
-            // element 0 is used for embedded aux data
-            return ref MemoryMarshal.GetArrayDataReference(table);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static ref CastCacheEntry Element(ref int tableData, int index)
-        {
-            // element 0 is used for embedded aux data, skip it
-            return ref Unsafe.Add(ref Unsafe.As<int, CastCacheEntry>(ref tableData), index + 1);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static int HashShift(ref int tableData)
-        {
-            return tableData;
-        }
-
-        // TableMask is "size - 1"
-        // we need that more often that we need size
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static int TableMask(ref int tableData)
-        {
-            return Unsafe.Add(ref tableData, 1);
-        }
-
-        private enum CastResult
-        {
-            CannotCast = 0,
-            CanCast = 1,
-            MaybeCast = 2
-        }
-
-        // NOTE!!
-        // This is a copy of C++ implementation in castcache.cpp
-        // Keep the copies, if possible, in sync.
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static CastResult TryGet(nuint source, nuint target)
-        {
-            const int BUCKET_SIZE = 8;
-
-            // table is initialized and updated by native code that guarantees it is not null.
-            ref int tableData = ref TableData(s_table!);
-
-            int index = KeyToBucket(ref tableData, source, target);
-            for (int i = 0; i < BUCKET_SIZE;)
-            {
-                ref CastCacheEntry pEntry = ref Element(ref tableData, index);
-
-                // must read in this order: version -> [entry parts] -> version
-                // if version is odd or changes, the entry is inconsistent and thus ignored
-                int version = Volatile.Read(ref pEntry._version);
-                nuint entrySource = pEntry._source;
-
-                // mask the lower version bit to make it even.
-                // This way we can check if version is odd or changing in just one compare.
-                version &= ~1;
-
-                if (entrySource == source)
-                {
-                    nuint entryTargetAndResult = pEntry._targetAndResult;
-                    // target never has its lower bit set.
-                    // a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
-                    entryTargetAndResult ^= target;
-                    if (entryTargetAndResult <= 1)
-                    {
-                        // make sure 'version' is loaded after 'source' and 'targetAndResults'
-                        //
-                        // We can either:
-                        // - use acquires for both _source and _targetAndResults or
-                        // - issue a load barrier before reading _version
-                        // benchmarks on available hardware show that use of a read barrier is cheaper.
-                        Interlocked.ReadMemoryBarrier();
-                        if (version != pEntry._version)
-                        {
-                            // oh, so close, the entry is in inconsistent state.
-                            // it is either changing or has changed while we were reading.
-                            // treat it as a miss.
-                            break;
-                        }
-
-                        return (CastResult)entryTargetAndResult;
-                    }
-                }
-
-                if (version == 0)
-                {
-                    // the rest of the bucket is unclaimed, no point to search further
-                    break;
-                }
-
-                // quadratic reprobe
-                i++;
-                index = (index + i) & TableMask(ref tableData);
-            }
-            return CastResult.MaybeCast;
-        }
-
         [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern object IsInstanceOfAny_NoCacheLookup(void* toTypeHnd, object obj);
 
@@ -177,7 +36,7 @@ namespace System.Runtime.CompilerServices
                 void* mt = RuntimeHelpers.GetMethodTable(obj);
                 if (mt != toTypeHnd)
                 {
-                    CastResult result = TryGet((nuint)mt, (nuint)toTypeHnd);
+                    CastResult result = CastCache.TryGet((nuint)mt, (nuint)toTypeHnd);
                     if (result == CastResult.CanCast)
                     {
                         // do nothing
@@ -327,7 +186,7 @@ namespace System.Runtime.CompilerServices
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static object? IsInstance_Helper(void* toTypeHnd, object obj)
         {
-            CastResult result = TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd);
+            CastResult result = CastCache.TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd);
             if (result == CastResult.CanCast)
             {
                 return obj;
@@ -356,7 +215,7 @@ namespace System.Runtime.CompilerServices
                 void* mt = RuntimeHelpers.GetMethodTable(obj);
                 if (mt != toTypeHnd)
                 {
-                    result = TryGet((nuint)mt, (nuint)toTypeHnd);
+                    result = CastCache.TryGet((nuint)mt, (nuint)toTypeHnd);
                     if (result != CastResult.CanCast)
                     {
                         goto slowPath;
@@ -380,7 +239,7 @@ namespace System.Runtime.CompilerServices
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static object? ChkCast_Helper(void* toTypeHnd, object obj)
         {
-            CastResult result = TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd);
+            CastResult result = CastCache.TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd);
             if (result == CastResult.CanCast)
             {
                 return obj;
@@ -597,7 +456,7 @@ namespace System.Runtime.CompilerServices
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static void StelemRef_Helper(ref object? element, void* elementType, object obj)
         {
-            CastResult result = TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)elementType);
+            CastResult result = CastCache.TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)elementType);
             if (result == CastResult.CanCast)
             {
                 WriteBarrier(ref element, obj);
index 261868c..251d521 100644 (file)
@@ -308,14 +308,6 @@ namespace System.Runtime
 
         [DllImport(Redhawk.BaseName)]
         [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvCdecl) })]
-        internal static extern void RhpAcquireCastCacheLock();
-
-        [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvCdecl) })]
-        internal static extern void RhpReleaseCastCacheLock();
-
-        [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvCdecl) })]
         internal static extern ulong RhpGetTickCount64();
 
         [DllImport(Redhawk.BaseName)]
index 1142820..562f781 100644 (file)
@@ -4,7 +4,7 @@
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-
+using System.Threading;
 using Internal.Runtime;
 
 namespace System.Runtime
@@ -107,7 +107,7 @@ namespace System.Runtime
                 // parameters are compatible.
 
                 // NOTE: using general assignable path for the cache because of the cost of the variance checks
-                if (CastCache.AreTypesAssignableInternal(pObjType, pTargetType, AssignmentVariation.BoxedSource, null))
+                if (AreTypesAssignableInternal(pObjType, pTargetType, AssignmentVariation.BoxedSource, null))
                     return obj;
                 return null;
             }
@@ -207,7 +207,7 @@ namespace System.Runtime
                 }
             }
 
-            if (CastCache.AreTypesAssignableInternal(pObjType->RelatedParameterType, pTargetType->RelatedParameterType,
+            if (AreTypesAssignableInternal(pObjType->RelatedParameterType, pTargetType->RelatedParameterType,
                 AssignmentVariation.AllowSizeEquivalence, null))
             {
                 return obj;
@@ -246,7 +246,7 @@ namespace System.Runtime
 
             MethodTable* pObjType = obj.GetMethodTable();
 
-            if (CastCache.AreTypesAssignableInternal_SourceNotTarget_BoxedSource(pObjType, pTargetType, null))
+            if (AreTypesAssignableInternal_SourceNotTarget_BoxedSource(pObjType, pTargetType, null))
                 return obj;
 
             // If object type implements IDynamicInterfaceCastable then there's one more way to check whether it implements
@@ -441,7 +441,7 @@ namespace System.Runtime
                         //   class Foo : ICovariant<Bar> is ICovariant<IBar>
                         //   class Foo : ICovariant<IBar> is ICovariant<Object>
 
-                        if (!CastCache.AreTypesAssignableInternal(pSourceArgType, pTargetArgType, AssignmentVariation.Normal, pVisited))
+                        if (!AreTypesAssignableInternal(pSourceArgType, pTargetArgType, AssignmentVariation.Normal, pVisited))
                             return false;
 
                         break;
@@ -456,7 +456,7 @@ namespace System.Runtime
 
                         // This call is just like the call for Covariance above except true is passed
                         // to the fAllowSizeEquivalence parameter to allow the int/uint matching to work
-                        if (!CastCache.AreTypesAssignableInternal(pSourceArgType, pTargetArgType, AssignmentVariation.AllowSizeEquivalence, pVisited))
+                        if (!AreTypesAssignableInternal(pSourceArgType, pTargetArgType, AssignmentVariation.AllowSizeEquivalence, pVisited))
                             return false;
 
                         break;
@@ -471,7 +471,7 @@ namespace System.Runtime
                         //   class Foo : IContravariant<IBar> is IContravariant<Bar>
                         //   class Foo : IContravariant<Object> is IContravariant<IBar>
 
-                        if (!CastCache.AreTypesAssignableInternal(pTargetArgType, pSourceArgType, AssignmentVariation.Normal, pVisited))
+                        if (!AreTypesAssignableInternal(pTargetArgType, pSourceArgType, AssignmentVariation.Normal, pVisited))
                             return false;
 
                         break;
@@ -513,7 +513,7 @@ namespace System.Runtime
                 return AreTypesEquivalent(pSourceType, pNullableType);
             }
 
-            return CastCache.AreTypesAssignableInternal(pSourceType, pTargetType, AssignmentVariation.BoxedSource, null);
+            return AreTypesAssignableInternal(pSourceType, pTargetType, AssignmentVariation.BoxedSource, null);
         }
 
         // Internally callable version of the export method above. Has two additional flags:
@@ -521,7 +521,7 @@ namespace System.Runtime
         //                            compatible with Object, ValueType and Enum (if applicable)
         //  fAllowSizeEquivalence   : allow identically sized integral types and enums to be considered
         //                            equivalent (currently used only for array element types)
-        internal static unsafe bool AreTypesAssignableInternal(MethodTable* pSourceType, MethodTable* pTargetType, AssignmentVariation variation, EETypePairList* pVisited)
+        internal static unsafe bool AreTypesAssignableInternalUncached(MethodTable* pSourceType, MethodTable* pTargetType, AssignmentVariation variation, EETypePairList* pVisited)
         {
             bool fBoxedSource = ((variation & AssignmentVariation.BoxedSource) == AssignmentVariation.BoxedSource);
             bool fAllowSizeEquivalence = ((variation & AssignmentVariation.AllowSizeEquivalence) == AssignmentVariation.AllowSizeEquivalence);
@@ -585,7 +585,7 @@ namespace System.Runtime
                         // Note that using AreTypesAssignableInternal with AssignmentVariation.AllowSizeEquivalence
                         // here handles array covariance as well as IFoo[] -> Foo[] etc.  We are not using
                         // AssignmentVariation.BoxedSource because int[] is not assignable to object[].
-                        return CastCache.AreTypesAssignableInternal(pSourceType->RelatedParameterType,
+                        return AreTypesAssignableInternal(pSourceType->RelatedParameterType,
                             pTargetType->RelatedParameterType, AssignmentVariation.AllowSizeEquivalence, pVisited);
                     }
                 }
@@ -663,7 +663,7 @@ namespace System.Runtime
 
             MethodTable* pObjType = obj.GetMethodTable();
 
-            if (CastCache.AreTypesAssignableInternal_SourceNotTarget_BoxedSource(pObjType, pTargetType, null))
+            if (AreTypesAssignableInternal_SourceNotTarget_BoxedSource(pObjType, pTargetType, null))
                 return obj;
 
             // If object type implements IDynamicInterfaceCastable then there's one more way to check whether it implements
@@ -690,7 +690,7 @@ namespace System.Runtime
             Debug.Assert(array.GetMethodTable()->IsArray, "first argument must be an array");
 
             MethodTable* arrayElemType = array.GetMethodTable()->RelatedParameterType;
-            if (CastCache.AreTypesAssignableInternal(obj.GetMethodTable(), arrayElemType, AssignmentVariation.BoxedSource, null))
+            if (AreTypesAssignableInternal(obj.GetMethodTable(), arrayElemType, AssignmentVariation.BoxedSource, null))
                 return;
 
             // If object type implements IDynamicInterfaceCastable then there's one more way to check whether it implements
@@ -774,11 +774,11 @@ namespace System.Runtime
             if (elementType != obj.GetMethodTable())
                 goto notExactMatch;
 
-doWrite:
+        doWrite:
             InternalCalls.RhpAssignRef(ref element, obj);
             return;
 
-assigningNull:
+        assigningNull:
             element = null;
             return;
 
@@ -795,7 +795,7 @@ assigningNull:
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static unsafe void StelemRef_Helper(ref object element, MethodTable* elementType, object obj)
         {
-            if (CastCache.AreTypesAssignableInternal(obj.GetMethodTable(), elementType, AssignmentVariation.BoxedSource, null))
+            if (AreTypesAssignableInternal(obj.GetMethodTable(), elementType, AssignmentVariation.BoxedSource, null))
             {
                 InternalCalls.RhpAssignRef(ref element, obj);
             }
@@ -1034,304 +1034,64 @@ assigningNull:
             }
         }
 
-        // source type + target type + assignment variation -> true/false
-        [System.Runtime.CompilerServices.EagerStaticClassConstructionAttribute]
-        private static class CastCache
+        public static unsafe bool AreTypesAssignableInternal(MethodTable* pSourceType, MethodTable* pTargetType, AssignmentVariation variation, EETypePairList* pVisited)
         {
-            //
-            // Cache size parameters
-            //
-
-            // Start with small cache size so that the cache entries used by startup one-time only initialization
-            // will get flushed soon
-            private const int InitialCacheSize = 128; // MUST BE A POWER OF TWO
-            private const int DefaultCacheSize = 1024;
-            private const int MaximumCacheSize = 128 * 1024;
-
-            //
-            // Cache state
-            //
-            private static Entry[] s_cache = new Entry[InitialCacheSize];   // Initialize the cache eagerly to avoid null checks.
-            private static UnsafeGCHandle s_previousCache;
-            private static ulong s_tickCountOfLastOverflow = InternalCalls.RhpGetTickCount64();
-            private static int s_entries;
-            private static bool s_roundRobinFlushing;
-
-
-            private sealed class Entry
-            {
-                public Entry Next;
-                public Key Key;
-                public bool Result;     // @TODO: consider storing this bit in the Key -- there is room
-            }
-
-            private unsafe struct Key
-            {
-                private IntPtr _sourceTypeAndVariation;
-                private IntPtr _targetType;
-
-                public Key(MethodTable* pSourceType, MethodTable* pTargetType, AssignmentVariation variation)
-                {
-                    Debug.Assert((((long)pSourceType) & 3) == 0, "misaligned MethodTable!");
-                    Debug.Assert(((uint)variation) <= 3, "variation enum has an unexpectedly large value!");
-
-                    _sourceTypeAndVariation = (IntPtr)(((byte*)pSourceType) + ((int)variation));
-                    _targetType = (IntPtr)pTargetType;
-                }
-
-                private static int GetHashCode(IntPtr intptr)
-                {
-                    return unchecked((int)((long)intptr));
-                }
-
-                public int CalculateHashCode()
-                {
-                    return ((GetHashCode(_targetType) >> 4) ^ GetHashCode(_sourceTypeAndVariation));
-                }
-
-                public bool Equals(ref Key other)
-                {
-                    return (_sourceTypeAndVariation == other._sourceTypeAndVariation) && (_targetType == other._targetType);
-                }
-
-                public AssignmentVariation Variation
-                {
-                    get { return (AssignmentVariation)(unchecked((int)(long)_sourceTypeAndVariation) & 3); }
-                }
-
-                public MethodTable* SourceType { get { return (MethodTable*)(((long)_sourceTypeAndVariation) & ~3L); } }
-                public MethodTable* TargetType { get { return (MethodTable*)_targetType; } }
-            }
+            // Important special case -- it breaks infinite recursion
+            if (pSourceType == pTargetType)
+                return true;
 
-            public static unsafe bool AreTypesAssignableInternal(MethodTable* pSourceType, MethodTable* pTargetType, AssignmentVariation variation, EETypePairList* pVisited)
+            nuint sourceAndVariation = (nuint)pSourceType + (uint)variation;
+            CastResult result = CastCache.TryGet(sourceAndVariation, (nuint)(pTargetType));
+            if (result != CastResult.MaybeCast)
             {
-                // Important special case -- it breaks infinite recursion in CastCache itself!
-                if (pSourceType == pTargetType)
-                    return true;
-
-                Key key = new Key(pSourceType, pTargetType, variation);
-                Entry? entry = LookupInCache(s_cache, ref key);
-                if (entry == null)
-                    return CacheMiss(ref key, pVisited);
-
-                return entry.Result;
+                return result == CastResult.CanCast;
             }
 
-            // This method is an optimized and customized version of AreTypesAssignable that achieves better performance
-            // than AreTypesAssignableInternal through 2 significant changes
-            // 1. Removal of sourceType to targetType check (This property must be known before calling this function. At time
-            //    of writing, this is true as its is only used if sourceType is from an object, and targetType is an interface.)
-            // 2. Force inlining (This particular variant is only used in a small number of dispatch scenarios that are particularly
-            //    high in performance impact.)
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static unsafe bool AreTypesAssignableInternal_SourceNotTarget_BoxedSource(MethodTable* pSourceType, MethodTable* pTargetType, EETypePairList* pVisited)
-            {
-                Debug.Assert(pSourceType != pTargetType, "target is source");
-                Key key = new Key(pSourceType, pTargetType, AssignmentVariation.BoxedSource);
-                Entry? entry = LookupInCache(s_cache, ref key);
-                if (entry == null)
-                    return CacheMiss(ref key, pVisited);
-
-                return entry.Result;
-            }
+            return CacheMiss(pSourceType, pTargetType, variation, pVisited);
+        }
 
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            private static Entry? LookupInCache(Entry[] cache, ref Key key)
+        // This method is an optimized and customized version of AreTypesAssignable that achieves better performance
+        // than AreTypesAssignableInternal through 2 significant changes
+        // 1. Removal of sourceType to targetType check (This property must be known before calling this function. At time
+        //    of writing, this is true as its is only used if sourceType is from an object, and targetType is an interface.)
+        // 2. Force inlining (This particular variant is only used in a small number of dispatch scenarios that are particularly
+        //    high in performance impact.)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe bool AreTypesAssignableInternal_SourceNotTarget_BoxedSource(MethodTable* pSourceType, MethodTable* pTargetType, EETypePairList* pVisited)
+        {
+            Debug.Assert(pSourceType != pTargetType, "target is source");
+            nuint sourceAndVariation = (nuint)pSourceType + (int)AssignmentVariation.BoxedSource;
+            CastResult result = CastCache.TryGet(sourceAndVariation, (nuint)(pTargetType));
+            if (result != CastResult.MaybeCast)
             {
-                int entryIndex = key.CalculateHashCode() & (cache.Length - 1);
-                Entry entry = cache[entryIndex];
-                while (entry != null)
-                {
-                    if (entry.Key.Equals(ref key))
-                        break;
-                    entry = entry.Next;
-                }
-                return entry;
+                return result == CastResult.CanCast;
             }
 
-            private static unsafe bool CacheMiss(ref Key key, EETypePairList* pVisited)
-            {
-                //
-                // First, check if we previously visited the input types pair, to avoid infinite recursions
-                //
-                if (EETypePairList.Exists(pVisited, key.SourceType, key.TargetType))
-                    return false;
-
-                bool result = false;
-                bool previouslyCached = false;
-
-                //
-                // Try to find the entry in the previous version of the cache that is kept alive by weak reference
-                //
-                if (s_previousCache.IsAllocated)
-                {
-                    // Unchecked cast to avoid recursive dependency on array casting
-                    Entry[] previousCache = Unsafe.As<Entry[]>(s_previousCache.Target);
-                    if (previousCache != null)
-                    {
-                        Entry? previousEntry = LookupInCache(previousCache, ref key);
-                        if (previousEntry != null)
-                        {
-                            result = previousEntry.Result;
-                            previouslyCached = true;
-                        }
-                    }
-                }
-
-                //
-                // Call into the type cast code to calculate the result
-                //
-                if (!previouslyCached)
-                {
-                    EETypePairList newList = new EETypePairList(key.SourceType, key.TargetType, pVisited);
-                    result = TypeCast.AreTypesAssignableInternal(key.SourceType, key.TargetType, key.Variation, &newList);
-                }
-
-                //
-                // Update the cache under the lock
-                //
-                InternalCalls.RhpAcquireCastCacheLock();
-                try
-                {
-                    try
-                    {
-                        // Avoid duplicate entries
-                        Entry? existingEntry = LookupInCache(s_cache, ref key);
-                        if (existingEntry != null)
-                            return existingEntry.Result;
-
-                        // Resize cache as necessary
-                        Entry[] cache = ResizeCacheForNewEntryAsNecessary();
-
-                        int entryIndex = key.CalculateHashCode() & (cache.Length - 1);
-
-                        Entry newEntry = new Entry() { Key = key, Result = result, Next = cache[entryIndex] };
-
-                        // BEWARE: Array store check can lead to infinite recursion. We avoid this by making certain
-                        // that the cache trivially answers the case of equivalent types without triggering the cache
-                        // miss path. (See CastCache.AreTypesAssignableInternal)
-                        cache[entryIndex] = newEntry;
-                        return newEntry.Result;
-                    }
-                    catch (OutOfMemoryException)
-                    {
-                        // Entry allocation failed -- but we can still return the correct cast result.
-                        return result;
-                    }
-                }
-                finally
-                {
-                    InternalCalls.RhpReleaseCastCacheLock();
-                }
-            }
-
-            private static Entry[] ResizeCacheForNewEntryAsNecessary()
-            {
-                Entry[] cache = s_cache;
-
-                int entries = s_entries++;
-
-                // If the cache has spare space, we are done
-                if (2 * entries < cache.Length)
-                {
-                    if (s_roundRobinFlushing)
-                    {
-                        cache[2 * entries] = null;
-                        cache[2 * entries + 1] = null;
-                    }
-                    return cache;
-                }
-
-                //
-                // Now, we have cache that is overflowing with results. We need to decide whether to resize it or start
-                // flushing the old entries instead
-                //
-
-                // Start over counting the entries
-                s_entries = 0;
-
-                // See how long it has been since the last time the cache was overflowing
-                ulong tickCount = InternalCalls.RhpGetTickCount64();
-                int tickCountSinceLastOverflow = (int)(tickCount - s_tickCountOfLastOverflow);
-                s_tickCountOfLastOverflow = tickCount;
-
-                bool shrinkCache = false;
-                bool growCache = false;
-
-                if (cache.Length < DefaultCacheSize)
-                {
-                    // If the cache have not reached the default size, just grow it without thinking about it much
-                    growCache = true;
-                }
-                else
-                {
-                    if (tickCountSinceLastOverflow < cache.Length)
-                    {
-                        // We 'overflow' when 2*entries == cache.Length, so we have cache.Length / 2 entries that were
-                        // filled in tickCountSinceLastOverflow ms, which is 2ms/entry
-
-                        // If the fill rate of the cache is faster than ~2ms per entry, grow it
-                        if (cache.Length < MaximumCacheSize)
-                            growCache = true;
-                    }
-                    else
-                    if (tickCountSinceLastOverflow > cache.Length * 16)
-                    {
-                        // We 'overflow' when 2*entries == cache.Length, so we have ((cache.Length*16) / 2) entries that
-                        // were filled in tickCountSinceLastOverflow ms, which is 32ms/entry
-
-                        // If the fill rate of the cache is slower than 32ms per entry, shrink it
-                        if (cache.Length > DefaultCacheSize)
-                            shrinkCache = true;
-                    }
-                    // Otherwise, keep the current size and just keep flushing the entries round robin
-                }
+            return CacheMiss(pSourceType, pTargetType, AssignmentVariation.BoxedSource, pVisited);
+        }
 
-                Entry[]? newCache = null;
-                if (growCache || shrinkCache)
-                {
-                    try
-                    {
-                        newCache = new Entry[shrinkCache ? (cache.Length / 2) : (cache.Length * 2)];
-                    }
-                    catch (OutOfMemoryException)
-                    {
-                        // Failed to allocate a bigger/smaller cache.  That is fine, keep the old one.
-                    }
-                }
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static unsafe bool CacheMiss(MethodTable* pSourceType, MethodTable* pTargetType, AssignmentVariation variation, EETypePairList* pVisited)
+        {
+            //
+            // First, check if we previously visited the input types pair, to avoid infinite recursions
+            //
+            if (EETypePairList.Exists(pVisited, pSourceType, pTargetType))
+                return false;
 
-                if (newCache != null)
-                {
-                    s_roundRobinFlushing = false;
+            //
+            // Call into the type cast code to calculate the result
+            //
+            EETypePairList newList = new EETypePairList(pSourceType, pTargetType, pVisited);
+            bool result = TypeCast.AreTypesAssignableInternalUncached(pSourceType, pTargetType, variation, &newList);
 
-                    // Keep the reference to the old cache in a weak handle. We will try to use it to avoid hitting the
-                    // cache miss path until the GC collects it.
-                    if (s_previousCache.IsAllocated)
-                    {
-                        s_previousCache.Target = cache;
-                    }
-                    else
-                    {
-                        try
-                        {
-                            s_previousCache = UnsafeGCHandle.Alloc(cache, GCHandleType.Weak);
-                        }
-                        catch (OutOfMemoryException)
-                        {
-                            // Failed to allocate the handle to utilize the old cache, that is fine, we will just miss
-                            // out on repopulating the new cache from the old cache.
-                            s_previousCache = default(UnsafeGCHandle);
-                        }
-                    }
+            //
+            // Update the cache
+            //
+            nuint sourceAndVariation = (nuint)pSourceType + (uint)variation;
+            CastCache.TrySet(sourceAndVariation, (nuint)pTargetType, result);
 
-                    return s_cache = newCache;
-                }
-                else
-                {
-                    s_roundRobinFlushing = true;
-                    return cache;
-                }
-            }
+            return result;
         }
     }
 }
index 297a496..31e4300 100644 (file)
 enum CrstType
 {
     CrstHandleTable,
-    CrstDispatchCache,
     CrstAllocHeap,
-    CrstGenericInstHashtab,
-    CrstMemAccessMgr,
     CrstInterfaceDispatchGlobalLists,
     CrstStressLog,
     CrstRestrictedCallouts,
     CrstObjectiveCMarshalCallouts,
     CrstGcStressControl,
     CrstThreadStore,
-    CrstCastCache,
+    CrstThunkPool,
     CrstYieldProcessorNormalized,
     CrstEventPipe,
     CrstEventPipeConfig,
index ec5dbf6..e073885 100644 (file)
@@ -361,18 +361,6 @@ COOP_PINVOKE_HELPER(void*, RhGetUniversalTransitionThunk, ())
     return (void*)RhpUniversalTransition;
 }
 
-extern CrstStatic g_CastCacheLock;
-
-EXTERN_C NATIVEAOT_API void __cdecl RhpAcquireCastCacheLock()
-{
-    g_CastCacheLock.Enter();
-}
-
-EXTERN_C NATIVEAOT_API void __cdecl RhpReleaseCastCacheLock()
-{
-    g_CastCacheLock.Leave();
-}
-
 extern CrstStatic g_ThunkPoolLock;
 
 EXTERN_C NATIVEAOT_API void __cdecl RhpAcquireThunkPoolLock()
index 773991e..1ce5f40 100644 (file)
@@ -51,7 +51,6 @@ extern RhConfig * g_pRhConfig;
 EXTERN_C bool g_fHasFastFxsave;
 bool g_fHasFastFxsave = false;
 
-CrstStatic g_CastCacheLock;
 CrstStatic g_ThunkPoolLock;
 
 #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)
@@ -170,10 +169,7 @@ static bool InitDLL(HANDLE hPalInstance)
         return false;
 #endif
 
-    if (!g_CastCacheLock.InitNoThrow(CrstType::CrstCastCache))
-        return false;
-
-    if (!g_ThunkPoolLock.InitNoThrow(CrstType::CrstCastCache))
+    if (!g_ThunkPoolLock.InitNoThrow(CrstType::CrstThunkPool))
         return false;
 
     return true;
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs
new file mode 100644 (file)
index 0000000..58de4e5
--- /dev/null
@@ -0,0 +1,22 @@
+namespace System.Runtime.CompilerServices
+{
+    internal enum CastResult
+    {
+        CannotCast = 0,
+        CanCast = 1,
+        MaybeCast = 2
+    }
+
+    // trivial implementation of the cast cache
+    internal static unsafe class CastCache
+    {
+        internal static CastResult TryGet(nuint source, nuint target)
+        {
+            return CastResult.MaybeCast;
+        }
+
+        internal static void TrySet(nuint source, nuint target, bool result)
+        {
+        }
+    }
+}
index da62a5d..f4c83b5 100644 (file)
       <Link>System\Runtime\CompilerServices\Unsafe.cs</Link>
     </Compile>
     <Compile Include="Internal\Runtime\IDynamicInterfaceCastableSupport.cs" />
+    <Compile Include="System\Runtime\CompilerServices\CastCache.cs" />
     <Compile Include="System\Runtime\CompilerServices\ClassConstructorRunner.cs" />
     <Compile Include="System\Runtime\CompilerServices\StaticClassConstructionContext.cs" />
     <Compile Include="System\Runtime\InteropServices\InAttribute.cs" />
index d2cfa88..1e59f78 100644 (file)
@@ -124,7 +124,7 @@ void CastCache::Initialize()
     }
     CONTRACTL_END;
 
-    FieldDesc* pTableField = CoreLibBinder::GetField(FIELD__CASTHELPERS__TABLE);
+    FieldDesc* pTableField = CoreLibBinder::GetField(FIELD__CASTCACHE__TABLE);
 
     GCX_COOP();
     s_pTableRef = (BASEARRAYREF*)pTableField->GetCurrentStaticAddress();
index ed4163d..e325d8e 100644 (file)
@@ -1192,8 +1192,10 @@ DEFINE_CLASS(NULLABLE_COMPARER, CollectionsGeneric, NullableComparer`1)
 
 DEFINE_CLASS(INATTRIBUTE, Interop, InAttribute)
 
+DEFINE_CLASS(CASTCACHE, CompilerServices, CastCache)
+DEFINE_FIELD(CASTCACHE, TABLE, s_table)
+
 DEFINE_CLASS(CASTHELPERS, CompilerServices, CastHelpers)
-DEFINE_FIELD(CASTHELPERS, TABLE, s_table)
 DEFINE_METHOD(CASTHELPERS, ISINSTANCEOFANY,  IsInstanceOfAny,             SM_PtrVoid_Obj_RetObj)
 DEFINE_METHOD(CASTHELPERS, ISINSTANCEOFCLASS,IsInstanceOfClass,           SM_PtrVoid_Obj_RetObj)
 DEFINE_METHOD(CASTHELPERS, ISINSTANCEOFINTERFACE,  IsInstanceOfInterface, SM_PtrVoid_Obj_RetObj)
index 3d3d69d..4ec4f2d 100644 (file)
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\AsyncValueTaskMethodBuilder.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\AsyncValueTaskMethodBuilderT.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\AsyncVoidMethodBuilder.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\CastCache.cs" Condition="'$(FeatureCoreCLR)' == 'true' or '$(FeatureNativeAot)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\CallerArgumentExpressionAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\CallerFilePathAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\CallerLineNumberAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\IUnaryPlusOperators.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\IUnsignedNumber.cs" />
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs
new file mode 100644 (file)
index 0000000..fe58141
--- /dev/null
@@ -0,0 +1,439 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace System.Runtime.CompilerServices
+{
+    internal enum CastResult
+    {
+        CannotCast = 0,
+        CanCast = 1,
+        MaybeCast = 2
+    }
+
+#if NATIVEAOT
+    [EagerStaticClassConstruction]
+#endif
+    internal static unsafe class CastCache
+    {
+
+#if CORECLR
+        // In coreclr the table is written to only on the native side. T
+        // This is all we need to implement TryGet.
+        private static int[]? s_table;
+#else
+
+  #if DEBUG
+        private const int INITIAL_CACHE_SIZE = 8;    // MUST BE A POWER OF TWO
+        private const int MAXIMUM_CACHE_SIZE = 512;  // make this lower than release to make it easier to reach this in tests.
+  #else
+        private const int INITIAL_CACHE_SIZE = 128;  // MUST BE A POWER OF TWO
+        private const int MAXIMUM_CACHE_SIZE = 4096; // 4096 * sizeof(CastCacheEntry) is 98304 bytes on 64bit. We will rarely need this much though.
+  #endif // DEBUG
+
+        private const int VERSION_NUM_SIZE = 29;
+        private const uint VERSION_NUM_MASK = (1 << VERSION_NUM_SIZE) - 1;
+
+        // A trivial 2-elements table used for "flushing" the cache. Nothing is ever stored in this table.
+        // It is required that we are able to allocate this.
+        private static int[] s_sentinelTable = CreateCastCache(2, throwOnFail: true)!;
+
+        // when flushing, remember the last size.
+        private static int s_lastFlushSize = INITIAL_CACHE_SIZE;
+
+        // The actual storage.
+        // Initialize to the sentinel in DEBUG as if just flushed, to ensure the sentinel can be handled in Set.
+        private static int[] s_table =
+  #if !DEBUG
+            CreateCastCache(INITIAL_CACHE_SIZE) ??
+  #endif
+            s_sentinelTable;
+
+#endif // CORECLR
+
+        private const int BUCKET_SIZE = 8;
+
+        [StructLayout(LayoutKind.Sequential)]
+        private struct CastCacheEntry
+        {
+            // version has the following structure:
+            // [ distance:3bit |  versionNum:29bit ]
+            //
+            // distance is how many iterations the entry is from it ideal position.
+            // we use that for preemption.
+            //
+            // versionNum is a monotonically increasing numerical tag.
+            // Writer "claims" entry by atomically incrementing the tag. Thus odd number indicates an entry in progress.
+            // Upon completion of adding an entry the tag is incremented again making it even. Even number indicates a complete entry.
+            //
+            // Readers will read the version twice before and after retrieving the entry.
+            // To have a usable entry both reads must yield the same even version.
+            //
+            internal uint _version;
+            internal nuint _source;
+            // pointers have unused lower bits due to alignment, we use one for the result
+            internal nuint _targetAndResult;
+
+            internal void SetEntry(nuint source, nuint target, bool result)
+            {
+                _source = source;
+                _targetAndResult = target | (nuint)(result ? 1 : 0);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int KeyToBucket(ref int tableData, nuint source, nuint target)
+        {
+            // upper bits of addresses do not vary much, so to reduce loss due to cancelling out,
+            // we do `rotl(source, <half-size>) ^ target` for mixing inputs.
+            // then we use fibonacci hashing to reduce the value to desired size.
+
+            int hashShift = HashShift(ref tableData);
+#if TARGET_64BIT
+            ulong hash = BitOperations.RotateLeft((ulong)source, 32) ^ (ulong)target;
+            return (int)((hash * 11400714819323198485ul) >> hashShift);
+#else
+            uint hash = BitOperations.RotateLeft((uint)source, 16) ^ (uint)target;
+            return (int)((hash * 2654435769u) >> hashShift);
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ref int TableData(int[] table)
+        {
+            // element 0 is used for embedded aux data
+            //
+            // AuxData: { hashShift, tableMask, victimCounter }
+            return ref Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref table.GetRawData(), (nint)sizeof(nint)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ref int HashShift(ref int tableData)
+        {
+            return ref tableData;
+        }
+
+        // TableMask is "size - 1"
+        // we need that more often that we need size
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ref int TableMask(ref int tableData)
+        {
+            return ref Unsafe.Add(ref tableData, 1);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ref uint VictimCounter(ref int tableData)
+        {
+            return ref Unsafe.As<int, uint>(ref Unsafe.Add(ref tableData, 2));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ref CastCacheEntry Element(ref int tableData, int index)
+        {
+            // element 0 is used for embedded aux data, skip it
+            return ref Unsafe.Add(ref Unsafe.As<int, CastCacheEntry>(ref tableData), index + 1);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static CastResult TryGet(nuint source, nuint target)
+        {
+            // table is always initialized and is not null.
+            ref int tableData = ref TableData(s_table!);
+
+            int index = KeyToBucket(ref tableData, source, target);
+            for (int i = 0; i < BUCKET_SIZE;)
+            {
+                ref CastCacheEntry pEntry = ref Element(ref tableData, index);
+
+                // we must read in this order: version -> [entry parts] -> version
+                // if version is odd or changes, the entry is inconsistent and thus ignored
+                uint version = Volatile.Read(ref pEntry._version);
+
+#if CORECLR
+                // in CoreCLR we do ordinary reads of the entry parts and
+                // Interlocked.ReadMemoryBarrier() before reading the version
+                nuint entrySource = pEntry._source;
+#else
+                // must read this before reading the version again
+                nuint entrySource = Volatile.Read(ref pEntry._source);
+#endif
+
+                // mask the lower version bit to make it even.
+                // This way we can check if version is odd or changing in just one compare.
+                version &= unchecked((uint)~1);
+
+                if (entrySource == source)
+                {
+
+#if CORECLR
+                    // in CoreCLR we do ordinary reads of the entry parts and
+                    // Interlocked.ReadMemoryBarrier() before reading the version
+                    nuint entryTargetAndResult = pEntry._targetAndResult;
+#else
+                    // must read this before reading the version again
+                    nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
+#endif
+
+                    // target never has its lower bit set.
+                    // a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
+                    entryTargetAndResult ^= target;
+                    if (entryTargetAndResult <= 1)
+                    {
+                        // make sure the second read of 'version' happens after reading 'source' and 'targetAndResults'
+                        //
+                        // We can either:
+                        // - use acquires for both _source and _targetAndResults or
+                        // - issue a load barrier before reading _version
+                        // benchmarks on available hardware (Jan 2020) show that use of a read barrier is cheaper.
+
+#if CORECLR
+                        Interlocked.ReadMemoryBarrier();
+#endif
+
+                        if (version != pEntry._version)
+                        {
+                            // oh, so close, the entry is in inconsistent state.
+                            // it is either changing or has changed while we were reading.
+                            // treat it as a miss.
+                            break;
+                        }
+
+                        return (CastResult)entryTargetAndResult;
+                    }
+                }
+
+                if (version == 0)
+                {
+                    // the rest of the bucket is unclaimed, no point to search further
+                    break;
+                }
+
+                // quadratic reprobe
+                i++;
+                index = (index + i) & TableMask(ref tableData);
+            }
+            return CastResult.MaybeCast;
+        }
+
+        // the rest is the support for updating the cache.
+        // in CoreClr the cache is only updated in the native code
+        //
+        // The following helpers must match native implementations in castcache.h and castcache.cpp
+#if !CORECLR
+
+        // we generally do not OOM in casts, just return null unless throwOnFail is specified.
+        private static int[]? CreateCastCache(int size, bool throwOnFail = false)
+        {
+            // size must be positive
+            Debug.Assert(size > 1);
+            // size must be a power of two
+            Debug.Assert((size & (size - 1)) == 0);
+
+            int[]? table = null;
+            try
+            {
+                table = new int[(size + 1) * sizeof(CastCacheEntry) / sizeof(int)];
+            }
+            catch (OutOfMemoryException) when (!throwOnFail)
+            {
+            }
+
+            if (table == null)
+            {
+                size = INITIAL_CACHE_SIZE;
+                try
+                {
+                    table = new int[(size + 1) * sizeof(CastCacheEntry) / sizeof(int)];
+                }
+                catch (OutOfMemoryException)
+                {
+                }
+            }
+
+            if (table == null)
+            {
+                return table;
+            }
+
+            ref int tableData = ref TableData(table);
+
+            // set the table mask. we need it often, do not want to compute each time.
+            TableMask(ref tableData) = size - 1;
+
+            // Fibonacci hash reduces the value into desired range by shifting right by the number of leading zeroes in 'size-1'
+            byte shift = (byte)BitOperations.LeadingZeroCount(size - 1);
+            HashShift(ref tableData) = shift;
+
+            return table;
+        }
+
+        internal static void TrySet(nuint source, nuint target, bool result)
+        {
+            int bucket;
+            ref int tableData = ref *(int*)0;
+
+            do
+            {
+                tableData = ref TableData(s_table);
+                if (TableMask(ref tableData) == 1)
+                {
+                    // 2-element table is used as a sentinel.
+                    // we did not allocate a real table yet or have flushed it.
+                    // try replacing the table, but do not insert anything.
+                    MaybeReplaceCacheWithLarger(s_lastFlushSize);
+                    return;
+                }
+
+                bucket = KeyToBucket(ref tableData, source, target);
+                int index = bucket;
+                ref CastCacheEntry pEntry = ref Element(ref tableData, index);
+
+                for (int i = 0; i < BUCKET_SIZE;)
+                {
+                    // claim the entry if unused or is more distant than us from its origin.
+                    // Note - someone familiar with Robin Hood hashing will notice that
+                    //        we do the opposite - we are "robbing the poor".
+                    //        Robin Hood strategy improves average lookup in a lossles dictionary by reducing
+                    //        outliers via giving preference to more distant entries.
+                    //        What we have here is a lossy cache with outliers bounded by the bucket size.
+                    //        We improve average lookup by giving preference to the "richer" entries.
+                    //        If we used Robin Hood strategy we could eventually end up with all
+                    //        entries in the table being maximally "poor".
+
+                    uint version = pEntry._version;
+
+                    // mask the lower version bit to make it even.
+                    // This way we will detect both if version is changing (odd) or has changed (even, but different).
+                    version &= unchecked((uint)~1);
+
+                    if ((version & VERSION_NUM_MASK) >= (VERSION_NUM_MASK - 2))
+                    {
+                        // If exactly VERSION_NUM_MASK updates happens between here and publishing, we may not recognize a race.
+                        // It is extremely unlikely, but to not worry about the possibility, lets not allow version to go this high and just get a new cache.
+                        // This will not happen often.
+                        FlushCurrentCache();
+                        return;
+                    }
+
+                    if (version == 0 || (version >> VERSION_NUM_SIZE) > i)
+                    {
+                        uint newVersion = ((uint)i << VERSION_NUM_SIZE) + (version & VERSION_NUM_MASK) + 1;
+                        uint versionOrig = Interlocked.CompareExchange(ref pEntry._version, newVersion, version);
+                        if (versionOrig == version)
+                        {
+                            pEntry.SetEntry(source, target, result);
+
+                            // entry is in inconsistent state and cannot be read or written to until we
+                            // update the version, which is the last thing we do here
+                            Volatile.Write(ref pEntry._version, newVersion + 1);
+                            return;
+                        }
+                        // someone snatched the entry. try the next one in the bucket.
+                    }
+
+                    if (pEntry._source == source && ((pEntry._targetAndResult ^ target) <= 1))
+                    {
+                        // looks like we already have an entry for this.
+                        // duplicate entries are harmless, but a bit of a waste.
+                        return;
+                    }
+
+                    // quadratic reprobe
+                    i++;
+                    index += i;
+                    pEntry = ref Element(ref tableData, index & TableMask(ref tableData));
+                }
+
+                // bucket is full.
+            } while (TryGrow(ref tableData));
+
+            // reread tableData after TryGrow.
+            tableData = ref TableData(s_table);
+
+            if (TableMask(ref tableData) == 1)
+            {
+                // do not insert into a sentinel.
+                return;
+            }
+
+            // pick a victim somewhat randomly within a bucket
+            // NB: ++ is not interlocked. We are ok if we lose counts here. It is just a number that changes.
+            uint victimDistance = VictimCounter(ref tableData)++ & (BUCKET_SIZE - 1);
+            // position the victim in a quadratic reprobe bucket
+            uint victim = (victimDistance * victimDistance + victimDistance) / 2;
+
+            {
+                ref CastCacheEntry pEntry = ref Element(ref tableData, (bucket + (int)victim) & TableMask(ref tableData));
+
+                uint version = pEntry._version;
+
+                // mask the lower version bit to make it even.
+                // This way we will detect both if version is changing (odd) or has changed (even, but different).
+                version &= unchecked((uint)~1);
+
+                if ((version & VERSION_NUM_MASK) >= (VERSION_NUM_MASK - 2))
+                {
+                    // If exactly VERSION_NUM_MASK updates happens between here and publishing, we may not recognize a race.
+                    // It is extremely unlikely, but to not worry about the possibility, lets not allow version to go this high and just get a new cache.
+                    // This will not happen often.
+                    FlushCurrentCache();
+                    return;
+                }
+
+                uint newVersion = (victimDistance << VERSION_NUM_SIZE) + (version & VERSION_NUM_MASK) + 1;
+                uint versionOrig = Interlocked.CompareExchange(ref pEntry._version, newVersion, version);
+
+                if (versionOrig == version)
+                {
+                    pEntry.SetEntry(source, target, result);
+                    Volatile.Write(ref pEntry._version, newVersion + 1);
+                }
+            }
+        }
+
+        private static int CacheElementCount(ref int tableData)
+        {
+            return TableMask(ref tableData) + 1;
+        }
+
+        private static void FlushCurrentCache()
+        {
+            ref int tableData = ref TableData(s_table);
+            int lastSize = CacheElementCount(ref tableData);
+            if (lastSize < INITIAL_CACHE_SIZE)
+                lastSize = INITIAL_CACHE_SIZE;
+
+            s_lastFlushSize = lastSize;
+            // flushing is just replacing the table with a sentinel.
+            s_table = s_sentinelTable;
+        }
+
+        private static bool MaybeReplaceCacheWithLarger(int size)
+        {
+            int[]? newTable = CreateCastCache(size);
+            if (newTable == null)
+            {
+                return false;
+            }
+
+            s_table = newTable;
+            return true;
+        }
+
+        private static bool TryGrow(ref int tableData)
+        {
+            int newSize = CacheElementCount(ref tableData) * 2;
+            if (newSize <= MAXIMUM_CACHE_SIZE)
+            {
+                return MaybeReplaceCacheWithLarger(newSize);
+            }
+
+            return false;
+        }
+#endif   // !CORECLR
+    }
+}