Continue updating previous generic dictionaries after expansion (#40355)
authorTomáš Rylek <trylek@microsoft.com>
Fri, 7 Aug 2020 23:17:32 +0000 (01:17 +0200)
committerGitHub <noreply@github.com>
Fri, 7 Aug 2020 23:17:32 +0000 (01:17 +0200)
Fix perf regression after introduction of expandable generic
dictionaries. When we expand a generic dictionary, we should back
propagate the change to previously allocated shorter versions of
the same dictionary, otherwise already running JITted code may
continue referring to the "old version" of the dictionary implying
repeated slow path generic lookups.

Thanks

Tomas

src/coreclr/src/jit/importer.cpp
src/coreclr/src/vm/genericdict.cpp
src/coreclr/src/vm/genericdict.h
src/coreclr/src/vm/generics.cpp
src/coreclr/src/vm/genmeth.cpp
src/coreclr/src/vm/method.cpp
src/coreclr/src/vm/methodtable.cpp
src/coreclr/src/vm/methodtable.h
src/coreclr/src/vm/methodtable.inl
src/coreclr/src/vm/methodtablebuilder.cpp

index 5e575d1..463054b 100644 (file)
@@ -2117,11 +2117,18 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken
                                       nullptr DEBUGARG("impRuntimeLookup indirectOffset"));
         }
 
+        // The last indirection could be subject to a size check (dynamic dictionary expansion)
+        bool isLastIndirectionWithSizeCheck =
+            ((i == pRuntimeLookup->indirections - 1) && (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK));
+
         if (i != 0)
         {
             slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
             slotPtrTree->gtFlags |= GTF_IND_NONFAULTING;
-            slotPtrTree->gtFlags |= GTF_IND_INVARIANT;
+            if (!isLastIndirectionWithSizeCheck)
+            {
+                slotPtrTree->gtFlags |= GTF_IND_INVARIANT;
+            }
         }
 
         if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset))
@@ -2131,8 +2138,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken
 
         if (pRuntimeLookup->offsets[i] != 0)
         {
-            // The last indirection could be subject to a size check (dynamic dictionary expansion)
-            if (i == pRuntimeLookup->indirections - 1 && pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)
+            if (isLastIndirectionWithSizeCheck)
             {
                 lastIndOfTree = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
                                              nullptr DEBUGARG("impRuntimeLookup indirectOffset"));
index 1fb0584..37044ef 100644 (file)
@@ -71,26 +71,39 @@ DictionaryLayout* DictionaryLayout::Allocate(WORD              numSlots,
 
 //---------------------------------------------------------------------------------------
 //
-// Count the number of bytes that are required in a dictionary with the specified layout
+// Total number of bytes for a dictionary with the specified layout (including optional back pointer
+// used by expanded dictionaries). The pSlotSize argument is used to return the size
+// to be stored in the size slot of the dictionary (not including the optional back pointer).
 // 
 //static
 DWORD 
 DictionaryLayout::GetDictionarySizeFromLayout(
     DWORD                numGenericArgs, 
-    PTR_DictionaryLayout pDictLayout)
+    PTR_DictionaryLayout pDictLayout,
+    DWORD*               pSlotSize)
 {
     LIMITED_METHOD_DAC_CONTRACT;
     PRECONDITION(numGenericArgs > 0);
     PRECONDITION(CheckPointer(pDictLayout, NULL_OK));
+    PRECONDITION(CheckPointer(pSlotSize));
 
-    DWORD bytes = numGenericArgs * sizeof(TypeHandle);          // Slots for instantiation arguments
+    DWORD slotBytes = numGenericArgs * sizeof(TypeHandle); // Slots for instantiation arguments
+    DWORD extraAllocBytes = 0;
     if (pDictLayout != NULL)
     {
-        bytes += sizeof(TADDR);                                 // Slot for dictionary size
-        bytes += pDictLayout->m_numSlots * sizeof(TADDR);       // Slots for dictionary slots based on a dictionary layout
+        DWORD numSlots = VolatileLoadWithoutBarrier(&pDictLayout->m_numSlots);
+
+        slotBytes += sizeof(TADDR);                        // Slot for dictionary size
+        slotBytes += numSlots * sizeof(TADDR);             // Slots for dictionary slots based on a dictionary layout
+
+        if (numSlots > pDictLayout->m_numInitialSlots)
+        {
+            extraAllocBytes = sizeof(PTR_Dictionary);      // Slot for the back-pointer in expanded dictionaries
+        }
     }
 
-    return bytes;
+    *pSlotSize = slotBytes;
+    return slotBytes + extraAllocBytes;
 }
 
 #ifndef DACCESS_COMPILE
@@ -800,10 +813,11 @@ Dictionary* Dictionary::GetMethodDictionaryWithSizeCheck(MethodDesc* pMD, ULONG
             InstantiatedMethodDesc* pIMD = pMD->AsInstantiatedMethodDesc();
             _ASSERTE(pDictLayout != NULL && pDictLayout->GetMaxSlots() > 0);
 
-            DWORD expectedDictionarySize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout);
-            _ASSERT(currentDictionarySize < expectedDictionarySize);
+            DWORD expectedDictionarySlotSize;
+            DWORD expectedDictionaryAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout, &expectedDictionarySlotSize);
+            _ASSERT(currentDictionarySize < expectedDictionarySlotSize);
 
-            Dictionary* pNewDictionary = (Dictionary*)(void*)pIMD->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionarySize));
+            Dictionary* pNewDictionary = (Dictionary*)(void*)pIMD->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionaryAllocSize));
 
             // Copy old dictionary entry contents
             for (DWORD i = 0; i < currentDictionarySize / sizeof(DictionaryEntry); i++)
@@ -813,7 +827,8 @@ Dictionary* Dictionary::GetMethodDictionaryWithSizeCheck(MethodDesc* pMD, ULONG
             }
 
             DWORD* pSizeSlot = (DWORD*)(pNewDictionary + numGenericArgs);
-            *pSizeSlot = expectedDictionarySize;
+            *pSizeSlot = expectedDictionarySlotSize;
+            *pNewDictionary->GetBackPointerSlot(numGenericArgs) = pDictionary;
 
             // Publish the new dictionary slots to the type.
             FastInterlockExchangePointer(pIMD->m_pPerInstInfo.GetValuePtr(), pNewDictionary);
@@ -855,11 +870,12 @@ Dictionary* Dictionary::GetTypeDictionaryWithSizeCheck(MethodTable* pMT, ULONG s
             DictionaryLayout* pDictLayout = pMT->GetClass()->GetDictionaryLayout();
             _ASSERTE(pDictLayout != NULL && pDictLayout->GetMaxSlots() > 0);
 
-            DWORD expectedDictionarySize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout);
-            _ASSERT(currentDictionarySize < expectedDictionarySize);
+            DWORD expectedDictionarySlotSize;
+            DWORD expectedDictionaryAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout, &expectedDictionarySlotSize);
+            _ASSERT(currentDictionarySize < expectedDictionarySlotSize);
 
             // Expand type dictionary
-            Dictionary* pNewDictionary = (Dictionary*)(void*)pMT->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionarySize));
+            Dictionary* pNewDictionary = (Dictionary*)(void*)pMT->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionaryAllocSize));
 
             // Copy old dictionary entry contents
             for (DWORD i = 0; i < currentDictionarySize / sizeof(DictionaryEntry); i++)
@@ -869,7 +885,8 @@ Dictionary* Dictionary::GetTypeDictionaryWithSizeCheck(MethodTable* pMT, ULONG s
             }
 
             DWORD* pSizeSlot = (DWORD*)(pNewDictionary + numGenericArgs);
-            *pSizeSlot = expectedDictionarySize;
+            *pSizeSlot = expectedDictionarySlotSize;
+            *pNewDictionary->GetBackPointerSlot(numGenericArgs) = pDictionary;
 
             // Publish the new dictionary slots to the type.
             ULONG dictionaryIndex = pMT->GetNumDicts() - 1;
@@ -1490,12 +1507,46 @@ Dictionary::PopulateEntry(
 #if !defined(CROSSGEN_COMPILE)
         if (slotIndex != 0)
         {
-            Dictionary* pDictionary = (pMT != NULL) ?
-                GetTypeDictionaryWithSizeCheck(pMT, slotIndex) :
-                GetMethodDictionaryWithSizeCheck(pMD, slotIndex);
+            Dictionary* pDictionary;
+            DWORD numGenericArgs;
+            DictionaryLayout * pDictLayout;
+            if (pMT != NULL)
+            {
+                pDictionary = GetTypeDictionaryWithSizeCheck(pMT, slotIndex);
+                numGenericArgs = pMT->GetNumGenericArgs();
+                pDictLayout = pMT->GetClass()->GetDictionaryLayout();
+            }
+            else
+            {
+                pDictionary = GetMethodDictionaryWithSizeCheck(pMD, slotIndex);
+                numGenericArgs = pMD->GetNumGenericMethodArgs();
+                pDictLayout = pMD->GetDictionaryLayout();
+            }
+            DWORD minimumSizeOfDictionaryToPatch = (slotIndex + 1) * sizeof(DictionaryEntry *);
+            DWORD sizeOfInitialDictionary = (numGenericArgs + 1 + pDictLayout->GetNumInitialSlots()) * sizeof(DictionaryEntry *);
+
+            DictionaryEntry *slot = pDictionary->GetSlotAddr(0, slotIndex);
+            VolatileStoreWithoutBarrier(slot, (DictionaryEntry)result);
+            *ppSlot = slot;
 
-            VolatileStoreWithoutBarrier(pDictionary->GetSlotAddr(0, slotIndex), (DictionaryEntry)result);
-            *ppSlot = pDictionary->GetSlotAddr(0, slotIndex);
+            // Backpatch previous versions of the generic dictionary
+            DWORD dictionarySize = pDictionary->GetDictionarySlotsSize(numGenericArgs);
+            while (dictionarySize > sizeOfInitialDictionary)
+            {
+                pDictionary = *pDictionary->GetBackPointerSlot(numGenericArgs);
+                if (pDictionary == nullptr)
+                {
+                    // Initial dictionary allocated with higher number of slots than the initial layout slot count
+                    break;
+                }
+                dictionarySize = pDictionary->GetDictionarySlotsSize(numGenericArgs);
+                if (dictionarySize < minimumSizeOfDictionaryToPatch)
+                {
+                    // Previous dictionary is too short to patch, end iteration
+                    break;
+                }
+                VolatileStoreWithoutBarrier(pDictionary->GetSlotAddr(0, slotIndex), (DictionaryEntry)result);
+            }
         }
 #endif // !CROSSGEN_COMPILE
     }
index 8637144..43df310 100644 (file)
@@ -146,9 +146,11 @@ public:
     // Create an initial dictionary layout containing numSlots slots
     static DictionaryLayout* Allocate(WORD numSlots, LoaderAllocator *pAllocator, AllocMemTracker *pamTracker);
 
-    // Bytes used for this dictionary, which might be stored inline in
-    // another structure (e.g. MethodTable)
-    static DWORD GetDictionarySizeFromLayout(DWORD numGenericArgs, PTR_DictionaryLayout pDictLayout);
+    // Total number of bytes used for this dictionary, which might be stored inline in
+    // another structure (e.g. MethodTable). This may include the final back-pointer
+    // to previous dictionaries after dictionary expansion; pSlotSize is used to return
+    // the size to be stored in the "size slot" of the dictionary.
+    static DWORD GetDictionarySizeFromLayout(DWORD numGenericArgs, PTR_DictionaryLayout pDictLayout, DWORD *pSlotSize);
 
     static BOOL FindToken(MethodTable*                      pMT,
                           LoaderAllocator*                  pAllocator,
@@ -171,7 +173,7 @@ public:
     DWORD GetMaxSlots();
     DWORD GetNumInitialSlots();
     DWORD GetNumUsedSlots();
-
+    
     PTR_DictionaryEntryLayout GetEntryLayout(DWORD i)
     {
         LIMITED_METHOD_CONTRACT;
@@ -300,6 +302,12 @@ private:
         LIMITED_METHOD_CONTRACT;
         return VolatileLoadWithoutBarrier((DWORD*)GetSlotAddr(numGenericArgs, 0));
     }
+    
+    inline Dictionary **GetBackPointerSlot(DWORD numGenericArgs)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (Dictionary **)((uint8_t *)m_pEntries + GetDictionarySlotsSize(numGenericArgs));
+    }
 #endif // #ifndef DACCESS_COMPILE
 
 public:
index 4e95b52..b616b84 100644 (file)
@@ -266,14 +266,15 @@ ClassLoader::CreateTypeHandleForNonCanonicalGenericInstantiation(
     // creating this type. In other words: this type will have a smaller dictionary that its layout. This is not a
     // problem however because whenever we need to load a value from the dictionary of this type beyond its size, we
     // will expand the dictionary at that point.
-    DWORD cbInstAndDict = pOldMT->GetInstAndDictSize();
+    DWORD cbInstAndDictSlotSize;
+    DWORD cbInstAndDictAllocSize = pOldMT->GetInstAndDictSize(&cbInstAndDictSlotSize);
 
     // Allocate from the high frequence heap of the correct domain
     S_SIZE_T allocSize = safe_cbMT;
     allocSize += cbOptional;
     allocSize += cbIMap;
     allocSize += cbPerInst;
-    allocSize += cbInstAndDict;
+    allocSize += cbInstAndDictAllocSize;
 
     if (allocSize.IsOverflow())
     {
@@ -474,7 +475,7 @@ ClassLoader::CreateTypeHandleForNonCanonicalGenericInstantiation(
         _ASSERTE(pLayout->GetMaxSlots() > 0);
         PTR_Dictionary pDictionarySlots = pMT->GetPerInstInfo()[pOldMT->GetNumDicts() - 1].GetValue();
         DWORD* pSizeSlot = (DWORD*)(pDictionarySlots + ntypars);
-        *pSizeSlot = cbInstAndDict;
+        *pSizeSlot = cbInstAndDictSlotSize;
     }
 
     // Copy interface map across
index 80ffce4..3b67c67 100644 (file)
@@ -386,15 +386,17 @@ InstantiatedMethodDesc::NewInstantiatedMethodDesc(MethodTable *pExactMT,
                 {
                     SString name;
                     TypeString::AppendMethodDebug(name, pGenericMDescInRepMT);
-                    LOG((LF_JIT, LL_INFO1000, "GENERICS: Created new dictionary layout for dictionary of size %d for %S\n",
-                        DictionaryLayout::GetDictionarySizeFromLayout(pGenericMDescInRepMT->GetNumGenericMethodArgs(), pDL), name.GetUnicode()));
+                    DWORD dictionarySlotSize;
+                    DWORD dictionaryAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(pGenericMDescInRepMT->GetNumGenericMethodArgs(), pDL, &dictionarySlotSize);
+                    LOG((LF_JIT, LL_INFO1000, "GENERICS: Created new dictionary layout for dictionary of slot size %d / alloc size %d for %S\n",
+                        dictionarySlotSize, dictionaryAllocSize, name.GetUnicode()));
                 }
 #endif // _DEBUG
             }
 
             // Allocate space for the instantiation and dictionary
-            infoSize = DictionaryLayout::GetDictionarySizeFromLayout(methodInst.GetNumArgs(), pDL);
-            pInstOrPerInstInfo = (TypeHandle*)(void*)amt.Track(pAllocator->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(infoSize)));
+            DWORD allocSize = DictionaryLayout::GetDictionarySizeFromLayout(methodInst.GetNumArgs(), pDL, &infoSize);
+            pInstOrPerInstInfo = (TypeHandle*)(void*)amt.Track(pAllocator->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(allocSize)));
             for (DWORD i = 0; i < methodInst.GetNumArgs(); i++)
                 pInstOrPerInstInfo[i] = methodInst[i];
 
index 3fa6028..ed6f44e 100644 (file)
@@ -2664,11 +2664,12 @@ void MethodDesc::Save(DataImage *image)
 
     if (GetMethodDictionary())
     {
-        DWORD cBytes = DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericMethodArgs(), GetDictionaryLayout());
+        DWORD cSlotBytes;
+        DWORD cAllocBytes = DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericMethodArgs(), GetDictionaryLayout(), &cSlotBytes);
         void* pBytes = GetMethodDictionary()->AsPtr();
 
-        LOG((LF_ZAP, LL_INFO10000, "    MethodDesc::Save dictionary size %d\n", cBytes));
-        image->StoreStructure(pBytes, cBytes,
+        LOG((LF_ZAP, LL_INFO10000, "    MethodDesc::Save dictionary size %d\n", cSlotBytes));
+        image->StoreStructure(pBytes, cSlotBytes,
                             DataImage::ITEM_DICTIONARY_WRITEABLE);
     }
 
index 485b826..437162f 100644 (file)
@@ -4109,14 +4109,15 @@ void MethodTable::Save(DataImage *image, DWORD profilingFlags)
             fIsWriteable = FALSE;
         }
 
-
+        DWORD slotSize;
+        DWORD allocSize = GetInstAndDictSize(&slotSize);
         if (!fIsWriteable)
         {
-            image->StoreInternedStructure(pDictionary, GetInstAndDictSize(), DataImage::ITEM_DICTIONARY);
+            image->StoreInternedStructure(pDictionary, slotSize, DataImage::ITEM_DICTIONARY);
         }
         else
         {
-            image->StoreStructure(pDictionary, GetInstAndDictSize(), DataImage::ITEM_DICTIONARY_WRITEABLE);
+            image->StoreStructure(pDictionary, slotSize, DataImage::ITEM_DICTIONARY_WRITEABLE);
         }
     }
 
@@ -8888,7 +8889,9 @@ MethodTable::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
 
     if (GetDictionary() != NULL)
     {
-        DacEnumMemoryRegion(dac_cast<TADDR>(GetDictionary()), GetInstAndDictSize());
+        DWORD slotSize;
+        DWORD allocSize = GetInstAndDictSize(&slotSize);
+        DacEnumMemoryRegion(dac_cast<TADDR>(GetDictionary()), slotSize);
     }
 
     VtableIndirectionSlotIterator it = IterateVtableIndirectionSlots();
index 16f8d7d..6018c24 100644 (file)
@@ -3929,8 +3929,10 @@ private:
     inline DWORD GetInterfaceMapSize();
 
     // The instantiation/dictionary comes at the end of the MethodTable after
-    //  the interface map.
-    inline DWORD GetInstAndDictSize();
+    // the interface map. This is the total number of bytes used by the dictionary.
+    // The pSlotSize argument is used to return the size occupied by slots (not including
+    // the optional back pointer used when expanding dictionaries).
+    inline DWORD GetInstAndDictSize(DWORD *pSlotSize);
 
 private:
     // Helper template to compute the offsets at compile time
index a20e1b4..07b2c3c 100644 (file)
@@ -1092,14 +1092,14 @@ inline DWORD MethodTable::GetInterfaceMapSize()
 //==========================================================================================
 // These are the generic dictionaries themselves and are come after
 //  the interface map.  In principle they need not be inline in the method table.
-inline DWORD MethodTable::GetInstAndDictSize()
+inline DWORD MethodTable::GetInstAndDictSize(DWORD *pSlotSize)
 {
     LIMITED_METHOD_DAC_CONTRACT;
 
     if (!HasInstantiation())
-        return 0;
+        return *pSlotSize = 0;
     else
-        return DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericArgs(), GetClass()->GetDictionaryLayout());
+        return DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericArgs(), GetClass()->GetDictionaryLayout(), pSlotSize);
 }
 
 //==========================================================================================
index fc14d39..666bb3b 100644 (file)
@@ -10114,10 +10114,12 @@ MethodTableBuilder::SetupMethodTable2(
 
     EEClass *pClass = GetHalfBakedClass();
 
-    DWORD cbDict = bmtGenerics->HasInstantiation()
-                   ?  DictionaryLayout::GetDictionarySizeFromLayout(
-                          bmtGenerics->GetNumGenericArgs(), pClass->GetDictionaryLayout())
-                   : 0;
+    DWORD cbDictSlotSize = 0;
+    DWORD cbDictAllocSize = 0;
+    if (bmtGenerics->HasInstantiation())
+    {
+        cbDictAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(bmtGenerics->GetNumGenericArgs(), pClass->GetDictionaryLayout(), &cbDictSlotSize);
+    }
 
 #ifdef FEATURE_COLLECTIBLE_TYPES
     BOOL fCollectible = pLoaderModule->IsCollectible();
@@ -10150,7 +10152,7 @@ MethodTableBuilder::SetupMethodTable2(
                                    dwGCSize,
                                    bmtInterface->dwInterfaceMapSize,
                                    bmtGenerics->numDicts,
-                                   cbDict,
+                                   cbDictAllocSize,
                                    GetParentMethodTable(),
                                    GetClassLoader(),
                                    bmtAllocator,
@@ -10370,7 +10372,7 @@ MethodTableBuilder::SetupMethodTable2(
 
             PTR_Dictionary pDictionarySlots = pMT->GetPerInstInfo()[bmtGenerics->numDicts - 1].GetValue();
             DWORD* pSizeSlot = (DWORD*)(pDictionarySlots + bmtGenerics->GetNumGenericArgs());
-            *pSizeSlot = cbDict;
+            *pSizeSlot = cbDictSlotSize;
         }
     }