From: Tomáš Rylek Date: Fri, 7 Aug 2020 23:17:32 +0000 (+0200) Subject: Continue updating previous generic dictionaries after expansion (#40355) X-Git-Tag: submit/tizen/20210909.063632~6134 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d1c7946bb0892005a58ec647e3a6d8f3514cdc6b;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Continue updating previous generic dictionaries after expansion (#40355) Fix perf regression after introduction of expandable generic dictionaries. When we expand a generic dictionary, we should back propagate the change to previously allocated shorter versions of the same dictionary, otherwise already running JITted code may continue referring to the "old version" of the dictionary implying repeated slow path generic lookups. Thanks Tomas --- diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 5e575d1..463054b 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -2117,11 +2117,18 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken nullptr DEBUGARG("impRuntimeLookup indirectOffset")); } + // The last indirection could be subject to a size check (dynamic dictionary expansion) + bool isLastIndirectionWithSizeCheck = + ((i == pRuntimeLookup->indirections - 1) && (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)); + if (i != 0) { slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + if (!isLastIndirectionWithSizeCheck) + { + slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + } } if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset)) @@ -2131,8 +2138,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken if (pRuntimeLookup->offsets[i] != 0) { - // The last indirection could be subject to a size check (dynamic dictionary expansion) - if (i == pRuntimeLookup->indirections - 1 && pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK) + if (isLastIndirectionWithSizeCheck) { lastIndOfTree = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("impRuntimeLookup indirectOffset")); diff --git a/src/coreclr/src/vm/genericdict.cpp b/src/coreclr/src/vm/genericdict.cpp index 1fb0584..37044ef 100644 --- a/src/coreclr/src/vm/genericdict.cpp +++ b/src/coreclr/src/vm/genericdict.cpp @@ -71,26 +71,39 @@ DictionaryLayout* DictionaryLayout::Allocate(WORD numSlots, //--------------------------------------------------------------------------------------- // -// Count the number of bytes that are required in a dictionary with the specified layout +// Total number of bytes for a dictionary with the specified layout (including optional back pointer +// used by expanded dictionaries). The pSlotSize argument is used to return the size +// to be stored in the size slot of the dictionary (not including the optional back pointer). // //static DWORD DictionaryLayout::GetDictionarySizeFromLayout( DWORD numGenericArgs, - PTR_DictionaryLayout pDictLayout) + PTR_DictionaryLayout pDictLayout, + DWORD* pSlotSize) { LIMITED_METHOD_DAC_CONTRACT; PRECONDITION(numGenericArgs > 0); PRECONDITION(CheckPointer(pDictLayout, NULL_OK)); + PRECONDITION(CheckPointer(pSlotSize)); - DWORD bytes = numGenericArgs * sizeof(TypeHandle); // Slots for instantiation arguments + DWORD slotBytes = numGenericArgs * sizeof(TypeHandle); // Slots for instantiation arguments + DWORD extraAllocBytes = 0; if (pDictLayout != NULL) { - bytes += sizeof(TADDR); // Slot for dictionary size - bytes += pDictLayout->m_numSlots * sizeof(TADDR); // Slots for dictionary slots based on a dictionary layout + DWORD numSlots = VolatileLoadWithoutBarrier(&pDictLayout->m_numSlots); + + slotBytes += sizeof(TADDR); // Slot for dictionary size + slotBytes += numSlots * sizeof(TADDR); // Slots for dictionary slots based on a dictionary layout + + if (numSlots > pDictLayout->m_numInitialSlots) + { + extraAllocBytes = sizeof(PTR_Dictionary); // Slot for the back-pointer in expanded dictionaries + } } - return bytes; + *pSlotSize = slotBytes; + return slotBytes + extraAllocBytes; } #ifndef DACCESS_COMPILE @@ -800,10 +813,11 @@ Dictionary* Dictionary::GetMethodDictionaryWithSizeCheck(MethodDesc* pMD, ULONG InstantiatedMethodDesc* pIMD = pMD->AsInstantiatedMethodDesc(); _ASSERTE(pDictLayout != NULL && pDictLayout->GetMaxSlots() > 0); - DWORD expectedDictionarySize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout); - _ASSERT(currentDictionarySize < expectedDictionarySize); + DWORD expectedDictionarySlotSize; + DWORD expectedDictionaryAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout, &expectedDictionarySlotSize); + _ASSERT(currentDictionarySize < expectedDictionarySlotSize); - Dictionary* pNewDictionary = (Dictionary*)(void*)pIMD->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionarySize)); + Dictionary* pNewDictionary = (Dictionary*)(void*)pIMD->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionaryAllocSize)); // Copy old dictionary entry contents for (DWORD i = 0; i < currentDictionarySize / sizeof(DictionaryEntry); i++) @@ -813,7 +827,8 @@ Dictionary* Dictionary::GetMethodDictionaryWithSizeCheck(MethodDesc* pMD, ULONG } DWORD* pSizeSlot = (DWORD*)(pNewDictionary + numGenericArgs); - *pSizeSlot = expectedDictionarySize; + *pSizeSlot = expectedDictionarySlotSize; + *pNewDictionary->GetBackPointerSlot(numGenericArgs) = pDictionary; // Publish the new dictionary slots to the type. FastInterlockExchangePointer(pIMD->m_pPerInstInfo.GetValuePtr(), pNewDictionary); @@ -855,11 +870,12 @@ Dictionary* Dictionary::GetTypeDictionaryWithSizeCheck(MethodTable* pMT, ULONG s DictionaryLayout* pDictLayout = pMT->GetClass()->GetDictionaryLayout(); _ASSERTE(pDictLayout != NULL && pDictLayout->GetMaxSlots() > 0); - DWORD expectedDictionarySize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout); - _ASSERT(currentDictionarySize < expectedDictionarySize); + DWORD expectedDictionarySlotSize; + DWORD expectedDictionaryAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(numGenericArgs, pDictLayout, &expectedDictionarySlotSize); + _ASSERT(currentDictionarySize < expectedDictionarySlotSize); // Expand type dictionary - Dictionary* pNewDictionary = (Dictionary*)(void*)pMT->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionarySize)); + Dictionary* pNewDictionary = (Dictionary*)(void*)pMT->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(expectedDictionaryAllocSize)); // Copy old dictionary entry contents for (DWORD i = 0; i < currentDictionarySize / sizeof(DictionaryEntry); i++) @@ -869,7 +885,8 @@ Dictionary* Dictionary::GetTypeDictionaryWithSizeCheck(MethodTable* pMT, ULONG s } DWORD* pSizeSlot = (DWORD*)(pNewDictionary + numGenericArgs); - *pSizeSlot = expectedDictionarySize; + *pSizeSlot = expectedDictionarySlotSize; + *pNewDictionary->GetBackPointerSlot(numGenericArgs) = pDictionary; // Publish the new dictionary slots to the type. ULONG dictionaryIndex = pMT->GetNumDicts() - 1; @@ -1490,12 +1507,46 @@ Dictionary::PopulateEntry( #if !defined(CROSSGEN_COMPILE) if (slotIndex != 0) { - Dictionary* pDictionary = (pMT != NULL) ? - GetTypeDictionaryWithSizeCheck(pMT, slotIndex) : - GetMethodDictionaryWithSizeCheck(pMD, slotIndex); + Dictionary* pDictionary; + DWORD numGenericArgs; + DictionaryLayout * pDictLayout; + if (pMT != NULL) + { + pDictionary = GetTypeDictionaryWithSizeCheck(pMT, slotIndex); + numGenericArgs = pMT->GetNumGenericArgs(); + pDictLayout = pMT->GetClass()->GetDictionaryLayout(); + } + else + { + pDictionary = GetMethodDictionaryWithSizeCheck(pMD, slotIndex); + numGenericArgs = pMD->GetNumGenericMethodArgs(); + pDictLayout = pMD->GetDictionaryLayout(); + } + DWORD minimumSizeOfDictionaryToPatch = (slotIndex + 1) * sizeof(DictionaryEntry *); + DWORD sizeOfInitialDictionary = (numGenericArgs + 1 + pDictLayout->GetNumInitialSlots()) * sizeof(DictionaryEntry *); + + DictionaryEntry *slot = pDictionary->GetSlotAddr(0, slotIndex); + VolatileStoreWithoutBarrier(slot, (DictionaryEntry)result); + *ppSlot = slot; - VolatileStoreWithoutBarrier(pDictionary->GetSlotAddr(0, slotIndex), (DictionaryEntry)result); - *ppSlot = pDictionary->GetSlotAddr(0, slotIndex); + // Backpatch previous versions of the generic dictionary + DWORD dictionarySize = pDictionary->GetDictionarySlotsSize(numGenericArgs); + while (dictionarySize > sizeOfInitialDictionary) + { + pDictionary = *pDictionary->GetBackPointerSlot(numGenericArgs); + if (pDictionary == nullptr) + { + // Initial dictionary allocated with higher number of slots than the initial layout slot count + break; + } + dictionarySize = pDictionary->GetDictionarySlotsSize(numGenericArgs); + if (dictionarySize < minimumSizeOfDictionaryToPatch) + { + // Previous dictionary is too short to patch, end iteration + break; + } + VolatileStoreWithoutBarrier(pDictionary->GetSlotAddr(0, slotIndex), (DictionaryEntry)result); + } } #endif // !CROSSGEN_COMPILE } diff --git a/src/coreclr/src/vm/genericdict.h b/src/coreclr/src/vm/genericdict.h index 8637144..43df310 100644 --- a/src/coreclr/src/vm/genericdict.h +++ b/src/coreclr/src/vm/genericdict.h @@ -146,9 +146,11 @@ public: // Create an initial dictionary layout containing numSlots slots static DictionaryLayout* Allocate(WORD numSlots, LoaderAllocator *pAllocator, AllocMemTracker *pamTracker); - // Bytes used for this dictionary, which might be stored inline in - // another structure (e.g. MethodTable) - static DWORD GetDictionarySizeFromLayout(DWORD numGenericArgs, PTR_DictionaryLayout pDictLayout); + // Total number of bytes used for this dictionary, which might be stored inline in + // another structure (e.g. MethodTable). This may include the final back-pointer + // to previous dictionaries after dictionary expansion; pSlotSize is used to return + // the size to be stored in the "size slot" of the dictionary. + static DWORD GetDictionarySizeFromLayout(DWORD numGenericArgs, PTR_DictionaryLayout pDictLayout, DWORD *pSlotSize); static BOOL FindToken(MethodTable* pMT, LoaderAllocator* pAllocator, @@ -171,7 +173,7 @@ public: DWORD GetMaxSlots(); DWORD GetNumInitialSlots(); DWORD GetNumUsedSlots(); - + PTR_DictionaryEntryLayout GetEntryLayout(DWORD i) { LIMITED_METHOD_CONTRACT; @@ -300,6 +302,12 @@ private: LIMITED_METHOD_CONTRACT; return VolatileLoadWithoutBarrier((DWORD*)GetSlotAddr(numGenericArgs, 0)); } + + inline Dictionary **GetBackPointerSlot(DWORD numGenericArgs) + { + LIMITED_METHOD_CONTRACT; + return (Dictionary **)((uint8_t *)m_pEntries + GetDictionarySlotsSize(numGenericArgs)); + } #endif // #ifndef DACCESS_COMPILE public: diff --git a/src/coreclr/src/vm/generics.cpp b/src/coreclr/src/vm/generics.cpp index 4e95b52..b616b84 100644 --- a/src/coreclr/src/vm/generics.cpp +++ b/src/coreclr/src/vm/generics.cpp @@ -266,14 +266,15 @@ ClassLoader::CreateTypeHandleForNonCanonicalGenericInstantiation( // creating this type. In other words: this type will have a smaller dictionary that its layout. This is not a // problem however because whenever we need to load a value from the dictionary of this type beyond its size, we // will expand the dictionary at that point. - DWORD cbInstAndDict = pOldMT->GetInstAndDictSize(); + DWORD cbInstAndDictSlotSize; + DWORD cbInstAndDictAllocSize = pOldMT->GetInstAndDictSize(&cbInstAndDictSlotSize); // Allocate from the high frequence heap of the correct domain S_SIZE_T allocSize = safe_cbMT; allocSize += cbOptional; allocSize += cbIMap; allocSize += cbPerInst; - allocSize += cbInstAndDict; + allocSize += cbInstAndDictAllocSize; if (allocSize.IsOverflow()) { @@ -474,7 +475,7 @@ ClassLoader::CreateTypeHandleForNonCanonicalGenericInstantiation( _ASSERTE(pLayout->GetMaxSlots() > 0); PTR_Dictionary pDictionarySlots = pMT->GetPerInstInfo()[pOldMT->GetNumDicts() - 1].GetValue(); DWORD* pSizeSlot = (DWORD*)(pDictionarySlots + ntypars); - *pSizeSlot = cbInstAndDict; + *pSizeSlot = cbInstAndDictSlotSize; } // Copy interface map across diff --git a/src/coreclr/src/vm/genmeth.cpp b/src/coreclr/src/vm/genmeth.cpp index 80ffce4..3b67c67 100644 --- a/src/coreclr/src/vm/genmeth.cpp +++ b/src/coreclr/src/vm/genmeth.cpp @@ -386,15 +386,17 @@ InstantiatedMethodDesc::NewInstantiatedMethodDesc(MethodTable *pExactMT, { SString name; TypeString::AppendMethodDebug(name, pGenericMDescInRepMT); - LOG((LF_JIT, LL_INFO1000, "GENERICS: Created new dictionary layout for dictionary of size %d for %S\n", - DictionaryLayout::GetDictionarySizeFromLayout(pGenericMDescInRepMT->GetNumGenericMethodArgs(), pDL), name.GetUnicode())); + DWORD dictionarySlotSize; + DWORD dictionaryAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(pGenericMDescInRepMT->GetNumGenericMethodArgs(), pDL, &dictionarySlotSize); + LOG((LF_JIT, LL_INFO1000, "GENERICS: Created new dictionary layout for dictionary of slot size %d / alloc size %d for %S\n", + dictionarySlotSize, dictionaryAllocSize, name.GetUnicode())); } #endif // _DEBUG } // Allocate space for the instantiation and dictionary - infoSize = DictionaryLayout::GetDictionarySizeFromLayout(methodInst.GetNumArgs(), pDL); - pInstOrPerInstInfo = (TypeHandle*)(void*)amt.Track(pAllocator->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(infoSize))); + DWORD allocSize = DictionaryLayout::GetDictionarySizeFromLayout(methodInst.GetNumArgs(), pDL, &infoSize); + pInstOrPerInstInfo = (TypeHandle*)(void*)amt.Track(pAllocator->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(allocSize))); for (DWORD i = 0; i < methodInst.GetNumArgs(); i++) pInstOrPerInstInfo[i] = methodInst[i]; diff --git a/src/coreclr/src/vm/method.cpp b/src/coreclr/src/vm/method.cpp index 3fa6028..ed6f44e 100644 --- a/src/coreclr/src/vm/method.cpp +++ b/src/coreclr/src/vm/method.cpp @@ -2664,11 +2664,12 @@ void MethodDesc::Save(DataImage *image) if (GetMethodDictionary()) { - DWORD cBytes = DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericMethodArgs(), GetDictionaryLayout()); + DWORD cSlotBytes; + DWORD cAllocBytes = DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericMethodArgs(), GetDictionaryLayout(), &cSlotBytes); void* pBytes = GetMethodDictionary()->AsPtr(); - LOG((LF_ZAP, LL_INFO10000, " MethodDesc::Save dictionary size %d\n", cBytes)); - image->StoreStructure(pBytes, cBytes, + LOG((LF_ZAP, LL_INFO10000, " MethodDesc::Save dictionary size %d\n", cSlotBytes)); + image->StoreStructure(pBytes, cSlotBytes, DataImage::ITEM_DICTIONARY_WRITEABLE); } diff --git a/src/coreclr/src/vm/methodtable.cpp b/src/coreclr/src/vm/methodtable.cpp index 485b826..437162f 100644 --- a/src/coreclr/src/vm/methodtable.cpp +++ b/src/coreclr/src/vm/methodtable.cpp @@ -4109,14 +4109,15 @@ void MethodTable::Save(DataImage *image, DWORD profilingFlags) fIsWriteable = FALSE; } - + DWORD slotSize; + DWORD allocSize = GetInstAndDictSize(&slotSize); if (!fIsWriteable) { - image->StoreInternedStructure(pDictionary, GetInstAndDictSize(), DataImage::ITEM_DICTIONARY); + image->StoreInternedStructure(pDictionary, slotSize, DataImage::ITEM_DICTIONARY); } else { - image->StoreStructure(pDictionary, GetInstAndDictSize(), DataImage::ITEM_DICTIONARY_WRITEABLE); + image->StoreStructure(pDictionary, slotSize, DataImage::ITEM_DICTIONARY_WRITEABLE); } } @@ -8888,7 +8889,9 @@ MethodTable::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) if (GetDictionary() != NULL) { - DacEnumMemoryRegion(dac_cast(GetDictionary()), GetInstAndDictSize()); + DWORD slotSize; + DWORD allocSize = GetInstAndDictSize(&slotSize); + DacEnumMemoryRegion(dac_cast(GetDictionary()), slotSize); } VtableIndirectionSlotIterator it = IterateVtableIndirectionSlots(); diff --git a/src/coreclr/src/vm/methodtable.h b/src/coreclr/src/vm/methodtable.h index 16f8d7db..6018c24 100644 --- a/src/coreclr/src/vm/methodtable.h +++ b/src/coreclr/src/vm/methodtable.h @@ -3929,8 +3929,10 @@ private: inline DWORD GetInterfaceMapSize(); // The instantiation/dictionary comes at the end of the MethodTable after - // the interface map. - inline DWORD GetInstAndDictSize(); + // the interface map. This is the total number of bytes used by the dictionary. + // The pSlotSize argument is used to return the size occupied by slots (not including + // the optional back pointer used when expanding dictionaries). + inline DWORD GetInstAndDictSize(DWORD *pSlotSize); private: // Helper template to compute the offsets at compile time diff --git a/src/coreclr/src/vm/methodtable.inl b/src/coreclr/src/vm/methodtable.inl index a20e1b4..07b2c3c 100644 --- a/src/coreclr/src/vm/methodtable.inl +++ b/src/coreclr/src/vm/methodtable.inl @@ -1092,14 +1092,14 @@ inline DWORD MethodTable::GetInterfaceMapSize() //========================================================================================== // These are the generic dictionaries themselves and are come after // the interface map. In principle they need not be inline in the method table. -inline DWORD MethodTable::GetInstAndDictSize() +inline DWORD MethodTable::GetInstAndDictSize(DWORD *pSlotSize) { LIMITED_METHOD_DAC_CONTRACT; if (!HasInstantiation()) - return 0; + return *pSlotSize = 0; else - return DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericArgs(), GetClass()->GetDictionaryLayout()); + return DictionaryLayout::GetDictionarySizeFromLayout(GetNumGenericArgs(), GetClass()->GetDictionaryLayout(), pSlotSize); } //========================================================================================== diff --git a/src/coreclr/src/vm/methodtablebuilder.cpp b/src/coreclr/src/vm/methodtablebuilder.cpp index fc14d39..666bb3b 100644 --- a/src/coreclr/src/vm/methodtablebuilder.cpp +++ b/src/coreclr/src/vm/methodtablebuilder.cpp @@ -10114,10 +10114,12 @@ MethodTableBuilder::SetupMethodTable2( EEClass *pClass = GetHalfBakedClass(); - DWORD cbDict = bmtGenerics->HasInstantiation() - ? DictionaryLayout::GetDictionarySizeFromLayout( - bmtGenerics->GetNumGenericArgs(), pClass->GetDictionaryLayout()) - : 0; + DWORD cbDictSlotSize = 0; + DWORD cbDictAllocSize = 0; + if (bmtGenerics->HasInstantiation()) + { + cbDictAllocSize = DictionaryLayout::GetDictionarySizeFromLayout(bmtGenerics->GetNumGenericArgs(), pClass->GetDictionaryLayout(), &cbDictSlotSize); + } #ifdef FEATURE_COLLECTIBLE_TYPES BOOL fCollectible = pLoaderModule->IsCollectible(); @@ -10150,7 +10152,7 @@ MethodTableBuilder::SetupMethodTable2( dwGCSize, bmtInterface->dwInterfaceMapSize, bmtGenerics->numDicts, - cbDict, + cbDictAllocSize, GetParentMethodTable(), GetClassLoader(), bmtAllocator, @@ -10370,7 +10372,7 @@ MethodTableBuilder::SetupMethodTable2( PTR_Dictionary pDictionarySlots = pMT->GetPerInstInfo()[bmtGenerics->numDicts - 1].GetValue(); DWORD* pSizeSlot = (DWORD*)(pDictionarySlots + bmtGenerics->GetNumGenericArgs()); - *pSizeSlot = cbDict; + *pSizeSlot = cbDictSlotSize; } }