From: Jakob Botsch Nielsen Date: Mon, 19 Jun 2023 16:20:58 +0000 (+0200) Subject: JIT: Add some limits in physical promotion (#87729) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~1527 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=feff67d5ca49b892fb00ce2415c25e2fd6ed6476;p=platform%2Fupstream%2Fdotnet%2Fruntime.git JIT: Add some limits in physical promotion (#87729) Add limits on how many fields we promote in each struct and how many total fields we promote. These limits are put in place to avoid pathological cases; we do not hit any of them over all of our SPMI collections (at least on win-x64). The limits were selected based on histograms of physical promotion stats over our collections. Here they are for some of the important collections: JitEnablePhysicalPromotion=1: benchmarks.run_pgo: ``` (Per context) How many fields are promoted: <= 0 ===> 1654 count ( 66% of total) 1 .. 1 ===> 206 count ( 75% of total) 2 .. 2 ===> 259 count ( 85% of total) 3 .. 3 ===> 127 count ( 90% of total) 4 .. 4 ===> 46 count ( 92% of total) 5 .. 5 ===> 30 count ( 93% of total) 6 .. 10 ===> 99 count ( 97% of total) 11 .. 15 ===> 17 count ( 98% of total) 16 .. 20 ===> 21 count ( 99% of total) 21 .. 30 ===> 16 count ( 99% of total) 31 .. 40 ===> 3 count (100% of total) 41 .. 50 ===> 0 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 200 ===> 0 count (100% of total) 201 .. 300 ===> 0 count (100% of total) 301 .. 400 ===> 0 count (100% of total) 401 .. 500 ===> 0 count (100% of total) 501 .. 700 ===> 0 count (100% of total) 701 .. 1000 ===> 0 count (100% of total) 1001 .. 2000 ===> 0 count (100% of total) 2001 .. 3000 ===> 0 count (100% of total) 3001 .. 4000 ===> 0 count (100% of total) 4001 .. 5000 ===> 0 count (100% of total) (Per struct) How many fields are promoted: <= 0 ===> 980 count ( 39% of total) 1 .. 1 ===> 760 count ( 69% of total) 2 .. 2 ===> 423 count ( 86% of total) 3 .. 3 ===> 73 count ( 89% of total) 4 .. 4 ===> 30 count ( 90% of total) 5 .. 5 ===> 46 count ( 92% of total) 6 .. 6 ===> 181 count ( 99% of total) 7 .. 8 ===> 2 count (100% of total) 9 .. 10 ===> 0 count (100% of total) 11 .. 15 ===> 0 count (100% of total) 16 .. 20 ===> 0 count (100% of total) 21 .. 30 ===> 0 count (100% of total) 31 .. 50 ===> 0 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 200 ===> 0 count (100% of total) 201 .. 400 ===> 0 count (100% of total) ``` libraries.pmi: ``` (Per context) How many fields are promoted: <= 0 ===> 19677 count ( 76% of total) 1 .. 1 ===> 1717 count ( 83% of total) 2 .. 2 ===> 899 count ( 86% of total) 3 .. 3 ===> 616 count ( 89% of total) 4 .. 4 ===> 660 count ( 91% of total) 5 .. 5 ===> 285 count ( 92% of total) 6 .. 10 ===> 932 count ( 96% of total) 11 .. 15 ===> 377 count ( 98% of total) 16 .. 20 ===> 164 count ( 98% of total) 21 .. 30 ===> 165 count ( 99% of total) 31 .. 40 ===> 68 count ( 99% of total) 41 .. 50 ===> 28 count ( 99% of total) 51 .. 100 ===> 51 count ( 99% of total) 101 .. 150 ===> 14 count ( 99% of total) 151 .. 200 ===> 2 count ( 99% of total) 201 .. 300 ===> 0 count ( 99% of total) 301 .. 400 ===> 1 count (100% of total) 401 .. 500 ===> 0 count (100% of total) 501 .. 700 ===> 0 count (100% of total) 701 .. 1000 ===> 0 count (100% of total) 1001 .. 2000 ===> 0 count (100% of total) 2001 .. 3000 ===> 0 count (100% of total) 3001 .. 4000 ===> 0 count (100% of total) 4001 .. 5000 ===> 0 count (100% of total) (Per struct) How many fields are promoted: <= 0 ===> 6218 count ( 31% of total) 1 .. 1 ===> 5075 count ( 57% of total) 2 .. 2 ===> 1440 count ( 65% of total) 3 .. 3 ===> 1859 count ( 74% of total) 4 .. 4 ===> 2682 count ( 88% of total) 5 .. 5 ===> 974 count ( 93% of total) 6 .. 6 ===> 624 count ( 96% of total) 7 .. 8 ===> 533 count ( 99% of total) 9 .. 10 ===> 37 count ( 99% of total) 11 .. 15 ===> 65 count ( 99% of total) 16 .. 20 ===> 2 count ( 99% of total) 21 .. 30 ===> 0 count ( 99% of total) 31 .. 50 ===> 1 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 200 ===> 0 count (100% of total) 201 .. 400 ===> 0 count (100% of total) ``` realworld: ``` (Per context) How many fields are promoted: <= 0 ===> 1807 count ( 54% of total) 1 .. 1 ===> 407 count ( 66% of total) 2 .. 2 ===> 230 count ( 73% of total) 3 .. 3 ===> 146 count ( 77% of total) 4 .. 4 ===> 122 count ( 81% of total) 5 .. 5 ===> 86 count ( 83% of total) 6 .. 10 ===> 218 count ( 90% of total) 11 .. 15 ===> 147 count ( 94% of total) 16 .. 20 ===> 59 count ( 96% of total) 21 .. 30 ===> 66 count ( 98% of total) 31 .. 40 ===> 19 count ( 98% of total) 41 .. 50 ===> 12 count ( 99% of total) 51 .. 100 ===> 14 count ( 99% of total) 101 .. 150 ===> 3 count ( 99% of total) 151 .. 200 ===> 2 count ( 99% of total) 201 .. 300 ===> 4 count ( 99% of total) 301 .. 400 ===> 0 count ( 99% of total) 401 .. 500 ===> 0 count ( 99% of total) 501 .. 700 ===> 2 count (100% of total) 701 .. 1000 ===> 0 count (100% of total) 1001 .. 2000 ===> 0 count (100% of total) 2001 .. 3000 ===> 0 count (100% of total) 3001 .. 4000 ===> 0 count (100% of total) 4001 .. 5000 ===> 0 count (100% of total) (Per struct) How many fields are promoted: <= 0 ===> 2012 count ( 32% of total) 1 .. 1 ===> 1421 count ( 55% of total) 2 .. 2 ===> 499 count ( 63% of total) 3 .. 3 ===> 521 count ( 71% of total) 4 .. 4 ===> 565 count ( 80% of total) 5 .. 5 ===> 446 count ( 87% of total) 6 .. 6 ===> 503 count ( 96% of total) 7 .. 8 ===> 139 count ( 98% of total) 9 .. 10 ===> 71 count ( 99% of total) 11 .. 15 ===> 34 count ( 99% of total) 16 .. 20 ===> 3 count (100% of total) 21 .. 30 ===> 0 count (100% of total) 31 .. 50 ===> 0 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 200 ===> 0 count (100% of total) 201 .. 400 ===> 0 count (100% of total) ``` JitEnablePhysicalPromotion=1;JitStressModeNames=STRESS_NO_OLD_PROMOTION: benchmarks.run_pgo: ``` (Per context) How many fields are promoted: <= 0 ===> 1654 count ( 66% of total) 1 .. 1 ===> 206 count ( 75% of total) 2 .. 2 ===> 259 count ( 85% of total) 3 .. 3 ===> 127 count ( 90% of total) 4 .. 4 ===> 46 count ( 92% of total) 5 .. 5 ===> 30 count ( 93% of total) 6 .. 10 ===> 99 count ( 97% of total) 11 .. 15 ===> 17 count ( 98% of total) 16 .. 20 ===> 21 count ( 99% of total) 21 .. 30 ===> 16 count ( 99% of total) 31 .. 40 ===> 3 count (100% of total) 41 .. 50 ===> 0 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 200 ===> 0 count (100% of total) 201 .. 300 ===> 0 count (100% of total) 301 .. 400 ===> 0 count (100% of total) 401 .. 500 ===> 0 count (100% of total) 501 .. 700 ===> 0 count (100% of total) 701 .. 1000 ===> 0 count (100% of total) 1001 .. 2000 ===> 0 count (100% of total) 2001 .. 3000 ===> 0 count (100% of total) 3001 .. 4000 ===> 0 count (100% of total) 4001 .. 5000 ===> 0 count (100% of total) (Per struct) How many fields are promoted: <= 0 ===> 980 count ( 39% of total) 1 .. 1 ===> 760 count ( 69% of total) 2 .. 2 ===> 423 count ( 86% of total) 3 .. 3 ===> 73 count ( 89% of total) 4 .. 4 ===> 30 count ( 90% of total) 5 .. 5 ===> 46 count ( 92% of total) 6 .. 6 ===> 181 count ( 99% of total) 7 .. 8 ===> 2 count (100% of total) 9 .. 10 ===> 0 count (100% of total) 11 .. 15 ===> 0 count (100% of total) 16 .. 20 ===> 0 count (100% of total) 21 .. 30 ===> 0 count (100% of total) 31 .. 50 ===> 0 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 200 ===> 0 count (100% of total) 201 .. 400 ===> 0 count (100% of total) ``` libraries.pmi: ``` (Per context) How many fields are promoted: <= 0 ===> 37477 count ( 48% of total) 1 .. 1 ===> 7987 count ( 58% of total) 2 .. 2 ===> 7292 count ( 68% of total) 3 .. 3 ===> 2988 count ( 72% of total) 4 .. 4 ===> 4927 count ( 78% of total) 5 .. 5 ===> 1835 count ( 81% of total) 6 .. 10 ===> 7147 count ( 90% of total) 11 .. 15 ===> 2694 count ( 93% of total) 16 .. 20 ===> 1667 count ( 95% of total) 21 .. 30 ===> 1346 count ( 97% of total) 31 .. 40 ===> 758 count ( 98% of total) 41 .. 50 ===> 335 count ( 99% of total) 51 .. 100 ===> 510 count ( 99% of total) 101 .. 150 ===> 107 count ( 99% of total) 151 .. 200 ===> 34 count ( 99% of total) 201 .. 300 ===> 12 count ( 99% of total) 301 .. 400 ===> 2 count ( 99% of total) 401 .. 500 ===> 2 count (100% of total) 501 .. 700 ===> 0 count (100% of total) 701 .. 1000 ===> 0 count (100% of total) 1001 .. 2000 ===> 0 count (100% of total) 2001 .. 3000 ===> 0 count (100% of total) 3001 .. 4000 ===> 0 count (100% of total) 4001 .. 5000 ===> 0 count (100% of total) (Per struct) How many fields are promoted: <= 0 ===> 102669 count ( 39% of total) 1 .. 1 ===> 57467 count ( 61% of total) 2 .. 2 ===> 69010 count ( 88% of total) 3 .. 3 ===> 11494 count ( 92% of total) 4 .. 4 ===> 16887 count ( 99% of total) 5 .. 5 ===> 1043 count ( 99% of total) 6 .. 6 ===> 618 count ( 99% of total) 7 .. 8 ===> 579 count ( 99% of total) 9 .. 10 ===> 57 count ( 99% of total) 11 .. 15 ===> 71 count ( 99% of total) 16 .. 20 ===> 5 count ( 99% of total) 21 .. 30 ===> 0 count ( 99% of total) 31 .. 50 ===> 1 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 200 ===> 0 count (100% of total) 201 .. 400 ===> 0 count (100% of total) ``` realworld: ``` (Per context) How many fields are promoted: <= 0 ===> 5617 count ( 41% of total) 1 .. 1 ===> 1657 count ( 53% of total) 2 .. 2 ===> 1164 count ( 62% of total) 3 .. 3 ===> 522 count ( 66% of total) 4 .. 4 ===> 878 count ( 72% of total) 5 .. 5 ===> 472 count ( 76% of total) 6 .. 10 ===> 1383 count ( 86% of total) 11 .. 15 ===> 619 count ( 91% of total) 16 .. 20 ===> 335 count ( 93% of total) 21 .. 30 ===> 361 count ( 96% of total) 31 .. 40 ===> 165 count ( 97% of total) 41 .. 50 ===> 101 count ( 98% of total) 51 .. 100 ===> 177 count ( 99% of total) 101 .. 150 ===> 38 count ( 99% of total) 151 .. 200 ===> 18 count ( 99% of total) 201 .. 300 ===> 12 count ( 99% of total) 301 .. 400 ===> 1 count ( 99% of total) 401 .. 500 ===> 1 count ( 99% of total) 501 .. 700 ===> 2 count (100% of total) 701 .. 1000 ===> 0 count (100% of total) 1001 .. 2000 ===> 0 count (100% of total) 2001 .. 3000 ===> 0 count (100% of total) 3001 .. 4000 ===> 0 count (100% of total) 4001 .. 5000 ===> 0 count (100% of total) (Per struct) How many fields are promoted: <= 0 ===> 23630 count ( 37% of total) 1 .. 1 ===> 13267 count ( 59% of total) 2 .. 2 ===> 16731 count ( 86% of total) 3 .. 3 ===> 4677 count ( 93% of total) 4 .. 4 ===> 2727 count ( 97% of total) 5 .. 5 ===> 492 count ( 98% of total) 6 .. 6 ===> 496 count ( 99% of total) 7 .. 8 ===> 150 count ( 99% of total) 9 .. 10 ===> 92 count ( 99% of total) 11 .. 15 ===> 41 count ( 99% of total) 16 .. 20 ===> 3 count ( 99% of total) 21 .. 30 ===> 3 count ( 99% of total) 31 .. 50 ===> 3 count (100% of total) 51 .. 100 ===> 0 count (100% of total) 101 .. 200 ===> 0 count (100% of total) 201 .. 400 ===> 0 count (100% of total) ``` --- diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index b211ed9..da79080 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -382,16 +382,22 @@ public: // lclNum - Local num for this struct local // aggregateInfo - [out] Pointer to aggregate info to create and insert replacements into. // - void PickPromotions(Compiler* comp, unsigned lclNum, AggregateInfo** aggregateInfo) + // Returns: + // Number of promotions picked. + // + int PickPromotions(Compiler* comp, unsigned lclNum, AggregateInfo** aggregateInfo) { if (m_accesses.size() <= 0) { - return; + return 0; } + AggregateInfo*& agg = *aggregateInfo; + JITDUMP("Picking promotions for V%02u\n", lclNum); - assert(*aggregateInfo == nullptr); + assert(agg == nullptr); + int numReps = 0; for (size_t i = 0; i < m_accesses.size(); i++) { const Access& access = m_accesses[i]; @@ -406,15 +412,24 @@ public: continue; } - if (*aggregateInfo == nullptr) + if (agg == nullptr) { - *aggregateInfo = new (comp, CMK_Promotion) AggregateInfo(comp->getAllocator(CMK_Promotion), lclNum); + agg = new (comp, CMK_Promotion) AggregateInfo(comp->getAllocator(CMK_Promotion), lclNum); } - (*aggregateInfo)->Replacements.push_back(Replacement(access.Offset, access.AccessType)); + agg->Replacements.push_back(Replacement(access.Offset, access.AccessType)); + numReps++; + + if (agg->Replacements.size() >= PHYSICAL_PROMOTION_MAX_PROMOTIONS_PER_STRUCT) + { + JITDUMP(" Promoted %zu fields in V%02u; will not promote more\n", agg->Replacements.size(), + agg->LclNum); + break; + } } JITDUMP("\n"); + return numReps; } //------------------------------------------------------------------------ @@ -427,14 +442,24 @@ public: // lclNum - Local num for this struct local // aggregateInfo - [out] Pointer to aggregate info to create and insert replacements into. // - bool PickInducedPromotions(Compiler* comp, unsigned lclNum, AggregateInfo** aggregateInfo) + // Returns: + // Number of new promotions. + // + int PickInducedPromotions(Compiler* comp, unsigned lclNum, AggregateInfo** aggregateInfo) { if (m_inducedAccesses.size() <= 0) { - return false; + return 0; + } + + AggregateInfo*& agg = *aggregateInfo; + + if ((agg != nullptr) && (agg->Replacements.size() >= PHYSICAL_PROMOTION_MAX_PROMOTIONS_PER_STRUCT)) + { + return 0; } - bool any = false; + int numReps = 0; JITDUMP("Picking induced promotions for V%02u\n", lclNum); for (PrimitiveAccess& inducedAccess : m_inducedAccesses) { @@ -483,24 +508,22 @@ public: } } - if (*aggregateInfo == nullptr) + if (agg == nullptr) { - *aggregateInfo = new (comp, CMK_Promotion) AggregateInfo(comp->getAllocator(CMK_Promotion), lclNum); + agg = new (comp, CMK_Promotion) AggregateInfo(comp->getAllocator(CMK_Promotion), lclNum); } size_t insertionIndex; - if ((*aggregateInfo)->Replacements.size() > 0) + if (agg->Replacements.size() > 0) { #ifdef DEBUG Replacement* overlapRep; - assert(!(*aggregateInfo) - ->OverlappingReplacements(inducedAccess.Offset, genTypeSize(inducedAccess.AccessType), - &overlapRep, nullptr)); + assert(!agg->OverlappingReplacements(inducedAccess.Offset, genTypeSize(inducedAccess.AccessType), + &overlapRep, nullptr)); #endif insertionIndex = - Promotion::BinarySearch((*aggregateInfo)->Replacements, - inducedAccess.Offset); + Promotion::BinarySearch(agg->Replacements, inducedAccess.Offset); assert((ssize_t)insertionIndex < 0); insertionIndex = ~insertionIndex; } @@ -509,13 +532,18 @@ public: insertionIndex = 0; } - (*aggregateInfo) - ->Replacements.insert((*aggregateInfo)->Replacements.begin() + insertionIndex, - Replacement(inducedAccess.Offset, inducedAccess.AccessType)); - any = true; + agg->Replacements.insert(agg->Replacements.begin() + insertionIndex, + Replacement(inducedAccess.Offset, inducedAccess.AccessType)); + numReps++; + + if (agg->Replacements.size() >= PHYSICAL_PROMOTION_MAX_PROMOTIONS_PER_STRUCT) + { + JITDUMP(" Promoted %zu fields in V%02u; will not promote more\n", agg->Replacements.size()); + break; + } } - return any; + return numReps; } //------------------------------------------------------------------------ @@ -897,7 +925,21 @@ public: unsigned numLocals = (unsigned)aggregates.size(); JITDUMP("Picking promotions\n"); - bool any = false; + int totalNumPromotions = 0; + // We limit the total number of promotions picked based on the tracking + // limit to avoid blowup in the superlinear liveness computation in + // pathological cases, and also because once we stop tracking the fields there is no benefit anymore. + // + // This logic could be improved by the use of ref counting to pick the + // smart fields to compute liveness for, but as of writing this there + // is no example in the built-in SPMI collections that hits this limit. + // + // Note that we may go slightly over this as once we start picking + // replacement locals for a single struct we do not stop until we get + // to the next struct, but PHYSICAL_PROMOTION_MAX_PROMOTIONS_PER_STRUCT + // puts a limit on the number of promotions in each struct so this is + // fine to avoid the pathological cases. + const int maxTotalNumPromotions = JitConfig.JitMaxLocalsToTrack(); for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) { @@ -914,17 +956,22 @@ public: } #endif - uses->PickPromotions(m_compiler, lclNum, &aggregates[lclNum]); + totalNumPromotions += uses->PickPromotions(m_compiler, lclNum, &aggregates[lclNum]); - any |= aggregates[lclNum] != nullptr; + if (totalNumPromotions >= maxTotalNumPromotions) + { + JITDUMP("Promoted %d fields which is over our limit of %d; will not promote more\n", totalNumPromotions, + maxTotalNumPromotions); + break; + } } - if (!any) + if (totalNumPromotions <= 0) { return false; } - if (m_candidateStores.Height() > 0) + if ((m_candidateStores.Height() > 0) && (totalNumPromotions < maxTotalNumPromotions)) { // Now look for induced accesses due to assignment decomposition. @@ -974,7 +1021,7 @@ public: } } - bool any = false; + bool again = false; for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) { LocalUses* uses = m_uses[lclNum]; @@ -989,10 +1036,20 @@ public: } #endif - any |= uses->PickInducedPromotions(m_compiler, lclNum, &aggregates[lclNum]); + int numInducedProms = uses->PickInducedPromotions(m_compiler, lclNum, &aggregates[lclNum]); + again |= numInducedProms > 0; + + totalNumPromotions += numInducedProms; + if (totalNumPromotions >= maxTotalNumPromotions) + { + JITDUMP("Promoted %d fields and our limit is %d; will not promote more\n", totalNumPromotions, + maxTotalNumPromotions); + again = false; + break; + } } - if (!any) + if (!again) { break; } @@ -1067,11 +1124,9 @@ public: { // Aggregate is fully promoted, leave UnpromotedMin == UnpromotedMax to indicate this. } - - any = true; } - return any; + return totalNumPromotions > 0; } private: diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 3f58760..2e93b9f 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -7,6 +7,13 @@ #include "compiler.h" #include "vector.h" +// We limit the max number of fields that can be promoted in a single struct to +// avoid pathological cases (e.g. machine generated code). Furthermore, +// writebacks before struct uses introduce commas with nested trees for each +// field written back, so without a limit we could create arbitrarily deep +// trees. +const int PHYSICAL_PROMOTION_MAX_PROMOTIONS_PER_STRUCT = 64; + // Represents a single replacement of a (field) access into a struct local. struct Replacement {