From 9f389711a3ed282450f235df40b2d568d918d5b5 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 8 Mar 2016 15:13:56 -0800 Subject: [PATCH] Inline refactoring: move native callsite estimator Move the native call site estimation logic into `LegacyPolicy`. Rework the remaining per-callsite bonus multipliers so that they fit into the observation framework. Remove `hasNativeSizeEstimate` and instead check that the inline is a candidate and its observation is a discretionary inline. Fix header comment in `fgFindJumpTargets`. Commit migrated from https://github.com/dotnet/coreclr/commit/97c0fcb5bb5b85d45854dcd25e60020215ff6ef9 --- src/coreclr/src/jit/compiler.cpp | 3 +- src/coreclr/src/jit/compiler.h | 9 ++- src/coreclr/src/jit/flowgraph.cpp | 21 +++--- src/coreclr/src/jit/importer.cpp | 139 ++++------------------------------- src/coreclr/src/jit/inline.def | 1 + src/coreclr/src/jit/inline.h | 28 +++++-- src/coreclr/src/jit/inlinepolicy.cpp | 133 ++++++++++++++++++++++++++++++--- src/coreclr/src/jit/inlinepolicy.h | 32 ++++---- 8 files changed, 194 insertions(+), 172 deletions(-) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index 2c61e89..987d65a 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -4869,7 +4869,8 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas assert(compNativeSizeEstimate != NATIVE_SIZE_INVALID); // Estimate the call site impact - int callsiteNativeSizeEstimate = impEstimateCallsiteNativeSize(methodInfo); + int callsiteNativeSizeEstimate = + prejitResult.determineCallsiteNativeSizeEstimate(methodInfo); // See if we're willing to pay for inlining this method impCanInlineNative(callsiteNativeSizeEstimate, diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index f1e947b..125cba0 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -3108,8 +3108,6 @@ private: static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr * lclVarTreeOut); - int impEstimateCallsiteNativeSize (CORINFO_METHOD_INFO * methInfo); - void impCanInlineNative(int callsiteNativeEstimate, int calleeNativeSizeEstimate, InlineInfo* pInlineInfo, @@ -7652,6 +7650,13 @@ public : bool compStressCompile(compStressArea stressArea, unsigned weightPercentage); +#ifdef DEBUG + bool compInlineStress() + { + return compStressCompile(STRESS_INLINE, 50); + } +#endif // DEBUG + bool compTailCallStress() { #ifdef DEBUG diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index f8ba54e..48a2609 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -4188,15 +4188,17 @@ private: // Arguments: // codeAddr - base address of the IL code buffer // codeSize - number of bytes in the IL code buffer -// jumpTarget - [OUT] xxx yuuu +// jumpTarget - [OUT] byte array for flagging jump targets // // Notes: -// // If inlining or prejitting the root, this method also makes // various observations about the method that factor into inline // decisions. It sets `compNativeSizeEstimate` as a side effect. // // May throw an exception if the IL is malformed. +// +// jumpTarget[N] is set to a JT_* value if IL offset N is a +// jump target in the method. void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, @@ -4780,8 +4782,10 @@ TOO_FAR: { compInlineResult->note(InlineObservation::CALLEE_END_OPCODE_SCAN); - // If we were estimating native code size, grab that data now. - if (compInlineResult->hasNativeSizeEstimate()) + // If the inline is viable and discretionary, we need to get + // an estimate for the callee native code size. + if (compInlineResult->isCandidate() + && (compInlineResult->getObservation() == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE)) { compNativeSizeEstimate = compInlineResult->determineNativeSizeEstimate(); noway_assert(compNativeSizeEstimate != NATIVE_SIZE_INVALID); @@ -4795,13 +4799,10 @@ TOO_FAR: // compCompileHelper. if (compIsForInlining()) { - // If the inlining decision was obvious from the size of the IL, - // it should have been made earlier. - InlineObservation obs = compInlineResult->getObservation(); - noway_assert(obs == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE); - // Make an inlining decision based on the estimated native size. - int callsiteNativeSizeEstimate = impEstimateCallsiteNativeSize(&impInlineInfo->inlineCandidateInfo->methInfo); + int callsiteNativeSizeEstimate = + compInlineResult->determineCallsiteNativeSizeEstimate( + &impInlineInfo->inlineCandidateInfo->methInfo); impCanInlineNative(callsiteNativeSizeEstimate, compNativeSizeEstimate, diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index aa426f2..3122eef 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -73,7 +73,7 @@ void Compiler::impInit() #else impInlineSize = JitConfig.JitInlineSize(); - if (compStressCompile(STRESS_INLINE, 50)) + if (compInlineStress()) impInlineSize *= 10; if (impInlineSize > IMPLEMENTATION_MAX_INLINE_SIZE) @@ -15467,53 +15467,6 @@ BOOL Compiler::impIsAddressInLocal(GenTreePtr tree, GenTreePtr * lclVarTreeO } } -int Compiler::impEstimateCallsiteNativeSize(CORINFO_METHOD_INFO * methInfo) -{ - int callsiteSize = 55; // Direct call take 5 native bytes; indirect call takes 6 native bytes. - - bool hasThis = methInfo->args.hasThis(); - - if (hasThis) - { - callsiteSize += 30; // "mov" or "lea" - } - - CORINFO_ARG_LIST_HANDLE argLst = methInfo->args.args; - - for (unsigned i = (hasThis ? 1 : 0); i < methInfo->args.totalILArgs(); i++, argLst = info.compCompHnd->getArgNext(argLst)) - { - var_types sigType = (var_types) eeGetArgType(argLst, &methInfo->args); - - if (sigType == TYP_STRUCT) - { - typeInfo verType = verParseArgSigToTypeInfo(&methInfo->args, argLst); - - /* - - IN0028: 00009B lea EAX, bword ptr [EBP-14H] - IN0029: 00009E push dword ptr [EAX+4] - IN002a: 0000A1 push gword ptr [EAX] - IN002b: 0000A3 call [MyStruct.staticGetX2(struct):int] - - */ - - callsiteSize += 10; // "lea EAX, bword ptr [EBP-14H]" - - unsigned opsz = (unsigned)(roundUp(info.compCompHnd->getClassSize(verType.GetClassHandle()), sizeof(void*))); - unsigned slots = opsz / sizeof(void*); - - callsiteSize += slots * 20; // "push gword ptr [EAX+offs] " - } - else - { - callsiteSize += 30; // push by average takes 3 bytes. - } - } - - return callsiteSize; -} - - /***************************************************************************** */ @@ -15532,15 +15485,9 @@ void Compiler::impCanInlineNative(int callsiteNativeEstima // If this is a "forceinline" method, the JIT probably shouldn't have gone // to the trouble of estimating the native code size. Even if it did, it // shouldn't be relying on the result of this method. - assert(!(info.compFlags & CORINFO_FLG_FORCEINLINE)); + assert(inlineResult->getObservation() == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE); -#ifdef DEBUG - if (verbose) - { - printf("\ncalleeNativeSizeEstimate=%d, callsiteNativeEstimate=%d.\n", - calleeNativeSizeEstimate, callsiteNativeEstimate); - } -#endif + JITDUMP("\ncalleeNativeSizeEstimate=%d, callsiteNativeEstimate=%d.\n", calleeNativeSizeEstimate, callsiteNativeEstimate); // Note if this method is an instance constructor if ((info.compFlags & CORINFO_FLG_CONSTRUCTOR) != 0 && @@ -15570,23 +15517,12 @@ void Compiler::impCanInlineNative(int callsiteNativeEstima #endif // FEATURE_SIMD - // Determine base multiplier given the various observations made so far. - double multiplier = inlineResult->determineMultiplier(); - - //Because it is an if ... else if, keep them in sorted order by multiplier. This ensures that we always - //get the maximum multipler. Also, pInlineInfo is null for static hints. Make sure that the first case - //always checks pInlineInfo and falls into it. + // Roughly classify callsite frequency. + InlineCallsiteFrequency frequency = InlineCallsiteFrequency::UNUSED; - //Now handle weight of calling block. if (!pInlineInfo || pInlineInfo->iciBlock->bbWeight >= BB_MAX_WEIGHT) { - //The calling block is very hot. We should inline very aggressively. - //If we're computing the static hint, we should be most conservative (i.e. highest weight). - multiplier += 3.0; -#ifdef DEBUG - if (verbose) - printf("\nmultiplier in hottest block increased: %g.", multiplier); -#endif + frequency = InlineCallsiteFrequency::HOT; } //No training data. Look for loop-like things. //We consider a recursive call loop-like. Do not give the inlining boost to the method itself. @@ -15594,84 +15530,37 @@ void Compiler::impCanInlineNative(int callsiteNativeEstima else if ((pInlineInfo->iciBlock->bbFlags & BBF_BACKWARD_JUMP) && (pInlineInfo->fncHandle != pInlineInfo->inlineCandidateInfo->ilCallerHandle)) { - multiplier += 3.0; -#ifdef DEBUG - if (verbose) - printf("\nmultiplier in loop increased: %g.", multiplier); -#endif + frequency = InlineCallsiteFrequency::LOOP; } else if ((pInlineInfo->iciBlock->bbFlags & BBF_PROF_WEIGHT) && (pInlineInfo->iciBlock->bbWeight > BB_ZERO_WEIGHT)) { - multiplier += 2.0; - //We hit this block while training. Give it some importance. -#ifdef DEBUG - if (verbose) - printf("\nmultiplier in hot block increased: %g.", multiplier); -#endif - + frequency = InlineCallsiteFrequency::WARM; } //Now modify the multiplier based on where we're called from. else if (pInlineInfo->iciBlock->isRunRarely() || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR)) { - // Basically only allow inlining that will shrink the - // X86 code size, so things like empty methods, field accessors - // rebound calls, literal constants, ... - - multiplier = 1.3; -#ifdef DEBUG - if (verbose) - printf("\nmultiplier for rare run blocks set to %g.", multiplier); -#endif + frequency = InlineCallsiteFrequency::RARE; } else { - multiplier += 1.3; -#ifdef DEBUG - if (verbose) - printf("\nmultiplier for boring block increased: %g.", multiplier); -#endif - + frequency = InlineCallsiteFrequency::BORING; } -#ifdef DEBUG - int additionalMultiplier = JitConfig.JitInlineAdditionalMultiplier(); - - if (additionalMultiplier!=0) - { - multiplier += additionalMultiplier; - if (verbose) - { - printf("\nmultiplier increased additionally by %d to %g.", additionalMultiplier, multiplier); - } - } + inlineResult->noteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast(frequency)); - if (compStressCompile(STRESS_INLINE, 50)) - { - multiplier += 10; - if (verbose) - { - printf("\nmultiplier increased by 10 to %g due to the stress mode.", multiplier); - } - } + // Determine multiplier given the various observations. + double multiplier = inlineResult->determineMultiplier(); // Note the various estimates we've obtained. - inlineResult->noteInt(InlineObservation::CALLEE_NATIVE_SIZE_ESTIMATE, calleeNativeSizeEstimate); inlineResult->noteInt(InlineObservation::CALLSITE_NATIVE_SIZE_ESTIMATE, callsiteNativeEstimate); inlineResult->noteDouble(InlineObservation::CALLSITE_BENEFIT_MULTIPLIER, multiplier); - -#endif int threshold = (int)(callsiteNativeEstimate * multiplier); -#ifdef DEBUG - if (verbose) - { - printf("\ncalleeNativeSizeEstimate=%d, threshold=%d.\n", calleeNativeSizeEstimate, threshold); - } -#endif + JITDUMP("\ncalleeNativeSizeEstimate=%d, threshold=%d.\n", calleeNativeSizeEstimate, threshold); #define NATIVE_CALL_SIZE_MULTIPLIER (10.0) if (calleeNativeSizeEstimate > threshold) diff --git a/src/coreclr/src/jit/inline.def b/src/coreclr/src/jit/inline.def index 9d2b07a..c2e6082 100644 --- a/src/coreclr/src/jit/inline.def +++ b/src/coreclr/src/jit/inline.def @@ -147,6 +147,7 @@ INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals", INLINE_OBSERVATION(BENEFIT_MULTIPLIER, double, "benefit multiplier", INFORMATION, CALLSITE) INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE) INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE) +INLINE_OBSERVATION(FREQUENCY, int, "execution frequency", INFORMATION, CALLSITE) INLINE_OBSERVATION(NATIVE_SIZE_ESTIMATE, double, "native size estimate", INFORMATION, CALLSITE) INLINE_OBSERVATION(NATIVE_SIZE_ESTIMATE_OK, bool, "native size estimate ok", INFORMATION, CALLSITE) diff --git a/src/coreclr/src/jit/inline.h b/src/coreclr/src/jit/inline.h index 9a5a7bc..37e42ec 100644 --- a/src/coreclr/src/jit/inline.h +++ b/src/coreclr/src/jit/inline.h @@ -9,6 +9,7 @@ // // -- ENUMS -- // +// InlineCallFrequency - rough assessment of call site frequency // InlineDecision - overall decision made about an inline // InlineTarget - target of a particular observation // InlineImpact - impact of a particular observation @@ -101,6 +102,19 @@ const unsigned int MAX_INL_LCLS = 8; CORJIT_FLG_DEBUG_INFO \ ) +// InlineCallsiteFrequency gives a rough classification of how +// often a call site will be excuted at runtime. + +enum class InlineCallsiteFrequency +{ + UNUSED, // n/a + RARE, // once in a blue moon + BORING, // normal call site + WARM, // seen during profiling + LOOP, // in a loop + HOT // very frequent +}; + // InlineDecision describes the various states the jit goes through when // evaluating an inline candidate. It is distinct from CorInfoInline // because it must capture internal states that don't get reported back @@ -228,8 +242,8 @@ public: // Policy determinations virtual double determineMultiplier() = 0; - virtual bool hasNativeSizeEstimate() = 0; virtual int determineNativeSizeEstimate() = 0; + virtual int determineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo) = 0; // Policy policies virtual bool propagateNeverToRuntime() const = 0; @@ -365,18 +379,18 @@ public: return inlPolicy->determineMultiplier(); } - // Is there a native size estimate? - bool hasNativeSizeEstimate() - { - return inlPolicy->hasNativeSizeEstimate(); - } - // Determine the native size estimate for this inline int determineNativeSizeEstimate() { return inlPolicy->determineNativeSizeEstimate(); } + // Determine the native size estimate for this call site + int determineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo) + { + return inlPolicy->determineCallsiteNativeSizeEstimate(methodInfo); + } + // Ensure details of this inlining process are appropriately // reported when the result goes out of scope. ~InlineResult() diff --git a/src/coreclr/src/jit/inlinepolicy.cpp b/src/coreclr/src/jit/inlinepolicy.cpp index fd8dd82..84914eb 100644 --- a/src/coreclr/src/jit/inlinepolicy.cpp +++ b/src/coreclr/src/jit/inlinepolicy.cpp @@ -261,6 +261,12 @@ void LegacyPolicy::noteInt(InlineObservation obs, int value) break; } + case InlineObservation::CALLSITE_FREQUENCY: + assert(inlCallsiteFrequency == InlineCallsiteFrequency::UNUSED); + inlCallsiteFrequency = static_cast(value); + assert(inlCallsiteFrequency != InlineCallsiteFrequency::UNUSED); + break; + default: // Ignore all other information break; @@ -458,30 +464,133 @@ double LegacyPolicy::determineMultiplier() JITDUMP("\nInline candidate has const arg that feeds a conditional. Multiplier increased to %g.", multiplier); } - return multiplier; -} + switch (inlCallsiteFrequency) + { + case InlineCallsiteFrequency::RARE: + // Note this one is not additive, it uses '=' instead of '+=' + multiplier = 1.3; + JITDUMP("\nInline candidate callsite is rare. Multiplier limited to %g.", multiplier); + break; + case InlineCallsiteFrequency::BORING: + multiplier += 1.3; + JITDUMP("\nInline candidate callsite is boring. Multiplier increased to %g.", multiplier); + break; + case InlineCallsiteFrequency::WARM: + multiplier += 2.0; + JITDUMP("\nInline candidate callsite is warm. Multiplier increased to %g.", multiplier); + break; + case InlineCallsiteFrequency::LOOP: + multiplier += 3.0; + JITDUMP("\nInline candidate callsite is in a loop. Multiplier increased to %g.", multiplier); + break; + case InlineCallsiteFrequency::HOT: + multiplier += 3.0; + JITDUMP("\nInline candidate callsite is hot. Multiplier increased to %g.", multiplier); + break; + default: + assert(!"Unexpected callsite frequency"); + break; + } -//------------------------------------------------------------------------ -// hasNativeCodeSizeEstimate: did this policy estimate native code size -// -// Notes: -// Temporary scaffolding for refactoring work. +#ifdef DEBUG -bool LegacyPolicy::hasNativeSizeEstimate() -{ - return (inlStateMachine != nullptr); + int additionalMultiplier = JitConfig.JitInlineAdditionalMultiplier(); + + if (additionalMultiplier != 0) + { + multiplier += additionalMultiplier; + JITDUMP("\nmultiplier increased via JitInlineAdditonalMultiplier=%d to %g.", additionalMultiplier, multiplier); + } + + if (inlCompiler->compInlineStress()) + { + multiplier += 10; + JITDUMP("\nmultiplier increased via inline stress to %g.", multiplier); + } + +#endif // DEBUG + + return multiplier; } //------------------------------------------------------------------------ // determineNativeCodeSizeEstimate: return estimated native code size for // this inline candidate. // -// Notes: +// Notes: // Uses the results of a state machine model for discretionary -// candidates. Should not be needed for forcded or always +// candidates. Should not be needed for forced or always // candidates. int LegacyPolicy::determineNativeSizeEstimate() { + // Should be a discretionary candidate. + assert(inlObservation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE); + // Should have a valid state machine estimate. + assert(inlNativeSizeEstimate != NATIVE_SIZE_INVALID); + return inlNativeSizeEstimate; } + +//------------------------------------------------------------------------ +// determineNativeCallsiteSizeEstimate: estimate native size for the +// callsite. +// +// Arguments: +// methInfo -- method info for the callee +// +// Notes: +// Estimates the native size (in bytes, scaled up by 10x) for the +// call site. While the quiality of the estimate here is questionable +// (especially for x64) it is being left as is for legacy compatibility. + +int LegacyPolicy::determineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methInfo) +{ + int callsiteSize = 55; // Direct call take 5 native bytes; indirect call takes 6 native bytes. + + bool hasThis = methInfo->args.hasThis(); + + if (hasThis) + { + callsiteSize += 30; // "mov" or "lea" + } + + CORINFO_ARG_LIST_HANDLE argLst = methInfo->args.args; + COMP_HANDLE comp = inlCompiler->info.compCompHnd; + + for (unsigned i = (hasThis ? 1 : 0); + i < methInfo->args.totalILArgs(); + i++, argLst = comp->getArgNext(argLst)) + { + var_types sigType = (var_types) inlCompiler->eeGetArgType(argLst, &methInfo->args); + + if (sigType == TYP_STRUCT) + { + typeInfo verType = inlCompiler->verParseArgSigToTypeInfo(&methInfo->args, argLst); + + /* + + IN0028: 00009B lea EAX, bword ptr [EBP-14H] + IN0029: 00009E push dword ptr [EAX+4] + IN002a: 0000A1 push gword ptr [EAX] + IN002b: 0000A3 call [MyStruct.staticGetX2(struct):int] + + */ + + callsiteSize += 10; // "lea EAX, bword ptr [EBP-14H]" + + // NB sizeof (void*) fails to convey intent when cross-jitting. + + unsigned opsz = (unsigned)(roundUp(comp->getClassSize(verType.GetClassHandle()), sizeof(void*))); + unsigned slots = opsz / sizeof(void*); + + callsiteSize += slots * 20; // "push gword ptr [EAX+offs] " + } + else + { + callsiteSize += 30; // push by average takes 3 bytes. + } + } + + return callsiteSize; +} diff --git a/src/coreclr/src/jit/inlinepolicy.h b/src/coreclr/src/jit/inlinepolicy.h index 45faba9..a1c159a 100644 --- a/src/coreclr/src/jit/inlinepolicy.h +++ b/src/coreclr/src/jit/inlinepolicy.h @@ -44,6 +44,7 @@ public: , inlStateMachine(nullptr) , inlCodeSize(0) , inlNativeSizeEstimate(NATIVE_SIZE_INVALID) + , inlCallsiteFrequency(InlineCallsiteFrequency::UNUSED) , inlIsForceInline(false) , inlIsForceInlineKnown(false) , inlIsInstanceCtor(false) @@ -68,7 +69,7 @@ public: // Policy determinations double determineMultiplier() override; int determineNativeSizeEstimate() override; - bool hasNativeSizeEstimate() override; + int determineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo) override; // Policy policies bool propagateNeverToRuntime() const override { return true; } @@ -89,20 +90,21 @@ private: const unsigned MAX_BASIC_BLOCKS = 5; // Data members - Compiler* inlCompiler; - CodeSeqSM* inlStateMachine; - unsigned inlCodeSize; - int inlNativeSizeEstimate; - bool inlIsForceInline :1; - bool inlIsForceInlineKnown :1; - bool inlIsInstanceCtor :1; - bool inlIsFromPromotableValueClass :1; - bool inlHasSimd :1; - bool inlLooksLikeWrapperMethod :1; - bool inlArgFeedsConstantTest :1; - bool inlMethodIsMostlyLoadStore :1; - bool inlArgFeedsRangeCheck :1; - bool inlConstantFeedsConstantTest :1; + Compiler* inlCompiler; + CodeSeqSM* inlStateMachine; + unsigned inlCodeSize; + int inlNativeSizeEstimate; + InlineCallsiteFrequency inlCallsiteFrequency; + bool inlIsForceInline :1; + bool inlIsForceInlineKnown :1; + bool inlIsInstanceCtor :1; + bool inlIsFromPromotableValueClass :1; + bool inlHasSimd :1; + bool inlLooksLikeWrapperMethod :1; + bool inlArgFeedsConstantTest :1; + bool inlMethodIsMostlyLoadStore :1; + bool inlArgFeedsRangeCheck :1; + bool inlConstantFeedsConstantTest :1; }; #endif // _INLINE_POLICY_H_ -- 2.7.4