1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
4 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16 #include "hostallocator.h"
18 #include "ssabuilder.h"
20 #include "rangecheck.h"
22 #include "stacklevelsetter.h"
23 #include "jittelemetry.h"
24 #include "patchpointinfo.h"
25 #include "jitstd/algorithm.h"
27 extern ICorJitHost* g_jitHost;
29 unsigned Compiler::jitTotalMethodCompiled = 0;
32 LONG Compiler::jitNestingLevel = 0;
33 #endif // defined(DEBUG)
36 bool Compiler::s_pAltJitExcludeAssembliesListInitialized = false;
37 AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullptr;
41 bool Compiler::s_pJitDisasmIncludeAssembliesListInitialized = false;
42 AssemblyNamesList2* Compiler::s_pJitDisasmIncludeAssembliesList = nullptr;
45 bool Compiler::s_pJitFunctionFileInitialized = false;
46 MethodSet* Compiler::s_pJitMethodSet = nullptr;
49 #ifdef CONFIGURABLE_ARM_ABI
51 bool GlobalJitOptions::compFeatureHfa = false;
52 LONG GlobalJitOptions::compUseSoftFPConfigured = 0;
53 #endif // CONFIGURABLE_ARM_ABI
55 /*****************************************************************************
57 * Little helpers to grab the current cycle counter value; this is done
58 * differently based on target architecture, host toolchain, etc. The
59 * main thing is to keep the overhead absolutely minimal; in fact, on
60 * x86/x64 we use RDTSC even though it's not thread-safe; GetThreadCycles
61 * (which is monotonous) is just too expensive.
63 #ifdef FEATURE_JIT_METHOD_PERF
65 #if defined(HOST_X86) || defined(HOST_AMD64)
70 inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
72 *cycleOut = __rdtsc();
76 #elif defined(__GNUC__)
78 inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
81 __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
82 *cycleOut = (static_cast<unsigned __int64>(hi) << 32) | static_cast<unsigned __int64>(lo);
86 #else // neither _MSC_VER nor __GNUC__
88 // The following *might* work - might as well try.
89 #define _our_GetThreadCycles(cp) GetThreadCycles(cp)
93 #elif defined(HOST_ARM) || defined(HOST_ARM64)
95 // If this doesn't work please see ../gc/gc.cpp for additional ARM
96 // info (and possible solutions).
97 #define _our_GetThreadCycles(cp) GetThreadCycles(cp)
99 #else // not x86/x64 and not ARM
101 // Don't know what this target is, but let's give it a try; if
102 // someone really wants to make this work, please add the right
104 #define _our_GetThreadCycles(cp) GetThreadCycles(cp)
106 #endif // which host OS
108 const BYTE genTypeSizes[] = {
109 #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sz,
110 #include "typelist.h"
114 const BYTE genTypeAlignments[] = {
115 #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) al,
116 #include "typelist.h"
120 const BYTE genTypeStSzs[] = {
121 #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) st,
122 #include "typelist.h"
126 const BYTE genActualTypes[] = {
127 #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) jitType,
128 #include "typelist.h"
132 #endif // FEATURE_JIT_METHOD_PERF
133 /*****************************************************************************/
134 inline unsigned getCurTime()
140 return (((tim.wHour * 60) + tim.wMinute) * 60 + tim.wSecond) * 1000 + tim.wMilliseconds;
143 /*****************************************************************************/
145 /*****************************************************************************/
147 static FILE* jitSrcFilePtr;
149 static unsigned jitCurSrcLine;
151 void Compiler::JitLogEE(unsigned level, const char* fmt, ...)
158 vflogf(jitstdout(), fmt, args);
163 vlogf(level, fmt, args);
169 /*****************************************************************************/
170 #if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
172 static unsigned genMethodCnt; // total number of methods JIT'ted
173 unsigned genMethodICnt; // number of interruptible methods
174 unsigned genMethodNCnt; // number of non-interruptible methods
175 static unsigned genSmallMethodsNeedingExtraMemoryCnt = 0;
179 /*****************************************************************************/
180 #if MEASURE_NODE_SIZE
181 NodeSizeStats genNodeSizeStats;
182 NodeSizeStats genNodeSizeStatsPerFunc;
184 unsigned genTreeNcntHistBuckets[] = {10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 5000, 10000, 0};
185 Histogram genTreeNcntHist(genTreeNcntHistBuckets);
187 unsigned genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000, 1000000, 0};
188 Histogram genTreeNsizHist(genTreeNsizHistBuckets);
189 #endif // MEASURE_NODE_SIZE
191 /*****************************************************************************/
192 #if MEASURE_MEM_ALLOC
194 unsigned memAllocHistBuckets[] = {64, 128, 192, 256, 512, 1024, 4096, 8192, 0};
195 Histogram memAllocHist(memAllocHistBuckets);
196 unsigned memUsedHistBuckets[] = {16, 32, 64, 128, 192, 256, 512, 1024, 4096, 8192, 0};
197 Histogram memUsedHist(memUsedHistBuckets);
199 #endif // MEASURE_MEM_ALLOC
201 /*****************************************************************************
203 * Variables to keep track of total code amounts.
208 size_t grossVMsize; // Total IL code size
209 size_t grossNCsize; // Native code + data size
210 size_t totalNCsize; // Native code + data + GC info size (TODO-Cleanup: GC info size only accurate for JIT32_GCENCODER)
211 size_t gcHeaderISize; // GC header size: interruptible methods
212 size_t gcPtrMapISize; // GC pointer map size: interruptible methods
213 size_t gcHeaderNSize; // GC header size: non-interruptible methods
214 size_t gcPtrMapNSize; // GC pointer map size: non-interruptible methods
216 #endif // DISPLAY_SIZES
218 /*****************************************************************************
220 * Variables to keep track of argument counts.
225 unsigned argTotalCalls;
226 unsigned argHelperCalls;
227 unsigned argStaticCalls;
228 unsigned argNonVirtualCalls;
229 unsigned argVirtualCalls;
231 unsigned argTotalArgs; // total number of args for all calls (including objectPtr)
232 unsigned argTotalDWordArgs;
233 unsigned argTotalLongArgs;
234 unsigned argTotalFloatArgs;
235 unsigned argTotalDoubleArgs;
237 unsigned argTotalRegArgs;
238 unsigned argTotalTemps;
239 unsigned argTotalLclVar;
240 unsigned argTotalDeferred;
241 unsigned argTotalConst;
243 unsigned argTotalObjPtr;
245 unsigned argMaxTempsPerMethod;
247 unsigned argCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
248 Histogram argCntTable(argCntBuckets);
250 unsigned argDWordCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
251 Histogram argDWordCntTable(argDWordCntBuckets);
253 unsigned argDWordLngCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
254 Histogram argDWordLngCntTable(argDWordLngCntBuckets);
256 unsigned argTempsCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
257 Histogram argTempsCntTable(argTempsCntBuckets);
259 #endif // CALL_ARG_STATS
261 /*****************************************************************************
263 * Variables to keep track of basic block counts.
266 #if COUNT_BASIC_BLOCKS
268 // --------------------------------------------------
269 // Basic block count frequency table:
270 // --------------------------------------------------
271 // <= 1 ===> 26872 count ( 56% of total)
272 // 2 .. 2 ===> 669 count ( 58% of total)
273 // 3 .. 3 ===> 4687 count ( 68% of total)
274 // 4 .. 5 ===> 5101 count ( 78% of total)
275 // 6 .. 10 ===> 5575 count ( 90% of total)
276 // 11 .. 20 ===> 3028 count ( 97% of total)
277 // 21 .. 50 ===> 1108 count ( 99% of total)
278 // 51 .. 100 ===> 182 count ( 99% of total)
279 // 101 .. 1000 ===> 34 count (100% of total)
280 // 1001 .. 10000 ===> 0 count (100% of total)
281 // --------------------------------------------------
283 unsigned bbCntBuckets[] = {1, 2, 3, 5, 10, 20, 50, 100, 1000, 10000, 0};
284 Histogram bbCntTable(bbCntBuckets);
286 /* Histogram for the IL opcode size of methods with a single basic block */
288 unsigned bbSizeBuckets[] = {1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 0};
289 Histogram bbOneBBSizeTable(bbSizeBuckets);
291 unsigned domsChangedIterationBuckets[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0};
292 Histogram domsChangedIterationTable(domsChangedIterationBuckets);
294 unsigned computeReachabilitySetsIterationBuckets[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0};
295 Histogram computeReachabilitySetsIterationTable(computeReachabilitySetsIterationBuckets);
297 unsigned computeReachabilityIterationBuckets[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0};
298 Histogram computeReachabilityIterationTable(computeReachabilityIterationBuckets);
300 #endif // COUNT_BASIC_BLOCKS
302 /*****************************************************************************
304 * Used by optFindNaturalLoops to gather statistical information such as
305 * - total number of natural loops
306 * - number of loops with 1, 2, ... exit conditions
307 * - number of loops that have an iterator (for like)
308 * - number of loops that have a constant iterator
313 unsigned totalLoopMethods; // counts the total number of methods that have natural loops
314 unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has
315 unsigned totalLoopOverflows; // # of methods that identified more loops than we can represent
316 unsigned totalLoopCount; // counts the total number of natural loops
317 unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
318 unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
319 unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
320 unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
321 bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
322 unsigned loopsThisMethod; // counts the number of loops in the current method
323 bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
325 /* Histogram for number of loops in a method */
327 unsigned loopCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0};
328 Histogram loopCountTable(loopCountBuckets);
330 /* Histogram for number of loop exits */
332 unsigned loopExitCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 0};
333 Histogram loopExitCountTable(loopExitCountBuckets);
335 #endif // COUNT_LOOPS
337 //------------------------------------------------------------------------
338 // getJitGCType: Given the VM's CorInfoGCType convert it to the JIT's var_types
341 // gcType - an enum value that originally came from an element
342 // of the BYTE[] returned from getClassGClayout()
345 // The corresponding enum value from the JIT's var_types
348 // The gcLayout of each field of a struct is returned from getClassGClayout()
349 // as a BYTE[] but each BYTE element is actually a CorInfoGCType value
350 // Note when we 'know' that there is only one element in this array
351 // the JIT will often pass the address of a single BYTE, instead of a BYTE[]
354 var_types Compiler::getJitGCType(BYTE gcType)
356 var_types result = TYP_UNKNOWN;
357 CorInfoGCType corInfoType = (CorInfoGCType)gcType;
359 if (corInfoType == TYPE_GC_NONE)
363 else if (corInfoType == TYPE_GC_REF)
367 else if (corInfoType == TYPE_GC_BYREF)
373 noway_assert(!"Bad value of 'gcType'");
379 //---------------------------------------------------------------------------
380 // isTrivialPointerSizedStruct:
381 // Check if the given struct type contains only one pointer-sized integer value type
384 // clsHnd - the handle for the struct type.
387 // true if the given struct type contains only one pointer-sized integer value type,
390 bool Compiler::isTrivialPointerSizedStruct(CORINFO_CLASS_HANDLE clsHnd) const
392 assert(info.compCompHnd->isValueClass(clsHnd));
393 if (info.compCompHnd->getClassSize(clsHnd) != TARGET_POINTER_SIZE)
399 // all of class chain must be of value type and must have only one field
400 if (!info.compCompHnd->isValueClass(clsHnd) || info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
405 CORINFO_CLASS_HANDLE* pClsHnd = &clsHnd;
406 CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
407 CorInfoType fieldType = info.compCompHnd->getFieldType(fldHnd, pClsHnd);
409 var_types vt = JITtype2varType(fieldType);
411 if (fieldType == CORINFO_TYPE_VALUECLASS)
415 else if (varTypeIsI(vt) && !varTypeIsGC(vt))
427 //---------------------------------------------------------------------------
428 // isNativePrimitiveStructType:
429 // Check if the given struct type is an intrinsic type that should be treated as though
430 // it is not a struct at the unmanaged ABI boundary.
433 // clsHnd - the handle for the struct type.
436 // true if the given struct type should be treated as a primitive for unmanaged calls,
439 bool Compiler::isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd)
441 if (!isIntrinsicType(clsHnd))
445 const char* namespaceName = nullptr;
446 const char* typeName = getClassNameFromMetadata(clsHnd, &namespaceName);
448 if (strcmp(namespaceName, "System.Runtime.InteropServices") != 0)
453 return strcmp(typeName, "CLong") == 0 || strcmp(typeName, "CULong") == 0 || strcmp(typeName, "NFloat") == 0;
456 //-----------------------------------------------------------------------------
457 // getPrimitiveTypeForStruct:
458 // Get the "primitive" type that is used for a struct
459 // of size 'structSize'.
460 // We examine 'clsHnd' to check the GC layout of the struct and
461 // return TYP_REF for structs that simply wrap an object.
462 // If the struct is a one element HFA/HVA, we will return the
463 // proper floating point or vector type.
466 // structSize - the size of the struct type, cannot be zero
467 // clsHnd - the handle for the struct type, used when may have
468 // an HFA or if we need the GC layout for an object ref.
471 // The primitive type (i.e. byte, short, int, long, ref, float, double)
472 // used to pass or return structs of this size.
473 // If we shouldn't use a "primitive" type then TYP_UNKNOWN is returned.
475 // For 32-bit targets (X86/ARM32) the 64-bit TYP_LONG type is not
476 // considered a primitive type by this method.
477 // So a struct that wraps a 'long' is passed and returned in the
478 // same way as any other 8-byte struct
479 // For ARM32 if we have an HFA struct that wraps a 64-bit double
480 // we will return TYP_DOUBLE.
481 // For vector calling conventions, a vector is considered a "primitive"
482 // type, as it is passed in a single register.
484 var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg)
486 assert(structSize != 0);
488 var_types useType = TYP_UNKNOWN;
490 // Start by determining if we have an HFA/HVA with a single element.
491 if (GlobalJitOptions::compFeatureHfa)
493 // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated
494 // as if they are not HFA types.
495 if (!(TargetArchitecture::IsArm64 && TargetOS::IsWindows && isVarArg))
503 #endif // TARGET_ARM64
505 var_types hfaType = GetHfaType(clsHnd);
506 // We're only interested in the case where the struct size is equal to the size of the hfaType.
507 if (varTypeIsValidHfaType(hfaType))
509 if (genTypeSize(hfaType) == structSize)
520 if (useType != TYP_UNKNOWN)
527 // Now deal with non-HFA/HVA structs.
535 useType = TYP_USHORT;
538 #if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
543 #endif // !TARGET_XARCH || UNIX_AMD64_ABI
547 // We dealt with the one-float HFA above. All other 4-byte structs are handled as INT.
551 #if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
555 useType = TYP_I_IMPL;
558 #endif // !TARGET_XARCH || UNIX_AMD64_ABI
559 #endif // TARGET_64BIT
561 case TARGET_POINTER_SIZE:
564 // Check if this pointer-sized struct is wrapping a GC object
565 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
566 useType = getJitGCType(gcPtr);
571 useType = TYP_UNKNOWN;
578 //-----------------------------------------------------------------------------
579 // getArgTypeForStruct:
580 // Get the type that is used to pass values of the given struct type.
581 // If you have already retrieved the struct size then it should be
582 // passed as the optional fourth argument, as this allows us to avoid
583 // an extra call to getClassSize(clsHnd)
586 // clsHnd - the handle for the struct type
587 // wbPassStruct - An "out" argument with information about how
588 // the struct is to be passed
589 // isVarArg - is vararg, used to ignore HFA types for Arm64 windows varargs
590 // structSize - the size of the struct type,
591 // or zero if we should call getClassSize(clsHnd)
594 // For wbPassStruct you can pass a 'nullptr' and nothing will be written
595 // or returned for that out parameter.
596 // When *wbPassStruct is SPK_PrimitiveType this method's return value
597 // is the primitive type used to pass the struct.
598 // When *wbPassStruct is SPK_ByReference this method's return value
599 // is always TYP_UNKNOWN and the struct type is passed by reference to a copy
600 // When *wbPassStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
601 // is always TYP_STRUCT and the struct type is passed by value either
602 // using multiple registers or on the stack.
605 // The size must be the size of the given type.
606 // The given class handle must be for a value type (struct).
610 // When the clsHnd is a one element HFA type we return the appropriate
611 // floating point primitive type and *wbPassStruct is SPK_PrimitiveType
612 // If there are two or more elements in the HFA type then the this method's
613 // return value is TYP_STRUCT and *wbPassStruct is SPK_ByValueAsHfa
615 var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
616 structPassingKind* wbPassStruct,
620 var_types useType = TYP_UNKNOWN;
621 structPassingKind howToPassStruct = SPK_Unknown; // We must change this before we return
623 assert(structSize != 0);
625 // Determine if we can pass the struct as a primitive type.
626 // Note that on x86 we only pass specific pointer-sized structs that satisfy isTrivialPointerSizedStruct checks.
628 #ifdef UNIX_AMD64_ABI
630 // An 8-byte struct may need to be passed in a floating point register
631 // So we always consult the struct "Classifier" routine
633 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
634 eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
636 if (structDesc.passedInRegisters && (structDesc.eightByteCount != 1))
638 // We can't pass this as a primitive type.
640 else if (structDesc.eightByteClassifications[0] == SystemVClassificationTypeSSE)
642 // If this is passed as a floating type, use that.
643 // Otherwise, we'll use the general case - we don't want to use the "EightByteType"
644 // directly, because it returns `TYP_INT` for any integral type <= 4 bytes, and
645 // we need to preserve small types.
646 useType = GetEightByteType(structDesc, 0);
649 #endif // UNIX_AMD64_ABI
651 // The largest arg passed in a single register is MAX_PASS_SINGLEREG_BYTES,
652 // so we can skip calling getPrimitiveTypeForStruct when we
653 // have a struct that is larger than that.
655 if (structSize <= MAX_PASS_SINGLEREG_BYTES)
657 // We set the "primitive" useType based upon the structSize
658 // and also examine the clsHnd to see if it is an HFA of count one
659 useType = getPrimitiveTypeForStruct(structSize, clsHnd, isVarArg);
662 if (isTrivialPointerSizedStruct(clsHnd))
664 useType = TYP_I_IMPL;
666 #endif // !TARGET_X86
668 // Did we change this struct type into a simple "primitive" type?
670 if (useType != TYP_UNKNOWN)
672 // Yes, we should use the "primitive" type in 'useType'
673 howToPassStruct = SPK_PrimitiveType;
675 else // We can't replace the struct with a "primitive" type
677 // See if we can pass this struct by value, possibly in multiple registers
678 // or if we should pass it by reference to a copy
680 if (structSize <= MAX_PASS_MULTIREG_BYTES)
682 // Structs that are HFA/HVA's are passed by value in multiple registers.
683 // Arm64 Windows VarArg methods arguments will not classify HFA/HVA types, they will need to be treated
684 // as if they are not HFA/HVA types.
686 if (TargetArchitecture::IsArm64 && TargetOS::IsWindows && isVarArg)
692 hfaType = GetHfaType(clsHnd);
694 if (varTypeIsValidHfaType(hfaType))
696 // HFA's of count one should have been handled by getPrimitiveTypeForStruct
697 assert(GetHfaCount(clsHnd) >= 2);
699 // setup wbPassType and useType indicate that this is passed by value as an HFA
700 // using multiple registers
701 // (when all of the parameters registers are used, then the stack will be used)
702 howToPassStruct = SPK_ByValueAsHfa;
703 useType = TYP_STRUCT;
705 else // Not an HFA struct type
708 #ifdef UNIX_AMD64_ABI
709 // The case of (structDesc.eightByteCount == 1) should have already been handled
710 if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters)
712 // setup wbPassType and useType indicate that this is passed by value in multiple registers
713 // (when all of the parameters registers are used, then the stack will be used)
714 howToPassStruct = SPK_ByValue;
715 useType = TYP_STRUCT;
719 assert(structDesc.eightByteCount == 0);
720 // Otherwise we pass this struct by reference to a copy
721 // setup wbPassType and useType indicate that this is passed using one register
722 // (by reference to a copy)
723 howToPassStruct = SPK_ByReference;
724 useType = TYP_UNKNOWN;
727 #elif defined(TARGET_ARM64)
729 // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
730 assert(structSize > TARGET_POINTER_SIZE);
732 // On ARM64 structs that are 9-16 bytes are passed by value in multiple registers
734 if (structSize <= (TARGET_POINTER_SIZE * 2))
736 // setup wbPassType and useType indicate that this is passed by value in multiple registers
737 // (when all of the parameters registers are used, then the stack will be used)
738 howToPassStruct = SPK_ByValue;
739 useType = TYP_STRUCT;
741 else // a structSize that is 17-32 bytes in size
743 // Otherwise we pass this struct by reference to a copy
744 // setup wbPassType and useType indicate that this is passed using one register
745 // (by reference to a copy)
746 howToPassStruct = SPK_ByReference;
747 useType = TYP_UNKNOWN;
750 #elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
752 // Otherwise we pass this struct by value on the stack
753 // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
754 // On LOONGARCH64 struct that is 1-16 bytes is passed by value in one/two register(s)
755 howToPassStruct = SPK_ByValue;
756 useType = TYP_STRUCT;
760 noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
765 else // (structSize > MAX_PASS_MULTIREG_BYTES)
767 // We have a (large) struct that can't be replaced with a "primitive" type
768 // and can't be passed in multiple registers
769 CLANG_FORMAT_COMMENT_ANCHOR;
771 #if defined(TARGET_X86) || defined(TARGET_ARM) || defined(UNIX_AMD64_ABI)
773 // Otherwise we pass this struct by value on the stack
774 // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
775 howToPassStruct = SPK_ByValue;
776 useType = TYP_STRUCT;
778 #elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
780 // Otherwise we pass this struct by reference to a copy
781 // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
782 howToPassStruct = SPK_ByReference;
783 useType = TYP_UNKNOWN;
787 noway_assert(!"Unhandled TARGET in getArgTypeForStruct");
793 // 'howToPassStruct' must be set to one of the valid values before we return
794 assert(howToPassStruct != SPK_Unknown);
795 if (wbPassStruct != nullptr)
797 *wbPassStruct = howToPassStruct;
803 //-----------------------------------------------------------------------------
804 // getReturnTypeForStruct:
805 // Get the type that is used to return values of the given struct type.
806 // If you have already retrieved the struct size then it should be
807 // passed as the optional third argument, as this allows us to avoid
808 // an extra call to getClassSize(clsHnd)
811 // clsHnd - the handle for the struct type
812 // callConv - the calling convention of the function
813 // that returns this struct.
814 // wbReturnStruct - An "out" argument with information about how
815 // the struct is to be returned
816 // structSize - the size of the struct type,
817 // or zero if we should call getClassSize(clsHnd)
820 // For wbReturnStruct you can pass a 'nullptr' and nothing will be written
821 // or returned for that out parameter.
822 // When *wbReturnStruct is SPK_PrimitiveType this method's return value
823 // is the primitive type used to return the struct.
824 // When *wbReturnStruct is SPK_ByReference this method's return value
825 // is always TYP_UNKNOWN and the struct type is returned using a return buffer
826 // When *wbReturnStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
827 // is always TYP_STRUCT and the struct type is returned using multiple registers.
830 // The size must be the size of the given type.
831 // The given class handle must be for a value type (struct).
835 // When the clsHnd is a one element HFA type then this method's return
836 // value is the appropriate floating point primitive type and
837 // *wbReturnStruct is SPK_PrimitiveType.
838 // If there are two or more elements in the HFA type and the target supports
839 // multireg return types then the return value is TYP_STRUCT and
840 // *wbReturnStruct is SPK_ByValueAsHfa.
841 // Additionally if there are two or more elements in the HFA type and
842 // the target doesn't support multreg return types then it is treated
843 // as if it wasn't an HFA type.
844 // About returning TYP_STRUCT:
845 // Whenever this method's return value is TYP_STRUCT it always means
846 // that multiple registers are used to return this struct.
848 var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
849 CorInfoCallConvExtension callConv,
850 structPassingKind* wbReturnStruct /* = nullptr */,
851 unsigned structSize /* = 0 */)
853 var_types useType = TYP_UNKNOWN;
854 structPassingKind howToReturnStruct = SPK_Unknown; // We must change this before we return
855 bool canReturnInRegister = true;
857 assert(clsHnd != NO_CLASS_HANDLE);
861 structSize = info.compCompHnd->getClassSize(clsHnd);
863 assert(structSize > 0);
865 #ifdef UNIX_AMD64_ABI
866 // An 8-byte struct may need to be returned in a floating point register
867 // So we always consult the struct "Classifier" routine
869 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
870 eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
872 if (structDesc.eightByteCount == 1)
874 assert(structSize <= sizeof(double));
875 assert(structDesc.passedInRegisters);
877 if (structDesc.eightByteClassifications[0] == SystemVClassificationTypeSSE)
879 // If this is returned as a floating type, use that.
880 // Otherwise, leave as TYP_UNKNOWN and we'll sort things out below.
881 useType = GetEightByteType(structDesc, 0);
882 howToReturnStruct = SPK_PrimitiveType;
887 // Return classification is not always size based...
888 canReturnInRegister = structDesc.passedInRegisters;
889 if (!canReturnInRegister)
891 assert(structDesc.eightByteCount == 0);
892 howToReturnStruct = SPK_ByReference;
893 useType = TYP_UNKNOWN;
897 if (callConv != CorInfoCallConvExtension::Managed && !isNativePrimitiveStructType(clsHnd))
899 canReturnInRegister = false;
900 howToReturnStruct = SPK_ByReference;
901 useType = TYP_UNKNOWN;
903 #elif defined(TARGET_LOONGARCH64)
904 if (structSize <= (TARGET_POINTER_SIZE * 2))
906 uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
908 if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
910 howToReturnStruct = SPK_PrimitiveType;
911 useType = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT;
913 else if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
915 howToReturnStruct = SPK_ByValue;
916 useType = TYP_STRUCT;
920 #elif defined(TARGET_RISCV64)
921 if (structSize <= (TARGET_POINTER_SIZE * 2))
923 uint32_t floatFieldFlags = info.compCompHnd->getRISCV64PassStructInRegisterFlags(clsHnd);
925 if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
927 howToReturnStruct = SPK_PrimitiveType;
928 useType = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT;
930 else if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
932 howToReturnStruct = SPK_ByValue;
933 useType = TYP_STRUCT;
938 if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) &&
939 !isNativePrimitiveStructType(clsHnd))
941 canReturnInRegister = false;
942 howToReturnStruct = SPK_ByReference;
943 useType = TYP_UNKNOWN;
946 // Check for cases where a small struct is returned in a register
947 // via a primitive type.
949 // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES
950 // so we can skip calling getPrimitiveTypeForStruct when we
951 // have a struct that is larger than that.
952 if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES))
954 // We set the "primitive" useType based upon the structSize
955 // and also examine the clsHnd to see if it is an HFA of count one
957 // The ABI for struct returns in varArg methods, is same as the normal case,
958 // so pass false for isVararg
959 useType = getPrimitiveTypeForStruct(structSize, clsHnd, /*isVararg=*/false);
961 if (useType != TYP_UNKNOWN)
963 if (structSize == genTypeSize(useType))
965 // Currently: 1, 2, 4, or 8 byte structs
966 howToReturnStruct = SPK_PrimitiveType;
970 // Currently: 3, 5, 6, or 7 byte structs
971 assert(structSize < genTypeSize(useType));
972 howToReturnStruct = SPK_EnclosingType;
978 // Note this handles an odd case when FEATURE_MULTIREG_RET is disabled and HFAs are enabled
980 // getPrimitiveTypeForStruct will return TYP_UNKNOWN for a struct that is an HFA of two floats
981 // because when HFA are enabled, normally we would use two FP registers to pass or return it
983 // But if we don't have support for multiple register return types, we have to change this.
984 // Since what we have is an 8-byte struct (float + float) we change useType to TYP_I_IMPL
985 // so that the struct is returned instead using an 8-byte integer register.
987 if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd))
989 useType = TYP_I_IMPL;
990 howToReturnStruct = SPK_PrimitiveType;
994 // Did we change this struct type into a simple "primitive" type?
995 if (useType != TYP_UNKNOWN)
997 // If so, we should have already set howToReturnStruct, too.
998 assert(howToReturnStruct != SPK_Unknown);
1000 else if (canReturnInRegister) // We can't replace the struct with a "primitive" type
1002 // See if we can return this struct by value, possibly in multiple registers
1003 // or if we should return it using a return buffer register
1005 if ((FEATURE_MULTIREG_RET == 1) && (structSize <= MAX_RET_MULTIREG_BYTES))
1007 // Structs that are HFA's are returned in multiple registers
1010 // HFA's of count one should have been handled by getPrimitiveTypeForStruct
1011 assert(GetHfaCount(clsHnd) >= 2);
1013 // setup wbPassType and useType indicate that this is returned by value as an HFA
1014 // using multiple registers
1015 howToReturnStruct = SPK_ByValueAsHfa;
1016 useType = TYP_STRUCT;
1018 else // Not an HFA struct type
1021 #ifdef UNIX_AMD64_ABI
1023 // The cases of (structDesc.eightByteCount == 1) and (structDesc.eightByteCount == 0)
1024 // should have already been handled
1025 assert(structDesc.eightByteCount > 1);
1026 // setup wbPassType and useType indicate that this is returned by value in multiple registers
1027 howToReturnStruct = SPK_ByValue;
1028 useType = TYP_STRUCT;
1029 assert(structDesc.passedInRegisters == true);
1031 #elif defined(TARGET_ARM64)
1033 // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
1034 assert(structSize > TARGET_POINTER_SIZE);
1036 // On ARM64 structs that are 9-16 bytes are returned by value in multiple registers
1038 if (structSize <= (TARGET_POINTER_SIZE * 2))
1040 // setup wbPassType and useType indicate that this is return by value in multiple registers
1041 howToReturnStruct = SPK_ByValue;
1042 useType = TYP_STRUCT;
1044 else // a structSize that is 17-32 bytes in size
1046 // Otherwise we return this struct using a return buffer
1047 // setup wbPassType and useType indicate that this is returned using a return buffer register
1048 // (reference to a return buffer)
1049 howToReturnStruct = SPK_ByReference;
1050 useType = TYP_UNKNOWN;
1052 #elif defined(TARGET_X86)
1054 // Only 8-byte structs are return in multiple registers.
1055 // We also only support multireg struct returns on x86 to match the native calling convention.
1056 // So return 8-byte structs only when the calling convention is a native calling convention.
1057 if (structSize == MAX_RET_MULTIREG_BYTES && callConv != CorInfoCallConvExtension::Managed)
1059 // setup wbPassType and useType indicate that this is return by value in multiple registers
1060 howToReturnStruct = SPK_ByValue;
1061 useType = TYP_STRUCT;
1065 // Otherwise we return this struct using a return buffer
1066 // setup wbPassType and useType indicate that this is returned using a return buffer register
1067 // (reference to a return buffer)
1068 howToReturnStruct = SPK_ByReference;
1069 useType = TYP_UNKNOWN;
1071 #elif defined(TARGET_ARM)
1073 // Otherwise we return this struct using a return buffer
1074 // setup wbPassType and useType indicate that this is returned using a return buffer register
1075 // (reference to a return buffer)
1076 howToReturnStruct = SPK_ByReference;
1077 useType = TYP_UNKNOWN;
1079 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1081 // On LOONGARCH64/RISCV64 struct that is 1-16 bytes is returned by value in one/two register(s)
1082 howToReturnStruct = SPK_ByValue;
1083 useType = TYP_STRUCT;
1087 noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
1089 #endif // TARGET_XXX
1092 else // (structSize > MAX_RET_MULTIREG_BYTES) || (FEATURE_MULTIREG_RET == 0)
1094 // We have a (large) struct that can't be replaced with a "primitive" type
1095 // and can't be returned in multiple registers
1097 // We return this struct using a return buffer register
1098 // setup wbPassType and useType indicate that this is returned using a return buffer register
1099 // (reference to a return buffer)
1100 howToReturnStruct = SPK_ByReference;
1101 useType = TYP_UNKNOWN;
1105 // 'howToReturnStruct' must be set to one of the valid values before we return
1106 assert(howToReturnStruct != SPK_Unknown);
1107 if (wbReturnStruct != nullptr)
1109 *wbReturnStruct = howToReturnStruct;
1115 ///////////////////////////////////////////////////////////////////////////////
1117 // MEASURE_NOWAY: code to measure and rank dynamic occurrences of noway_assert.
1118 // (Just the appearances of noway_assert, whether the assert is true or false.)
1119 // This might help characterize the cost of noway_assert in non-DEBUG builds,
1120 // or determine which noway_assert should be simple DEBUG-only asserts.
1122 ///////////////////////////////////////////////////////////////////////////////
1132 FileLine() : m_file(nullptr), m_line(0), m_condStr(nullptr)
1136 FileLine(const char* file, unsigned line, const char* condStr) : m_line(line)
1138 size_t newSize = (strlen(file) + 1) * sizeof(char);
1139 m_file = HostAllocator::getHostAllocator().allocate<char>(newSize);
1140 strcpy_s(m_file, newSize, file);
1142 newSize = (strlen(condStr) + 1) * sizeof(char);
1143 m_condStr = HostAllocator::getHostAllocator().allocate<char>(newSize);
1144 strcpy_s(m_condStr, newSize, condStr);
1147 FileLine(const FileLine& other)
1149 m_file = other.m_file;
1150 m_line = other.m_line;
1151 m_condStr = other.m_condStr;
1154 // GetHashCode() and Equals() are needed by JitHashTable
1156 static unsigned GetHashCode(FileLine fl)
1158 assert(fl.m_file != nullptr);
1159 unsigned code = fl.m_line;
1160 for (const char* p = fl.m_file; *p != '\0'; p++)
1164 // Could also add condStr.
1168 static bool Equals(FileLine fl1, FileLine fl2)
1170 return (fl1.m_line == fl2.m_line) && (0 == strcmp(fl1.m_file, fl2.m_file));
1174 typedef JitHashTable<FileLine, FileLine, size_t, HostAllocator> FileLineToCountMap;
1175 FileLineToCountMap* NowayAssertMap;
1177 void Compiler::RecordNowayAssert(const char* filename, unsigned line, const char* condStr)
1179 if (NowayAssertMap == nullptr)
1181 NowayAssertMap = new (HostAllocator::getHostAllocator()) FileLineToCountMap(HostAllocator::getHostAllocator());
1183 FileLine fl(filename, line, condStr);
1184 size_t* pCount = NowayAssertMap->LookupPointer(fl);
1185 if (pCount == nullptr)
1187 NowayAssertMap->Set(fl, 1);
1195 void RecordNowayAssertGlobal(const char* filename, unsigned line, const char* condStr)
1197 if ((JitConfig.JitMeasureNowayAssert() == 1) && (JitTls::GetCompiler() != nullptr))
1199 JitTls::GetCompiler()->RecordNowayAssert(filename, line, condStr);
1203 struct NowayAssertCountMap
1208 NowayAssertCountMap() : count(0)
1214 bool operator()(const NowayAssertCountMap& elem1, const NowayAssertCountMap& elem2)
1216 return (ssize_t)elem2.count < (ssize_t)elem1.count; // sort in descending order
1221 void DisplayNowayAssertMap()
1223 if (NowayAssertMap != nullptr)
1227 LPCWSTR strJitMeasureNowayAssertFile = JitConfig.JitMeasureNowayAssertFile();
1228 if (strJitMeasureNowayAssertFile != nullptr)
1230 fout = _wfopen(strJitMeasureNowayAssertFile, W("a"));
1231 if (fout == nullptr)
1233 fprintf(jitstdout(), "Failed to open JitMeasureNowayAssertFile \"%ws\"\n",
1234 strJitMeasureNowayAssertFile);
1243 // Iterate noway assert map, create sorted table by occurrence, dump it.
1244 unsigned count = NowayAssertMap->GetCount();
1245 NowayAssertCountMap* nacp = new NowayAssertCountMap[count];
1248 for (FileLineToCountMap::Node* const iter : FileLineToCountMap::KeyValueIteration(NowayAssertMap))
1250 nacp[i].count = iter->GetValue();
1251 nacp[i].fl = iter->GetKey();
1255 jitstd::sort(nacp, nacp + count, NowayAssertCountMap::compare());
1257 if (fout == jitstdout())
1259 // Don't output the header if writing to a file, since we'll be appending to existing dumps in that case.
1260 fprintf(fout, "\nnoway_assert counts:\n");
1261 fprintf(fout, "count, file, line, text\n");
1264 for (i = 0; i < count; i++)
1266 fprintf(fout, "%u, %s, %u, \"%s\"\n", nacp[i].count, nacp[i].fl.m_file, nacp[i].fl.m_line,
1267 nacp[i].fl.m_condStr);
1270 if (fout != jitstdout())
1278 #endif // MEASURE_NOWAY
1280 #if MEASURE_BLOCK_SIZE
1281 size_t genFlowNodeSize;
1282 size_t genFlowNodeCnt;
1283 #endif // MEASURE_BLOCK_SIZE
1285 /*****************************************************************************/
1286 // We keep track of methods we've already compiled.
1288 /*****************************************************************************
1289 * Declare the statics
1294 LONG Compiler::s_compMethodsCount = 0; // to produce unique label names
1297 #if MEASURE_MEM_ALLOC
1299 bool Compiler::s_dspMemStats = false;
1302 #ifndef PROFILING_SUPPORTED
1303 const bool Compiler::Options::compNoPInvokeInlineCB = false;
1306 /*****************************************************************************
1308 * One time initialization code
1312 void Compiler::compStartup()
1315 grossVMsize = grossNCsize = totalNCsize = 0;
1316 #endif // DISPLAY_SIZES
1318 /* Initialize the table of tree node sizes */
1320 GenTree::InitNodeSize();
1322 #ifdef JIT32_GCENCODER
1323 // Initialize the GC encoder lookup table
1325 GCInfo::gcInitEncoderLookupTable();
1328 /* Initialize the emitter */
1330 emitter::emitInit();
1332 // Static vars of ValueNumStore
1333 ValueNumStore::ValidateValueNumStoreStatics();
1335 compDisplayStaticSizes();
1338 /*****************************************************************************
1340 * One time finalization code
1344 void Compiler::compShutdown()
1346 if (s_pAltJitExcludeAssembliesList != nullptr)
1348 s_pAltJitExcludeAssembliesList->~AssemblyNamesList2(); // call the destructor
1349 s_pAltJitExcludeAssembliesList = nullptr;
1353 if (s_pJitDisasmIncludeAssembliesList != nullptr)
1355 s_pJitDisasmIncludeAssembliesList->~AssemblyNamesList2(); // call the destructor
1356 s_pJitDisasmIncludeAssembliesList = nullptr;
1361 DisplayNowayAssertMap();
1362 #endif // MEASURE_NOWAY
1364 /* Shut down the emitter */
1366 emitter::emitDone();
1368 #if defined(DEBUG) || defined(INLINE_DATA)
1369 // Finish reading and/or writing inline xml
1370 if (JitConfig.JitInlineDumpXmlFile() != nullptr)
1372 FILE* file = _wfopen(JitConfig.JitInlineDumpXmlFile(), W("a"));
1373 if (file != nullptr)
1375 InlineStrategy::FinalizeXml(file);
1380 InlineStrategy::FinalizeXml();
1383 #endif // defined(DEBUG) || defined(INLINE_DATA)
1385 #if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
1386 if (genMethodCnt == 0)
1393 GenTree::ReportOperBashing(jitstdout());
1396 #ifdef FEATURE_JIT_METHOD_PERF
1397 if (compJitTimeLogFilename != nullptr)
1399 FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a"));
1400 if (jitTimeLogFile != nullptr)
1402 CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
1403 fclose(jitTimeLogFile);
1407 JitTimer::Shutdown();
1408 #endif // FEATURE_JIT_METHOD_PERF
1412 // Add up all the counts so that we can show percentages of total
1413 unsigned totalCount = 0;
1414 for (unsigned op = 0; op < GT_COUNT; op++)
1416 totalCount += GenTree::s_gtNodeCounts[op];
1428 OperInfo opers[GT_COUNT];
1429 for (unsigned op = 0; op < GT_COUNT; op++)
1431 opers[op] = {GenTree::s_gtNodeCounts[op], GenTree::s_gtTrueSizes[op], static_cast<genTreeOps>(op)};
1434 jitstd::sort(opers, opers + ArrLen(opers), [](const OperInfo& l, const OperInfo& r) {
1435 // We'll be sorting in descending order.
1436 return l.Count >= r.Count;
1439 unsigned remainingCount = totalCount;
1440 unsigned remainingCountLarge = 0;
1441 unsigned remainingCountSmall = 0;
1443 unsigned countLarge = 0;
1444 unsigned countSmall = 0;
1446 jitprintf("\nGenTree operator counts (approximate):\n\n");
1448 for (OperInfo oper : opers)
1450 unsigned size = oper.Size;
1451 unsigned count = oper.Count;
1452 double percentage = 100.0 * count / totalCount;
1454 if (size > TREE_NODE_SZ_SMALL)
1456 countLarge += count;
1460 countSmall += count;
1463 // Let's not show anything below a threshold
1464 if (percentage >= 0.5)
1466 jitprintf(" GT_%-17s %7u (%4.1lf%%) %3u bytes each\n", GenTree::OpName(oper.Oper), count,
1468 remainingCount -= count;
1472 if (size > TREE_NODE_SZ_SMALL)
1474 remainingCountLarge += count;
1478 remainingCountSmall += count;
1483 if (remainingCount > 0)
1485 jitprintf(" All other GT_xxx ... %7u (%4.1lf%%) ... %4.1lf%% small + %4.1lf%% large\n", remainingCount,
1486 100.0 * remainingCount / totalCount, 100.0 * remainingCountSmall / totalCount,
1487 100.0 * remainingCountLarge / totalCount);
1489 jitprintf(" -----------------------------------------------------\n");
1490 jitprintf(" Total ....... %11u --ALL-- ... %4.1lf%% small + %4.1lf%% large\n", totalCount,
1491 100.0 * countSmall / totalCount, 100.0 * countLarge / totalCount);
1495 #endif // COUNT_AST_OPERS
1499 if (grossVMsize && grossNCsize)
1502 jitprintf("--------------------------------------\n");
1503 jitprintf("Function and GC info size stats\n");
1504 jitprintf("--------------------------------------\n");
1506 jitprintf("[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, grossNCsize, Target::g_tgtCPUName,
1507 100 * grossNCsize / grossVMsize, "Total (excluding GC info)");
1509 jitprintf("[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, totalNCsize, Target::g_tgtCPUName,
1510 100 * totalNCsize / grossVMsize, "Total (including GC info)");
1512 if (gcHeaderISize || gcHeaderNSize)
1516 jitprintf("GC tables : [%7uI,%7uN] %7u byt (%u%% of IL, %u%% of %s).\n", gcHeaderISize + gcPtrMapISize,
1517 gcHeaderNSize + gcPtrMapNSize, totalNCsize - grossNCsize,
1518 100 * (totalNCsize - grossNCsize) / grossVMsize, 100 * (totalNCsize - grossNCsize) / grossNCsize,
1519 Target::g_tgtCPUName);
1521 jitprintf("GC headers : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcHeaderISize,
1522 gcHeaderNSize, gcHeaderISize + gcHeaderNSize, (float)gcHeaderISize / (genMethodICnt + 0.001),
1523 (float)gcHeaderNSize / (genMethodNCnt + 0.001),
1524 (float)(gcHeaderISize + gcHeaderNSize) / genMethodCnt);
1526 jitprintf("GC ptr maps : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcPtrMapISize,
1527 gcPtrMapNSize, gcPtrMapISize + gcPtrMapNSize, (float)gcPtrMapISize / (genMethodICnt + 0.001),
1528 (float)gcPtrMapNSize / (genMethodNCnt + 0.001),
1529 (float)(gcPtrMapISize + gcPtrMapNSize) / genMethodCnt);
1535 jitprintf("GC tables take up %u bytes (%u%% of instr, %u%% of %6s code).\n", totalNCsize - grossNCsize,
1536 100 * (totalNCsize - grossNCsize) / grossVMsize, 100 * (totalNCsize - grossNCsize) / grossNCsize,
1537 Target::g_tgtCPUName);
1542 jitprintf("%u out of %u methods generated with double-aligned stack\n", Compiler::s_lvaDoubleAlignedProcsCount,
1548 #endif // DISPLAY_SIZES
1551 compDispCallArgStats(jitstdout());
1554 #if COUNT_BASIC_BLOCKS
1555 jitprintf("--------------------------------------------------\n");
1556 jitprintf("Basic block count frequency table:\n");
1557 jitprintf("--------------------------------------------------\n");
1558 bbCntTable.dump(jitstdout());
1559 jitprintf("--------------------------------------------------\n");
1563 jitprintf("--------------------------------------------------\n");
1564 jitprintf("IL method size frequency table for methods with a single basic block:\n");
1565 jitprintf("--------------------------------------------------\n");
1566 bbOneBBSizeTable.dump(jitstdout());
1567 jitprintf("--------------------------------------------------\n");
1569 jitprintf("--------------------------------------------------\n");
1570 jitprintf("fgComputeDoms `while (change)` iterations:\n");
1571 jitprintf("--------------------------------------------------\n");
1572 domsChangedIterationTable.dump(jitstdout());
1573 jitprintf("--------------------------------------------------\n");
1575 jitprintf("--------------------------------------------------\n");
1576 jitprintf("fgComputeReachabilitySets `while (change)` iterations:\n");
1577 jitprintf("--------------------------------------------------\n");
1578 computeReachabilitySetsIterationTable.dump(jitstdout());
1579 jitprintf("--------------------------------------------------\n");
1581 jitprintf("--------------------------------------------------\n");
1582 jitprintf("fgComputeReachability `while (change)` iterations:\n");
1583 jitprintf("--------------------------------------------------\n");
1584 computeReachabilityIterationTable.dump(jitstdout());
1585 jitprintf("--------------------------------------------------\n");
1587 #endif // COUNT_BASIC_BLOCKS
1592 jitprintf("---------------------------------------------------\n");
1593 jitprintf("Loop stats\n");
1594 jitprintf("---------------------------------------------------\n");
1595 jitprintf("Total number of methods with loops is %5u\n", totalLoopMethods);
1596 jitprintf("Total number of loops is %5u\n", totalLoopCount);
1597 jitprintf("Maximum number of loops per method is %5u\n", maxLoopsPerMethod);
1598 jitprintf("# of methods overflowing nat loop table is %5u\n", totalLoopOverflows);
1599 jitprintf("Total number of 'unnatural' loops is %5u\n", totalUnnatLoopCount);
1600 jitprintf("# of methods overflowing unnat loop limit is %5u\n", totalUnnatLoopOverflows);
1601 jitprintf("Total number of loops with an iterator is %5u\n", iterLoopCount);
1602 jitprintf("Total number of loops with a constant iterator is %5u\n", constIterLoopCount);
1604 jitprintf("--------------------------------------------------\n");
1605 jitprintf("Loop count frequency table:\n");
1606 jitprintf("--------------------------------------------------\n");
1607 loopCountTable.dump(jitstdout());
1608 jitprintf("--------------------------------------------------\n");
1609 jitprintf("Loop exit count frequency table:\n");
1610 jitprintf("--------------------------------------------------\n");
1611 loopExitCountTable.dump(jitstdout());
1612 jitprintf("--------------------------------------------------\n");
1614 #endif // COUNT_LOOPS
1616 #if MEASURE_NODE_SIZE
1619 jitprintf("---------------------------------------------------\n");
1620 jitprintf("GenTree node allocation stats\n");
1621 jitprintf("---------------------------------------------------\n");
1623 jitprintf("Allocated %6I64u tree nodes (%7I64u bytes total, avg %4I64u bytes per method)\n",
1624 genNodeSizeStats.genTreeNodeCnt, genNodeSizeStats.genTreeNodeSize,
1625 genNodeSizeStats.genTreeNodeSize / genMethodCnt);
1627 jitprintf("Allocated %7I64u bytes of unused tree node space (%3.2f%%)\n",
1628 genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize,
1629 (float)(100 * (genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize)) /
1630 genNodeSizeStats.genTreeNodeSize);
1633 jitprintf("---------------------------------------------------\n");
1634 jitprintf("Distribution of per-method GenTree node counts:\n");
1635 genTreeNcntHist.dump(jitstdout());
1638 jitprintf("---------------------------------------------------\n");
1639 jitprintf("Distribution of per-method GenTree node allocations (in bytes):\n");
1640 genTreeNsizHist.dump(jitstdout());
1642 #endif // MEASURE_NODE_SIZE
1644 #if MEASURE_BLOCK_SIZE
1647 jitprintf("---------------------------------------------------\n");
1648 jitprintf("BasicBlock and FlowEdge/BasicBlockList allocation stats\n");
1649 jitprintf("---------------------------------------------------\n");
1651 jitprintf("Allocated %6u basic blocks (%7u bytes total, avg %4u bytes per method)\n", BasicBlock::s_Count,
1652 BasicBlock::s_Size, BasicBlock::s_Size / genMethodCnt);
1653 jitprintf("Allocated %6u flow nodes (%7u bytes total, avg %4u bytes per method)\n", genFlowNodeCnt, genFlowNodeSize,
1654 genFlowNodeSize / genMethodCnt);
1656 #endif // MEASURE_BLOCK_SIZE
1658 #if MEASURE_MEM_ALLOC
1662 jitprintf("\nAll allocations:\n");
1663 ArenaAllocator::dumpAggregateMemStats(jitstdout());
1665 jitprintf("\nLargest method:\n");
1666 ArenaAllocator::dumpMaxMemStats(jitstdout());
1669 jitprintf("---------------------------------------------------\n");
1670 jitprintf("Distribution of total memory allocated per method (in KB):\n");
1671 memAllocHist.dump(jitstdout());
1674 jitprintf("---------------------------------------------------\n");
1675 jitprintf("Distribution of total memory used per method (in KB):\n");
1676 memUsedHist.dump(jitstdout());
1679 #endif // MEASURE_MEM_ALLOC
1681 #if LOOP_HOIST_STATS
1682 #ifdef DEBUG // Always display loop stats in retail
1683 if (JitConfig.DisplayLoopHoistStats() != 0)
1686 PrintAggregateLoopHoistStats(jitstdout());
1688 #endif // LOOP_HOIST_STATS
1690 #if TRACK_ENREG_STATS
1691 if (JitConfig.JitEnregStats() != 0)
1693 s_enregisterStats.Dump(jitstdout());
1695 #endif // TRACK_ENREG_STATS
1697 #if MEASURE_PTRTAB_SIZE
1700 jitprintf("---------------------------------------------------\n");
1701 jitprintf("GC pointer table stats\n");
1702 jitprintf("---------------------------------------------------\n");
1704 jitprintf("Reg pointer descriptor size (internal): %8u (avg %4u per method)\n", GCInfo::s_gcRegPtrDscSize,
1705 GCInfo::s_gcRegPtrDscSize / genMethodCnt);
1707 jitprintf("Total pointer table size: %8u (avg %4u per method)\n", GCInfo::s_gcTotalPtrTabSize,
1708 GCInfo::s_gcTotalPtrTabSize / genMethodCnt);
1710 #endif // MEASURE_PTRTAB_SIZE
1712 #if MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
1714 if (genMethodCnt != 0)
1717 jitprintf("A total of %6u methods compiled", genMethodCnt);
1719 if (genMethodICnt || genMethodNCnt)
1721 jitprintf(" (%u interruptible, %u non-interruptible)", genMethodICnt, genMethodNCnt);
1723 #endif // DISPLAY_SIZES
1727 #endif // MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
1730 emitterStats(jitstdout());
1735 jitprintf("---------------------------------------------------\n");
1736 jitprintf("Fatal errors stats\n");
1737 jitprintf("---------------------------------------------------\n");
1738 jitprintf(" badCode: %u\n", fatal_badCode);
1739 jitprintf(" noWay: %u\n", fatal_noWay);
1740 jitprintf(" implLimitation: %u\n", fatal_implLimitation);
1741 jitprintf(" NOMEM: %u\n", fatal_NOMEM);
1742 jitprintf(" noWayAssertBody: %u\n", fatal_noWayAssertBody);
1744 jitprintf(" noWayAssertBodyArgs: %u\n", fatal_noWayAssertBodyArgs);
1746 jitprintf(" NYI: %u\n", fatal_NYI);
1747 #endif // MEASURE_FATAL
1750 /*****************************************************************************
1751 * Display static data structure sizes.
1755 void Compiler::compDisplayStaticSizes()
1757 #if MEASURE_NODE_SIZE
1758 GenTree::DumpNodeSizes();
1762 emitterStaticStats();
1766 /*****************************************************************************
1770 void Compiler::compInit(ArenaAllocator* pAlloc,
1771 CORINFO_METHOD_HANDLE methodHnd,
1772 COMP_HANDLE compHnd,
1773 CORINFO_METHOD_INFO* methodInfo,
1774 InlineInfo* inlineInfo)
1777 compArenaAllocator = pAlloc;
1779 // Inlinee Compile object will only be allocated when needed for the 1st time.
1780 InlineeCompiler = nullptr;
1782 // Set the inline info.
1783 impInlineInfo = inlineInfo;
1784 info.compCompHnd = compHnd;
1785 info.compMethodHnd = methodHnd;
1786 info.compMethodInfo = methodInfo;
1787 info.compClassHnd = compHnd->getMethodClass(methodHnd);
1790 bRangeAllowStress = false;
1792 // set this early so we can use it without relying on random memory values
1793 verbose = compIsForInlining() ? impInlineInfo->InlinerCompiler->verbose : false;
1795 compNumStatementLinksTraversed = 0;
1796 compPoisoningAnyImplicitByrefs = false;
1799 #if defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS || DUMP_GC_TABLES
1800 // Initialize the method name and related info, as it is used early in determining whether to
1801 // apply stress modes, and which ones to apply.
1802 // Note that even allocating memory can invoke the stress mechanism, so ensure that both
1803 // 'compMethodName' and 'compFullName' are either null or valid before we allocate.
1804 // (The stress mode checks references these prior to checking bRangeAllowStress.)
1806 info.compMethodName = nullptr;
1807 info.compClassName = nullptr;
1808 info.compFullName = nullptr;
1810 info.compMethodName = eeGetMethodName(methodHnd);
1811 info.compClassName = eeGetClassName(info.compClassHnd);
1812 info.compFullName = eeGetMethodFullName(methodHnd);
1813 info.compPerfScore = 0.0;
1815 info.compMethodSuperPMIIndex = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), -1);
1816 #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS
1818 #if defined(DEBUG) || defined(INLINE_DATA)
1819 info.compMethodHashPrivate = 0;
1820 #endif // defined(DEBUG) || defined(INLINE_DATA)
1823 // Opt-in to jit stress based on method hash ranges.
1825 // Note the default (with JitStressRange not set) is that all
1826 // methods will be subject to stress.
1827 static ConfigMethodRange fJitStressRange;
1828 fJitStressRange.EnsureInit(JitConfig.JitStressRange());
1829 assert(!fJitStressRange.Error());
1830 bRangeAllowStress = fJitStressRange.Contains(info.compMethodHash());
1833 eeInfoInitialized = false;
1835 compDoAggressiveInlining = false;
1837 if (compIsForInlining())
1839 m_inlineStrategy = nullptr;
1840 compInlineResult = inlineInfo->inlineResult;
1844 m_inlineStrategy = new (this, CMK_Inlining) InlineStrategy(this);
1845 compInlineResult = nullptr;
1848 // Initialize this to the first phase to run.
1849 mostRecentlyActivePhase = PHASE_PRE_IMPORT;
1851 // Initially, no phase checks are active, and all dumps are enabled.
1852 activePhaseChecks = PhaseChecks::CHECK_NONE;
1853 activePhaseDumps = PhaseDumps::DUMP_ALL;
1855 #ifdef FEATURE_TRACELOGGING
1856 // Make sure JIT telemetry is initialized as soon as allocations can be made
1857 // but no later than a point where noway_asserts can be thrown.
1858 // 1. JIT telemetry could allocate some objects internally.
1859 // 2. NowayAsserts are tracked through telemetry.
1860 // Note: JIT telemetry could gather data when compiler is not fully initialized.
1861 // So you have to initialize the compiler variables you use for telemetry.
1862 assert((unsigned)PHASE_PRE_IMPORT == 0);
1863 info.compILCodeSize = 0;
1864 info.compMethodHnd = nullptr;
1865 compJitTelemetry.Initialize(this);
1872 if (!compIsForInlining())
1874 codeGen = getCodeGenerator(this);
1876 compVarScopeMap = nullptr;
1878 // If this method were a real constructor for Compiler, these would
1879 // become method initializations.
1880 impPendingBlockMembers = JitExpandArray<BYTE>(getAllocator());
1881 impSpillCliquePredMembers = JitExpandArray<BYTE>(getAllocator());
1882 impSpillCliqueSuccMembers = JitExpandArray<BYTE>(getAllocator());
1884 new (&genIPmappings, jitstd::placement_t()) jitstd::list<IPmappingDsc>(getAllocator(CMK_DebugInfo));
1885 new (&genRichIPmappings, jitstd::placement_t()) jitstd::list<RichIPMapping>(getAllocator(CMK_DebugOnly));
1887 lvMemoryPerSsaData = SsaDefArray<SsaMemDef>();
1890 // Initialize all the per-method statistics gathering data structures.
1895 #if LOOP_HOIST_STATS
1896 m_loopsConsidered = 0;
1897 m_curLoopHasHoistedExpression = false;
1898 m_loopsWithHoistedExpressions = 0;
1899 m_totalHoistedExpressions = 0;
1900 #endif // LOOP_HOIST_STATS
1901 #if MEASURE_NODE_SIZE
1902 genNodeSizeStatsPerFunc.Init();
1903 #endif // MEASURE_NODE_SIZE
1910 compJmpOpUsed = false;
1911 compLongUsed = false;
1912 compTailCallUsed = false;
1913 compTailPrefixSeen = false;
1914 compMayConvertTailCallToLoop = false;
1915 compLocallocSeen = false;
1916 compLocallocUsed = false;
1917 compLocallocOptimized = false;
1918 compQmarkRationalized = false;
1919 compQmarkUsed = false;
1920 compFloatingPointUsed = false;
1922 compSuppressedZeroInit = false;
1924 compNeedsGSSecurityCookie = false;
1925 compGSReorderStackLayout = false;
1927 compGeneratingProlog = false;
1928 compGeneratingEpilog = false;
1929 compGeneratingUnwindProlog = false;
1930 compGeneratingUnwindEpilog = false;
1932 compPostImportationCleanupDone = false;
1933 compLSRADone = false;
1934 compRationalIRForm = false;
1937 compCodeGenDone = false;
1938 opts.compMinOptsIsUsed = false;
1940 opts.compMinOptsIsSet = false;
1942 // Used by fgFindJumpTargets for inlining heuristics.
1943 opts.instrCount = 0;
1945 // Used to track when we should consider running EarlyProp
1947 optNoReturnCallCount = 0;
1950 m_nodeTestData = nullptr;
1951 m_loopHoistCSEClass = FIRST_LOOP_HOIST_CSE_CLASS;
1953 m_switchDescMap = nullptr;
1954 m_blockToEHPreds = nullptr;
1955 m_fieldSeqStore = nullptr;
1956 m_refAnyClass = nullptr;
1957 for (MemoryKind memoryKind : allMemoryKinds())
1959 m_memorySsaMap[memoryKind] = nullptr;
1963 if (!compIsForInlining())
1965 compDoComponentUnitTestsOnce();
1970 m_outlinedCompositeSsaNums = nullptr;
1971 m_nodeToLoopMemoryBlockMap = nullptr;
1972 m_signatureToLookupInfoMap = nullptr;
1973 fgSsaPassesCompleted = 0;
1974 fgSsaChecksEnabled = false;
1975 fgVNPassesCompleted = 0;
1977 // check that HelperCallProperties are initialized
1979 assert(s_helperCallProperties.IsPure(CORINFO_HELP_GETSHARED_GCSTATIC_BASE));
1980 assert(!s_helperCallProperties.IsPure(CORINFO_HELP_GETFIELDOBJ)); // quick sanity check
1982 // We start with the flow graph in tree-order
1983 fgOrder = FGOrderTree;
1985 m_classLayoutTable = nullptr;
1988 m_simdHandleCache = nullptr;
1989 #endif // FEATURE_SIMD
1991 compUsesThrowHelper = false;
1993 m_preferredInitCctor = CORINFO_HELP_UNDEF;
1996 /*****************************************************************************
2001 void Compiler::compDone()
2005 void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN */
2006 void** ppIndirection) /* OUT */
2010 if (info.compMatchedVM)
2012 addr = info.compCompHnd->getHelperFtn(ftnNum, ppIndirection);
2016 // If we don't have a matched VM, we won't get valid results when asking for a helper function.
2017 addr = UlongToPtr(0xCA11CA11); // "callcall"
2023 unsigned Compiler::compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd)
2025 var_types sigType = genActualType(JITtype2varType(cit));
2027 sigSize = genTypeSize(sigType);
2028 if (cit == CORINFO_TYPE_VALUECLASS)
2030 sigSize = info.compCompHnd->getClassSize(clsHnd);
2032 else if (cit == CORINFO_TYPE_REFANY)
2034 sigSize = 2 * TARGET_POINTER_SIZE;
2040 static bool DidComponentUnitTests = false;
2042 void Compiler::compDoComponentUnitTestsOnce()
2044 if (!JitConfig.RunComponentUnitTests())
2049 if (!DidComponentUnitTests)
2051 DidComponentUnitTests = true;
2052 ValueNumStore::RunTests(this);
2053 BitSetSupport::TestSuite(getAllocatorDebugOnly());
2057 //------------------------------------------------------------------------
2058 // compGetJitDefaultFill:
2061 // An unsigned char value used to initizalize memory allocated by the JIT.
2062 // The default value is taken from DOTNET_JitDefaultFill, if is not set
2063 // the value will be 0xdd. When JitStress is active a random value based
2064 // on the method hash is used.
2067 // Note that we can't use small values like zero, because we have some
2068 // asserts that can fire for such values.
2071 unsigned char Compiler::compGetJitDefaultFill(Compiler* comp)
2073 unsigned char defaultFill = (unsigned char)JitConfig.JitDefaultFill();
2075 if (comp != nullptr && comp->compStressCompile(STRESS_GENERIC_VARN, 50))
2078 temp = comp->info.compMethodHash();
2079 temp = (temp >> 16) ^ temp;
2080 temp = (temp >> 8) ^ temp;
2082 // asserts like this: assert(!IsUninitialized(stkLvl));
2083 // mean that small values for defaultFill are problematic
2084 // so we make the value larger in that case.
2090 // Make a misaligned pointer value to reduce probability of getting a valid value and firing
2091 // assert(!IsUninitialized(pointer)).
2094 defaultFill = (unsigned char)temp;
2100 /*****************************************************************************/
2102 VarName Compiler::compVarName(regNumber reg, bool isFloatReg)
2106 assert(genIsValidFloatReg(reg));
2110 assert(genIsValidReg(reg));
2113 if ((info.compVarScopesCount > 0) && compCurBB && opts.varNames)
2118 /* Look for the matching register */
2119 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
2121 /* If the variable is not in a register, or not in the register we're looking for, quit. */
2122 /* Also, if it is a compiler generated variable (i.e. slot# > info.compVarScopesCount), don't bother. */
2123 if ((varDsc->lvRegister != 0) && (varDsc->GetRegNum() == reg) &&
2124 (varDsc->lvSlotNum < info.compVarScopesCount))
2126 /* check if variable in that register is live */
2127 if (VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
2129 /* variable is live - find the corresponding slot */
2130 VarScopeDsc* varScope =
2131 compFindLocalVar(varDsc->lvSlotNum, compCurBB->bbCodeOffs, compCurBB->bbCodeOffsEnd);
2134 return varScope->vsdName;
2146 const char* Compiler::compRegVarName(regNumber reg, bool displayVar, bool isFloatReg)
2149 isFloatReg = genIsValidFloatReg(reg);
2153 if (displayVar && (reg != REG_NA))
2155 VarName varName = compVarName(reg, isFloatReg);
2159 const int NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
2160 static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2
2161 // consecutive calls before printing
2162 static int index = 0; // for circular index into the name array
2164 index = (index + 1) % 2; // circular reuse of index
2165 sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "%s'%s'", getRegName(reg), VarNameToStr(varName));
2167 return nameVarReg[index];
2172 /* no debug info required or no variable in that register
2173 -> return standard name */
2175 return getRegName(reg);
2178 const char* Compiler::compRegNameForSize(regNumber reg, size_t size)
2180 if (size == 0 || size >= 4)
2182 return compRegVarName(reg, true);
2187 const char * sizeNames[][2] =
2194 { "spl", "sp" }, // ESP
2195 { "bpl", "bp" }, // EBP
2196 { "sil", "si" }, // ESI
2197 { "dil", "di" }, // EDI
2206 #endif // TARGET_AMD64
2210 assert(isByteReg(reg));
2211 assert(genRegMask(reg) & RBM_BYTE_REGS);
2212 assert(size == 1 || size == 2);
2214 return sizeNames[reg][size - 1];
2218 const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
2223 for (i = 0, t = info.compVarScopes; i < info.compVarScopesCount; i++, t++)
2225 if (t->vsdVarNum != varNum)
2230 if (offs >= t->vsdLifeBeg && offs < t->vsdLifeEnd)
2232 return VarNameToStr(t->vsdName);
2240 /*****************************************************************************/
2242 void Compiler::compSetProcessor()
2245 // NOTE: This function needs to be kept in sync with EEJitManager::SetCpuInfo() in vm\codeman.cpp
2248 const JitFlags& jitFlags = *opts.jitFlags;
2251 // Processor specific optimizations
2253 CLANG_FORMAT_COMMENT_ANCHOR;
2255 CORINFO_InstructionSetFlags instructionSetFlags = jitFlags.GetInstructionSetFlags();
2256 opts.compSupportsISA.Reset();
2257 opts.compSupportsISAReported.Reset();
2258 opts.compSupportsISAExactly.Reset();
2260 // The VM will set the ISA flags depending on actual hardware support
2261 // and any specified config switches specified by the user. The exception
2262 // here is for certain "artificial ISAs" such as Vector64/128/256 where they
2263 // don't actually exist. The JIT is in charge of adding those and ensuring
2264 // the total sum of flags is still valid.
2265 #if defined(TARGET_XARCH)
2266 // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits
2267 uint32_t preferredVectorBitWidth = (ReinterpretHexAsDecimal(JitConfig.PreferredVectorBitWidth()) / 128) * 128;
2268 uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8;
2270 if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE))
2272 instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
2275 if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX))
2277 instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
2280 // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
2281 // These have been shipped together historically and at the time of this writing
2282 // there exists no hardware which doesn't support the entire feature set. To simplify
2283 // the overall JIT implementation, we currently require the entire set of ISAs to be
2284 // supported and disable AVX512 support otherwise.
2286 if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F))
2288 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F));
2289 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL));
2290 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW));
2291 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL));
2292 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD));
2293 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL));
2294 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ));
2295 assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
2297 instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
2299 if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING))
2301 // Some architectures can experience frequency throttling when
2302 // executing 512-bit width instructions. To account for this we set the
2303 // default preferred vector width to 256-bits in some scenarios. Power
2304 // users can override this with `DOTNET_PreferredVectorBitWidth=512` to
2305 // allow using such instructions where hardware support is available.
2307 // Do not condition this based on stress mode as it makes the support
2308 // reported inconsistent across methods and breaks expectations/functionality
2310 preferredVectorByteLength = 256 / 8;
2314 opts.preferredVectorByteLength = preferredVectorByteLength;
2315 #elif defined(TARGET_ARM64)
2316 if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd))
2318 instructionSetFlags.AddInstructionSet(InstructionSet_Vector64);
2319 instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
2321 #endif // TARGET_ARM64
2323 assert(instructionSetFlags.Equals(EnsureInstructionSetFlagsAreValid(instructionSetFlags)));
2324 opts.setSupportedISAs(instructionSetFlags);
2327 if (!compIsForInlining())
2329 if (canUseVexEncoding())
2331 codeGen->GetEmitter()->SetUseVEXEncoding(true);
2332 // Assume each JITted method does not contain AVX instruction at first
2333 codeGen->GetEmitter()->SetContainsAVX(false);
2334 codeGen->GetEmitter()->SetContains256bitOrMoreAVX(false);
2336 if (canUseEvexEncoding())
2338 codeGen->GetEmitter()->SetUseEvexEncoding(true);
2339 // TODO-XArch-AVX512 : Revisit other flags to be set once avx512 instructions are added.
2342 #endif // TARGET_XARCH
2345 bool Compiler::notifyInstructionSetUsage(CORINFO_InstructionSet isa, bool supported) const
2347 const char* isaString = InstructionSetToString(isa);
2348 JITDUMP("Notify VM instruction set (%s) %s be supported.\n", isaString, supported ? "must" : "must not");
2349 return info.compCompHnd->notifyInstructionSetUsage(isa, supported);
2352 #ifdef PROFILING_SUPPORTED
2353 // A Dummy routine to receive Enter/Leave/Tailcall profiler callbacks.
2354 // These are used when DOTNET_JitEltHookEnabled=1
2356 void DummyProfilerELTStub(UINT_PTR ProfilerHandle, UINT_PTR callerSP)
2360 #else //! TARGET_AMD64
2361 void DummyProfilerELTStub(UINT_PTR ProfilerHandle)
2365 #endif //! TARGET_AMD64
2367 #endif // PROFILING_SUPPORTED
2369 bool Compiler::compShouldThrowOnNoway(
2370 #ifdef FEATURE_TRACELOGGING
2371 const char* filename, unsigned line
2375 #ifdef FEATURE_TRACELOGGING
2376 compJitTelemetry.NotifyNowayAssert(filename, line);
2379 // In min opts, we don't want the noway assert to go through the exception
2380 // path. Instead we want it to just silently go through codegen for
2382 return !opts.MinOpts();
2385 // ConfigInteger does not offer an option for decimal flags. Any numbers are interpreted as hex.
2386 // I could add the decimal option to ConfigInteger or I could write a function to reinterpret this
2387 // value as the user intended.
2388 unsigned ReinterpretHexAsDecimal(unsigned in)
2390 // ex: in: 0x100 returns: 100
2391 unsigned result = 0;
2402 unsigned digit = in % 16;
2405 result += digit * index;
2411 void Compiler::compInitOptions(JitFlags* jitFlags)
2415 if (compIsForInlining())
2417 // The following flags are lost when inlining. (They are removed in
2418 // Compiler::fgInvokeInlineeCompiler().)
2419 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR));
2420 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR_IF_LOOPS));
2421 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE));
2422 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC));
2423 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE));
2424 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_TRACK_TRANSITIONS));
2427 opts.jitFlags = jitFlags;
2428 opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
2430 if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) ||
2431 jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2433 opts.compFlags = CLFLG_MINOPT;
2435 // Don't optimize .cctors (except prejit) or if we're an inlinee
2436 else if (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) &&
2437 !compIsForInlining())
2439 opts.compFlags = CLFLG_MINOPT;
2442 // Default value is to generate a blend of size and speed optimizations
2444 opts.compCodeOpt = BLENDED_CODE;
2446 // If the EE sets SIZE_OPT or if we are compiling a Class constructor
2447 // we will optimize for code size at the expense of speed
2449 if (jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
2451 opts.compCodeOpt = SMALL_CODE;
2454 // If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
2456 else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT) ||
2457 (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1) && !jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)))
2459 opts.compCodeOpt = FAST_CODE;
2460 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT));
2463 //-------------------------------------------------------------------------
2465 opts.compDbgCode = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE);
2466 opts.compDbgInfo = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO);
2467 opts.compDbgEnC = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC);
2470 opts.compJitAlignLoopAdaptive = JitConfig.JitAlignLoopAdaptive() == 1;
2471 opts.compJitAlignLoopBoundary = (unsigned short)JitConfig.JitAlignLoopBoundary();
2472 opts.compJitAlignLoopMinBlockWeight = (unsigned short)JitConfig.JitAlignLoopMinBlockWeight();
2474 opts.compJitAlignLoopForJcc = JitConfig.JitAlignLoopForJcc() == 1;
2475 opts.compJitAlignLoopMaxCodeSize = (unsigned short)JitConfig.JitAlignLoopMaxCodeSize();
2476 opts.compJitHideAlignBehindJmp = JitConfig.JitHideAlignBehindJmp() == 1;
2477 opts.compJitOptimizeStructHiddenBuffer = JitConfig.JitOptimizeStructHiddenBuffer() == 1;
2478 opts.compJitUnrollLoopMaxIterationCount = (unsigned short)JitConfig.JitUnrollLoopMaxIterationCount();
2480 opts.compJitAlignLoopAdaptive = true;
2481 opts.compJitAlignLoopBoundary = DEFAULT_ALIGN_LOOP_BOUNDARY;
2482 opts.compJitAlignLoopMinBlockWeight = DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT;
2483 opts.compJitAlignLoopMaxCodeSize = DEFAULT_MAX_LOOPSIZE_FOR_ALIGN;
2484 opts.compJitHideAlignBehindJmp = true;
2485 opts.compJitOptimizeStructHiddenBuffer = true;
2486 opts.compJitUnrollLoopMaxIterationCount = DEFAULT_UNROLL_LOOP_MAX_ITERATION_COUNT;
2490 if (opts.compJitAlignLoopAdaptive)
2492 // For adaptive alignment, padding limit is equal to the max instruction encoding
2493 // size which is 15 bytes. Hence (32 >> 1) - 1 = 15 bytes.
2494 opts.compJitAlignPaddingLimit = (opts.compJitAlignLoopBoundary >> 1) - 1;
2498 // For non-adaptive alignment, padding limit is 1 less than the alignment boundary
2500 opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary - 1;
2503 if (opts.compJitAlignLoopAdaptive)
2505 // For adaptive alignment, padding limit is same as specified by the alignment
2506 // boundary because all instructions are 4 bytes long. Hence (32 >> 1) = 16 bytes.
2507 opts.compJitAlignPaddingLimit = (opts.compJitAlignLoopBoundary >> 1);
2511 // For non-adaptive, padding limit is same as specified by the alignment.
2512 opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary;
2516 assert(isPow2(opts.compJitAlignLoopBoundary));
2518 // The minimum encoding size for Arm64 is 4 bytes.
2519 assert(opts.compJitAlignLoopBoundary >= 4);
2522 #if REGEN_SHORTCUTS || REGEN_CALLPAT
2523 // We never want to have debugging enabled when regenerating GC encoding patterns
2524 opts.compDbgCode = false;
2525 opts.compDbgInfo = false;
2526 opts.compDbgEnC = false;
2532 opts.dspOrder = false;
2534 // Optionally suppress inliner compiler instance dumping.
2536 if (compIsForInlining())
2538 if (JitConfig.JitDumpInlinePhases() > 0)
2540 verbose = impInlineInfo->InlinerCompiler->verbose;
2550 codeGen->setVerbose(false);
2552 verboseTrees = verbose && shouldUseVerboseTrees();
2553 verboseSsa = verbose && shouldUseVerboseSsa();
2554 asciiTrees = shouldDumpASCIITrees();
2555 opts.dspDiffable = compIsForInlining() ? impInlineInfo->InlinerCompiler->opts.dspDiffable : false;
2559 opts.altJit = false;
2561 #if defined(LATE_DISASM) && !defined(DEBUG)
2562 // For non-debug builds with the late disassembler built in, we currently always do late disassembly
2563 // (we have no way to determine when not to, since we don't have class/method names).
2564 // In the DEBUG case, this is initialized to false, below.
2565 opts.doLateDisasm = true;
2570 const JitConfigValues::MethodSet* pfAltJit;
2571 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
2573 pfAltJit = &JitConfig.AltJitNgen();
2577 pfAltJit = &JitConfig.AltJit();
2580 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
2582 if (pfAltJit->contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2587 unsigned altJitLimit = ReinterpretHexAsDecimal(JitConfig.AltJitLimit());
2588 if (altJitLimit > 0 && Compiler::jitTotalMethodCompiled >= altJitLimit)
2590 opts.altJit = false;
2596 const char* altJitVal;
2597 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
2599 altJitVal = JitConfig.AltJitNgen().list();
2603 altJitVal = JitConfig.AltJit().list();
2606 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
2608 // In release mode, you either get all methods or no methods. You must use "*" as the parameter, or we ignore
2609 // it. You don't get to give a regular expression of methods to match.
2610 // (Partially, this is because we haven't computed and stored the method and class name except in debug, and it
2611 // might be expensive to do so.)
2612 if ((altJitVal != nullptr) && (strcmp(altJitVal, "*") == 0))
2620 // Take care of DOTNET_AltJitExcludeAssemblies.
2623 // First, initialize the AltJitExcludeAssemblies list, but only do it once.
2624 if (!s_pAltJitExcludeAssembliesListInitialized)
2626 const WCHAR* wszAltJitExcludeAssemblyList = JitConfig.AltJitExcludeAssemblies();
2627 if (wszAltJitExcludeAssemblyList != nullptr)
2629 // NOTE: The Assembly name list is allocated in the process heap, not in the no-release heap, which is
2631 // for every compilation. This is ok because we only allocate once, due to the static.
2632 s_pAltJitExcludeAssembliesList = new (HostAllocator::getHostAllocator())
2633 AssemblyNamesList2(wszAltJitExcludeAssemblyList, HostAllocator::getHostAllocator());
2635 s_pAltJitExcludeAssembliesListInitialized = true;
2638 if (s_pAltJitExcludeAssembliesList != nullptr)
2640 // We have an exclusion list. See if this method is in an assembly that is on the list.
2641 // Note that we check this for every method, since we might inline across modules, and
2642 // if the inlinee module is on the list, we don't want to use the altjit for it.
2643 const char* methodAssemblyName = info.compCompHnd->getAssemblyName(
2644 info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
2645 if (s_pAltJitExcludeAssembliesList->IsInList(methodAssemblyName))
2647 opts.altJit = false;
2654 bool altJitConfig = !pfAltJit->isEmpty();
2656 bool verboseDump = false;
2658 if (!altJitConfig || opts.altJit)
2660 // We should only enable 'verboseDump' when we are actually compiling a matching method
2661 // and not enable it when we are just considering inlining a matching method.
2663 if (!compIsForInlining())
2665 if (JitConfig.JitDump().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2669 unsigned jitHashDumpVal = (unsigned)JitConfig.JitHashDump();
2670 if ((jitHashDumpVal != (DWORD)-1) && (jitHashDumpVal == info.compMethodHash()))
2677 // Optionally suppress dumping Tier0 jit requests.
2679 if (verboseDump && jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2681 verboseDump = (JitConfig.JitDumpTier0() > 0);
2684 // Optionally suppress dumping except for a specific OSR jit request.
2686 const int dumpAtOSROffset = JitConfig.JitDumpAtOSROffset();
2688 if (verboseDump && (dumpAtOSROffset != -1))
2690 if (jitFlags->IsSet(JitFlags::JIT_FLAG_OSR))
2692 verboseDump = (((IL_OFFSET)dumpAtOSROffset) == info.compILEntry);
2696 verboseDump = false;
2707 setUsesSIMDTypes(false);
2708 #endif // FEATURE_SIMD
2710 lvaEnregEHVars = (compEnregLocals() && JitConfig.EnableEHWriteThru());
2711 lvaEnregMultiRegVars = (compEnregLocals() && JitConfig.EnableMultiRegLocals());
2713 #if FEATURE_TAILCALL_OPT
2714 // By default opportunistic tail call optimization is enabled.
2715 // Recognition is done in the importer so this must be set for
2716 // inlinees as well.
2717 opts.compTailCallOpt = true;
2718 #endif // FEATURE_TAILCALL_OPT
2720 #if FEATURE_FASTTAILCALL
2721 // By default fast tail calls are enabled.
2722 opts.compFastTailCalls = true;
2723 #endif // FEATURE_FASTTAILCALL
2727 fgPgoSchema = nullptr;
2728 fgPgoData = nullptr;
2729 fgPgoSchemaCount = 0;
2730 fgPgoQueryResult = E_FAIL;
2731 fgPgoFailReason = nullptr;
2732 fgPgoSource = ICorJitInfo::PgoSource::Unknown;
2733 fgPgoHaveWeights = false;
2735 if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
2737 fgPgoQueryResult = info.compCompHnd->getPgoInstrumentationResults(info.compMethodHnd, &fgPgoSchema,
2738 &fgPgoSchemaCount, &fgPgoData, &fgPgoSource);
2740 // a failed result that also has a non-NULL fgPgoSchema
2741 // indicates that the ILSize for the method no longer matches
2742 // the ILSize for the method when profile data was collected.
2744 // We will discard the IBC data in this case
2746 if (FAILED(fgPgoQueryResult))
2748 fgPgoFailReason = (fgPgoSchema != nullptr) ? "No matching PGO data" : "No PGO data";
2749 fgPgoData = nullptr;
2750 fgPgoSchema = nullptr;
2752 // Optionally, disable use of profile data.
2754 else if (JitConfig.JitDisablePgo() > 0)
2756 fgPgoFailReason = "PGO data available, but JitDisablePgo > 0";
2757 fgPgoQueryResult = E_FAIL;
2758 fgPgoData = nullptr;
2759 fgPgoSchema = nullptr;
2760 fgPgoDisabled = true;
2763 // Optionally, enable use of profile data for only some methods.
2767 static ConfigMethodRange JitEnablePgoRange;
2768 JitEnablePgoRange.EnsureInit(JitConfig.JitEnablePgoRange());
2770 // Base this decision on the root method hash, so a method either sees all available
2771 // profile data (including that for inlinees), or none of it.
2773 const unsigned hash = impInlineRoot()->info.compMethodHash();
2774 if (!JitEnablePgoRange.Contains(hash))
2776 fgPgoFailReason = "PGO data available, but method hash NOT within JitEnablePgoRange";
2777 fgPgoQueryResult = E_FAIL;
2778 fgPgoData = nullptr;
2779 fgPgoSchema = nullptr;
2780 fgPgoDisabled = true;
2784 // A successful result implies a non-NULL fgPgoSchema
2786 if (SUCCEEDED(fgPgoQueryResult))
2788 assert(fgPgoSchema != nullptr);
2790 for (UINT32 i = 0; i < fgPgoSchemaCount; i++)
2792 ICorJitInfo::PgoInstrumentationKind kind = fgPgoSchema[i].InstrumentationKind;
2793 if (kind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ||
2794 kind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount ||
2795 kind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount ||
2796 kind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount)
2798 fgPgoHaveWeights = true;
2804 // A failed result implies a NULL fgPgoSchema
2805 // see implementation of Compiler::fgHaveProfileData()
2807 if (FAILED(fgPgoQueryResult))
2809 assert(fgPgoSchema == nullptr);
2814 if (compIsForInlining())
2819 // The rest of the opts fields that we initialize here
2820 // should only be used when we generate code for the method
2821 // They should not be used when importing or inlining
2822 CLANG_FORMAT_COMMENT_ANCHOR;
2824 #if FEATURE_TAILCALL_OPT
2825 opts.compTailCallLoopOpt = true;
2826 #endif // FEATURE_TAILCALL_OPT
2828 opts.genFPorder = true;
2829 opts.genFPopt = true;
2831 opts.instrCount = 0;
2832 opts.lvRefCount = 0;
2834 #ifdef PROFILING_SUPPORTED
2835 opts.compJitELTHookEnabled = false;
2836 #endif // PROFILING_SUPPORTED
2838 #if defined(TARGET_ARM64)
2839 // 0 is default: use the appropriate frame type based on the function.
2840 opts.compJitSaveFpLrWithCalleeSavedRegisters = 0;
2841 #endif // defined(TARGET_ARM64)
2843 opts.disAsm = false;
2844 opts.disDiffable = false;
2845 opts.dspDiffable = false;
2846 opts.disAlignment = false;
2847 opts.disCodeBytes = false;
2849 opts.dspInstrs = false;
2850 opts.dspLines = false;
2851 opts.varNames = false;
2852 opts.dmpHex = false;
2853 opts.disAsmSpilled = false;
2854 opts.disAddr = false;
2855 opts.dspCode = false;
2856 opts.dspEHTable = false;
2857 opts.dspDebugInfo = false;
2858 opts.dspGCtbls = false;
2859 opts.disAsm2 = false;
2860 opts.dspUnwind = false;
2861 opts.compLongAddress = false;
2862 opts.optRepeat = false;
2865 opts.doLateDisasm = false;
2866 #endif // LATE_DISASM
2868 compDebugBreak = false;
2870 // If we have a non-empty AltJit config then we change all of these other
2871 // config values to refer only to the AltJit.
2873 if (!altJitConfig || opts.altJit)
2875 bool disEnabled = true;
2877 // Setup assembly name list for disassembly, if not already set up.
2878 if (!s_pJitDisasmIncludeAssembliesListInitialized)
2880 const WCHAR* assemblyNameList = JitConfig.JitDisasmAssemblies();
2881 if (assemblyNameList != nullptr)
2883 s_pJitDisasmIncludeAssembliesList = new (HostAllocator::getHostAllocator())
2884 AssemblyNamesList2(assemblyNameList, HostAllocator::getHostAllocator());
2886 s_pJitDisasmIncludeAssembliesListInitialized = true;
2889 // If we have an assembly name list for disassembly, also check this method's assembly.
2890 if (s_pJitDisasmIncludeAssembliesList != nullptr && !s_pJitDisasmIncludeAssembliesList->IsEmpty())
2892 const char* assemblyName = info.compCompHnd->getAssemblyName(
2893 info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
2895 if (!s_pJitDisasmIncludeAssembliesList->IsInList(assemblyName))
2903 if ((JitConfig.JitOrder() & 1) == 1)
2905 opts.dspOrder = true;
2908 if (JitConfig.JitGCDump().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2910 opts.dspGCtbls = true;
2913 if (JitConfig.JitDisasm().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2918 if (JitConfig.JitDisasmSpilled())
2920 opts.disAsmSpilled = true;
2923 if (JitConfig.JitUnwindDump().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2925 opts.dspUnwind = true;
2928 if (JitConfig.JitEHDump().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2930 opts.dspEHTable = true;
2933 if (JitConfig.JitDebugDump().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2935 opts.dspDebugInfo = true;
2938 if (opts.disAsm && JitConfig.JitDisasmWithGC())
2940 opts.disasmWithGC = true;
2944 if (JitConfig.JitLateDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2945 opts.doLateDisasm = true;
2946 #endif // LATE_DISASM
2948 // This one applies to both Ngen/Jit Disasm output: DOTNET_JitDasmWithAddress=1
2949 if (JitConfig.JitDasmWithAddress() != 0)
2951 opts.disAddr = true;
2953 if (JitConfig.JitLongAddress() != 0)
2955 opts.compLongAddress = true;
2958 if (JitConfig.JitOptRepeat().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2960 opts.optRepeat = true;
2966 opts.dspCode = true;
2967 opts.dspEHTable = true;
2968 opts.dspGCtbls = true;
2969 opts.disAsm2 = true;
2970 opts.dspUnwind = true;
2972 verboseTrees = shouldUseVerboseTrees();
2973 verboseSsa = shouldUseVerboseSsa();
2974 codeGen->setVerbose(true);
2977 treesBeforeAfterMorph = (JitConfig.TreesBeforeAfterMorph() == 1);
2978 morphNum = 0; // Initialize the morphed-trees counting.
2980 expensiveDebugCheckLevel = JitConfig.JitExpensiveDebugCheckLevel();
2981 if (expensiveDebugCheckLevel == 0)
2983 // If we're in a stress mode that modifies the flowgraph, make 1 the default.
2984 if (fgStressBBProf() || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
2986 expensiveDebugCheckLevel = 1;
2992 printf("****** START compiling %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
2993 printf("Generating code for %s %s\n", Target::g_tgtPlatformName(), Target::g_tgtCPUName);
2994 printf(""); // in our logic this causes a flush
2997 if (JitConfig.JitBreak().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
2999 assert(!"JitBreak reached");
3002 unsigned jitHashBreakVal = (unsigned)JitConfig.JitHashBreak();
3003 if ((jitHashBreakVal != (DWORD)-1) && (jitHashBreakVal == info.compMethodHash()))
3005 assert(!"JitHashBreak reached");
3009 JitConfig.JitDebugBreak().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args) ||
3010 JitConfig.JitBreak().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
3012 compDebugBreak = true;
3015 memset(compActiveStressModes, 0, sizeof(compActiveStressModes));
3017 // Read function list, if not already read, and there exists such a list.
3018 if (!s_pJitFunctionFileInitialized)
3020 const WCHAR* functionFileName = JitConfig.JitFunctionFile();
3021 if (functionFileName != nullptr)
3024 new (HostAllocator::getHostAllocator()) MethodSet(functionFileName, HostAllocator::getHostAllocator());
3026 s_pJitFunctionFileInitialized = true;
3029 if (JitConfig.JitDisasm().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
3039 if (JitConfig.JitDisasmTesting())
3041 opts.disTesting = true;
3043 if (JitConfig.JitDisasmWithAlignmentBoundaries())
3045 opts.disAlignment = true;
3047 if (JitConfig.JitDisasmWithCodeBytes())
3049 opts.disCodeBytes = true;
3051 if (JitConfig.JitDisasmDiffable())
3053 opts.disDiffable = true;
3054 opts.dspDiffable = true;
3058 // These are left for backward compatibility, to be removed
3060 if (JitConfig.JitDasmWithAlignmentBoundaries())
3062 opts.disAlignment = true;
3064 if (JitConfig.JitDiffableDasm())
3066 opts.disDiffable = true;
3067 opts.dspDiffable = true;
3071 //-------------------------------------------------------------------------
3074 assert(!codeGen->isGCTypeFixed());
3075 opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5);
3078 #if defined(DEBUG) && defined(TARGET_XARCH)
3081 STACK_CHECK_ON_RETURN = 0x1,
3082 STACK_CHECK_ON_CALL = 0x2,
3083 STACK_CHECK_ALL = 0x3
3086 DWORD dwJitStackChecks = JitConfig.JitStackChecks();
3087 if (compStressCompile(STRESS_GENERIC_VARN, 5))
3089 dwJitStackChecks = STACK_CHECK_ALL;
3091 opts.compStackCheckOnRet = (dwJitStackChecks & DWORD(STACK_CHECK_ON_RETURN)) != 0;
3092 #if defined(TARGET_X86)
3093 opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0;
3094 #endif // defined(TARGET_X86)
3095 #endif // defined(DEBUG) && defined(TARGET_XARCH)
3097 #if MEASURE_MEM_ALLOC
3098 s_dspMemStats = (JitConfig.DisplayMemStats() != 0);
3101 #ifdef PROFILING_SUPPORTED
3102 opts.compNoPInvokeInlineCB = jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_NO_PINVOKE_INLINE);
3104 // Cache the profiler handle
3105 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE))
3109 info.compCompHnd->GetProfilingHandle(&hookNeeded, &compProfilerMethHnd, &indirected);
3110 compProfilerHookNeeded = !!hookNeeded;
3111 compProfilerMethHndIndirected = !!indirected;
3115 compProfilerHookNeeded = false;
3116 compProfilerMethHnd = nullptr;
3117 compProfilerMethHndIndirected = false;
3120 // Honour DOTNET_JitELTHookEnabled or STRESS_PROFILER_CALLBACKS stress mode
3121 // only if VM has not asked us to generate profiler hooks in the first place.
3122 // That is, override VM only if it hasn't asked for a profiler callback for this method.
3123 // Don't run this stress mode when pre-JITing, as we would need to emit a relocation
3124 // for the call to the fake ELT hook, which wouldn't make sense, as we can't store that
3125 // in the pre-JIT image.
3126 if (!compProfilerHookNeeded)
3128 if ((JitConfig.JitELTHookEnabled() != 0) ||
3129 (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && compStressCompile(STRESS_PROFILER_CALLBACKS, 5)))
3131 opts.compJitELTHookEnabled = true;
3135 // TBD: Exclude PInvoke stubs
3136 if (opts.compJitELTHookEnabled)
3138 #if defined(DEBUG) // We currently only know if we're running under SuperPMI in DEBUG
3139 // We don't want to get spurious SuperPMI asm diffs because profile stress kicks in and we use
3140 // the address of `DummyProfilerELTStub` in the JIT binary, without relocation. So just use
3141 // a fixed address in this case. It's SuperPMI replay, so the generated code won't be run.
3142 if (RunningSuperPmiReplay())
3145 static_assert_no_msg(sizeof(void*) == 8);
3146 compProfilerMethHnd = (void*)0x0BADF00DBEADCAFE;
3148 static_assert_no_msg(sizeof(void*) == 4);
3149 compProfilerMethHnd = (void*)0x0BADF00D;
3155 compProfilerMethHnd = (void*)DummyProfilerELTStub;
3157 compProfilerMethHndIndirected = false;
3160 #endif // PROFILING_SUPPORTED
3162 #if FEATURE_TAILCALL_OPT
3163 const WCHAR* strTailCallOpt = JitConfig.TailCallOpt();
3164 if (strTailCallOpt != nullptr)
3166 opts.compTailCallOpt = (UINT)_wtoi(strTailCallOpt) != 0;
3169 if (JitConfig.TailCallLoopOpt() == 0)
3171 opts.compTailCallLoopOpt = false;
3175 #if FEATURE_FASTTAILCALL
3176 if (JitConfig.FastTailCalls() == 0)
3178 opts.compFastTailCalls = false;
3180 #endif // FEATURE_FASTTAILCALL
3182 #ifdef CONFIGURABLE_ARM_ABI
3183 opts.compUseSoftFP = jitFlags->IsSet(JitFlags::JIT_FLAG_SOFTFP_ABI);
3184 unsigned int softFPConfig = opts.compUseSoftFP ? 2 : 1;
3185 unsigned int oldSoftFPConfig =
3186 InterlockedCompareExchange(&GlobalJitOptions::compUseSoftFPConfigured, softFPConfig, 0);
3187 if (oldSoftFPConfig != softFPConfig && oldSoftFPConfig != 0)
3189 // There are no current scenarios where the abi can change during the lifetime of a process
3190 // that uses the JIT. If such a change occurs, either compFeatureHfa will need to change to a TLS static
3191 // or we will need to have some means to reset the flag safely.
3192 NO_WAY("SoftFP ABI setting changed during lifetime of process");
3195 GlobalJitOptions::compFeatureHfa = !opts.compUseSoftFP;
3196 #elif defined(ARM_SOFTFP) && defined(TARGET_ARM)
3197 // Armel is unconditionally enabled in the JIT. Verify that the VM side agrees.
3198 assert(jitFlags->IsSet(JitFlags::JIT_FLAG_SOFTFP_ABI));
3199 #elif defined(TARGET_ARM)
3200 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SOFTFP_ABI));
3201 #endif // CONFIGURABLE_ARM_ABI
3203 opts.compScopeInfo = opts.compDbgInfo;
3206 codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName,
3207 info.compMethodInfo->args.pSig);
3210 //-------------------------------------------------------------------------
3212 opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);
3214 bool enableFakeSplitting = false;
3217 enableFakeSplitting = JitConfig.JitFakeProcedureSplitting();
3219 #if defined(TARGET_XARCH)
3220 // Whether encoding of absolute addr as PC-rel offset is enabled
3221 opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0);
3225 opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting;
3227 #ifdef FEATURE_CFI_SUPPORT
3228 // Hot/cold splitting is not being tested on NativeAOT.
3229 if (generateCFIUnwindCodes())
3231 opts.compProcedureSplitting = false;
3233 #endif // FEATURE_CFI_SUPPORT
3235 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
3236 opts.compProcedureSplitting = false;
3237 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
3240 opts.compProcedureSplittingEH = opts.compProcedureSplitting;
3243 if (opts.compProcedureSplitting)
3245 // Note that opts.compdbgCode is true under ngen for checked assemblies!
3246 opts.compProcedureSplitting = !opts.compDbgCode || enableFakeSplitting;
3249 // JitForceProcedureSplitting is used to force procedure splitting on checked assemblies.
3250 // This is useful for debugging on a checked build. Note that we still only do procedure
3251 // splitting in the zapper.
3252 if (JitConfig.JitForceProcedureSplitting().contains(info.compMethodHnd, info.compClassHnd,
3253 &info.compMethodInfo->args))
3255 opts.compProcedureSplitting = true;
3258 // JitNoProcedureSplitting will always disable procedure splitting.
3259 if (JitConfig.JitNoProcedureSplitting().contains(info.compMethodHnd, info.compClassHnd,
3260 &info.compMethodInfo->args))
3262 opts.compProcedureSplitting = false;
3265 // JitNoProcedureSplittingEH will disable procedure splitting in functions with EH.
3266 if (JitConfig.JitNoProcedureSplittingEH().contains(info.compMethodHnd, info.compClassHnd,
3267 &info.compMethodInfo->args))
3269 opts.compProcedureSplittingEH = false;
3275 opts.compCollect64BitCounts = JitConfig.JitCollect64BitCounts() != 0;
3278 if (JitConfig.JitRandomlyCollect64BitCounts() != 0)
3281 rng.Init(info.compMethodHash() ^ JitConfig.JitRandomlyCollect64BitCounts() ^ 0x3485e20e);
3282 opts.compCollect64BitCounts = rng.Next(2) == 0;
3286 opts.compCollect64BitCounts = false;
3291 // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
3292 if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
3294 compMaxUncheckedOffsetForNullObject = (size_t)JitConfig.JitMaxUncheckedOffset();
3297 printf("STRESS_NULL_OBJECT_CHECK: compMaxUncheckedOffsetForNullObject=0x%X\n",
3298 compMaxUncheckedOffsetForNullObject);
3304 // If we are compiling for a specific tier, make that very obvious in the output.
3305 // Note that we don't expect multiple TIER flags to be set at one time, but there
3306 // is nothing preventing that.
3307 if (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
3309 printf("OPTIONS: Tier-0 compilation (set DOTNET_TieredCompilation=0 to disable)\n");
3311 if (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
3313 printf("OPTIONS: Tier-1 compilation\n");
3315 if (compSwitchedToOptimized)
3317 printf("OPTIONS: Tier-0 compilation, switched to FullOpts\n");
3319 if (compSwitchedToMinOpts)
3321 printf("OPTIONS: Tier-1/FullOpts compilation, switched to MinOpts\n");
3324 if (jitFlags->IsSet(JitFlags::JIT_FLAG_OSR))
3326 printf("OPTIONS: OSR variant with entry point 0x%x\n", info.compILEntry);
3329 printf("OPTIONS: compCodeOpt = %s\n",
3330 (opts.compCodeOpt == BLENDED_CODE)
3332 : (opts.compCodeOpt == SMALL_CODE) ? "SMALL_CODE"
3333 : (opts.compCodeOpt == FAST_CODE) ? "FAST_CODE" : "UNKNOWN_CODE");
3335 printf("OPTIONS: compDbgCode = %s\n", dspBool(opts.compDbgCode));
3336 printf("OPTIONS: compDbgInfo = %s\n", dspBool(opts.compDbgInfo));
3337 printf("OPTIONS: compDbgEnC = %s\n", dspBool(opts.compDbgEnC));
3338 printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting));
3339 printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
3341 if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
3343 printf("OPTIONS: optimizer should use profile data\n");
3346 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
3348 printf("OPTIONS: Jit invoked for ngen\n");
3353 #ifdef PROFILING_SUPPORTED
3354 #ifdef UNIX_AMD64_ABI
3355 if (compIsProfilerHookNeeded())
3357 opts.compNeedToAlignFrame = true;
3359 #endif // UNIX_AMD64_ABI
3362 #if defined(DEBUG) && defined(TARGET_ARM64)
3363 if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash()))
3365 opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
3367 #endif // defined(DEBUG) && defined(TARGET_ARM64)
3369 #if defined(TARGET_AMD64)
3370 rbmAllFloat = RBM_ALLFLOAT_INIT;
3371 rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
3372 cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;
3374 if (canUseEvexEncoding())
3376 rbmAllFloat |= RBM_HIGHFLOAT;
3377 rbmFltCalleeTrash |= RBM_HIGHFLOAT;
3378 cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
3380 #endif // TARGET_AMD64
3382 #if defined(TARGET_XARCH)
3383 rbmAllMask = RBM_ALLMASK_INIT;
3384 rbmMskCalleeTrash = RBM_MSK_CALLEE_TRASH_INIT;
3385 cntCalleeTrashMask = CNT_CALLEE_TRASH_MASK_INIT;
3387 if (canUseEvexEncoding())
3389 rbmAllMask |= RBM_ALLMASK_EVEX;
3390 rbmMskCalleeTrash |= RBM_MSK_CALLEE_TRASH_EVEX;
3391 cntCalleeTrashMask += CNT_CALLEE_TRASH_MASK_EVEX;
3394 // Make sure we copy the register info and initialize the
3395 // trash regs after the underlying fields are initialized
3397 const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{
3398 #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr,
3399 #include "typelist.h"
3402 memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT);
3404 codeGen->CopyRegisterInfo();
3405 #endif // TARGET_XARCH
3410 bool Compiler::compJitHaltMethod()
3412 /* This method returns true when we use an INS_BREAKPOINT to allow us to step into the generated native code */
3413 /* Note that this these two "Jit" environment variables also work for ngen images */
3415 if (JitConfig.JitHalt().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
3420 /* Use this Hash variant when there are a lot of method with the same name and different signatures */
3422 unsigned fJitHashHaltVal = (unsigned)JitConfig.JitHashHalt();
3423 if ((fJitHashHaltVal != (unsigned)-1) && (fJitHashHaltVal == info.compMethodHash()))
3431 /*****************************************************************************
3432 * Should we use a "stress-mode" for the given stressArea. We have different
3433 * areas to allow the areas to be mixed in different combinations in
3434 * different methods.
3435 * 'weight' indicates how often (as a percentage) the area should be stressed.
3436 * It should reflect the usefulness:overhead ratio.
3439 const LPCWSTR Compiler::s_compStressModeNamesW[STRESS_COUNT + 1] = {
3440 #define STRESS_MODE(mode) W("STRESS_") W(#mode),
3446 const char* Compiler::s_compStressModeNames[STRESS_COUNT + 1] = {
3447 #define STRESS_MODE(mode) "STRESS_" #mode,
3453 //------------------------------------------------------------------------
3454 // compStressCompile: determine if a stress mode should be enabled
3457 // stressArea - stress mode to possibly enable
3458 // weight - percent of time this mode should be turned on
3459 // (range 0 to 100); weight 0 effectively disables
3462 // true if this stress mode is enabled
3465 // Methods may be excluded from stress via name or hash.
3467 // Particular stress modes may be disabled or forcibly enabled.
3469 // With JitStress=2, some stress modes are enabled regardless of weight;
3470 // these modes are the ones after COUNT_VARN in the enumeration.
3472 // For other modes or for nonzero JitStress values, stress will be
3473 // enabled selectively for roughly weight% of methods.
3475 bool Compiler::compStressCompile(compStressArea stressArea, unsigned weight)
3477 // This can be called early, before info is fully set up.
3478 if ((info.compMethodName == nullptr) || (info.compFullName == nullptr))
3483 // Inlinees defer to the root method for stress, so that we can
3484 // more easily isolate methods that cause stress failures.
3485 if (compIsForInlining())
3487 return impInlineRoot()->compStressCompile(stressArea, weight);
3490 const bool doStress = compStressCompileHelper(stressArea, weight);
3492 if (doStress && !compActiveStressModes[stressArea])
3496 printf("\n\n*** JitStress: %s ***\n\n", s_compStressModeNames[stressArea]);
3498 compActiveStressModes[stressArea] = 1;
3504 //------------------------------------------------------------------------
3505 // compStressAreaHash: Get (or compute) a hash code for a stress area.
3508 // stressArea - stress mode
3511 // A hash code for the specific stress area.
3513 unsigned Compiler::compStressAreaHash(compStressArea area)
3515 static LONG s_hashCodes[STRESS_COUNT];
3516 assert(static_cast<unsigned>(area) < ArrLen(s_hashCodes));
3518 unsigned result = (unsigned)s_hashCodes[area];
3521 result = HashStringA(s_compStressModeNames[area]);
3527 InterlockedExchange(&s_hashCodes[area], (LONG)result);
3533 //------------------------------------------------------------------------
3534 // compStressCompileHelper: helper to determine if a stress mode should be enabled
3537 // stressArea - stress mode to possibly enable
3538 // weight - percent of time this mode should be turned on
3539 // (range 0 to 100); weight 0 effectively disables
3542 // true if this stress mode is enabled
3545 // See compStressCompile
3547 bool Compiler::compStressCompileHelper(compStressArea stressArea, unsigned weight)
3549 if (!bRangeAllowStress)
3554 if (!JitConfig.JitStressOnly().isEmpty() &&
3555 !JitConfig.JitStressOnly().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
3560 // Does user explicitly prevent using this STRESS_MODE through the command line?
3561 const WCHAR* strStressModeNamesNot = JitConfig.JitStressModeNamesNot();
3562 if ((strStressModeNamesNot != nullptr) &&
3563 (u16_strstr(strStressModeNamesNot, s_compStressModeNamesW[stressArea]) != nullptr))
3568 // Does user explicitly set this STRESS_MODE through the command line?
3569 const WCHAR* strStressModeNames = JitConfig.JitStressModeNames();
3570 if (strStressModeNames != nullptr)
3572 if (u16_strstr(strStressModeNames, s_compStressModeNamesW[stressArea]) != nullptr)
3577 // This stress mode name did not match anything in the stress
3578 // mode allowlist. If user has requested only enable mode,
3579 // don't allow this stress mode to turn on.
3580 const bool onlyEnableMode = JitConfig.JitStressModeNamesOnly() != 0;
3588 // 0: No stress (Except when explicitly set in DOTNET_JitStressModeNames)
3589 // !=2: Vary stress. Performance will be slightly/moderately degraded
3590 // 2: Check-all stress. Performance will be REALLY horrible
3591 const int stressLevel = getJitStressLevel();
3593 assert(weight <= MAX_STRESS_WEIGHT);
3595 // Check for boundary conditions
3596 if (stressLevel == 0 || weight == 0)
3601 // Should we allow unlimited stress ?
3602 if ((stressArea > STRESS_COUNT_VARN) && (stressLevel == 2))
3607 if (weight == MAX_STRESS_WEIGHT)
3612 // Get a hash which can be compared with 'weight'
3613 assert(stressArea != 0);
3614 const unsigned hash = (info.compMethodHash() ^ compStressAreaHash(stressArea) ^ stressLevel) % MAX_STRESS_WEIGHT;
3616 assert(hash < MAX_STRESS_WEIGHT && weight <= MAX_STRESS_WEIGHT);
3617 return (hash < weight);
3620 //------------------------------------------------------------------------
3621 // compPromoteFewerStructs: helper to determine if the local
3622 // should not be promoted under a stress mode.
3625 // lclNum - local number to test
3628 // true if this local should not be promoted.
3631 // Reject ~50% of the potential promotions if STRESS_PROMOTE_FEWER_STRUCTS is active.
3633 bool Compiler::compPromoteFewerStructs(unsigned lclNum)
3635 bool rejectThisPromo = false;
3636 const bool promoteLess = compStressCompile(STRESS_PROMOTE_FEWER_STRUCTS, 50);
3640 rejectThisPromo = (((info.compMethodHash() ^ lclNum) & 1) == 0);
3642 return rejectThisPromo;
3645 //------------------------------------------------------------------------
3646 // dumpRegMask: display a register mask. For well-known sets of registers, display a well-known token instead of
3647 // a potentially large number of registers.
3650 // regs - The set of registers to display
3652 void Compiler::dumpRegMask(regMaskTP regs) const
3654 if (regs == RBM_ALLINT)
3658 else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
3660 printf("[allIntButFP]");
3662 else if (regs == RBM_ALLFLOAT)
3664 printf("[allFloat]");
3666 else if (regs == RBM_ALLDOUBLE)
3668 printf("[allDouble]");
3678 void Compiler::compInitDebuggingInfo()
3683 printf("*************** In compInitDebuggingInfo() for %s\n", info.compFullName);
3687 /*-------------------------------------------------------------------------
3689 * Get hold of the local variable records, if there are any
3692 info.compVarScopesCount = 0;
3694 if (opts.compScopeInfo)
3699 compInitVarScopeMap();
3701 if (opts.compScopeInfo || opts.compDbgCode)
3703 compInitScopeLists();
3706 if (opts.compDbgCode && (info.compVarScopesCount > 0))
3708 /* Create a new empty basic block. fgExtendDbgLifetimes() may add
3709 initialization of variables which are in scope right from the
3710 start of the (real) first BB (and therefore artificially marked
3711 as alive) into this block.
3714 fgEnsureFirstBBisScratch();
3716 fgNewStmtAtEnd(fgFirstBB, gtNewNothingNode());
3718 JITDUMP("Debuggable code - Add new %s to perform initialization of variables\n", fgFirstBB->dspToString());
3720 /*-------------------------------------------------------------------------
3722 * Read the stmt-offsets table and the line-number table
3725 info.compStmtOffsetsImplicit = ICorDebugInfo::NO_BOUNDARIES;
3727 // We can only report debug info for EnC at places where the stack is empty.
3728 // Actually, at places where there are not live temps. Else, we won't be able
3729 // to map between the old and the new versions correctly as we won't have
3730 // any info for the live temps.
3732 assert(!opts.compDbgEnC || !opts.compDbgInfo ||
3733 0 == (info.compStmtOffsetsImplicit & ~ICorDebugInfo::STACK_EMPTY_BOUNDARIES));
3735 info.compStmtOffsetsCount = 0;
3737 if (opts.compDbgInfo)
3739 /* Get hold of the line# records, if there are any */
3746 printf("info.compStmtOffsetsCount = %d\n", info.compStmtOffsetsCount);
3747 printf("info.compStmtOffsetsImplicit = %04Xh", info.compStmtOffsetsImplicit);
3749 if (info.compStmtOffsetsImplicit)
3752 if (info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES)
3754 printf("STACK_EMPTY ");
3756 if (info.compStmtOffsetsImplicit & ICorDebugInfo::NOP_BOUNDARIES)
3760 if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
3762 printf("CALL_SITE ");
3767 IL_OFFSET* pOffs = info.compStmtOffsets;
3768 for (unsigned i = 0; i < info.compStmtOffsetsCount; i++, pOffs++)
3770 printf("%02d) IL_%04Xh\n", i, *pOffs);
3777 void Compiler::compSetOptimizationLevel()
3779 bool theMinOptsValue;
3780 #pragma warning(suppress : 4101)
3781 unsigned jitMinOpts;
3783 if (compIsForInlining())
3785 theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts();
3789 theMinOptsValue = false;
3791 if (opts.compFlags == CLFLG_MINOPT)
3793 JITLOG((LL_INFO100, "CLFLG_MINOPT set for method %s\n", info.compFullName));
3794 theMinOptsValue = true;
3798 jitMinOpts = JitConfig.JitMinOpts();
3800 if (!theMinOptsValue && (jitMinOpts > 0))
3802 // jitTotalMethodCompiled does not include the method that is being compiled now, so make +1.
3803 unsigned methodCount = Compiler::jitTotalMethodCompiled + 1;
3804 unsigned methodCountMask = methodCount & 0xFFF;
3805 unsigned kind = (jitMinOpts & 0xF000000) >> 24;
3809 if (jitMinOpts <= methodCount)
3813 printf(" Optimizations disabled by JitMinOpts and methodCount\n");
3815 theMinOptsValue = true;
3820 unsigned firstMinopts = (jitMinOpts >> 12) & 0xFFF;
3821 unsigned secondMinopts = (jitMinOpts >> 0) & 0xFFF;
3823 if ((firstMinopts == methodCountMask) || (secondMinopts == methodCountMask))
3827 printf("0xD: Optimizations disabled by JitMinOpts and methodCountMask\n");
3829 theMinOptsValue = true;
3835 unsigned startMinopts = (jitMinOpts >> 12) & 0xFFF;
3836 unsigned endMinopts = (jitMinOpts >> 0) & 0xFFF;
3838 if ((startMinopts <= methodCountMask) && (endMinopts >= methodCountMask))
3842 printf("0xE: Optimizations disabled by JitMinOpts and methodCountMask\n");
3844 theMinOptsValue = true;
3850 unsigned bitsZero = (jitMinOpts >> 12) & 0xFFF;
3851 unsigned bitsOne = (jitMinOpts >> 0) & 0xFFF;
3853 if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
3857 printf("0xF: Optimizations disabled by JitMinOpts and methodCountMask\n");
3859 theMinOptsValue = true;
3866 if (!theMinOptsValue)
3868 if (JitConfig.JitMinOptsName().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
3870 theMinOptsValue = true;
3875 static ConfigMethodRange s_onlyOptimizeRange;
3876 s_onlyOptimizeRange.EnsureInit(JitConfig.JitOnlyOptimizeRange());
3878 if (!theMinOptsValue && !s_onlyOptimizeRange.IsEmpty())
3880 unsigned methHash = info.compMethodHash();
3881 theMinOptsValue = !s_onlyOptimizeRange.Contains(methHash);
3885 if (compStressCompile(STRESS_MIN_OPTS, 5))
3887 theMinOptsValue = true;
3889 // For PREJIT we never drop down to MinOpts
3890 // unless unless CLFLG_MINOPT is set
3891 else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
3893 if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize)
3895 JITLOG((LL_INFO10, "IL Code Size exceeded, using MinOpts for method %s\n", info.compFullName));
3896 theMinOptsValue = true;
3898 else if ((unsigned)JitConfig.JitMinOptsInstrCount() < opts.instrCount)
3900 JITLOG((LL_INFO10, "IL instruction count exceeded, using MinOpts for method %s\n", info.compFullName));
3901 theMinOptsValue = true;
3903 else if ((unsigned)JitConfig.JitMinOptsBbCount() < fgBBcount)
3905 JITLOG((LL_INFO10, "Basic Block count exceeded, using MinOpts for method %s\n", info.compFullName));
3906 theMinOptsValue = true;
3908 else if ((unsigned)JitConfig.JitMinOptsLvNumCount() < lvaCount)
3910 JITLOG((LL_INFO10, "Local Variable Num count exceeded, using MinOpts for method %s\n", info.compFullName));
3911 theMinOptsValue = true;
3913 else if ((unsigned)JitConfig.JitMinOptsLvRefCount() < opts.lvRefCount)
3915 JITLOG((LL_INFO10, "Local Variable Ref count exceeded, using MinOpts for method %s\n", info.compFullName));
3916 theMinOptsValue = true;
3918 if (theMinOptsValue == true)
3920 JITLOG((LL_INFO10000, "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count "
3921 "%3d,%3d for method %s\n",
3922 info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
3923 if (JitConfig.JitBreakOnMinOpts() != 0)
3925 assert(!"MinOpts enabled");
3930 // Retail check if we should force Minopts due to the complexity of the method
3931 // For PREJIT we never drop down to MinOpts
3932 // unless unless CLFLG_MINOPT is set
3933 if (!theMinOptsValue && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
3934 ((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) ||
3935 (DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) ||
3936 (DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount)))
3938 theMinOptsValue = true;
3942 JITLOG((LL_INFO10000,
3943 "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count %3d,%3d for method %s\n",
3944 info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
3947 // The code in this #if has been useful in debugging loop cloning issues, by
3948 // enabling selective enablement of the loop cloning optimization according to
3951 if (!theMinOptsValue)
3953 unsigned methHash = info.compMethodHash();
3954 char* lostr = getenv("opthashlo");
3955 unsigned methHashLo = 0;
3958 sscanf_s(lostr, "%x", &methHashLo);
3959 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
3961 char* histr = getenv("opthashhi");
3962 unsigned methHashHi = UINT32_MAX;
3965 sscanf_s(histr, "%x", &methHashHi);
3966 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
3968 if (methHash < methHashLo || methHash > methHashHi)
3970 theMinOptsValue = true;
3974 printf("Doing optimization in in %s (0x%x).\n", info.compFullName, methHash);
3982 // Set the MinOpts value
3983 opts.SetMinOpts(theMinOptsValue);
3985 // Notify the VM if MinOpts is being used when not requested
3986 if (theMinOptsValue && !compIsForInlining() && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) &&
3987 !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) && !opts.compDbgCode)
3989 info.compCompHnd->setMethodAttribs(info.compMethodHnd, CORINFO_FLG_SWITCHED_TO_MIN_OPT);
3990 opts.jitFlags->Clear(JitFlags::JIT_FLAG_TIER1);
3991 compSwitchedToMinOpts = true;
3995 if (verbose && !compIsForInlining())
3997 printf("OPTIONS: opts.MinOpts() == %s\n", opts.MinOpts() ? "true" : "false");
4001 /* Control the optimizations */
4003 if (opts.OptimizationDisabled())
4005 opts.compFlags &= ~CLFLG_MAXOPT;
4006 opts.compFlags |= CLFLG_MINOPT;
4008 lvaEnregEHVars &= compEnregLocals();
4009 lvaEnregMultiRegVars &= compEnregLocals();
4012 if (!compIsForInlining())
4014 codeGen->setFramePointerRequired(false);
4015 codeGen->setFrameRequired(false);
4017 if (opts.OptimizationDisabled())
4019 codeGen->setFrameRequired(true);
4022 #if !defined(TARGET_AMD64)
4023 // The VM sets JitFlags::JIT_FLAG_FRAMED for two reasons: (1) the DOTNET_JitFramed variable is set, or
4024 // (2) the function is marked "noinline". The reason for #2 is that people mark functions
4025 // noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame
4026 // pointer for the frame to show up in stack walk.
4027 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_FRAMED))
4028 codeGen->setFrameRequired(true);
4031 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !IsTargetAbi(CORINFO_NATIVEAOT_ABI))
4033 // The JIT doesn't currently support loop alignment for prejitted images outside NativeAOT.
4034 // (The JIT doesn't know the final address of the code, hence
4035 // it can't align code based on unknown addresses.)
4037 codeGen->SetAlignLoops(false); // loop alignment not supported for prejitted code
4041 codeGen->SetAlignLoops(JitConfig.JitAlignLoops() == 1);
4045 fgCanRelocateEHRegions = true;
4048 #if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64)
4049 // Function compRsvdRegCheck:
4050 // given a curState to use for calculating the total frame size
4051 // it will return true if the REG_OPT_RSVD should be reserved so
4052 // that it can be use to form large offsets when accessing stack
4053 // based LclVar including both incoming and out going argument areas.
4055 // The method advances the frame layout state to curState by calling
4056 // lvaFrameSize(curState).
4058 bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
4060 // Always do the layout even if returning early. Callers might
4061 // depend on us to do the layout.
4062 unsigned frameSize = lvaFrameSize(curState);
4064 "compRsvdRegCheck\n"
4065 " frame size = %6d\n"
4066 " compArgSize = %6d\n",
4067 frameSize, compArgSize);
4071 // Have a recovery path in case we fail to reserve REG_OPT_RSVD and go
4072 // over the limit of SP and FP offset ranges due to large
4074 JITDUMP(" Returning true (MinOpts)\n\n");
4078 unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
4079 if (compFloatingPointUsed)
4081 calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
4083 calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR. See genPushCalleeSavedRegisters
4085 noway_assert(frameSize >= calleeSavedRegMaxSz);
4087 #if defined(TARGET_ARM64)
4089 // TODO-ARM64-CQ: update this!
4090 JITDUMP(" Returning true (ARM64)\n\n");
4091 return true; // just always assume we'll need it, for now
4093 #elif defined(TARGET_RISCV64)
4094 JITDUMP(" Returning true (RISCV64)\n\n");
4095 return true; // just always assume we'll need it, for now
4101 // ... high addresses ...
4102 // frame contents size
4103 // ------------------- ------------------------
4104 // inArgs compArgSize (includes prespill)
4108 // R11 ---> R11 REGSIZE_BYTES
4109 // callee saved regs CALLEE_SAVED_REG_MAXSZ (32 bytes)
4110 // optional saved fp regs CALLEE_SAVED_FLOAT_MAXSZ (64 bytes)
4112 // incl. TEMPS MAX_SPILL_TEMP_SIZE
4115 // ... low addresses ...
4117 // When codeGen->isFramePointerRequired is true, R11 will be established as a frame pointer.
4118 // We can then use R11 to access incoming args with positive offsets, and LclVars with
4119 // negative offsets.
4121 // In functions with EH, in the non-funclet (or main) region, even though we will have a
4122 // frame pointer, we can use SP with positive offsets to access any or all locals or arguments
4123 // that we can reach with SP-relative encodings. The funclet region might require the reserved
4124 // register, since it must use offsets from R11 to access the parent frame.
4126 unsigned maxR11PositiveEncodingOffset = compFloatingPointUsed ? 0x03FC : 0x0FFF;
4127 JITDUMP(" maxR11PositiveEncodingOffset = %6d\n", maxR11PositiveEncodingOffset);
4129 // Floating point load/store instructions (VLDR/VSTR) can address up to -0x3FC from R11, but we
4130 // don't know if there are either no integer locals, or if we don't need large negative offsets
4131 // for the integer locals, so we must use the integer max negative offset, which is a
4132 // smaller (absolute value) number.
4133 unsigned maxR11NegativeEncodingOffset = 0x00FF; // This is a negative offset from R11.
4134 JITDUMP(" maxR11NegativeEncodingOffset = %6d\n", maxR11NegativeEncodingOffset);
4136 // -1 because otherwise we are computing the address just beyond the last argument, which we don't need to do.
4137 unsigned maxR11PositiveOffset = compArgSize + (2 * REGSIZE_BYTES) - 1;
4138 JITDUMP(" maxR11PositiveOffset = %6d\n", maxR11PositiveOffset);
4140 // The value is positive, but represents a negative offset from R11.
4141 // frameSize includes callee-saved space for R11 and LR, which are at non-negative offsets from R11
4142 // (+0 and +4, respectively), so don't include those in the max possible negative offset.
4143 assert(frameSize >= (2 * REGSIZE_BYTES));
4144 unsigned maxR11NegativeOffset = frameSize - (2 * REGSIZE_BYTES);
4145 JITDUMP(" maxR11NegativeOffset = %6d\n", maxR11NegativeOffset);
4147 if (codeGen->isFramePointerRequired())
4149 if (maxR11NegativeOffset > maxR11NegativeEncodingOffset)
4151 JITDUMP(" Returning true (frame required and maxR11NegativeOffset)\n\n");
4154 if (maxR11PositiveOffset > maxR11PositiveEncodingOffset)
4156 JITDUMP(" Returning true (frame required and maxR11PositiveOffset)\n\n");
4161 // Now consider the SP based frame case. Note that we will use SP based offsets to access the stack in R11 based
4162 // frames in the non-funclet main code area.
4164 unsigned maxSPPositiveEncodingOffset = compFloatingPointUsed ? 0x03FC : 0x0FFF;
4165 JITDUMP(" maxSPPositiveEncodingOffset = %6d\n", maxSPPositiveEncodingOffset);
4167 // -1 because otherwise we are computing the address just beyond the last argument, which we don't need to do.
4168 assert(compArgSize + frameSize > 0);
4169 unsigned maxSPPositiveOffset = compArgSize + frameSize - 1;
4171 if (codeGen->isFramePointerUsed())
4173 // We have a frame pointer, so we can use it to access part of the stack, even if SP can't reach those parts.
4174 // We will still generate SP-relative offsets if SP can reach.
4176 // First, check that the stack between R11 and SP can be fully reached, either via negative offset from FP
4177 // or positive offset from SP. Don't count stored R11 or LR, which are reached from positive offsets from FP.
4179 unsigned maxSPLocalsCombinedOffset = frameSize - (2 * REGSIZE_BYTES) - 1;
4180 JITDUMP(" maxSPLocalsCombinedOffset = %6d\n", maxSPLocalsCombinedOffset);
4182 if (maxSPLocalsCombinedOffset > maxSPPositiveEncodingOffset)
4185 unsigned maxRemainingLocalsCombinedOffset = maxSPLocalsCombinedOffset - maxSPPositiveEncodingOffset;
4186 JITDUMP(" maxRemainingLocalsCombinedOffset = %6d\n", maxRemainingLocalsCombinedOffset);
4188 if (maxRemainingLocalsCombinedOffset > maxR11NegativeEncodingOffset)
4190 JITDUMP(" Returning true (frame pointer exists; R11 and SP can't reach entire stack between them)\n\n");
4194 // Otherwise, yes, we can address the remaining parts of the locals frame with negative offsets from R11.
4197 // Check whether either R11 or SP can access the arguments.
4198 if ((maxR11PositiveOffset > maxR11PositiveEncodingOffset) &&
4199 (maxSPPositiveOffset > maxSPPositiveEncodingOffset))
4201 JITDUMP(" Returning true (frame pointer exists; R11 and SP can't reach all arguments)\n\n");
4207 if (maxSPPositiveOffset > maxSPPositiveEncodingOffset)
4209 JITDUMP(" Returning true (no frame pointer exists; SP can't reach all of frame)\n\n");
4214 // We won't need to reserve REG_OPT_RSVD.
4216 JITDUMP(" Returning false\n\n");
4218 #endif // TARGET_ARM
4220 #endif // TARGET_ARMARCH || TARGET_RISCV64
4222 //------------------------------------------------------------------------
4223 // compGetTieringName: get a string describing tiered compilation settings
4227 // wantShortName - true if a short name is ok (say for using in file names)
4230 // String describing tiering decisions for this method, including cases
4231 // where the jit codegen will differ from what the runtime requested.
4233 const char* Compiler::compGetTieringName(bool wantShortName) const
4235 const bool tier0 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0);
4236 const bool tier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
4237 const bool instrumenting = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR);
4239 if (!opts.compMinOptsIsSet)
4241 // If 'compMinOptsIsSet' is not set, just return here. Otherwise, if this method is called
4242 // by the assertAbort(), we would recursively call assert while trying to get MinOpts()
4243 // and eventually stackoverflow.
4244 return "Optimization-Level-Not-Yet-Set";
4247 assert(!tier0 || !tier1); // We don't expect multiple TIER flags to be set at one time.
4251 return instrumenting ? "Instrumented Tier0" : "Tier0";
4257 return instrumenting ? "Instrumented Tier1-OSR" : "Tier1-OSR";
4261 return instrumenting ? "Instrumented Tier1" : "Tier1";
4264 else if (opts.OptimizationEnabled())
4266 if (compSwitchedToOptimized)
4268 return wantShortName ? "Tier0-FullOpts" : "Tier-0 switched to FullOpts";
4275 else if (opts.MinOpts())
4277 if (compSwitchedToMinOpts)
4279 if (compSwitchedToOptimized)
4281 return wantShortName ? "Tier0-FullOpts-MinOpts" : "Tier-0 switched to FullOpts, then to MinOpts";
4285 return wantShortName ? "Tier0-MinOpts" : "Tier-0 switched MinOpts";
4293 else if (opts.compDbgCode)
4299 return wantShortName ? "Unknown" : "Unknown optimization level";
4303 //------------------------------------------------------------------------
4304 // compGetPgoSourceName: get a string describing PGO source
4307 // String describing describing PGO source (e.g. Dynamic, Static, etc)
4309 const char* Compiler::compGetPgoSourceName() const
4311 switch (fgPgoSource)
4313 case ICorJitInfo::PgoSource::Static:
4314 return "Static PGO";
4315 case ICorJitInfo::PgoSource::Dynamic:
4316 return "Dynamic PGO";
4317 case ICorJitInfo::PgoSource::Blend:
4318 return "Blended PGO";
4319 case ICorJitInfo::PgoSource::Text:
4320 return "Textual PGO";
4321 case ICorJitInfo::PgoSource::Sampling:
4322 return "Sample-based PGO";
4323 case ICorJitInfo::PgoSource::IBC:
4324 return "Classic IBC";
4325 case ICorJitInfo::PgoSource::Synthesis:
4326 return "Synthesized PGO";
4328 return "Unknown PGO";
4332 //------------------------------------------------------------------------
4333 // compGetStressMessage: get a string describing jitstress capability
4337 // An empty string if stress is not enabled, else a string describing
4338 // if this method is subject to stress or is excluded by name or hash.
4340 const char* Compiler::compGetStressMessage() const
4342 // Add note about stress where appropriate
4343 const char* stressMessage = "";
4346 // Is stress enabled via mode name or level?
4347 if ((JitConfig.JitStressModeNames() != nullptr) || (getJitStressLevel() > 0))
4349 // Is the method being jitted excluded from stress via range?
4350 if (bRangeAllowStress)
4352 // Or is it excluded via name?
4353 if (!JitConfig.JitStressOnly().isEmpty() ||
4354 !JitConfig.JitStressOnly().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
4356 // Not excluded -- stress can happen
4357 stressMessage = " JitStress";
4361 stressMessage = " NoJitStress(Only)";
4366 stressMessage = " NoJitStress(Range)";
4371 return stressMessage;
4374 void Compiler::compFunctionTraceStart()
4377 if (compIsForInlining())
4382 if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
4384 LONG newJitNestingLevel = InterlockedIncrement(&Compiler::jitNestingLevel);
4385 if (newJitNestingLevel <= 0)
4387 printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
4390 for (LONG i = 0; i < newJitNestingLevel - 1; i++)
4394 printf("{ Start Jitting Method %4d %s (MethodHash=%08x) %s\n", Compiler::jitTotalMethodCompiled,
4395 info.compFullName, info.compMethodHash(),
4396 compGetTieringName()); /* } editor brace matching workaround for this printf */
4401 void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, bool isNYI)
4404 assert(!compIsForInlining());
4406 if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
4408 LONG newJitNestingLevel = InterlockedDecrement(&Compiler::jitNestingLevel);
4409 if (newJitNestingLevel < 0)
4411 printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
4414 for (LONG i = 0; i < newJitNestingLevel; i++)
4419 // Note: that is incorrect if we are compiling several methods at the same time.
4420 unsigned methodNumber = Compiler::jitTotalMethodCompiled - 1;
4422 /* { editor brace-matching workaround for following printf */
4423 printf("} Jitted Method %4d at" FMT_ADDR "method %s size %08x%s%s\n", methodNumber, DBG_ADDR(methodCodePtr),
4424 info.compFullName, methodCodeSize, isNYI ? " NYI" : "", opts.altJit ? " altjit" : "");
4429 //------------------------------------------------------------------------
4430 // BeginPhase: begin execution of a phase
4433 // phase - the phase that is about to begin
4435 void Compiler::BeginPhase(Phases phase)
4437 mostRecentlyActivePhase = phase;
4440 //------------------------------------------------------------------------
4441 // EndPhase: finish execution of a phase
4444 // phase - the phase that has just finished
4446 void Compiler::EndPhase(Phases phase)
4448 #if defined(FEATURE_JIT_METHOD_PERF)
4449 if (pCompJitTimer != nullptr)
4451 pCompJitTimer->EndPhase(this, phase);
4455 mostRecentlyActivePhase = phase;
4458 //------------------------------------------------------------------------
4459 // compCompile: run phases needed for compilation
4462 // methodCodePtr [OUT] - address of generated code
4463 // methodCodeSize [OUT] - size of the generated code (hot + cold sections)
4464 // compileFlags [IN] - flags controlling jit behavior
4467 // This is the most interesting 'toplevel' function in the JIT. It goes through the operations of
4468 // importing, morphing, optimizations and code generation. This is called from the EE through the
4469 // code:CILJit::compileMethod function.
4471 // For an overview of the structure of the JIT, see:
4472 // https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/jit/ryujit-overview.md
4474 // Also called for inlinees, though they will only be run through the first few phases.
4476 void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags)
4478 compFunctionTraceStart();
4480 // Enable flow graph checks
4481 activePhaseChecks |= PhaseChecks::CHECK_FG;
4483 // Prepare for importation
4485 auto preImportPhase = [this]() {
4486 if (compIsForInlining())
4488 // Notify root instance that an inline attempt is about to import IL
4489 impInlineRoot()->m_inlineStrategy->NoteImport();
4494 VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal());
4496 // The temp holding the secret stub argument is used by fgImport() when importing the intrinsic.
4497 if (info.compPublishStubParam)
4499 assert(lvaStubArgumentVar == BAD_VAR_NUM);
4500 lvaStubArgumentVar = lvaGrabTempWithImplicitUse(false DEBUGARG("stub argument"));
4501 lvaGetDesc(lvaStubArgumentVar)->lvType = TYP_I_IMPL;
4502 // TODO-CQ: there is no need to mark it as doNotEnreg. There are no stores for this local
4503 // before codegen so liveness and LSRA mark it as "liveIn" and always allocate a stack slot for it.
4504 // However, it would be better to process it like other argument locals and keep it in
4505 // a reg for the whole method without spilling to the stack when possible.
4506 lvaSetVarDoNotEnregister(lvaStubArgumentVar DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr));
4509 DoPhase(this, PHASE_PRE_IMPORT, preImportPhase);
4511 // If we're going to instrument code, we may need to prepare before
4512 // we import. Also do this before we read in any profile data.
4514 if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
4516 DoPhase(this, PHASE_IBCPREP, &Compiler::fgPrepareToInstrumentMethod);
4519 // Incorporate profile data.
4521 // Note: the importer is sensitive to block weights, so this has
4522 // to happen before importation.
4524 activePhaseChecks |= PhaseChecks::CHECK_PROFILE;
4525 DoPhase(this, PHASE_INCPROFILE, &Compiler::fgIncorporateProfileData);
4526 activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
4528 // If we are doing OSR, update flow to initially reach the appropriate IL offset.
4532 fgFixEntryFlowForOSR();
4535 // Enable the post-phase checks that use internal logic to decide when checking makes sense.
4537 activePhaseChecks |=
4538 PhaseChecks::CHECK_EH | PhaseChecks::CHECK_LOOPS | PhaseChecks::CHECK_UNIQUE | PhaseChecks::CHECK_LINKED_LOCALS;
4540 // Import: convert the instrs in each basic block to a tree based intermediate representation
4542 DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport);
4544 // If this is a failed inline attempt, we're done.
4546 if (compIsForInlining() && compInlineResult->IsFailure())
4548 #ifdef FEATURE_JIT_METHOD_PERF
4549 if (pCompJitTimer != nullptr)
4551 #if MEASURE_CLRAPI_CALLS
4552 EndPhase(PHASE_CLR_API);
4554 pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, false);
4561 // If instrumenting, add block and class probes.
4563 if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
4565 DoPhase(this, PHASE_IBCINSTR, &Compiler::fgInstrumentMethod);
4568 // Expand any patchpoints
4570 DoPhase(this, PHASE_PATCHPOINTS, &Compiler::fgTransformPatchpoints);
4572 // Transform indirect calls that require control flow expansion.
4574 DoPhase(this, PHASE_INDXCALL, &Compiler::fgTransformIndirectCalls);
4576 // Cleanup un-imported BBs, cleanup un-imported or
4577 // partially imported try regions, add OSR step blocks.
4579 DoPhase(this, PHASE_POST_IMPORT, &Compiler::fgPostImportationCleanup);
4581 // If we're importing for inlining, we're done.
4582 if (compIsForInlining())
4585 #ifdef FEATURE_JIT_METHOD_PERF
4586 if (pCompJitTimer != nullptr)
4588 #if MEASURE_CLRAPI_CALLS
4589 EndPhase(PHASE_CLR_API);
4591 pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, false);
4598 // At this point in the phase list, all the inlinee phases have
4599 // been run, and inlinee compiles have exited, so we should only
4600 // get this far if we are jitting the root method.
4601 noway_assert(!compIsForInlining());
4603 // Prepare for the morph phases
4605 DoPhase(this, PHASE_MORPH_INIT, &Compiler::fgMorphInit);
4607 // Inline callee methods into this root method
4609 DoPhase(this, PHASE_MORPH_INLINE, &Compiler::fgInline);
4611 // Record "start" values for post-inlining cycles and elapsed time.
4612 RecordStateAtEndOfInlining();
4614 // Transform each GT_ALLOCOBJ node into either an allocation helper call or
4615 // local variable allocation on the stack.
4616 ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
4618 if (compObjectStackAllocation() && opts.OptimizationEnabled())
4620 objectAllocator.EnableObjectStackAllocation();
4623 objectAllocator.Run();
4625 // Add any internal blocks/trees we may need
4627 DoPhase(this, PHASE_MORPH_ADD_INTERNAL, &Compiler::fgAddInternal);
4629 // Remove empty try regions
4631 DoPhase(this, PHASE_EMPTY_TRY, &Compiler::fgRemoveEmptyTry);
4633 // Remove empty finally regions
4635 DoPhase(this, PHASE_EMPTY_FINALLY, &Compiler::fgRemoveEmptyFinally);
4637 // Streamline chains of finally invocations
4639 DoPhase(this, PHASE_MERGE_FINALLY_CHAINS, &Compiler::fgMergeFinallyChains);
4641 // Clone code in finallys to reduce overhead for non-exceptional paths
4643 DoPhase(this, PHASE_CLONE_FINALLY, &Compiler::fgCloneFinally);
4645 #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
4647 // Update finally target flags after EH optimizations
4649 DoPhase(this, PHASE_UPDATE_FINALLY_FLAGS, &Compiler::fgUpdateFinallyTargetFlags);
4651 #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
4656 unsigned methHash = info.compMethodHash();
4657 char* lostr = getenv("JitEHWTHashLo");
4658 unsigned methHashLo = 0;
4660 if (lostr != nullptr)
4662 sscanf_s(lostr, "%x", &methHashLo);
4665 char* histr = getenv("JitEHWTHashHi");
4666 unsigned methHashHi = UINT32_MAX;
4667 if (histr != nullptr)
4669 sscanf_s(histr, "%x", &methHashHi);
4672 if (methHash < methHashLo || methHash > methHashHi)
4674 lvaEnregEHVars = false;
4678 printf("Enregistering EH Vars for method %s, hash = 0x%x.\n", info.compFullName, info.compMethodHash());
4679 printf(""); // flush
4682 if (lvaEnregMultiRegVars)
4684 unsigned methHash = info.compMethodHash();
4685 char* lostr = getenv("JitMultiRegHashLo");
4686 unsigned methHashLo = 0;
4688 if (lostr != nullptr)
4690 sscanf_s(lostr, "%x", &methHashLo);
4693 char* histr = getenv("JitMultiRegHashHi");
4694 unsigned methHashHi = UINT32_MAX;
4695 if (histr != nullptr)
4697 sscanf_s(histr, "%x", &methHashHi);
4700 if (methHash < methHashLo || methHash > methHashHi)
4702 lvaEnregMultiRegVars = false;
4706 printf("Enregistering MultiReg Vars for method %s, hash = 0x%x.\n", info.compFullName,
4707 info.compMethodHash());
4708 printf(""); // flush
4713 // Do some flow-related optimizations
4715 if (opts.OptimizationEnabled())
4719 DoPhase(this, PHASE_TAIL_MERGE, &Compiler::fgTailMerge);
4721 // Merge common throw blocks
4723 DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows);
4725 // Run an early flow graph simplification pass
4727 DoPhase(this, PHASE_EARLY_UPDATE_FLOW_GRAPH, &Compiler::fgUpdateFlowGraphPhase);
4730 // Promote struct locals
4732 DoPhase(this, PHASE_PROMOTE_STRUCTS, &Compiler::fgPromoteStructs);
4734 // Enable early ref counting of locals
4736 lvaRefCountState = RCS_EARLY;
4738 if (opts.OptimizationEnabled())
4740 fgNodeThreading = NodeThreading::AllLocals;
4743 // Figure out what locals are address-taken.
4745 DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals);
4747 // Do an early pass of liveness for forward sub and morph. This data is
4748 // valid until after morph.
4750 DoPhase(this, PHASE_EARLY_LIVENESS, &Compiler::fgEarlyLiveness);
4752 // Run a simple forward substitution pass.
4754 DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub);
4756 // Promote struct locals based on primitive access patterns
4758 DoPhase(this, PHASE_PHYSICAL_PROMOTION, &Compiler::PhysicalPromotion);
4760 // Expose candidates for implicit byref last-use copy elision.
4761 DoPhase(this, PHASE_IMPBYREF_COPY_OMISSION, &Compiler::fgMarkImplicitByRefCopyOmissionCandidates);
4763 // Locals tree list is no longer kept valid.
4764 fgNodeThreading = NodeThreading::None;
4766 // Apply the type update to implicit byref parameters; also choose (based on address-exposed
4767 // analysis) which implicit byref promotions to keep (requires copy to initialize) or discard.
4769 DoPhase(this, PHASE_MORPH_IMPBYREF, &Compiler::fgRetypeImplicitByRefArgs);
4772 // Now that locals have address-taken and implicit byref marked, we can safely apply stress.
4774 fgStress64RsltMul();
4777 // Morph the trees in all the blocks of the method
4779 auto morphGlobalPhase = [this]() {
4780 unsigned prevBBCount = fgBBcount;
4783 // Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args
4784 fgMarkDemotedImplicitByRefArgs();
4785 lvaRefCountState = RCS_INVALID;
4786 fgLocalVarLivenessDone = false;
4788 // Decide the kind of code we want to generate
4791 fgExpandQmarkNodes();
4794 compCurBB = nullptr;
4797 // If we needed to create any new BasicBlocks then renumber the blocks
4798 if (fgBBcount > prevBBCount)
4804 activePhaseChecks |= PhaseChecks::CHECK_IR;
4806 DoPhase(this, PHASE_MORPH_GLOBAL, morphGlobalPhase);
4808 // GS security checks for unsafe buffers
4810 DoPhase(this, PHASE_GS_COOKIE, &Compiler::gsPhase);
4812 // Compute the block and edge weights
4814 DoPhase(this, PHASE_COMPUTE_EDGE_WEIGHTS, &Compiler::fgComputeBlockAndEdgeWeights);
4816 #if defined(FEATURE_EH_FUNCLETS)
4818 // Create funclets from the EH handlers.
4820 DoPhase(this, PHASE_CREATE_FUNCLETS, &Compiler::fgCreateFunclets);
4822 #endif // FEATURE_EH_FUNCLETS
4824 if (opts.OptimizationEnabled())
4828 DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops);
4830 // Run some flow graph optimizations (but don't reorder)
4832 DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow);
4834 // Second pass of tail merge
4836 DoPhase(this, PHASE_TAIL_MERGE2, &Compiler::fgTailMerge);
4838 // Compute reachability sets and dominators.
4840 DoPhase(this, PHASE_COMPUTE_REACHABILITY, &Compiler::fgComputeReachability);
4842 // Scale block weights and mark run rarely blocks.
4844 DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights);
4846 // Discover and classify natural loops (e.g. mark iterative loops as such). Also marks loop blocks
4847 // and sets bbWeight to the loop nesting levels.
4849 DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoopsPhase);
4851 // Clone loops with optimization opportunities, and choose one based on dynamic condition evaluation.
4853 DoPhase(this, PHASE_CLONE_LOOPS, &Compiler::optCloneLoops);
4857 DoPhase(this, PHASE_UNROLL_LOOPS, &Compiler::optUnrollLoops);
4859 // Clear loop table info that is not used after this point, and might become invalid.
4861 DoPhase(this, PHASE_CLEAR_LOOP_INFO, &Compiler::optClearLoopIterInfo);
4865 fgDebugCheckLinks();
4868 // Morph multi-dimensional array operations.
4869 // (Consider deferring all array operation morphing, including single-dimensional array ops,
4870 // from global morph to here, so cloning doesn't have to deal with morphed forms.)
4872 DoPhase(this, PHASE_MORPH_MDARR, &Compiler::fgMorphArrayOps);
4874 // Create the variable table (and compute variable ref counts)
4876 DoPhase(this, PHASE_MARK_LOCAL_VARS, &Compiler::lvaMarkLocalVars);
4878 // IMPORTANT, after this point, locals are ref counted.
4879 // However, ref counts are not kept incrementally up to date.
4880 assert(lvaLocalVarRefCounted());
4882 // Figure out the order in which operators are to be evaluated
4884 DoPhase(this, PHASE_FIND_OPER_ORDER, &Compiler::fgFindOperOrder);
4886 // Weave the tree lists. Anyone who modifies the tree shapes after
4887 // this point is responsible for calling fgSetStmtSeq() to keep the
4888 // nodes properly linked.
4890 DoPhase(this, PHASE_SET_BLOCK_ORDER, &Compiler::fgSetBlockOrder);
4892 fgNodeThreading = NodeThreading::AllTrees;
4894 // At this point we know if we are fully interruptible or not
4895 if (opts.OptimizationEnabled())
4898 bool doEarlyProp = true;
4899 bool doValueNum = true;
4900 bool doLoopHoisting = true;
4901 bool doCopyProp = true;
4902 bool doBranchOpt = true;
4904 bool doAssertionProp = true;
4905 bool doVNBasedIntrinExpansion = true;
4906 bool doRangeAnalysis = true;
4907 bool doVNBasedDeadStoreRemoval = true;
4910 #if defined(OPT_CONFIG)
4911 doSsa = (JitConfig.JitDoSsa() != 0);
4912 doEarlyProp = doSsa && (JitConfig.JitDoEarlyProp() != 0);
4913 doValueNum = doSsa && (JitConfig.JitDoValueNumber() != 0);
4914 doLoopHoisting = doValueNum && (JitConfig.JitDoLoopHoisting() != 0);
4915 doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0);
4916 doBranchOpt = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0);
4918 doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0);
4919 doVNBasedIntrinExpansion = doValueNum;
4920 doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
4921 doVNBasedDeadStoreRemoval = doValueNum && (JitConfig.JitDoVNBasedDeadStoreRemoval() != 0);
4925 iterations = JitConfig.JitOptRepeatCount();
4927 #endif // defined(OPT_CONFIG)
4929 while (iterations > 0)
4933 // Build up SSA form for the IR
4935 DoPhase(this, PHASE_BUILD_SSA, &Compiler::fgSsaBuild);
4939 // At least do local var liveness; lowering depends on this.
4940 fgLocalVarLiveness();
4945 // Propagate array length and rewrite getType() method call
4947 DoPhase(this, PHASE_EARLY_PROP, &Compiler::optEarlyProp);
4952 // Value number the trees
4954 DoPhase(this, PHASE_VALUE_NUMBER, &Compiler::fgValueNumber);
4959 // Hoist invariant code out of loops
4961 DoPhase(this, PHASE_HOIST_LOOP_CODE, &Compiler::optHoistLoopCode);
4966 // Perform VN based copy propagation
4968 DoPhase(this, PHASE_VN_COPY_PROP, &Compiler::optVnCopyProp);
4973 // Optimize redundant branches
4975 DoPhase(this, PHASE_OPTIMIZE_BRANCHES, &Compiler::optRedundantBranches);
4980 // Remove common sub-expressions
4982 DoPhase(this, PHASE_OPTIMIZE_VALNUM_CSES, &Compiler::optOptimizeCSEs);
4985 // Assertion prop can do arbitrary statement remorphing, which
4986 // can clone code and disrupt our simpleminded SSA accounting.
4988 // So, disable the ssa checks.
4990 if (fgSsaChecksEnabled)
4992 JITDUMP("Disabling SSA checking before assertion prop\n");
4993 fgSsaChecksEnabled = false;
4996 if (doAssertionProp)
4998 // Assertion propagation
5000 DoPhase(this, PHASE_ASSERTION_PROP_MAIN, &Compiler::optAssertionPropMain);
5003 if (doVNBasedIntrinExpansion)
5005 // Expand some intrinsics based on VN data
5007 DoPhase(this, PHASE_VN_BASED_INTRINSIC_EXPAND, &Compiler::fgVNBasedIntrinsicExpansion);
5010 if (doRangeAnalysis)
5012 // Bounds check elimination via range analysis
5014 DoPhase(this, PHASE_OPTIMIZE_INDEX_CHECKS, &Compiler::rangeCheckPhase);
5017 if (doVNBasedDeadStoreRemoval)
5019 // Note: this invalidates SSA and value numbers on tree nodes.
5021 DoPhase(this, PHASE_VN_BASED_DEAD_STORE_REMOVAL, &Compiler::optVNBasedDeadStoreRemoval);
5026 // update the flowgraph if we modified it during the optimization phase
5028 // Note: this invalidates loops, dominators and reachability
5030 DoPhase(this, PHASE_OPT_UPDATE_FLOW_GRAPH, &Compiler::fgUpdateFlowGraphPhase);
5032 // Recompute the edge weight if we have modified the flow graph
5034 DoPhase(this, PHASE_COMPUTE_EDGE_WEIGHTS2, &Compiler::fgComputeEdgeWeights);
5037 // Iterate if requested, resetting annotations first.
5038 if (--iterations == 0)
5042 ResetOptAnnotations();
5043 RecomputeLoopInfo();
5047 // Dominator and reachability sets are no longer valid.
5048 // The loop table is no longer valid.
5049 fgDomsComputed = false;
5050 optLoopTableValid = false;
5051 optLoopsRequirePreHeaders = false;
5054 DoPhase(this, PHASE_STRESS_SPLIT_TREE, &Compiler::StressSplitTree);
5057 // Expand runtime lookups (an optimization but we'd better run it in tier0 too)
5058 DoPhase(this, PHASE_EXPAND_RTLOOKUPS, &Compiler::fgExpandRuntimeLookups);
5060 // Partially inline static initializations
5061 DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgExpandStaticInit);
5063 // Expand thread local access
5064 DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess);
5067 DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls);
5069 if (opts.OptimizationEnabled())
5071 // Optimize boolean conditions
5073 DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools);
5077 DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);
5079 // Optimize block order
5081 DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
5084 // Determine start of cold region if we are hot/cold splitting
5086 DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock);
5089 // Stash the current estimate of the function's size if necessary.
5092 compSizeEstimate = 0;
5093 compCycleEstimate = 0;
5094 for (BasicBlock* const block : Blocks())
5096 for (Statement* const stmt : block->Statements())
5098 compSizeEstimate += stmt->GetCostSz();
5099 compCycleEstimate += stmt->GetCostEx();
5105 // rationalize trees
5106 Rationalizer rat(this); // PHASE_RATIONALIZE
5109 fgNodeThreading = NodeThreading::LIR;
5111 // Here we do "simple lowering". When the RyuJIT backend works for all
5112 // platforms, this will be part of the more general lowering phase. For now, though, we do a separate
5113 // pass of "final lowering." We must do this before (final) liveness analysis, because this creates
5114 // range check throw blocks, in which the liveness must be correct.
5116 DoPhase(this, PHASE_SIMPLE_LOWERING, &Compiler::fgSimpleLowering);
5118 // Enable this to gather statistical data such as
5119 // call and register argument info, flowgraph and loop info, etc.
5123 if (compLocallocUsed)
5125 // We reserve REG_SAVED_LOCALLOC_SP to store SP on entry for stack unwinding
5126 codeGen->regSet.rsMaskResvd |= RBM_SAVED_LOCALLOC_SP;
5128 #endif // TARGET_ARM
5130 // Assign registers to variables, etc.
5132 // Create LinearScan before Lowering, so that Lowering can call LinearScan methods
5133 // for determining whether locals are register candidates and (for xarch) whether
5134 // a node is a containable memory op.
5135 m_pLinearScan = getLinearScanAllocator(this);
5139 m_pLowering = new (this, CMK_LSRA) Lowering(this, m_pLinearScan); // PHASE_LOWERING
5142 if (!compMacOsArm64Abi())
5144 // Set stack levels; this information is necessary for x86
5145 // but on other platforms it is used only in asserts.
5146 // TODO: do not run it in release on other platforms, see https://github.com/dotnet/runtime/issues/42673.
5147 StackLevelSetter stackLevelSetter(this);
5148 stackLevelSetter.Run();
5151 // We can not add any new tracked variables after this point.
5152 lvaTrackedFixed = true;
5154 // Now that lowering is completed we can proceed to perform register allocation
5156 auto linearScanPhase = [this]() { m_pLinearScan->doLinearScan(); };
5157 DoPhase(this, PHASE_LINEAR_SCAN, linearScanPhase);
5159 // Copied from rpPredictRegUse()
5160 SetFullPtrRegMapRequired(codeGen->GetInterruptible() || !codeGen->isFramePointerUsed());
5162 #if FEATURE_LOOP_ALIGN
5163 // Place loop alignment instructions
5164 DoPhase(this, PHASE_ALIGN_LOOPS, &Compiler::placeLoopAlignInstructions);
5167 // The common phase checks and dumps are no longer relevant past this point.
5169 activePhaseChecks = PhaseChecks::CHECK_NONE;
5170 activePhaseDumps = PhaseDumps::DUMP_NONE;
5173 codeGen->genGenerateCode(methodCodePtr, methodCodeSize);
5175 #if TRACK_LSRA_STATS
5176 if (JitConfig.DisplayLsraStats() == 2)
5178 m_pLinearScan->dumpLsraStatsCsv(jitstdout());
5180 #endif // TRACK_LSRA_STATS
5182 // We're done -- set the active phase to the last phase
5183 // (which isn't really a phase)
5184 mostRecentlyActivePhase = PHASE_POST_EMIT;
5186 #ifdef FEATURE_JIT_METHOD_PERF
5189 #if MEASURE_CLRAPI_CALLS
5190 EndPhase(PHASE_CLR_API);
5192 EndPhase(PHASE_POST_EMIT);
5194 pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, true);
5198 // Generate PatchpointInfo
5199 generatePatchpointInfo();
5201 RecordStateAtEndOfCompilation();
5203 #ifdef FEATURE_TRACELOGGING
5204 compJitTelemetry.NotifyEndOfCompilation();
5207 unsigned methodsCompiled = (unsigned)InterlockedIncrement((LONG*)&Compiler::jitTotalMethodCompiled);
5209 if (JitConfig.JitDisasmSummary() && !compIsForInlining())
5211 char osrBuffer[20] = {0};
5214 // Tiering name already includes "OSR", we just want the IL offset
5215 sprintf_s(osrBuffer, 20, " @0x%x", info.compILEntry);
5219 const char* fullName = info.compFullName;
5221 const char* fullName =
5222 eeGetMethodFullName(info.compMethodHnd, /* includeReturnType */ false, /* includeThisSpecifier */ false);
5225 char debugPart[128] = {0};
5226 INDEBUG(sprintf_s(debugPart, 128, ", hash=0x%08x%s", info.compMethodHash(), compGetStressMessage()));
5228 const bool hasProf = fgHaveProfileData();
5229 printf("%4d: JIT compiled %s [%s%s%s%s, IL size=%u, code size=%u%s]\n", methodsCompiled, fullName,
5230 compGetTieringName(), osrBuffer, hasProf ? " with " : "", hasProf ? compGetPgoSourceName() : "",
5231 info.compILCodeSize, *methodCodeSize, debugPart);
5234 compFunctionTraceEnd(*methodCodePtr, *methodCodeSize, false);
5235 JITDUMP("Method code size: %d\n", (unsigned)(*methodCodeSize));
5237 #if FUNC_INFO_LOGGING
5238 if (compJitFuncInfoFile != nullptr)
5240 assert(!compIsForInlining());
5241 #ifdef DEBUG // We only have access to info.compFullName in DEBUG builds.
5242 fprintf(compJitFuncInfoFile, "%s\n", info.compFullName);
5244 fprintf(compJitFuncInfoFile, " %s\n", eeGetMethodFullName(info.compMethodHnd));
5246 fprintf(compJitFuncInfoFile, ""); // in our logic this causes a flush
5248 #endif // FUNC_INFO_LOGGING
5251 #if FEATURE_LOOP_ALIGN
5253 //------------------------------------------------------------------------
5254 // placeLoopAlignInstructions: determine where to place alignment padding
5257 // Suitable phase status
5260 // Iterate over all the blocks and determine
5261 // the best position to place the 'align' instruction. Inserting 'align'
5262 // instructions after an unconditional branch is preferred over inserting
5263 // in the block before the loop. In case there are multiple blocks
5264 // having 'jmp', the one that has lower weight is preferred.
5265 // If the block having 'jmp' is hotter than the block before the loop,
5266 // the align will still be placed after 'jmp' because the processor should
5267 // be smart enough to not fetch extra instruction beyond jmp.
5269 PhaseStatus Compiler::placeLoopAlignInstructions()
5271 // Add align only if there were any loops that needed alignment
5272 if (loopAlignCandidates == 0)
5274 return PhaseStatus::MODIFIED_NOTHING;
5277 JITDUMP("Inside placeLoopAlignInstructions for %d loops.\n", loopAlignCandidates);
5279 bool madeChanges = false;
5280 weight_t minBlockSoFar = BB_MAX_WEIGHT;
5281 BasicBlock* bbHavingAlign = nullptr;
5282 BasicBlock::loopNumber currentAlignedLoopNum = BasicBlock::NOT_IN_LOOP;
5283 bool visitedLoopNum[BasicBlock::MAX_LOOP_NUM];
5284 memset(visitedLoopNum, false, sizeof(visitedLoopNum));
5287 unsigned visitedBlockForLoopNum[BasicBlock::MAX_LOOP_NUM];
5288 memset(visitedBlockForLoopNum, 0, sizeof(visitedBlockForLoopNum));
5291 if ((fgFirstBB != nullptr) && fgFirstBB->isLoopAlign())
5293 // Adding align instruction in prolog is not supported
5294 // hence just remove that loop from our list.
5295 fgFirstBB->unmarkLoopAlign(this DEBUG_ARG("prolog block"));
5299 int loopsToProcess = loopAlignCandidates;
5301 for (BasicBlock* const block : Blocks())
5303 if (currentAlignedLoopNum != BasicBlock::NOT_IN_LOOP)
5305 // We've been processing blocks within an aligned loop. Are we out of that loop now?
5306 if (currentAlignedLoopNum != block->bbNatLoopNum)
5308 currentAlignedLoopNum = BasicBlock::NOT_IN_LOOP;
5312 // If there is an unconditional jump (which is not part of callf/always pair)
5313 if (opts.compJitHideAlignBehindJmp && (block->bbJumpKind == BBJ_ALWAYS) && !block->isBBCallAlwaysPairTail())
5315 // Track the lower weight blocks
5316 if (block->bbWeight < minBlockSoFar)
5318 if (currentAlignedLoopNum == BasicBlock::NOT_IN_LOOP)
5320 // Ok to insert align instruction in this block because it is not part of any aligned loop.
5321 minBlockSoFar = block->bbWeight;
5322 bbHavingAlign = block;
5323 JITDUMP(FMT_BB ", bbWeight=" FMT_WT " ends with unconditional 'jmp' \n", block->bbNum,
5329 if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign()))
5331 // Loop alignment is disabled for cold blocks
5332 assert((block->bbFlags & BBF_COLD) == 0);
5333 BasicBlock* const loopTop = block->bbNext;
5334 bool isSpecialCallFinally = block->isBBCallAlwaysPairTail();
5335 bool unmarkedLoopAlign = false;
5337 #if FEATURE_EH_CALLFINALLY_THUNKS
5338 if (block->bbJumpKind == BBJ_CALLFINALLY)
5340 // It must be a retless BBJ_CALLFINALLY if we get here.
5341 assert(!block->isBBCallAlwaysPair());
5343 // In the case of FEATURE_EH_CALLFINALLY_THUNKS, we can't put the align instruction in a retless
5344 // BBJ_CALLFINALLY either, because it alters the "cloned finally" region reported to the VM.
5345 // In the x86 case (the only !FEATURE_EH_CALLFINALLY_THUNKS that supports retless
5346 // BBJ_CALLFINALLY), we allow it.
5347 isSpecialCallFinally = true;
5349 #endif // FEATURE_EH_CALLFINALLY_THUNKS
5351 if (isSpecialCallFinally)
5353 // There are two special cases:
5354 // 1. If the block before the loop start is a retless BBJ_CALLFINALLY with
5355 // FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment because it will affect reported EH
5357 // 2. If the previous block is the BBJ_ALWAYS of a BBJ_CALLFINALLY/BBJ_ALWAYS pair, then we
5358 // can't add alignment because we can't add instructions in that block. In the
5359 // FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the reported EH, as above.
5360 // Currently, we don't align loops for these cases.
5362 loopTop->unmarkLoopAlign(this DEBUG_ARG("block before loop is special callfinally/always block"));
5364 unmarkedLoopAlign = true;
5366 else if ((loopTop->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) && visitedLoopNum[loopTop->bbNatLoopNum])
5370 sprintf_s(buffer, 100, "loop block " FMT_BB " appears before top of loop",
5371 visitedBlockForLoopNum[loopTop->bbNatLoopNum]);
5374 // In some odd cases we may see blocks within the loop before we see the
5375 // top block of the loop. Just bail on aligning such loops.
5378 loopTop->unmarkLoopAlign(this DEBUG_ARG(buffer));
5380 unmarkedLoopAlign = true;
5383 if (!unmarkedLoopAlign)
5385 if (bbHavingAlign == nullptr)
5387 // If jmp was not found, then block before the loop start is where align instruction will be added.
5389 bbHavingAlign = block;
5390 JITDUMP("Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB "\n",
5391 block->bbNum, loopTop->bbNum);
5395 JITDUMP("Marking " FMT_BB
5396 " that ends with unconditional jump with BBF_HAS_ALIGN for loop at " FMT_BB "\n",
5397 bbHavingAlign->bbNum, loopTop->bbNum);
5401 bbHavingAlign->bbFlags |= BBF_HAS_ALIGN;
5404 minBlockSoFar = BB_MAX_WEIGHT;
5405 bbHavingAlign = nullptr;
5406 currentAlignedLoopNum = loopTop->bbNatLoopNum;
5408 if (--loopsToProcess == 0)
5414 if (block->bbNatLoopNum != BasicBlock::NOT_IN_LOOP)
5417 if (!visitedLoopNum[block->bbNatLoopNum])
5419 // Record the first block for which bbNatLoopNum was seen for
5420 // debugging purpose.
5421 visitedBlockForLoopNum[block->bbNatLoopNum] = block->bbNum;
5424 // If this block is part of loop, mark the loopNum as visited.
5425 visitedLoopNum[block->bbNatLoopNum] = true;
5429 assert(loopsToProcess == 0);
5431 return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
5435 //------------------------------------------------------------------------
5436 // StressSplitTree: A phase that stresses the gtSplitTree function.
5439 // Suitable phase status
5442 // Stress is applied on a function-by-function basis
5444 PhaseStatus Compiler::StressSplitTree()
5446 if (compStressCompile(STRESS_SPLIT_TREES_RANDOMLY, 10))
5448 SplitTreesRandomly();
5449 return PhaseStatus::MODIFIED_EVERYTHING;
5452 if (compStressCompile(STRESS_SPLIT_TREES_REMOVE_COMMAS, 10))
5454 SplitTreesRemoveCommas();
5455 return PhaseStatus::MODIFIED_EVERYTHING;
5458 return PhaseStatus::MODIFIED_NOTHING;
5461 //------------------------------------------------------------------------
5462 // SplitTreesRandomly: Split all statements at a random location.
5464 void Compiler::SplitTreesRandomly()
5468 rng.Init(info.compMethodHash() ^ 0x077cc4d4);
5470 // Splitting creates a lot of new locals. Set a limit on how many we end up creating here.
5471 unsigned maxLvaCount = max(lvaCount * 2, 50000);
5473 for (BasicBlock* block : Blocks())
5475 for (Statement* stmt : block->NonPhiStatements())
5478 for (GenTree* tree : stmt->TreeList())
5480 if (tree->OperIs(GT_JTRUE)) // Due to relop invariant
5488 int splitTree = rng.Next(numTrees);
5489 for (GenTree* tree : stmt->TreeList())
5491 if (tree->OperIs(GT_JTRUE))
5496 JITDUMP("Splitting " FMT_STMT " at [%06u]\n", stmt->GetID(), dspTreeID(tree));
5499 if (gtSplitTree(block, stmt, tree, &newStmt, &use))
5501 while ((newStmt != nullptr) && (newStmt != stmt))
5503 fgMorphStmtBlockOps(block, newStmt);
5504 newStmt = newStmt->GetNextStmt();
5507 fgMorphStmtBlockOps(block, stmt);
5508 gtUpdateStmtSideEffects(stmt);
5517 if (lvaCount > maxLvaCount)
5519 JITDUMP("Created too many locals (at %u) -- stopping\n", lvaCount);
5527 //------------------------------------------------------------------------
5528 // SplitTreesRemoveCommas: Split trees to remove all commas.
5530 void Compiler::SplitTreesRemoveCommas()
5532 // Splitting creates a lot of new locals. Set a limit on how many we end up creating here.
5533 unsigned maxLvaCount = max(lvaCount * 2, 50000);
5535 for (BasicBlock* block : Blocks())
5537 Statement* stmt = block->FirstNonPhiDef();
5538 while (stmt != nullptr)
5540 Statement* nextStmt = stmt->GetNextStmt();
5541 for (GenTree* tree : stmt->TreeList())
5543 if (!tree->OperIs(GT_COMMA))
5548 // Supporting this weird construct would require additional
5549 // handling, we need to sort of move the comma into to the
5550 // next node in execution order. We don't see this so just
5552 assert(!tree->IsReverseOp());
5554 JITDUMP("Removing COMMA [%06u]\n", dspTreeID(tree));
5557 gtSplitTree(block, stmt, tree, &newStmt, &use);
5558 GenTree* op1SideEffects = nullptr;
5559 gtExtractSideEffList(tree->gtGetOp1(), &op1SideEffects);
5561 if (op1SideEffects != nullptr)
5563 Statement* op1Stmt = fgNewStmtFromTree(op1SideEffects);
5564 fgInsertStmtBefore(block, stmt, op1Stmt);
5565 if (newStmt == nullptr)
5571 *use = tree->gtGetOp2();
5573 for (Statement* cur = newStmt; (cur != nullptr) && (cur != stmt); cur = cur->GetNextStmt())
5575 fgMorphStmtBlockOps(block, cur);
5578 fgMorphStmtBlockOps(block, stmt);
5579 gtUpdateStmtSideEffects(stmt);
5581 if (lvaCount > maxLvaCount)
5583 JITDUMP("Created too many locals (at %u) -- stopping\n", lvaCount);
5587 // Morphing block ops can introduce commas (and the original
5588 // statement can also have more commas left). Proceed from the
5589 // earliest newly introduced statement.
5590 nextStmt = newStmt != nullptr ? newStmt : stmt;
5598 for (BasicBlock* block : Blocks())
5600 for (Statement* stmt : block->NonPhiStatements())
5602 for (GenTree* tree : stmt->TreeList())
5604 assert(!tree->OperIs(GT_COMMA));
5610 //------------------------------------------------------------------------
5611 // generatePatchpointInfo: allocate and fill in patchpoint info data,
5612 // and report it to the VM
5614 void Compiler::generatePatchpointInfo()
5616 if (!doesMethodHavePatchpoints() && !doesMethodHavePartialCompilationPatchpoints())
5618 // Nothing to report
5622 // Patchpoints are only found in Tier0 code, which is unoptimized, and so
5623 // should always have frame pointer.
5624 assert(codeGen->isFramePointerUsed());
5626 // Allocate patchpoint info storage from runtime, and fill in initial bits of data.
5627 const unsigned patchpointInfoSize = PatchpointInfo::ComputeSize(info.compLocalsCount);
5628 PatchpointInfo* const patchpointInfo = (PatchpointInfo*)info.compCompHnd->allocateArray(patchpointInfoSize);
5630 // Patchpoint offsets always refer to "virtual frame offsets".
5632 // For x64 this falls out because Tier0 frames are always FP frames, and so the FP-relative
5633 // offset is what we want.
5635 // For arm64, if the frame pointer is not at the top of the frame, we need to adjust the
5637 CLANG_FORMAT_COMMENT_ANCHOR;
5639 #if defined(TARGET_AMD64)
5640 // We add +TARGET_POINTER_SIZE here is to account for the slot that Jit_Patchpoint
5641 // creates when it simulates calling the OSR method (the "pseudo return address" slot).
5642 // This is effectively a new slot at the bottom of the Tier0 frame.
5644 const int totalFrameSize = codeGen->genTotalFrameSize() + TARGET_POINTER_SIZE;
5645 const int offsetAdjust = 0;
5646 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5647 // SP is not manipulated by calls so no frame size adjustment needed.
5648 // Local Offsets may need adjusting, if FP is at bottom of frame.
5650 const int totalFrameSize = codeGen->genTotalFrameSize();
5651 const int offsetAdjust = codeGen->genSPtoFPdelta() - totalFrameSize;
5653 NYI("patchpoint info generation");
5654 const int offsetAdjust = 0;
5655 const int totalFrameSize = 0;
5658 patchpointInfo->Initialize(info.compLocalsCount, totalFrameSize);
5660 JITDUMP("--OSR--- Total Frame Size %d, local offset adjust is %d\n", patchpointInfo->TotalFrameSize(),
5663 // We record offsets for all the "locals" here. Could restrict
5664 // this to just the IL locals with some extra logic, and save a bit of space,
5665 // but would need to adjust all consumers, too.
5666 for (unsigned lclNum = 0; lclNum < info.compLocalsCount; lclNum++)
5668 // If there are shadowed params, the patchpoint info should refer to the shadow copy.
5670 unsigned varNum = lclNum;
5672 if (gsShadowVarInfo != nullptr)
5674 unsigned const shadowNum = gsShadowVarInfo[lclNum].shadowCopy;
5675 if (shadowNum != BAD_VAR_NUM)
5677 assert(shadowNum < lvaCount);
5678 assert(shadowNum >= info.compLocalsCount);
5684 LclVarDsc* const varDsc = lvaGetDesc(varNum);
5686 // We expect all these to have stack homes, and be FP relative
5687 assert(varDsc->lvOnFrame);
5688 assert(varDsc->lvFramePointerBased);
5690 // Record FramePtr relative offset (no localloc yet)
5691 // Note if IL stream contained an address-of that potentially leads to exposure.
5692 // That bit of IL might be skipped by OSR partial importation.
5693 const bool isExposed = varDsc->lvHasLdAddrOp;
5694 patchpointInfo->SetOffsetAndExposure(lclNum, varDsc->GetStackOffset() + offsetAdjust, isExposed);
5696 JITDUMP("--OSR-- V%02u is at virtual offset %d%s%s\n", lclNum, patchpointInfo->Offset(lclNum),
5697 patchpointInfo->IsExposed(lclNum) ? " (exposed)" : "", (varNum != lclNum) ? " (shadowed)" : "");
5702 if (lvaReportParamTypeArg())
5704 const int offset = lvaCachedGenericContextArgOffset();
5705 patchpointInfo->SetGenericContextArgOffset(offset + offsetAdjust);
5706 JITDUMP("--OSR-- cached generic context virtual offset is %d\n", patchpointInfo->GenericContextArgOffset());
5709 if (lvaKeepAliveAndReportThis())
5711 const int offset = lvaCachedGenericContextArgOffset();
5712 patchpointInfo->SetKeptAliveThisOffset(offset + offsetAdjust);
5713 JITDUMP("--OSR-- kept-alive this virtual offset is %d\n", patchpointInfo->KeptAliveThisOffset());
5716 if (compGSReorderStackLayout)
5718 assert(lvaGSSecurityCookie != BAD_VAR_NUM);
5719 LclVarDsc* const varDsc = lvaGetDesc(lvaGSSecurityCookie);
5720 patchpointInfo->SetSecurityCookieOffset(varDsc->GetStackOffset() + offsetAdjust);
5721 JITDUMP("--OSR-- security cookie V%02u virtual offset is %d\n", lvaGSSecurityCookie,
5722 patchpointInfo->SecurityCookieOffset());
5725 if (lvaMonAcquired != BAD_VAR_NUM)
5727 LclVarDsc* const varDsc = lvaGetDesc(lvaMonAcquired);
5728 patchpointInfo->SetMonitorAcquiredOffset(varDsc->GetStackOffset() + offsetAdjust);
5729 JITDUMP("--OSR-- monitor acquired V%02u virtual offset is %d\n", lvaMonAcquired,
5730 patchpointInfo->MonitorAcquiredOffset());
5733 #if defined(TARGET_AMD64)
5734 // Record callee save registers.
5735 // Currently only needed for x64.
5737 regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5738 rsPushRegs |= RBM_FPBASE;
5739 patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs);
5740 JITDUMP("--OSR-- Tier0 callee saves: ");
5741 JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters()));
5745 // Register this with the runtime.
5746 info.compCompHnd->setPatchpointInfo(patchpointInfo);
5749 //------------------------------------------------------------------------
5750 // ResetOptAnnotations: Clear annotations produced during global optimizations.
5753 // The intent of this method is to clear any information typically assumed
5754 // to be set only once; it is used between iterations when JitOptRepeat is
5757 void Compiler::ResetOptAnnotations()
5759 assert(opts.optRepeat);
5760 assert(JitConfig.JitOptRepeatCount() > 0);
5763 m_blockToEHPreds = nullptr;
5764 fgSsaPassesCompleted = 0;
5765 fgVNPassesCompleted = 0;
5766 fgSsaChecksEnabled = false;
5768 for (BasicBlock* const block : Blocks())
5770 for (Statement* const stmt : block->Statements())
5772 for (GenTree* const tree : stmt->TreeList())
5775 tree->ClearAssertion();
5776 tree->gtCSEnum = NO_CSE;
5782 //------------------------------------------------------------------------
5783 // RecomputeLoopInfo: Recompute loop annotations between opt-repeat iterations.
5786 // The intent of this method is to update loop structure annotations, and those
5787 // they depend on; these annotations may have become stale during optimization,
5788 // and need to be up-to-date before running another iteration of optimizations.
5790 void Compiler::RecomputeLoopInfo()
5792 assert(opts.optRepeat);
5793 assert(JitConfig.JitOptRepeatCount() > 0);
5794 // Recompute reachability sets, dominators, and loops.
5796 fgDomsComputed = false;
5797 fgComputeReachability();
5798 optSetBlockWeights();
5799 // Rebuild the loop tree annotations themselves
5803 /*****************************************************************************/
5804 void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo)
5808 /*****************************************************************************/
5811 void* forceFrameJIT; // used to force to frame &useful for fastchecked debugging
5813 bool Compiler::skipMethod()
5815 static ConfigMethodRange fJitRange;
5816 fJitRange.EnsureInit(JitConfig.JitRange());
5817 assert(!fJitRange.Error());
5819 // Normally JitConfig.JitRange() is null, we don't want to skip
5820 // jitting any methods.
5822 // So, the logic below relies on the fact that a null range string
5823 // passed to ConfigMethodRange represents the set of all methods.
5825 if (!fJitRange.Contains(info.compMethodHash()))
5830 if (JitConfig.JitExclude().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
5835 if (!JitConfig.JitInclude().isEmpty() &&
5836 !JitConfig.JitInclude().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
5846 /*****************************************************************************/
5848 int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
5849 void** methodCodePtr,
5850 uint32_t* methodCodeSize,
5851 JitFlags* compileFlags)
5853 // compInit should have set these already.
5854 noway_assert(info.compMethodInfo != nullptr);
5855 noway_assert(info.compCompHnd != nullptr);
5856 noway_assert(info.compMethodHnd != nullptr);
5858 #ifdef FEATURE_JIT_METHOD_PERF
5859 static bool checkedForJitTimeLog = false;
5861 pCompJitTimer = nullptr;
5863 if (!checkedForJitTimeLog)
5865 // Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for
5866 // retail builds. Do not call the regular Config helper here as it would pull
5867 // in a copy of the config parser into the clrjit.dll.
5868 InterlockedCompareExchangeT(&Compiler::compJitTimeLogFilename,
5869 (LPCWSTR)info.compCompHnd->getJitTimeLogFilename(), NULL);
5871 // At a process or module boundary clear the file and start afresh.
5872 JitTimer::PrintCsvHeader();
5874 checkedForJitTimeLog = true;
5876 if ((Compiler::compJitTimeLogFilename != nullptr) || (JitTimeLogCsv() != nullptr))
5878 pCompJitTimer = JitTimer::Create(this, info.compMethodInfo->ILCodeSize);
5880 #endif // FEATURE_JIT_METHOD_PERF
5883 Compiler* me = this;
5884 forceFrameJIT = (void*)&me; // let us see the this pointer in fastchecked build
5887 #if FUNC_INFO_LOGGING
5888 LPCWSTR tmpJitFuncInfoFilename = JitConfig.JitFuncInfoFile();
5890 if (tmpJitFuncInfoFilename != nullptr)
5892 LPCWSTR oldFuncInfoFileName =
5893 InterlockedCompareExchangeT(&compJitFuncInfoFilename, tmpJitFuncInfoFilename, NULL);
5894 if (oldFuncInfoFileName == nullptr)
5896 assert(compJitFuncInfoFile == nullptr);
5897 compJitFuncInfoFile = _wfopen(compJitFuncInfoFilename, W("a"));
5898 if (compJitFuncInfoFile == nullptr)
5900 #if defined(DEBUG) && !defined(HOST_UNIX) // no 'perror' in the PAL
5901 perror("Failed to open JitFuncInfoLogFile");
5902 #endif // defined(DEBUG) && !defined(HOST_UNIX)
5906 #endif // FUNC_INFO_LOGGING
5908 // if (s_compMethodsCount==0) setvbuf(jitstdout(), NULL, _IONBF, 0);
5910 if (compIsForInlining())
5912 compileFlags->Clear(JitFlags::JIT_FLAG_OSR);
5913 info.compILEntry = 0;
5914 info.compPatchpointInfo = nullptr;
5916 else if (compileFlags->IsSet(JitFlags::JIT_FLAG_OSR))
5918 // Fetch OSR info from the runtime
5919 info.compPatchpointInfo = info.compCompHnd->getOSRInfo(&info.compILEntry);
5920 assert(info.compPatchpointInfo != nullptr);
5923 #if defined(TARGET_ARM64)
5924 compFrameInfo = {0};
5927 virtualStubParamInfo = new (this, CMK_Unknown) VirtualStubParamInfo(IsTargetAbi(CORINFO_NATIVEAOT_ABI));
5929 // compMatchedVM is set to true if both CPU/ABI and OS are matching the execution engine requirements
5931 // Do we have a matched VM? Or are we "abusing" the VM to help us do JIT work (such as using an x86 native VM
5932 // with an ARM-targeting "altjit").
5933 // Match CPU/ABI for compMatchedVM
5934 info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
5936 // Match OS for compMatchedVM
5937 CORINFO_EE_INFO* eeInfo = eeGetEEInfo();
5939 #ifdef TARGET_OS_RUNTIMEDETERMINED
5940 noway_assert(TargetOS::OSSettingConfigured);
5943 if (TargetOS::IsMacOS)
5945 info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_MACOS);
5947 else if (TargetOS::IsUnix)
5949 if (TargetArchitecture::IsX64)
5951 // MacOS x64 uses the Unix jit variant in crossgen2, not a special jit
5952 info.compMatchedVM =
5953 info.compMatchedVM && ((eeInfo->osType == CORINFO_UNIX) || (eeInfo->osType == CORINFO_MACOS));
5957 info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_UNIX);
5960 else if (TargetOS::IsWindows)
5962 info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_WINNT);
5965 // If we are not compiling for a matched VM, then we are getting JIT flags that don't match our target
5966 // architecture. The two main examples here are an ARM targeting altjit hosted on x86 and an ARM64
5967 // targeting altjit hosted on x64. (Though with cross-bitness work, the host doesn't necessarily need
5968 // to be of the same bitness.) In these cases, we need to fix up the JIT flags to be appropriate for
5969 // the target, as the VM's expected target may overlap bit flags with different meaning to our target.
5970 // Note that it might be better to do this immediately when setting the JIT flags in CILJit::compileMethod()
5971 // (when JitFlags::SetFromFlags() is called), but this is close enough. (To move this logic to
5972 // CILJit::compileMethod() would require moving the info.compMatchedVM computation there as well.)
5974 if (!info.compMatchedVM)
5976 CORINFO_InstructionSetFlags instructionSetFlags;
5978 // We need to assume, by default, that all flags coming from the VM are invalid.
5979 instructionSetFlags.Reset();
5981 // We then add each available instruction set for the target architecture provided
5982 // that the corresponding JitConfig switch hasn't explicitly asked for it to be
5983 // disabled. This allows us to default to "everything" supported for altjit scenarios
5984 // while also still allowing instruction set opt-out providing users with the ability
5985 // to, for example, see and debug ARM64 codegen for any desired CPU configuration without
5986 // needing to have the hardware in question.
5988 #if defined(TARGET_ARM64)
5989 if (JitConfig.EnableHWIntrinsic() != 0)
5991 instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase);
5994 if (JitConfig.EnableArm64AdvSimd() != 0)
5996 instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd);
5999 if (JitConfig.EnableArm64Aes() != 0)
6001 instructionSetFlags.AddInstructionSet(InstructionSet_Aes);
6004 if (JitConfig.EnableArm64Crc32() != 0)
6006 instructionSetFlags.AddInstructionSet(InstructionSet_Crc32);
6009 if (JitConfig.EnableArm64Dp() != 0)
6011 instructionSetFlags.AddInstructionSet(InstructionSet_Dp);
6014 if (JitConfig.EnableArm64Rdm() != 0)
6016 instructionSetFlags.AddInstructionSet(InstructionSet_Rdm);
6019 if (JitConfig.EnableArm64Sha1() != 0)
6021 instructionSetFlags.AddInstructionSet(InstructionSet_Sha1);
6024 if (JitConfig.EnableArm64Sha256() != 0)
6026 instructionSetFlags.AddInstructionSet(InstructionSet_Sha256);
6029 if (JitConfig.EnableArm64Atomics() != 0)
6031 instructionSetFlags.AddInstructionSet(InstructionSet_Atomics);
6034 if (JitConfig.EnableArm64Dczva() != 0)
6036 instructionSetFlags.AddInstructionSet(InstructionSet_Dczva);
6038 #elif defined(TARGET_XARCH)
6039 if (JitConfig.EnableHWIntrinsic() != 0)
6041 instructionSetFlags.AddInstructionSet(InstructionSet_X86Base);
6044 if (JitConfig.EnableSSE() != 0)
6046 instructionSetFlags.AddInstructionSet(InstructionSet_SSE);
6049 if (JitConfig.EnableSSE2() != 0)
6051 instructionSetFlags.AddInstructionSet(InstructionSet_SSE2);
6054 if ((JitConfig.EnableSSE3() != 0) && (JitConfig.EnableSSE3_4() != 0))
6056 instructionSetFlags.AddInstructionSet(InstructionSet_SSE3);
6059 if (JitConfig.EnableSSSE3() != 0)
6061 instructionSetFlags.AddInstructionSet(InstructionSet_SSSE3);
6064 if (JitConfig.EnableSSE41() != 0)
6066 instructionSetFlags.AddInstructionSet(InstructionSet_SSE41);
6069 if (JitConfig.EnableSSE42() != 0)
6071 instructionSetFlags.AddInstructionSet(InstructionSet_SSE42);
6074 if (JitConfig.EnableAVX() != 0)
6076 instructionSetFlags.AddInstructionSet(InstructionSet_AVX);
6079 if (JitConfig.EnableAVX2() != 0)
6081 instructionSetFlags.AddInstructionSet(InstructionSet_AVX2);
6084 if (JitConfig.EnableAES() != 0)
6086 instructionSetFlags.AddInstructionSet(InstructionSet_AES);
6089 if (JitConfig.EnableBMI1() != 0)
6091 instructionSetFlags.AddInstructionSet(InstructionSet_BMI1);
6094 if (JitConfig.EnableBMI2() != 0)
6096 instructionSetFlags.AddInstructionSet(InstructionSet_BMI2);
6099 if (JitConfig.EnableFMA() != 0)
6101 instructionSetFlags.AddInstructionSet(InstructionSet_FMA);
6104 if (JitConfig.EnableLZCNT() != 0)
6106 instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT);
6109 if (JitConfig.EnablePCLMULQDQ() != 0)
6111 instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ);
6114 if (JitConfig.EnablePOPCNT() != 0)
6116 instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT);
6119 if (JitConfig.EnableAVXVNNI() != 0)
6121 instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI);
6124 if (JitConfig.EnableAVX512F() != 0)
6126 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F);
6129 if (JitConfig.EnableAVX512F_VL() != 0)
6131 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F_VL);
6134 if (JitConfig.EnableAVX512BW() != 0)
6136 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW);
6139 if (JitConfig.EnableAVX512BW_VL() != 0)
6141 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW_VL);
6144 if (JitConfig.EnableAVX512CD() != 0)
6146 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD);
6149 if (JitConfig.EnableAVX512CD_VL() != 0)
6151 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD_VL);
6154 if (JitConfig.EnableAVX512DQ() != 0)
6156 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ);
6159 if (JitConfig.EnableAVX512DQ_VL() != 0)
6161 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ_VL);
6164 if (JitConfig.EnableAVX512VBMI() != 0)
6166 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI);
6169 if (JitConfig.EnableAVX512VBMI_VL() != 0)
6171 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI_VL);
6175 // These calls are important and explicitly ordered to ensure that the flags are correct in
6176 // the face of missing or removed instruction sets. Without them, we might end up with incorrect
6177 // downstream checks.
6179 instructionSetFlags.Set64BitInstructionSetVariants();
6180 instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags);
6182 compileFlags->SetInstructionSetFlags(instructionSetFlags);
6185 compMaxUncheckedOffsetForNullObject = eeGetEEInfo()->maxUncheckedOffsetForNullObject;
6187 // Set the context for token lookup.
6188 if (compIsForInlining())
6190 impTokenLookupContextHandle = impInlineInfo->tokenLookupContextHandle;
6192 assert(impInlineInfo->inlineCandidateInfo->clsHandle == info.compClassHnd);
6193 assert(impInlineInfo->inlineCandidateInfo->clsAttr == info.compCompHnd->getClassAttribs(info.compClassHnd));
6194 // printf("%x != %x\n", impInlineInfo->inlineCandidateInfo->clsAttr,
6195 // info.compCompHnd->getClassAttribs(info.compClassHnd));
6196 info.compClassAttr = impInlineInfo->inlineCandidateInfo->clsAttr;
6200 impTokenLookupContextHandle = METHOD_BEING_COMPILED_CONTEXT();
6202 info.compClassAttr = info.compCompHnd->getClassAttribs(info.compClassHnd);
6206 if (JitConfig.EnableExtraSuperPmiQueries())
6208 // This call to getClassModule/getModuleAssembly/getAssemblyName fails in crossgen2 due to these
6209 // APIs being unimplemented. So disable this extra info for pre-jit mode. See
6210 // https://github.com/dotnet/runtime/issues/48888.
6212 // Ditto for some of the class name queries for generic params.
6214 if (!compileFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
6216 // Get the assembly name, to aid finding any particular SuperPMI method context function
6217 (void)info.compCompHnd->getAssemblyName(
6218 info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
6220 // Fetch class names for the method's generic parameters.
6222 CORINFO_SIG_INFO sig;
6223 info.compCompHnd->getMethodSig(info.compMethodHnd, &sig, nullptr);
6225 const unsigned classInst = sig.sigInst.classInstCount;
6228 for (unsigned i = 0; i < classInst; i++)
6230 eeGetClassName(sig.sigInst.classInst[i]);
6234 const unsigned methodInst = sig.sigInst.methInstCount;
6237 for (unsigned i = 0; i < methodInst; i++)
6239 eeGetClassName(sig.sigInst.methInst[i]);
6246 info.compProfilerCallback = false; // Assume false until we are told to hook this method.
6249 if (!compIsForInlining())
6251 JitTls::GetLogEnv()->setCompiler(this);
6254 // Have we been told to be more selective in our Jitting?
6257 if (compIsForInlining())
6259 compInlineResult->NoteFatal(InlineObservation::CALLEE_MARKED_AS_SKIPPED);
6261 return CORJIT_SKIPPED;
6266 /* Setup an error trap */
6272 CORINFO_MODULE_HANDLE classPtr;
6273 COMP_HANDLE compHnd;
6274 CORINFO_METHOD_INFO* methodInfo;
6275 void** methodCodePtr;
6276 uint32_t* methodCodeSize;
6277 JitFlags* compileFlags;
6282 param.classPtr = classPtr;
6283 param.compHnd = info.compCompHnd;
6284 param.methodInfo = info.compMethodInfo;
6285 param.methodCodePtr = methodCodePtr;
6286 param.methodCodeSize = methodCodeSize;
6287 param.compileFlags = compileFlags;
6288 param.result = CORJIT_INTERNALERROR;
6290 setErrorTrap(info.compCompHnd, Param*, pParam, ¶m) // ERROR TRAP: Start normal block
6293 pParam->pThis->compCompileHelper(pParam->classPtr, pParam->compHnd, pParam->methodInfo,
6294 pParam->methodCodePtr, pParam->methodCodeSize, pParam->compileFlags);
6296 finallyErrorTrap() // ERROR TRAP: The following block handles errors
6300 if (compIsForInlining())
6305 /* Tell the emitter that we're done with this function */
6307 GetEmitter()->emitEndCG();
6312 endErrorTrap() // ERROR TRAP: End
6314 return param.result;
6317 #if defined(DEBUG) || defined(INLINE_DATA)
6318 //------------------------------------------------------------------------
6319 // compMethodHash: get hash code for currently jitted method
6322 // Hash based on method's full name
6324 unsigned Compiler::Info::compMethodHash() const
6326 if (compMethodHashPrivate == 0)
6328 // compMethodHashPrivate = compCompHnd->getMethodHash(compMethodHnd);
6329 assert(compFullName != nullptr);
6330 assert(*compFullName != 0);
6331 COUNT_T hash = HashStringA(compFullName); // Use compFullName to generate the hash, as it contains the signature
6333 compMethodHashPrivate = hash;
6335 return compMethodHashPrivate;
6338 //------------------------------------------------------------------------
6339 // compMethodHash: get hash code for specified method
6342 // methodHnd - method of interest
6345 // Hash based on method's full name
6347 unsigned Compiler::compMethodHash(CORINFO_METHOD_HANDLE methodHnd)
6349 // If this is the root method, delegate to the caching version
6351 if (methodHnd == info.compMethodHnd)
6353 return info.compMethodHash();
6356 // Else compute from scratch. Might consider caching this too.
6358 unsigned methodHash = 0;
6359 const char* calleeName = eeGetMethodFullName(methodHnd);
6361 if (calleeName != nullptr)
6363 methodHash = HashStringA(calleeName);
6367 methodHash = info.compCompHnd->getMethodHash(methodHnd);
6373 #endif // defined(DEBUG) || defined(INLINE_DATA)
6375 void Compiler::compCompileFinish()
6377 #if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
6381 #if MEASURE_MEM_ALLOC
6383 compArenaAllocator->finishMemStats();
6384 memAllocHist.record((unsigned)((compArenaAllocator->getTotalBytesAllocated() + 1023) / 1024));
6385 memUsedHist.record((unsigned)((compArenaAllocator->getTotalBytesUsed() + 1023) / 1024));
6389 if (s_dspMemStats || verbose)
6391 printf("\nAllocations for %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
6392 compArenaAllocator->dumpMemStats(jitstdout());
6395 #endif // MEASURE_MEM_ALLOC
6397 #if LOOP_HOIST_STATS
6398 AddLoopHoistStats();
6399 #endif // LOOP_HOIST_STATS
6401 #if MEASURE_NODE_SIZE
6402 genTreeNcntHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeCnt));
6403 genTreeNsizHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeSize));
6407 // Small methods should fit in ArenaAllocator::getDefaultPageSize(), or else
6408 // we should bump up ArenaAllocator::getDefaultPageSize()
6410 if ((info.compILCodeSize <= 32) && // Is it a reasonably small method?
6411 (info.compNativeCodeSize < 512) && // Some trivial methods generate huge native code. eg. pushing a single huge
6413 (impInlinedCodeSize <= 128) && // Is the inlining reasonably bounded?
6414 // Small methods cannot meaningfully have a big number of locals
6415 // or arguments. We always track arguments at the start of
6416 // the prolog which requires memory
6417 (info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc
6418 (getJitStressLevel() == 0) && // We need extra memory for stress
6419 !opts.optRepeat && // We need extra memory to repeat opts
6420 !compArenaAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for
6422 // Factor of 2x is because data-structures are bigger under DEBUG
6423 (compArenaAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) &&
6424 // RyuJIT backend needs memory tuning! TODO-Cleanup: remove this case when memory tuning is complete.
6425 (compArenaAllocator->getTotalBytesAllocated() > (10 * ArenaAllocator::getDefaultPageSize())) &&
6426 !verbose) // We allocate lots of memory to convert sets to strings for JitDump
6428 genSmallMethodsNeedingExtraMemoryCnt++;
6430 // Less than 1% of all methods should run into this.
6431 // We cannot be more strict as there are always degenerate cases where we
6432 // would need extra memory (like huge structs as locals - see lvaSetStruct()).
6433 assert((genMethodCnt < 500) || (genSmallMethodsNeedingExtraMemoryCnt < (genMethodCnt / 100)));
6437 #if defined(DEBUG) || defined(INLINE_DATA)
6439 m_inlineStrategy->DumpData();
6441 if (JitConfig.JitInlineDumpXmlFile() != nullptr)
6443 FILE* file = _wfopen(JitConfig.JitInlineDumpXmlFile(), W("a"));
6444 if (file != nullptr)
6446 m_inlineStrategy->DumpXml(file);
6451 m_inlineStrategy->DumpXml();
6456 m_inlineStrategy->DumpXml();
6464 // mdMethodDef __stdcall CEEInfo::getMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod)
6465 mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
6467 static bool headerPrinted = false;
6471 headerPrinted = true;
6472 printf(" | Profiled | Method | Method has | calls | Num |LclV |AProp| CSE | Perf |bytes | %3s codesize| \n", Target::g_tgtCPUName);
6473 printf(" mdToken | CNT | RGN | Hash | EH | FRM | LOOP | NRM | IND | BBs | Cnt | Cnt | Cnt | Score | IL | HOT | CLD | method name \n");
6474 printf("---------+------+------+----------+----+-----+------+-----+-----+-----+-----+-----+-----+---------+------+-------+-----+\n");
6475 // 06001234 | 1234 | HOT | 0f1e2d3c | EH | ebp | LOOP | 15 | 6 | 12 | 17 | 12 | 8 | 1234.56 | 145 | 1234 | 123 | System.Example(int)
6479 printf("%08X | ", currentMethodToken);
6481 if (fgHaveProfileWeights())
6483 if (fgCalledCount < 1000)
6485 printf("%4.0f | ", fgCalledCount);
6487 else if (fgCalledCount < 1000000)
6489 printf("%3.0fK | ", fgCalledCount / 1000);
6493 printf("%3.0fM | ", fgCalledCount / 1000000);
6501 CorInfoRegionKind regionKind = info.compMethodInfo->regionKind;
6507 else if (regionKind == CORINFO_REGION_NONE)
6511 else if (regionKind == CORINFO_REGION_HOT)
6515 else if (regionKind == CORINFO_REGION_COLD)
6519 else if (regionKind == CORINFO_REGION_JIT)
6528 printf("%08x | ", info.compMethodHash());
6530 if (compHndBBtabCount > 0)
6539 if (rpFrameType == FT_EBP_FRAME)
6541 printf("%3s | ", STR_FPBASE);
6543 else if (rpFrameType == FT_ESP_FRAME)
6545 printf("%3s | ", STR_SPBASE);
6548 else if (rpFrameType == FT_DOUBLE_ALIGN_FRAME)
6553 else // (rpFrameType == FT_NOT_SET)
6567 printf(" %3d |", optCallCount);
6568 printf(" %3d |", optIndirectCallCount);
6569 printf(" %3d |", fgBBcountAtCodegen);
6570 printf(" %3d |", lvaCount);
6574 printf(" MinOpts |");
6578 printf(" %3d |", optAssertionCount);
6579 printf(" %3d |", optCSEcount);
6582 if (info.compPerfScore < 9999.995)
6584 printf(" %7.2f |", info.compPerfScore);
6588 printf(" %7.0f |", info.compPerfScore);
6591 printf(" %4d |", info.compMethodInfo->ILCodeSize);
6592 printf(" %5d |", info.compTotalHotCodeSize);
6593 printf(" %3d |", info.compTotalColdCodeSize);
6595 printf(" %s\n", eeGetMethodFullName(info.compMethodHnd));
6596 printf(""); // in our logic this causes a flush
6601 printf("****** DONE compiling %s\n", info.compFullName);
6602 printf(""); // in our logic this causes a flush
6605 #if TRACK_ENREG_STATS
6606 for (unsigned i = 0; i < lvaCount; ++i)
6608 const LclVarDsc* varDsc = lvaGetDesc(i);
6610 if (varDsc->lvRefCnt() != 0)
6612 s_enregisterStats.RecordLocal(varDsc);
6615 #endif // TRACK_ENREG_STATS
6617 // Only call _DbgBreakCheck when we are jitting, not when we are ngen-ing
6618 // For ngen the int3 or breakpoint instruction will be right at the
6619 // start of the ngen method and we will stop when we execute it.
6621 if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
6623 if (compJitHaltMethod())
6625 #if !defined(HOST_UNIX)
6626 // TODO-UNIX: re-enable this when we have an OS that supports a pop-up dialog
6628 // Don't do an assert, but just put up the dialog box so we get just-in-time debugger
6629 // launching. When you hit 'retry' it will continue and naturally stop at the INT 3
6630 // that the JIT put in the code
6631 _DbgBreakCheck(__FILE__, __LINE__, "JitHalt");
6638 #ifdef PSEUDORANDOM_NOP_INSERTION
6639 // this is zlib adler32 checksum. source came from windows base
6641 #define BASE 65521L // largest prime smaller than 65536
6643 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
6645 #define DO1(buf, i) \
6650 #define DO2(buf, i) \
6653 #define DO4(buf, i) \
6656 #define DO8(buf, i) \
6663 unsigned adler32(unsigned adler, char* buf, unsigned int len)
6665 unsigned int s1 = adler & 0xffff;
6666 unsigned int s2 = (adler >> 16) & 0xffff;
6674 k = len < NMAX ? len : NMAX;
6691 return (s2 << 16) | s1;
6695 unsigned getMethodBodyChecksum(_In_z_ char* code, int size)
6697 #ifdef PSEUDORANDOM_NOP_INSERTION
6698 return adler32(0, code, size);
6704 int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
6705 COMP_HANDLE compHnd,
6706 CORINFO_METHOD_INFO* methodInfo,
6707 void** methodCodePtr,
6708 uint32_t* methodCodeSize,
6709 JitFlags* compileFlags)
6711 CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
6713 info.compCode = methodInfo->ILCode;
6714 info.compILCodeSize = methodInfo->ILCodeSize;
6715 info.compILImportSize = 0;
6717 if (info.compILCodeSize == 0)
6719 BADCODE("code size is zero");
6722 if (compIsForInlining())
6725 unsigned methAttr_Old = impInlineInfo->inlineCandidateInfo->methAttr;
6726 unsigned methAttr_New = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
6727 unsigned flagsToIgnore = CORINFO_FLG_DONT_INLINE | CORINFO_FLG_FORCEINLINE;
6728 assert((methAttr_Old & (~flagsToIgnore)) == (methAttr_New & (~flagsToIgnore)));
6731 info.compFlags = impInlineInfo->inlineCandidateInfo->methAttr;
6732 compInlineContext = impInlineInfo->inlineContext;
6736 info.compFlags = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
6737 #ifdef PSEUDORANDOM_NOP_INSERTION
6738 info.compChecksum = getMethodBodyChecksum((char*)methodInfo->ILCode, methodInfo->ILCodeSize);
6740 compInlineContext = m_inlineStrategy->GetRootContext();
6743 compSwitchedToOptimized = false;
6744 compSwitchedToMinOpts = false;
6746 // compInitOptions will set the correct verbose flag.
6748 compInitOptions(compileFlags);
6750 if (!compIsForInlining() && !opts.altJit && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
6752 // We're an altjit, but the DOTNET_AltJit configuration did not say to compile this method,
6754 return CORJIT_SKIPPED;
6761 printf("IL to import:\n");
6762 dumpILRange(info.compCode, info.compILCodeSize);
6767 // Check for DOTNET_AggressiveInlining
6768 if (JitConfig.JitAggressiveInlining())
6770 compDoAggressiveInlining = true;
6773 if (compDoAggressiveInlining)
6775 info.compFlags |= CORINFO_FLG_FORCEINLINE;
6780 // Check for ForceInline stress.
6781 if (compStressCompile(STRESS_FORCE_INLINE, 0))
6783 info.compFlags |= CORINFO_FLG_FORCEINLINE;
6786 if (compIsForInlining())
6788 JITLOG((LL_INFO100000, "\nINLINER impTokenLookupContextHandle for %s is 0x%p.\n",
6789 eeGetMethodFullName(info.compMethodHnd), dspPtr(impTokenLookupContextHandle)));
6794 impCanReimport = compStressCompile(STRESS_CHK_REIMPORT, 15);
6796 /* Initialize set a bunch of global values */
6798 info.compScopeHnd = classPtr;
6799 info.compXcptnsCount = methodInfo->EHcount;
6800 info.compMaxStack = methodInfo->maxStack;
6801 compHndBBtab = nullptr;
6802 compHndBBtabCount = 0;
6803 compHndBBtabAllocCount = 0;
6805 info.compNativeCodeSize = 0;
6806 info.compTotalHotCodeSize = 0;
6807 info.compTotalColdCodeSize = 0;
6808 info.compHandleHistogramProbeCount = 0;
6810 compHasBackwardJump = false;
6811 compHasBackwardJumpInHandler = false;
6814 compCurBB = nullptr;
6817 // Reset node and block ID counter
6819 compStatementID = 0;
6820 compBasicBlockID = 0;
6824 info.compNeedsConsecutiveRegisters = false;
6827 /* Initialize emitter */
6829 if (!compIsForInlining())
6831 codeGen->GetEmitter()->emitBegCG(this, compHnd);
6834 info.compIsStatic = (info.compFlags & CORINFO_FLG_STATIC) != 0;
6836 info.compPublishStubParam = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM);
6838 info.compHasNextCallRetAddr = false;
6840 if (opts.IsReversePInvoke())
6843 info.compCallConv = info.compCompHnd->getUnmanagedCallConv(methodInfo->ftn, nullptr, &unused);
6844 info.compArgOrder = Target::g_tgtUnmanagedArgOrder;
6848 info.compCallConv = CorInfoCallConvExtension::Managed;
6849 info.compArgOrder = Target::g_tgtArgOrder;
6852 info.compIsVarArgs = false;
6854 switch (methodInfo->args.getCallConv())
6856 case CORINFO_CALLCONV_NATIVEVARARG:
6857 case CORINFO_CALLCONV_VARARG:
6858 info.compIsVarArgs = true;
6864 info.compRetType = JITtype2varType(methodInfo->args.retType);
6865 if (info.compRetType == TYP_STRUCT)
6867 info.compRetType = impNormStructType(methodInfo->args.retTypeClass);
6870 info.compUnmanagedCallCountWithGCTransition = 0;
6871 info.compLvFrameListRoot = BAD_VAR_NUM;
6873 info.compInitMem = ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0);
6875 /* Allocate the local variable table */
6879 compInitDebuggingInfo();
6881 // If are an altjit and have patchpoint info, we might need to tweak the frame size
6882 // so it's plausible for the altjit architecture.
6884 if (!info.compMatchedVM && compileFlags->IsSet(JitFlags::JIT_FLAG_OSR))
6886 assert(info.compLocalsCount == info.compPatchpointInfo->NumberOfLocals());
6887 const int totalFrameSize = info.compPatchpointInfo->TotalFrameSize();
6889 int frameSizeUpdate = 0;
6891 #if defined(TARGET_AMD64)
6892 if ((totalFrameSize % 16) != 8)
6894 frameSizeUpdate = 8;
6896 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6897 if ((totalFrameSize & 0xf) != 0)
6899 frameSizeUpdate = 8;
6902 if (frameSizeUpdate != 0)
6904 JITDUMP("Mismatched altjit + OSR -- updating tier0 frame size from %d to %d\n", totalFrameSize,
6905 totalFrameSize + frameSizeUpdate);
6907 // Allocate a local copy with altered frame size.
6909 const unsigned patchpointInfoSize = PatchpointInfo::ComputeSize(info.compLocalsCount);
6910 PatchpointInfo* const newInfo =
6911 (PatchpointInfo*)getAllocator(CMK_Unknown).allocate<char>(patchpointInfoSize);
6913 newInfo->Initialize(info.compLocalsCount, totalFrameSize + frameSizeUpdate);
6914 newInfo->Copy(info.compPatchpointInfo);
6916 // Swap it in place.
6918 info.compPatchpointInfo = newInfo;
6923 if (compIsForInlining())
6925 compBasicBlockID = impInlineInfo->InlinerCompiler->compBasicBlockID;
6929 const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE);
6931 if (!compIsForInlining() && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
6933 // We're prejitting the root method. We also will analyze it as
6934 // a potential inline candidate.
6935 InlineResult prejitResult(this, methodHnd, "prejit");
6937 // Profile data allows us to avoid early "too many IL bytes" outs.
6938 prejitResult.NoteBool(InlineObservation::CALLSITE_HAS_PROFILE_WEIGHTS, fgHaveSufficientProfileWeights());
6940 // Do the initial inline screen.
6941 impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult);
6943 // Temporarily install the prejitResult as the
6944 // compInlineResult so it's available to fgFindJumpTargets
6945 // and can accumulate more observations as the IL is
6948 // We don't pass prejitResult in as a parameter to avoid
6949 // potential aliasing confusion -- the other call to
6950 // fgFindBasicBlocks may have set up compInlineResult and
6951 // the code in fgFindJumpTargets references that data
6952 // member extensively.
6953 assert(compInlineResult == nullptr);
6954 assert(impInlineInfo == nullptr);
6955 compInlineResult = &prejitResult;
6957 // Find the basic blocks. We must do this regardless of
6958 // inlineability, since we are prejitting this method.
6960 // This will also update the status of this method as
6961 // an inline candidate.
6962 fgFindBasicBlocks();
6964 // Undo the temporary setup.
6965 assert(compInlineResult == &prejitResult);
6966 compInlineResult = nullptr;
6968 // If still a viable, discretionary inline, assess
6970 if (prejitResult.IsDiscretionaryCandidate())
6972 prejitResult.DetermineProfitability(methodInfo);
6975 m_inlineStrategy->NotePrejitDecision(prejitResult);
6977 // Handle the results of the inline analysis.
6978 if (prejitResult.IsFailure())
6980 // This method is a bad inlinee according to our
6981 // analysis. We will let the InlineResult destructor
6982 // mark it as noinline in the prejit image to save the
6985 // This decision better not be context-dependent.
6986 assert(prejitResult.IsNever());
6990 // This looks like a viable inline candidate. Since
6991 // we're not actually inlining, don't report anything.
6992 prejitResult.SetSuccessResult(INLINE_PREJIT_SUCCESS);
6997 // We are jitting the root method, or inlining.
6998 fgFindBasicBlocks();
7001 // If we're inlining and the candidate is bad, bail out.
7002 if (compDonotInline())
7007 // We may decide to optimize this method,
7008 // to avoid spending a long time stuck in Tier0 code.
7010 if (fgCanSwitchToOptimized())
7012 // We only expect to be able to do this at Tier0.
7014 assert(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0));
7016 // Normal tiering should bail us out of Tier0 tail call induced loops.
7017 // So keep these methods in Tier0 if we're gathering PGO data.
7018 // If we're not gathering PGO, then switch these to optimized to
7019 // minimize the number of tail call helper stubs we might need.
7020 // Reconsider this if/when we're able to share those stubs.
7022 // Honor the config setting that tells the jit to
7023 // always optimize methods with loops.
7025 // If neither of those apply, and OSR is enabled, the jit may still
7026 // decide to optimize, if there's something in the method that
7027 // OSR currently cannot handle, or we're optionally suppressing
7028 // OSR by method hash.
7030 const char* reason = nullptr;
7032 if (compTailPrefixSeen && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
7034 reason = "tail.call and not BBINSTR";
7036 else if (compHasBackwardJump && ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0))
7041 if (compHasBackwardJump && (reason == nullptr) && (JitConfig.TC_OnStackReplacement() > 0))
7043 bool canEscapeViaOSR = compCanHavePatchpoints(&reason);
7046 if (canEscapeViaOSR)
7048 // Optionally disable OSR by method hash. This will force any
7049 // method that might otherwise get trapped in Tier0 to be optimized.
7051 static ConfigMethodRange JitEnableOsrRange;
7052 JitEnableOsrRange.EnsureInit(JitConfig.JitEnableOsrRange());
7053 const unsigned hash = impInlineRoot()->info.compMethodHash();
7054 if (!JitEnableOsrRange.Contains(hash))
7056 canEscapeViaOSR = false;
7057 reason = "OSR disabled by JitEnableOsrRange";
7062 if (canEscapeViaOSR)
7064 JITDUMP("\nOSR enabled for this method\n");
7065 if (compHasBackwardJump && !compTailPrefixSeen &&
7066 opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR_IF_LOOPS) && opts.IsTier0())
7068 assert((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) == 0);
7069 opts.jitFlags->Set(JitFlags::JIT_FLAG_BBINSTR);
7070 JITDUMP("\nEnabling instrumentation for this method so OSR'd version will have a profile.\n");
7075 JITDUMP("\nOSR disabled for this method: %s\n", reason);
7076 assert(reason != nullptr);
7080 if (reason != nullptr)
7082 fgSwitchToOptimized(reason);
7086 compSetOptimizationLevel();
7088 #if COUNT_BASIC_BLOCKS
7089 bbCntTable.record(fgBBcount);
7093 bbOneBBSizeTable.record(methodInfo->ILCodeSize);
7095 #endif // COUNT_BASIC_BLOCKS
7100 printf("Basic block list for '%s'\n", info.compFullName);
7101 fgDispBasicBlocks();
7107 /* Give the function a unique number */
7109 if (opts.disAsm || verbose)
7111 compMethodID = ~info.compMethodHash() & 0xffff;
7115 compMethodID = InterlockedIncrement(&s_compMethodsCount);
7119 if (compIsForInlining())
7121 compInlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS, fgBBcount);
7123 if (compInlineResult->IsFailure())
7130 if (compIsForInlining())
7132 compGenTreeID = impInlineInfo->InlinerCompiler->compGenTreeID;
7133 compStatementID = impInlineInfo->InlinerCompiler->compStatementID;
7137 compCompile(methodCodePtr, methodCodeSize, compileFlags);
7140 if (compIsForInlining())
7142 impInlineInfo->InlinerCompiler->compGenTreeID = compGenTreeID;
7143 impInlineInfo->InlinerCompiler->compStatementID = compStatementID;
7144 impInlineInfo->InlinerCompiler->compBasicBlockID = compBasicBlockID;
7150 if (compDonotInline())
7152 // Verify we have only one inline result in play.
7153 assert(impInlineInfo->inlineResult == compInlineResult);
7156 if (!compIsForInlining())
7158 compCompileFinish();
7160 // Did we just compile for a target architecture that the VM isn't expecting? If so, the VM
7161 // can't used the generated code (and we better be an AltJit!).
7163 if (!info.compMatchedVM)
7165 return CORJIT_SKIPPED;
7169 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT) && JitConfig.RunAltJitCode() == 0)
7171 return CORJIT_SKIPPED;
7180 //------------------------------------------------------------------------
7181 // compFindLocalVarLinear: Linear search for variable's scope containing offset.
7184 // varNum The variable number to search for in the array of scopes.
7185 // offs The offset value which should occur within the life of the variable.
7188 // VarScopeDsc* of a matching variable that contains the offset within its life
7189 // begin and life end or nullptr when there is no match found.
7192 // Linear search for matching variables with their life begin and end containing
7194 // or NULL if one couldn't be found.
7197 // Usually called for scope count = 4. Could be called for values upto 8.
7199 VarScopeDsc* Compiler::compFindLocalVarLinear(unsigned varNum, unsigned offs)
7201 for (unsigned i = 0; i < info.compVarScopesCount; i++)
7203 VarScopeDsc* dsc = &info.compVarScopes[i];
7204 if ((dsc->vsdVarNum == varNum) && (dsc->vsdLifeBeg <= offs) && (dsc->vsdLifeEnd > offs))
7212 //------------------------------------------------------------------------
7213 // compFindLocalVar: Search for variable's scope containing offset.
7216 // varNum The variable number to search for in the array of scopes.
7217 // offs The offset value which should occur within the life of the variable.
7220 // VarScopeDsc* of a matching variable that contains the offset within its life
7221 // begin and life end.
7222 // or NULL if one couldn't be found.
7225 // Linear search for matching variables with their life begin and end containing
7226 // the offset only when the scope count is < MAX_LINEAR_FIND_LCL_SCOPELIST,
7227 // else use the hashtable lookup.
7229 VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned offs)
7231 if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
7233 return compFindLocalVarLinear(varNum, offs);
7237 VarScopeDsc* ret = compFindLocalVar(varNum, offs, offs);
7238 assert(ret == compFindLocalVarLinear(varNum, offs));
7243 //------------------------------------------------------------------------
7244 // compFindLocalVar: Search for variable's scope containing offset.
7247 // varNum The variable number to search for in the array of scopes.
7248 // lifeBeg The life begin of the variable's scope
7249 // lifeEnd The life end of the variable's scope
7252 // VarScopeDsc* of a matching variable that contains the offset within its life
7253 // begin and life end, or NULL if one couldn't be found.
7256 // Following are the steps used:
7257 // 1. Index into the hashtable using varNum.
7258 // 2. Iterate through the linked list at index varNum to find a matching
7261 VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned lifeBeg, unsigned lifeEnd)
7263 assert(compVarScopeMap != nullptr);
7265 VarScopeMapInfo* info;
7266 if (compVarScopeMap->Lookup(varNum, &info))
7268 VarScopeListNode* list = info->head;
7269 while (list != nullptr)
7271 if ((list->data->vsdLifeBeg <= lifeBeg) && (list->data->vsdLifeEnd > lifeEnd))
7281 //-------------------------------------------------------------------------
7282 // compInitVarScopeMap: Create a scope map so it can be looked up by varNum
7285 // Map.K => Map.V :: varNum => List(ScopeDsc)
7287 // Create a scope map that can be indexed by varNum and can be iterated
7288 // on it's values to look for matching scope when given an offs or
7289 // lifeBeg and lifeEnd.
7292 // 1. Build the map only when we think linear search is slow, i.e.,
7293 // MAX_LINEAR_FIND_LCL_SCOPELIST is large.
7294 // 2. Linked list preserves original array order.
7296 void Compiler::compInitVarScopeMap()
7298 if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
7303 assert(compVarScopeMap == nullptr);
7305 compVarScopeMap = new (getAllocator()) VarNumToScopeDscMap(getAllocator());
7307 // 599 prime to limit huge allocations; for ex: duplicated scopes on single var.
7308 compVarScopeMap->Reallocate(min(info.compVarScopesCount, 599));
7310 for (unsigned i = 0; i < info.compVarScopesCount; ++i)
7312 unsigned varNum = info.compVarScopes[i].vsdVarNum;
7314 VarScopeListNode* node = VarScopeListNode::Create(&info.compVarScopes[i], getAllocator());
7316 // Index by varNum and if the list exists append "node" to the "list".
7317 VarScopeMapInfo* info;
7318 if (compVarScopeMap->Lookup(varNum, &info))
7320 info->tail->next = node;
7323 // Create a new list.
7326 info = VarScopeMapInfo::Create(node, getAllocator());
7327 compVarScopeMap->Set(varNum, info);
7332 struct genCmpLocalVarLifeBeg
7334 bool operator()(const VarScopeDsc* elem1, const VarScopeDsc* elem2)
7336 return elem1->vsdLifeBeg < elem2->vsdLifeBeg;
7340 struct genCmpLocalVarLifeEnd
7342 bool operator()(const VarScopeDsc* elem1, const VarScopeDsc* elem2)
7344 return elem1->vsdLifeEnd < elem2->vsdLifeEnd;
7348 inline void Compiler::compInitScopeLists()
7350 if (info.compVarScopesCount == 0)
7352 compEnterScopeList = compExitScopeList = nullptr;
7356 // Populate the 'compEnterScopeList' and 'compExitScopeList' lists
7358 compEnterScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
7359 compExitScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
7361 for (unsigned i = 0; i < info.compVarScopesCount; i++)
7363 compEnterScopeList[i] = compExitScopeList[i] = &info.compVarScopes[i];
7366 jitstd::sort(compEnterScopeList, compEnterScopeList + info.compVarScopesCount, genCmpLocalVarLifeBeg());
7367 jitstd::sort(compExitScopeList, compExitScopeList + info.compVarScopesCount, genCmpLocalVarLifeEnd());
7370 void Compiler::compResetScopeLists()
7372 if (info.compVarScopesCount == 0)
7377 assert(compEnterScopeList && compExitScopeList);
7379 compNextEnterScope = compNextExitScope = 0;
7382 VarScopeDsc* Compiler::compGetNextEnterScope(unsigned offs, bool scan)
7384 assert(info.compVarScopesCount);
7385 assert(compEnterScopeList && compExitScopeList);
7387 if (compNextEnterScope < info.compVarScopesCount)
7389 assert(compEnterScopeList[compNextEnterScope]);
7390 unsigned nextEnterOff = compEnterScopeList[compNextEnterScope]->vsdLifeBeg;
7391 assert(scan || (offs <= nextEnterOff));
7395 if (offs == nextEnterOff)
7397 return compEnterScopeList[compNextEnterScope++];
7402 if (nextEnterOff <= offs)
7404 return compEnterScopeList[compNextEnterScope++];
7412 VarScopeDsc* Compiler::compGetNextExitScope(unsigned offs, bool scan)
7414 assert(info.compVarScopesCount);
7415 assert(compEnterScopeList && compExitScopeList);
7417 if (compNextExitScope < info.compVarScopesCount)
7419 assert(compExitScopeList[compNextExitScope]);
7420 unsigned nextExitOffs = compExitScopeList[compNextExitScope]->vsdLifeEnd;
7421 assert(scan || (offs <= nextExitOffs));
7425 if (offs == nextExitOffs)
7427 return compExitScopeList[compNextExitScope++];
7432 if (nextExitOffs <= offs)
7434 return compExitScopeList[compNextExitScope++];
7442 // The function will call the callback functions for scopes with boundaries
7443 // at instrs from the current status of the scope lists to 'offset',
7444 // ordered by instrs.
7446 void Compiler::compProcessScopesUntil(unsigned offset,
7448 void (Compiler::*enterScopeFn)(VARSET_TP* inScope, VarScopeDsc*),
7449 void (Compiler::*exitScopeFn)(VARSET_TP* inScope, VarScopeDsc*))
7451 assert(offset != BAD_IL_OFFSET);
7452 assert(inScope != nullptr);
7454 bool foundExit = false, foundEnter = true;
7456 VarScopeDsc* nextExitScope = nullptr;
7457 VarScopeDsc* nextEnterScope = nullptr;
7458 unsigned offs = offset, curEnterOffs = 0;
7460 goto START_FINDING_SCOPES;
7462 // We need to determine the scopes which are open for the current block.
7463 // This loop walks over the missing blocks between the current and the
7464 // previous block, keeping the enter and exit offsets in lockstep.
7468 foundExit = foundEnter = false;
7472 (this->*exitScopeFn)(inScope, nextExitScope);
7473 nextExitScope = nullptr;
7477 offs = nextEnterScope ? nextEnterScope->vsdLifeBeg : offset;
7479 while ((scope = compGetNextExitScope(offs, true)) != nullptr)
7483 if (!nextEnterScope || scope->vsdLifeEnd > nextEnterScope->vsdLifeBeg)
7485 // We overshot the last found Enter scope. Save the scope for later
7486 // and find an entering scope
7488 nextExitScope = scope;
7492 (this->*exitScopeFn)(inScope, scope);
7497 (this->*enterScopeFn)(inScope, nextEnterScope);
7498 curEnterOffs = nextEnterScope->vsdLifeBeg;
7499 nextEnterScope = nullptr;
7503 offs = nextExitScope ? nextExitScope->vsdLifeEnd : offset;
7505 START_FINDING_SCOPES:
7507 while ((scope = compGetNextEnterScope(offs, true)) != nullptr)
7511 if ((nextExitScope && scope->vsdLifeBeg >= nextExitScope->vsdLifeEnd) || (scope->vsdLifeBeg > curEnterOffs))
7513 // We overshot the last found exit scope. Save the scope for later
7514 // and find an exiting scope
7516 nextEnterScope = scope;
7520 (this->*enterScopeFn)(inScope, scope);
7524 curEnterOffs = scope->vsdLifeBeg;
7527 } while (foundExit || foundEnter);
7532 void Compiler::compDispScopeLists()
7536 printf("Local variable scopes = %d\n", info.compVarScopesCount);
7538 if (info.compVarScopesCount)
7540 printf(" \tVarNum \tLVNum \t Name \tBeg \tEnd\n");
7543 printf("Sorted by enter scope:\n");
7544 for (i = 0; i < info.compVarScopesCount; i++)
7546 VarScopeDsc* varScope = compEnterScopeList[i];
7548 printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
7549 VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
7550 varScope->vsdLifeBeg, varScope->vsdLifeEnd);
7552 if (compNextEnterScope == i)
7554 printf(" <-- next enter scope");
7560 printf("Sorted by exit scope:\n");
7561 for (i = 0; i < info.compVarScopesCount; i++)
7563 VarScopeDsc* varScope = compExitScopeList[i];
7565 printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
7566 VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
7567 varScope->vsdLifeBeg, varScope->vsdLifeEnd);
7569 if (compNextExitScope == i)
7571 printf(" <-- next exit scope");
7578 void Compiler::compDispLocalVars()
7580 printf("info.compVarScopesCount = %d\n", info.compVarScopesCount);
7582 if (info.compVarScopesCount > 0)
7584 printf(" \tVarNum \tLVNum \t Name \tBeg \tEnd\n");
7587 for (unsigned i = 0; i < info.compVarScopesCount; i++)
7589 VarScopeDsc* varScope = &info.compVarScopes[i];
7590 printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh\n", i, varScope->vsdVarNum, varScope->vsdLVnum,
7591 VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
7592 varScope->vsdLifeBeg, varScope->vsdLifeEnd);
7598 /*****************************************************************************/
7600 #if MEASURE_CLRAPI_CALLS
7602 struct WrapICorJitInfo : public ICorJitInfo
7604 //------------------------------------------------------------------------
7605 // WrapICorJitInfo::makeOne: allocate an instance of WrapICorJitInfo
7608 // alloc - the allocator to get memory from for the instance
7609 // compile - the compiler instance
7610 // compHndRef - the ICorJitInfo handle from the EE; the caller's
7611 // copy may be replaced with a "wrapper" instance
7614 // If the config flags indicate that ICorJitInfo should be wrapped,
7615 // we return the "wrapper" instance; otherwise we return "nullptr".
7617 static WrapICorJitInfo* makeOne(ArenaAllocator* alloc, Compiler* compiler, COMP_HANDLE& compHndRef /* INOUT */)
7619 WrapICorJitInfo* wrap = nullptr;
7621 if (JitConfig.JitEECallTimingInfo() != 0)
7623 // It's too early to use the default allocator, so we do this
7624 // in two steps to be safe (the constructor doesn't need to do
7625 // anything except fill in the vtable pointer, so we let the
7627 void* inst = alloc->allocateMemory(roundUp(sizeof(WrapICorJitInfo)));
7628 if (inst != nullptr)
7630 // If you get a build error here due to 'WrapICorJitInfo' being
7631 // an abstract class, it's very likely that the wrapper bodies
7632 // in ICorJitInfo_wrapper_generated.hpp are no longer in sync with
7633 // the EE interface; please be kind and update the header file.
7634 wrap = new (inst, jitstd::placement_t()) WrapICorJitInfo();
7636 wrap->wrapComp = compiler;
7638 // Save the real handle and replace it with our wrapped version.
7639 wrap->wrapHnd = compHndRef;
7649 COMP_HANDLE wrapHnd; // the "real thing"
7652 #include "ICorJitInfo_wrapper_generated.hpp"
7655 #endif // MEASURE_CLRAPI_CALLS
7657 /*****************************************************************************/
7659 // Compile a single method
7661 int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
7662 CORINFO_MODULE_HANDLE classPtr,
7663 COMP_HANDLE compHnd,
7664 CORINFO_METHOD_INFO* methodInfo,
7665 void** methodCodePtr,
7666 uint32_t* methodCodeSize,
7667 JitFlags* compileFlags,
7668 void* inlineInfoPtr)
7671 // A non-NULL inlineInfo means we are compiling the inlinee method.
7673 InlineInfo* inlineInfo = (InlineInfo*)inlineInfoPtr;
7675 bool jitFallbackCompile = false;
7677 int result = CORJIT_INTERNALERROR;
7679 ArenaAllocator* pAlloc = nullptr;
7680 ArenaAllocator alloc;
7682 #if MEASURE_CLRAPI_CALLS
7683 WrapICorJitInfo* wrapCLR = nullptr;
7688 // Use inliner's memory allocator when compiling the inlinee.
7689 pAlloc = inlineInfo->InlinerCompiler->compGetArenaAllocator();
7702 ArenaAllocator* pAlloc;
7703 bool jitFallbackCompile;
7705 CORINFO_METHOD_HANDLE methodHnd;
7706 CORINFO_MODULE_HANDLE classPtr;
7707 COMP_HANDLE compHnd;
7708 CORINFO_METHOD_INFO* methodInfo;
7709 void** methodCodePtr;
7710 uint32_t* methodCodeSize;
7711 JitFlags* compileFlags;
7712 InlineInfo* inlineInfo;
7713 #if MEASURE_CLRAPI_CALLS
7714 WrapICorJitInfo* wrapCLR;
7719 param.pComp = nullptr;
7720 param.pAlloc = pAlloc;
7721 param.jitFallbackCompile = jitFallbackCompile;
7722 param.methodHnd = methodHnd;
7723 param.classPtr = classPtr;
7724 param.compHnd = compHnd;
7725 param.methodInfo = methodInfo;
7726 param.methodCodePtr = methodCodePtr;
7727 param.methodCodeSize = methodCodeSize;
7728 param.compileFlags = compileFlags;
7729 param.inlineInfo = inlineInfo;
7730 #if MEASURE_CLRAPI_CALLS
7731 param.wrapCLR = nullptr;
7733 param.result = result;
7735 setErrorTrap(compHnd, Param*, pParamOuter, ¶m)
7737 setErrorTrap(nullptr, Param*, pParam, pParamOuter)
7739 if (pParam->inlineInfo)
7741 // Lazily create the inlinee compiler object
7742 if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr)
7744 pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
7745 (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
7748 // Use the inlinee compiler object
7749 pParam->pComp = pParam->inlineInfo->InlinerCompiler->InlineeCompiler;
7751 // memset(pParam->pComp, 0xEE, sizeof(Compiler));
7756 // Allocate create the inliner compiler object
7757 pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
7760 #if MEASURE_CLRAPI_CALLS
7761 pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd);
7764 // push this compiler on the stack (TLS)
7765 pParam->pComp->prevCompiler = JitTls::GetCompiler();
7766 JitTls::SetCompiler(pParam->pComp);
7768 // PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
7769 #if defined(_PREFAST_) || defined(_PREFIX_)
7770 PREFIX_ASSUME(pParam->pComp != NULL);
7772 assert(pParam->pComp != nullptr);
7775 pParam->pComp->compInit(pParam->pAlloc, pParam->methodHnd, pParam->compHnd, pParam->methodInfo,
7776 pParam->inlineInfo);
7779 pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile;
7782 // Now generate the code
7783 pParam->result = pParam->pComp->compCompile(pParam->classPtr, pParam->methodCodePtr, pParam->methodCodeSize,
7784 pParam->compileFlags);
7788 Compiler* pCompiler = pParamOuter->pComp;
7790 // If OOM is thrown when allocating memory for a pComp, we will end up here.
7791 // For this case, pComp and also pCompiler will be a nullptr
7793 if (pCompiler != nullptr)
7795 pCompiler->info.compCode = nullptr;
7797 // pop the compiler off the TLS stack only if it was linked above
7798 assert(JitTls::GetCompiler() == pCompiler);
7799 JitTls::SetCompiler(pCompiler->prevCompiler);
7802 if (pParamOuter->inlineInfo == nullptr)
7804 // Free up the allocator we were using
7805 pParamOuter->pAlloc->destroy();
7812 // If we were looking at an inlinee....
7813 if (inlineInfo != nullptr)
7815 // Note that we failed to compile the inlinee, and that
7816 // there's no point trying to inline it again anywhere else.
7817 inlineInfo->inlineResult->NoteFatal(InlineObservation::CALLEE_COMPILATION_ERROR);
7819 param.result = __errc;
7823 result = param.result;
7826 (result == CORJIT_INTERNALERROR || result == CORJIT_RECOVERABLEERROR || result == CORJIT_IMPLLIMITATION) &&
7827 !jitFallbackCompile)
7829 // If we failed the JIT, reattempt with debuggable code.
7830 jitFallbackCompile = true;
7832 // Update the flags for 'safer' code generation.
7833 compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT);
7834 compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT);
7835 compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT);
7843 #if defined(UNIX_AMD64_ABI)
7845 // GetTypeFromClassificationAndSizes:
7846 // Returns the type of the eightbyte accounting for the classification and size of the eightbyte.
7849 // classType: classification type
7850 // size: size of the eightbyte.
7853 var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size)
7855 var_types type = TYP_UNKNOWN;
7858 case SystemVClassificationTypeInteger:
7877 assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type.");
7880 case SystemVClassificationTypeIntegerReference:
7883 case SystemVClassificationTypeIntegerByRef:
7886 case SystemVClassificationTypeSSE:
7897 assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type.");
7902 assert(false && "GetTypeFromClassificationAndSizes Invalid classification type.");
7909 //-------------------------------------------------------------------
7910 // GetEightByteType: Returns the type of eightbyte slot of a struct
7913 // structDesc - struct classification description.
7914 // slotNum - eightbyte slot number for the struct.
7917 // type of the eightbyte slot of the struct
7920 var_types Compiler::GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
7923 var_types eightByteType = TYP_UNDEF;
7924 unsigned len = structDesc.eightByteSizes[slotNum];
7926 switch (structDesc.eightByteClassifications[slotNum])
7928 case SystemVClassificationTypeInteger:
7929 // See typelist.h for jit type definition.
7930 // All the types of size < 4 bytes are of jit type TYP_INT.
7931 if (structDesc.eightByteSizes[slotNum] <= 4)
7933 eightByteType = TYP_INT;
7935 else if (structDesc.eightByteSizes[slotNum] <= 8)
7937 eightByteType = TYP_LONG;
7941 assert(false && "GetEightByteType Invalid Integer classification type.");
7944 case SystemVClassificationTypeIntegerReference:
7945 assert(len == REGSIZE_BYTES);
7946 eightByteType = TYP_REF;
7948 case SystemVClassificationTypeIntegerByRef:
7949 assert(len == REGSIZE_BYTES);
7950 eightByteType = TYP_BYREF;
7952 case SystemVClassificationTypeSSE:
7953 if (structDesc.eightByteSizes[slotNum] <= 4)
7955 eightByteType = TYP_FLOAT;
7957 else if (structDesc.eightByteSizes[slotNum] <= 8)
7959 eightByteType = TYP_DOUBLE;
7963 assert(false && "GetEightByteType Invalid SSE classification type.");
7967 assert(false && "GetEightByteType Invalid classification type.");
7971 return eightByteType;
7974 //------------------------------------------------------------------------------------------------------
7975 // GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
7978 // 'structDesc' - struct description
7979 // 'type0' - out param; returns the type of the first eightbyte.
7980 // 'type1' - out param; returns the type of the second eightbyte.
7981 // 'offset0' - out param; returns the offset of the first eightbyte.
7982 // 'offset1' - out param; returns the offset of the second eightbyte.
7985 void Compiler::GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
7988 unsigned __int8* offset0,
7989 unsigned __int8* offset1)
7991 *offset0 = structDesc.eightByteOffsets[0];
7992 *offset1 = structDesc.eightByteOffsets[1];
7994 *type0 = TYP_UNKNOWN;
7995 *type1 = TYP_UNKNOWN;
7997 // Set the first eightbyte data
7998 if (structDesc.eightByteCount >= 1)
8000 *type0 = GetEightByteType(structDesc, 0);
8003 // Set the second eight byte data
8004 if (structDesc.eightByteCount == 2)
8006 *type1 = GetEightByteType(structDesc, 1);
8010 //------------------------------------------------------------------------------------------------------
8011 // GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
8014 // 'typeHnd' - type handle
8015 // 'type0' - out param; returns the type of the first eightbyte.
8016 // 'type1' - out param; returns the type of the second eightbyte.
8017 // 'offset0' - out param; returns the offset of the first eightbyte.
8018 // 'offset1' - out param; returns the offset of the second eightbyte.
8020 void Compiler::GetStructTypeOffset(CORINFO_CLASS_HANDLE typeHnd,
8023 unsigned __int8* offset0,
8024 unsigned __int8* offset1)
8026 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
8027 eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
8028 assert(structDesc.passedInRegisters);
8029 GetStructTypeOffset(structDesc, type0, type1, offset0, offset1);
8032 #endif // defined(UNIX_AMD64_ABI)
8034 /*****************************************************************************/
8035 /*****************************************************************************/
8038 Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData()
8040 NodeToIntMap* reachable = new (getAllocatorDebugOnly()) NodeToIntMap(getAllocatorDebugOnly());
8042 if (m_nodeTestData == nullptr)
8047 // Otherwise, iterate.
8049 for (BasicBlock* const block : Blocks())
8051 for (Statement* const stmt : block->NonPhiStatements())
8053 for (GenTree* const tree : stmt->TreeList())
8055 TestLabelAndNum tlAndN;
8057 // For call nodes, translate late args to what they stand for.
8058 if (tree->OperGet() == GT_CALL)
8060 GenTreeCall* call = tree->AsCall();
8062 for (CallArg& arg : call->gtArgs.Args())
8064 GenTree* argNode = arg.GetNode();
8065 if (GetNodeTestData()->Lookup(argNode, &tlAndN))
8067 reachable->Set(argNode, 0);
8073 if (GetNodeTestData()->Lookup(tree, &tlAndN))
8075 reachable->Set(tree, 0);
8083 void Compiler::TransferTestDataToNode(GenTree* from, GenTree* to)
8085 TestLabelAndNum tlAndN;
8086 // We can't currently associate multiple annotations with a single node.
8087 // If we need to, we can fix this...
8089 // If the table is null, don't create it just to do the lookup, which would fail...
8090 if (m_nodeTestData != nullptr && GetNodeTestData()->Lookup(from, &tlAndN))
8092 assert(!GetNodeTestData()->Lookup(to, &tlAndN));
8093 // We can't currently associate multiple annotations with a single node.
8094 // If we need to, we can fix this...
8095 TestLabelAndNum tlAndNTo;
8096 assert(!GetNodeTestData()->Lookup(to, &tlAndNTo));
8098 GetNodeTestData()->Remove(from);
8099 GetNodeTestData()->Set(to, tlAndN);
8106 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8107 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8111 XX Functions for the stand-alone version of the JIT . XX
8113 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8114 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8117 /*****************************************************************************/
8118 void codeGeneratorCodeSizeBeg()
8122 /*****************************************************************************
8124 * Used for counting pointer assignments.
8127 /*****************************************************************************/
8128 void codeGeneratorCodeSizeEnd()
8131 /*****************************************************************************
8133 * Gather statistics - mainly used for the standalone
8134 * Enable various #ifdef's to get the information you need
8137 void Compiler::compJitStats()
8141 /* Method types and argument statistics */
8143 #endif // CALL_ARG_STATS
8148 /*****************************************************************************
8150 * Gather statistics about method calls and arguments
8153 void Compiler::compCallArgStats()
8157 unsigned argDWordNum;
8163 unsigned regArgDeferred;
8164 unsigned regArgTemp;
8166 unsigned regArgLclVar;
8167 unsigned regArgConst;
8169 unsigned argTempsThisMethod = 0;
8171 assert(fgStmtListThreaded);
8173 for (BasicBlock* const block : Blocks())
8175 for (Statement* const stmt : block->Statements())
8177 for (GenTree* const call : stmt->TreeList())
8179 if (call->gtOper != GT_CALL)
8182 argNum = regArgNum = regArgDeferred = regArgTemp = regArgConst = regArgLclVar = argDWordNum =
8183 argLngNum = argFltNum = argDblNum = 0;
8187 if (call->AsCall()->gtCallThisArg == nullptr)
8189 if (call->AsCall()->gtCallType == CT_HELPER)
8200 /* We have a 'this' pointer */
8208 if (call->AsCall()->IsVirtual())
8210 /* virtual function */
8215 argNonVirtualCalls++;
8222 argTempsCntTable.record(argTempsThisMethod);
8224 if (argMaxTempsPerMethod < argTempsThisMethod)
8226 argMaxTempsPerMethod = argTempsThisMethod;
8231 void Compiler::compDispCallArgStats(FILE* fout)
8233 if (argTotalCalls == 0)
8236 fprintf(fout, "\n");
8237 fprintf(fout, "--------------------------------------------------\n");
8238 fprintf(fout, "Call stats\n");
8239 fprintf(fout, "--------------------------------------------------\n");
8240 fprintf(fout, "Total # of calls = %d, calls / method = %.3f\n\n", argTotalCalls,
8241 (float)argTotalCalls / genMethodCnt);
8243 fprintf(fout, "Percentage of helper calls = %4.2f %%\n", (float)(100 * argHelperCalls) / argTotalCalls);
8244 fprintf(fout, "Percentage of static calls = %4.2f %%\n", (float)(100 * argStaticCalls) / argTotalCalls);
8245 fprintf(fout, "Percentage of virtual calls = %4.2f %%\n", (float)(100 * argVirtualCalls) / argTotalCalls);
8246 fprintf(fout, "Percentage of non-virtual calls = %4.2f %%\n\n", (float)(100 * argNonVirtualCalls) / argTotalCalls);
8248 fprintf(fout, "Average # of arguments per call = %.2f%%\n\n", (float)argTotalArgs / argTotalCalls);
8250 fprintf(fout, "Percentage of DWORD arguments = %.2f %%\n", (float)(100 * argTotalDWordArgs) / argTotalArgs);
8251 fprintf(fout, "Percentage of LONG arguments = %.2f %%\n", (float)(100 * argTotalLongArgs) / argTotalArgs);
8252 fprintf(fout, "Percentage of FLOAT arguments = %.2f %%\n", (float)(100 * argTotalFloatArgs) / argTotalArgs);
8253 fprintf(fout, "Percentage of DOUBLE arguments = %.2f %%\n\n", (float)(100 * argTotalDoubleArgs) / argTotalArgs);
8255 if (argTotalRegArgs == 0)
8259 fprintf(fout, "Total deferred arguments = %d \n", argTotalDeferred);
8261 fprintf(fout, "Total temp arguments = %d \n\n", argTotalTemps);
8263 fprintf(fout, "Total 'this' arguments = %d \n", argTotalObjPtr);
8264 fprintf(fout, "Total local var arguments = %d \n", argTotalLclVar);
8265 fprintf(fout, "Total constant arguments = %d \n\n", argTotalConst);
8268 fprintf(fout, "\nRegister Arguments:\n\n");
8270 fprintf(fout, "Percentage of deferred arguments = %.2f %%\n", (float)(100 * argTotalDeferred) / argTotalRegArgs);
8271 fprintf(fout, "Percentage of temp arguments = %.2f %%\n\n", (float)(100 * argTotalTemps) / argTotalRegArgs);
8273 fprintf(fout, "Maximum # of temps per method = %d\n\n", argMaxTempsPerMethod);
8275 fprintf(fout, "Percentage of ObjPtr arguments = %.2f %%\n", (float)(100 * argTotalObjPtr) / argTotalRegArgs);
8276 // fprintf(fout, "Percentage of global arguments = %.2f %%\n", (float)(100 * argTotalDWordGlobEf) /
8277 // argTotalRegArgs);
8278 fprintf(fout, "Percentage of constant arguments = %.2f %%\n", (float)(100 * argTotalConst) / argTotalRegArgs);
8279 fprintf(fout, "Percentage of lcl var arguments = %.2f %%\n\n", (float)(100 * argTotalLclVar) / argTotalRegArgs);
8281 fprintf(fout, "--------------------------------------------------\n");
8282 fprintf(fout, "Argument count frequency table (includes ObjPtr):\n");
8283 fprintf(fout, "--------------------------------------------------\n");
8284 argCntTable.dump(fout);
8285 fprintf(fout, "--------------------------------------------------\n");
8287 fprintf(fout, "--------------------------------------------------\n");
8288 fprintf(fout, "DWORD argument count frequency table (w/o LONG):\n");
8289 fprintf(fout, "--------------------------------------------------\n");
8290 argDWordCntTable.dump(fout);
8291 fprintf(fout, "--------------------------------------------------\n");
8293 fprintf(fout, "--------------------------------------------------\n");
8294 fprintf(fout, "Temps count frequency table (per method):\n");
8295 fprintf(fout, "--------------------------------------------------\n");
8296 argTempsCntTable.dump(fout);
8297 fprintf(fout, "--------------------------------------------------\n");
8300 fprintf(fout, "--------------------------------------------------\n");
8301 fprintf(fout, "DWORD argument count frequency table (w/ LONG):\n");
8302 fprintf(fout, "--------------------------------------------------\n");
8303 argDWordLngCntTable.dump(fout);
8304 fprintf(fout, "--------------------------------------------------\n");
8308 #endif // CALL_ARG_STATS
8310 // JIT time end to end, and by phases.
8312 #ifdef FEATURE_JIT_METHOD_PERF
8314 CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock;
8315 CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
8316 #if MEASURE_CLRAPI_CALLS
8317 double JitTimer::s_cyclesPerSec = CachedCyclesPerSecond();
8319 #endif // FEATURE_JIT_METHOD_PERF
8321 #if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS || defined(FEATURE_TRACELOGGING)
8322 const char* PhaseNames[] = {
8323 #define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent, measureIR) string_nm,
8324 #include "compphases.h"
8327 const char* PhaseEnums[] = {
8328 #define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent, measureIR) #enum_nm,
8329 #include "compphases.h"
8332 #endif // defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
8334 #ifdef FEATURE_JIT_METHOD_PERF
8335 bool PhaseHasChildren[] = {
8336 #define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent, measureIR) hasChildren,
8337 #include "compphases.h"
8340 int PhaseParent[] = {
8341 #define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent, measureIR) parent,
8342 #include "compphases.h"
8345 bool PhaseReportsIRSize[] = {
8346 #define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent, measureIR) measureIR,
8347 #include "compphases.h"
8350 CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes)
8351 : m_byteCodeBytes(byteCodeBytes)
8353 , m_parentPhaseEndSlop(0)
8354 , m_timerFailure(false)
8355 #if MEASURE_CLRAPI_CALLS
8356 , m_allClrAPIcalls(0)
8357 , m_allClrAPIcycles(0)
8360 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8362 m_invokesByPhase[i] = 0;
8363 m_cyclesByPhase[i] = 0;
8364 #if MEASURE_CLRAPI_CALLS
8365 m_CLRinvokesByPhase[i] = 0;
8366 m_CLRcyclesByPhase[i] = 0;
8370 #if MEASURE_CLRAPI_CALLS
8371 assert(ArrLen(m_perClrAPIcalls) == API_ICorJitInfo_Names::API_COUNT);
8372 assert(ArrLen(m_perClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
8373 assert(ArrLen(m_maxClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
8374 for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
8376 m_perClrAPIcalls[i] = 0;
8377 m_perClrAPIcycles[i] = 0;
8378 m_maxClrAPIcycles[i] = 0;
8383 bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
8385 return false; // info.m_byteCodeBytes < 10;
8388 //------------------------------------------------------------------------
8389 // CompTimeSummaryInfo::AddInfo: Record timing info from one compile.
8392 // info - The timing information to record.
8393 // includePhases - If "true", the per-phase info in "info" is valid,
8394 // which means that a "normal" compile has ended; if
8395 // the value is "false" we are recording the results
8396 // of a partial compile (typically an import-only run
8397 // on behalf of the inliner) in which case the phase
8398 // info is not valid and so we only record EE call
8400 void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info, bool includePhases)
8402 if (info.m_timerFailure)
8404 return; // Don't update if there was a failure.
8407 CritSecHolder timeLock(s_compTimeSummaryLock);
8411 bool includeInFiltered = IncludedInFilteredData(info);
8415 // Update the totals and maxima.
8416 m_total.m_byteCodeBytes += info.m_byteCodeBytes;
8417 m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
8418 m_total.m_totalCycles += info.m_totalCycles;
8419 m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
8421 #if MEASURE_CLRAPI_CALLS
8422 // Update the CLR-API values.
8423 m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
8424 m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
8425 m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
8426 m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
8429 if (includeInFiltered)
8431 m_numFilteredMethods++;
8432 m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
8433 m_filtered.m_totalCycles += info.m_totalCycles;
8434 m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
8437 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8439 m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
8440 m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
8442 #if MEASURE_CLRAPI_CALLS
8443 m_total.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
8444 m_total.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
8447 if (includeInFiltered)
8449 m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
8450 m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
8451 #if MEASURE_CLRAPI_CALLS
8452 m_filtered.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
8453 m_filtered.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
8456 m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
8458 #if MEASURE_CLRAPI_CALLS
8459 m_maximum.m_CLRcyclesByPhase[i] = max(m_maximum.m_CLRcyclesByPhase[i], info.m_CLRcyclesByPhase[i]);
8462 m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
8463 m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
8465 #if MEASURE_CLRAPI_CALLS
8470 // Update the "global" CLR-API values.
8471 m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
8472 m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
8473 m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
8474 m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
8476 // Update the per-phase CLR-API values.
8477 m_total.m_invokesByPhase[PHASE_CLR_API] += info.m_allClrAPIcalls;
8478 m_maximum.m_invokesByPhase[PHASE_CLR_API] =
8479 max(m_maximum.m_perClrAPIcalls[PHASE_CLR_API], info.m_allClrAPIcalls);
8480 m_total.m_cyclesByPhase[PHASE_CLR_API] += info.m_allClrAPIcycles;
8481 m_maximum.m_cyclesByPhase[PHASE_CLR_API] =
8482 max(m_maximum.m_cyclesByPhase[PHASE_CLR_API], info.m_allClrAPIcycles);
8485 for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
8487 m_total.m_perClrAPIcalls[i] += info.m_perClrAPIcalls[i];
8488 m_maximum.m_perClrAPIcalls[i] = max(m_maximum.m_perClrAPIcalls[i], info.m_perClrAPIcalls[i]);
8490 m_total.m_perClrAPIcycles[i] += info.m_perClrAPIcycles[i];
8491 m_maximum.m_perClrAPIcycles[i] = max(m_maximum.m_perClrAPIcycles[i], info.m_perClrAPIcycles[i]);
8493 m_maximum.m_maxClrAPIcycles[i] = max(m_maximum.m_maxClrAPIcycles[i], info.m_maxClrAPIcycles[i]);
8499 LPCWSTR Compiler::compJitTimeLogFilename = nullptr;
8501 void CompTimeSummaryInfo::Print(FILE* f)
8508 double countsPerSec = CachedCyclesPerSecond();
8509 if (countsPerSec == 0.0)
8511 fprintf(f, "Processor does not have a high-frequency timer.\n");
8515 double totTime_ms = 0.0;
8517 fprintf(f, "JIT Compilation time report:\n");
8518 fprintf(f, " Compiled %d methods.\n", m_numMethods);
8519 if (m_numMethods != 0)
8521 fprintf(f, " Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes,
8522 m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods);
8523 totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
8524 fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0),
8526 fprintf(f, " max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0,
8527 ((double)m_maximum.m_totalCycles / countsPerSec) * 1000.0);
8528 fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
8529 ((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods);
8531 const char* extraHdr1 = "";
8532 const char* extraHdr2 = "";
8533 #if MEASURE_CLRAPI_CALLS
8534 bool extraInfo = (JitConfig.JitEECallTimingInfo() != 0);
8537 extraHdr1 = " CLRs/meth % in CLR";
8538 extraHdr2 = "-----------------------";
8542 fprintf(f, "\n Total time by phases:\n");
8543 fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)%s\n",
8545 fprintf(f, " ---------------------------------------------------------------------------------------%s\n",
8548 // Ensure that at least the names array and the Phases enum have the same number of entries:
8549 assert(ArrLen(PhaseNames) == PHASE_NUMBER_OF);
8550 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8552 double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
8553 double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
8555 #if MEASURE_CLRAPI_CALLS
8556 // Skip showing CLR API call info if we didn't collect any
8557 if (i == PHASE_CLR_API && !extraInfo)
8561 // Indent nested phases, according to depth.
8562 int ancPhase = PhaseParent[i];
8563 while (ancPhase != -1)
8566 ancPhase = PhaseParent[ancPhase];
8568 fprintf(f, " %-30s %6.2f %10.2f %9.3f %8.2f%% %8.3f", PhaseNames[i],
8569 ((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods),
8570 ((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms),
8573 #if MEASURE_CLRAPI_CALLS
8574 if (extraInfo && i != PHASE_CLR_API)
8576 double nest_tot_ms = (((double)m_total.m_CLRcyclesByPhase[i]) / countsPerSec) * 1000.0;
8577 double nest_percent = nest_tot_ms * 100.0 / totTime_ms;
8578 double calls_per_fn = ((double)m_total.m_CLRinvokesByPhase[i]) / ((double)m_numMethods);
8580 if (nest_percent > 0.1 || calls_per_fn > 10)
8581 fprintf(f, " %5.1f %8.2f%%", calls_per_fn, nest_percent);
8587 // Show slop if it's over a certain percentage of the total
8588 double pslop_pct = 100.0 * m_total.m_parentPhaseEndSlop * 1000.0 / countsPerSec / totTime_ms;
8589 if (pslop_pct >= 1.0)
8591 fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
8592 "%3.1f%% of total.\n\n",
8593 m_total.m_parentPhaseEndSlop / 1000000.0, pslop_pct);
8596 if (m_numFilteredMethods > 0)
8598 fprintf(f, " Compiled %d methods that meet the filter requirement.\n", m_numFilteredMethods);
8599 fprintf(f, " Compiled %d bytecodes total (%8.2f avg).\n", m_filtered.m_byteCodeBytes,
8600 (double)m_filtered.m_byteCodeBytes / (double)m_numFilteredMethods);
8601 double totTime_ms = ((double)m_filtered.m_totalCycles / countsPerSec) * 1000.0;
8602 fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_filtered.m_totalCycles / 1000000.0),
8604 fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
8605 ((double)m_filtered.m_totalCycles) / 1000000.0 / (double)m_numFilteredMethods,
8606 totTime_ms / (double)m_numFilteredMethods);
8608 fprintf(f, " Total time by phases:\n");
8609 fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total\n");
8610 fprintf(f, " --------------------------------------------------------------------------------------\n");
8611 // Ensure that at least the names array and the Phases enum have the same number of entries:
8612 assert(ArrLen(PhaseNames) == PHASE_NUMBER_OF);
8613 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8615 double phase_tot_ms = (((double)m_filtered.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
8616 // Indent nested phases, according to depth.
8617 int ancPhase = PhaseParent[i];
8618 while (ancPhase != -1)
8621 ancPhase = PhaseParent[ancPhase];
8623 fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%%\n", PhaseNames[i],
8624 ((double)m_filtered.m_invokesByPhase[i]) / ((double)m_numFilteredMethods),
8625 ((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms,
8626 (phase_tot_ms * 100.0 / totTime_ms));
8629 double fslop_ms = m_filtered.m_parentPhaseEndSlop * 1000.0 / countsPerSec;
8632 fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
8633 "%3.1f%% of total.\n\n",
8634 m_filtered.m_parentPhaseEndSlop / 1000000.0, fslop_ms);
8638 #if MEASURE_CLRAPI_CALLS
8639 if (m_total.m_allClrAPIcalls > 0 && m_total.m_allClrAPIcycles > 0)
8642 if (m_totMethods > 0)
8643 fprintf(f, " Imported %u methods.\n\n", m_numMethods + m_totMethods);
8645 fprintf(f, " CLR API # calls total time max time avg time %% "
8647 fprintf(f, " -------------------------------------------------------------------------------");
8648 fprintf(f, "---------------------\n");
8650 static const char* APInames[] = {
8651 #define DEF_CLR_API(name) #name,
8652 #include "ICorJitInfo_names_generated.h"
8655 unsigned shownCalls = 0;
8656 double shownMillis = 0.0;
8658 unsigned checkedCalls = 0;
8659 double checkedMillis = 0.0;
8662 for (unsigned pass = 0; pass < 2; pass++)
8664 for (unsigned i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
8666 unsigned calls = m_total.m_perClrAPIcalls[i];
8670 unsigned __int64 cycles = m_total.m_perClrAPIcycles[i];
8671 double millis = 1000.0 * cycles / countsPerSec;
8673 // Don't show the small fry to keep the results manageable
8676 // We always show the following API because it is always called
8677 // exactly once for each method and its body is the simplest one
8678 // possible (it just returns an integer constant), and therefore
8679 // it can be used to measure the overhead of adding the CLR API
8680 // timing code. Roughly speaking, on a 3GHz x64 box the overhead
8681 // per call should be around 40 ns when using RDTSC, compared to
8682 // about 140 ns when using GetThreadCycles() under Windows.
8683 if (i != API_ICorJitInfo_Names::API_getExpectedTargetArchitecture)
8687 // In the first pass we just compute the totals.
8690 shownCalls += m_total.m_perClrAPIcalls[i];
8691 shownMillis += millis;
8695 unsigned __int32 maxcyc = m_maximum.m_maxClrAPIcycles[i];
8696 double max_ms = 1000.0 * maxcyc / countsPerSec;
8698 fprintf(f, " %-40s", APInames[i]); // API name
8699 fprintf(f, " %8u %9.1f ms", calls, millis); // #calls, total time
8700 fprintf(f, " %8.1f ms %8.1f ns", max_ms, 1000000.0 * millis / calls); // max, avg time
8701 fprintf(f, " %5.1f%%\n", 100.0 * millis / shownMillis); // % of total
8704 checkedCalls += m_total.m_perClrAPIcalls[i];
8705 checkedMillis += millis;
8711 assert(checkedCalls == shownCalls);
8712 assert(checkedMillis == shownMillis);
8715 if (shownCalls > 0 || shownMillis > 0)
8717 fprintf(f, " -------------------------");
8718 fprintf(f, "---------------------------------------------------------------------------\n");
8719 fprintf(f, " Total for calls shown above %8u %10.1f ms", shownCalls, shownMillis);
8720 if (totTime_ms > 0.0)
8721 fprintf(f, " (%4.1lf%% of overall JIT time)", shownMillis * 100.0 / totTime_ms);
8731 JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
8733 #if MEASURE_CLRAPI_CALLS
8734 m_CLRcallInvokes = 0;
8735 m_CLRcallCycles = 0;
8739 m_lastPhase = (Phases)-1;
8740 #if MEASURE_CLRAPI_CALLS
8741 m_CLRcallAPInum = -1;
8745 unsigned __int64 threadCurCycles;
8746 if (_our_GetThreadCycles(&threadCurCycles))
8748 m_start = threadCurCycles;
8749 m_curPhaseStart = threadCurCycles;
8753 void JitTimer::EndPhase(Compiler* compiler, Phases phase)
8756 // We re-run some phases currently, so this following assert doesn't work.
8757 // assert((int)phase > (int)m_lastPhase); // We should end phases in increasing order.
8759 unsigned __int64 threadCurCycles;
8760 if (_our_GetThreadCycles(&threadCurCycles))
8762 unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart);
8764 // If this is not a leaf phase, the assumption is that the last subphase must have just recently ended.
8765 // Credit the duration to "slop", the total of which should be very small.
8766 if (PhaseHasChildren[phase])
8768 m_info.m_parentPhaseEndSlop += phaseCycles;
8772 // It is a leaf phase. Credit duration to it.
8773 m_info.m_invokesByPhase[phase]++;
8774 m_info.m_cyclesByPhase[phase] += phaseCycles;
8776 #if MEASURE_CLRAPI_CALLS
8777 // Record the CLR API timing info as well.
8778 m_info.m_CLRinvokesByPhase[phase] += m_CLRcallInvokes;
8779 m_info.m_CLRcyclesByPhase[phase] += m_CLRcallCycles;
8782 // Credit the phase's ancestors, if any.
8783 int ancPhase = PhaseParent[phase];
8784 while (ancPhase != -1)
8786 m_info.m_cyclesByPhase[ancPhase] += phaseCycles;
8787 ancPhase = PhaseParent[ancPhase];
8790 #if MEASURE_CLRAPI_CALLS
8791 const Phases lastPhase = PHASE_CLR_API;
8793 const Phases lastPhase = PHASE_NUMBER_OF;
8795 if (phase + 1 == lastPhase)
8797 m_info.m_totalCycles = (threadCurCycles - m_start);
8801 m_curPhaseStart = threadCurCycles;
8805 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[phase])
8807 m_info.m_nodeCountAfterPhase[phase] = compiler->fgMeasureIR();
8811 m_info.m_nodeCountAfterPhase[phase] = 0;
8816 m_lastPhase = phase;
8818 #if MEASURE_CLRAPI_CALLS
8819 m_CLRcallInvokes = 0;
8820 m_CLRcallCycles = 0;
8824 #if MEASURE_CLRAPI_CALLS
8826 //------------------------------------------------------------------------
8827 // JitTimer::CLRApiCallEnter: Start the stopwatch for an EE call.
8830 // apix - The API index - an "enum API_ICorJitInfo_Names" value.
8833 void JitTimer::CLRApiCallEnter(unsigned apix)
8835 assert(m_CLRcallAPInum == -1); // Nested calls not allowed
8836 m_CLRcallAPInum = apix;
8838 // If we can't get the cycles, we'll just ignore this call
8839 if (!_our_GetThreadCycles(&m_CLRcallStart))
8843 //------------------------------------------------------------------------
8844 // JitTimer::CLRApiCallLeave: compute / record time spent in an EE call.
8847 // apix - The API's "enum API_ICorJitInfo_Names" value; this value
8848 // should match the value passed to the most recent call to
8849 // "CLRApiCallEnter" (i.e. these must come as matched pairs),
8850 // and they also may not nest.
8853 void JitTimer::CLRApiCallLeave(unsigned apix)
8855 // Make sure we're actually inside a measured CLR call.
8856 assert(m_CLRcallAPInum != -1);
8857 m_CLRcallAPInum = -1;
8859 // Ignore this one if we don't have a valid starting counter.
8860 if (m_CLRcallStart != 0)
8862 if (JitConfig.JitEECallTimingInfo() != 0)
8864 unsigned __int64 threadCurCycles;
8865 if (_our_GetThreadCycles(&threadCurCycles))
8867 // Compute the cycles spent in the call.
8868 threadCurCycles -= m_CLRcallStart;
8870 // Add the cycles to the 'phase' and bump its use count.
8871 m_info.m_cyclesByPhase[PHASE_CLR_API] += threadCurCycles;
8872 m_info.m_invokesByPhase[PHASE_CLR_API] += 1;
8874 // Add the values to the "per API" info.
8875 m_info.m_allClrAPIcycles += threadCurCycles;
8876 m_info.m_allClrAPIcalls += 1;
8878 m_info.m_perClrAPIcalls[apix] += 1;
8879 m_info.m_perClrAPIcycles[apix] += threadCurCycles;
8880 m_info.m_maxClrAPIcycles[apix] = max(m_info.m_maxClrAPIcycles[apix], (unsigned __int32)threadCurCycles);
8882 // Subtract the cycles from the enclosing phase by bumping its start time
8883 m_curPhaseStart += threadCurCycles;
8885 // Update the running totals.
8886 m_CLRcallInvokes += 1;
8887 m_CLRcallCycles += threadCurCycles;
8894 assert(m_CLRcallAPInum != -1); // No longer in this API call.
8895 m_CLRcallAPInum = -1;
8898 #endif // MEASURE_CLRAPI_CALLS
8900 CritSecObject JitTimer::s_csvLock;
8902 // It's expensive to constantly open and close the file, so open it once and close it
8903 // when the process exits. This should be accessed under the s_csvLock.
8904 FILE* JitTimer::s_csvFile = nullptr;
8906 LPCWSTR Compiler::JitTimeLogCsv()
8908 LPCWSTR jitTimeLogCsv = JitConfig.JitTimeLogCsv();
8909 return jitTimeLogCsv;
8912 void JitTimer::PrintCsvHeader()
8914 LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
8915 if (jitTimeLogCsv == nullptr)
8920 CritSecHolder csvLock(s_csvLock);
8922 if (s_csvFile == nullptr)
8924 s_csvFile = _wfopen(jitTimeLogCsv, W("a"));
8926 if (s_csvFile != nullptr)
8928 // Seek to the end of the file s.t. `ftell` doesn't lie to us on Windows
8929 fseek(s_csvFile, 0, SEEK_END);
8931 // Write the header if the file is empty
8932 if (ftell(s_csvFile) == 0)
8934 fprintf(s_csvFile, "\"Method Name\",");
8935 fprintf(s_csvFile, "\"Assembly or SPMI Index\",");
8936 fprintf(s_csvFile, "\"IL Bytes\",");
8937 fprintf(s_csvFile, "\"Basic Blocks\",");
8938 fprintf(s_csvFile, "\"Min Opts\",");
8939 fprintf(s_csvFile, "\"Loops\",");
8940 fprintf(s_csvFile, "\"Loops Cloned\",");
8941 #if FEATURE_LOOP_ALIGN
8943 fprintf(s_csvFile, "\"Alignment Candidates\",");
8944 fprintf(s_csvFile, "\"Loops Aligned\",");
8946 #endif // FEATURE_LOOP_ALIGN
8947 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8949 fprintf(s_csvFile, "\"%s\",", PhaseNames[i]);
8950 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
8952 fprintf(s_csvFile, "\"Node Count After %s\",", PhaseNames[i]);
8956 InlineStrategy::DumpCsvHeader(s_csvFile);
8958 fprintf(s_csvFile, "\"Executable Code Bytes\",");
8959 fprintf(s_csvFile, "\"GC Info Bytes\",");
8960 fprintf(s_csvFile, "\"Total Bytes Allocated\",");
8961 fprintf(s_csvFile, "\"Total Cycles\",");
8962 fprintf(s_csvFile, "\"CPS\"\n");
8969 void JitTimer::PrintCsvMethodStats(Compiler* comp)
8971 LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
8972 if (jitTimeLogCsv == nullptr)
8977 // eeGetMethodFullName uses locks, so don't enter crit sec before this call.
8978 #if defined(DEBUG) || defined(LATE_DISASM)
8979 // If we already have computed the name because for some reason we're generating the CSV
8980 // for a DEBUG build (presumably not for the time info), just re-use it.
8981 const char* methName = comp->info.compFullName;
8983 const char* methName = comp->eeGetMethodFullName(comp->info.compMethodHnd);
8986 // Try and access the SPMI index to report in the data set.
8988 // If the jit is not hosted under SPMI this will return the
8989 // default value of zero.
8991 // Query the jit host directly here instead of going via the
8992 // config cache, since value will change for each method.
8993 int index = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), -1);
8995 CritSecHolder csvLock(s_csvLock);
8997 if (s_csvFile == nullptr)
9002 fprintf(s_csvFile, "\"%s\",", methName);
9005 fprintf(s_csvFile, "%d,", index);
9009 const char* methodAssemblyName = comp->info.compCompHnd->getAssemblyName(
9010 comp->info.compCompHnd->getModuleAssembly(comp->info.compCompHnd->getClassModule(comp->info.compClassHnd)));
9011 fprintf(s_csvFile, "\"%s\",", methodAssemblyName);
9013 fprintf(s_csvFile, "%u,", comp->info.compILCodeSize);
9014 fprintf(s_csvFile, "%u,", comp->fgBBcount);
9015 fprintf(s_csvFile, "%u,", comp->opts.MinOpts());
9016 fprintf(s_csvFile, "%u,", comp->optLoopCount);
9017 fprintf(s_csvFile, "%u,", comp->optLoopsCloned);
9018 #if FEATURE_LOOP_ALIGN
9020 fprintf(s_csvFile, "%u,", comp->loopAlignCandidates);
9021 fprintf(s_csvFile, "%u,", comp->loopsAligned);
9023 #endif // FEATURE_LOOP_ALIGN
9024 unsigned __int64 totCycles = 0;
9025 for (int i = 0; i < PHASE_NUMBER_OF; i++)
9027 if (!PhaseHasChildren[i])
9029 totCycles += m_info.m_cyclesByPhase[i];
9031 fprintf(s_csvFile, "%llu,", m_info.m_cyclesByPhase[i]);
9033 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
9035 fprintf(s_csvFile, "%u,", m_info.m_nodeCountAfterPhase[i]);
9039 comp->m_inlineStrategy->DumpCsvData(s_csvFile);
9041 fprintf(s_csvFile, "%u,", comp->info.compNativeCodeSize);
9042 fprintf(s_csvFile, "%zu,", comp->compInfoBlkSize);
9043 fprintf(s_csvFile, "%zu,", comp->compGetArenaAllocator()->getTotalBytesAllocated());
9044 fprintf(s_csvFile, "%llu,", m_info.m_totalCycles);
9045 fprintf(s_csvFile, "%f\n", CachedCyclesPerSecond());
9050 // Perform process shutdown actions.
9053 void JitTimer::Shutdown()
9055 CritSecHolder csvLock(s_csvLock);
9056 if (s_csvFile != nullptr)
9062 // Completes the timing of the current method, and adds it to "sum".
9063 void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases)
9067 PrintCsvMethodStats(comp);
9070 sum.AddInfo(m_info, includePhases);
9072 #endif // FEATURE_JIT_METHOD_PERF
9074 #if LOOP_HOIST_STATS
9076 CritSecObject Compiler::s_loopHoistStatsLock; // Default constructor.
9077 unsigned Compiler::s_loopsConsidered = 0;
9078 unsigned Compiler::s_loopsWithHoistedExpressions = 0;
9079 unsigned Compiler::s_totalHoistedExpressions = 0;
9082 void Compiler::PrintAggregateLoopHoistStats(FILE* f)
9085 fprintf(f, "---------------------------------------------------\n");
9086 fprintf(f, "Loop hoisting stats\n");
9087 fprintf(f, "---------------------------------------------------\n");
9089 double pctWithHoisted = 0.0;
9090 if (s_loopsConsidered > 0)
9092 pctWithHoisted = 100.0 * (double(s_loopsWithHoistedExpressions) / double(s_loopsConsidered));
9094 double exprsPerLoopWithExpr = 0.0;
9095 if (s_loopsWithHoistedExpressions > 0)
9097 exprsPerLoopWithExpr = double(s_totalHoistedExpressions) / double(s_loopsWithHoistedExpressions);
9099 fprintf(f, "Considered %d loops. Of these, we hoisted expressions out of %d (%6.2f%%).\n", s_loopsConsidered,
9100 s_loopsWithHoistedExpressions, pctWithHoisted);
9101 fprintf(f, " A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
9102 s_totalHoistedExpressions, exprsPerLoopWithExpr);
9105 void Compiler::AddLoopHoistStats()
9107 CritSecHolder statsLock(s_loopHoistStatsLock);
9109 s_loopsConsidered += m_loopsConsidered;
9110 s_loopsWithHoistedExpressions += m_loopsWithHoistedExpressions;
9111 s_totalHoistedExpressions += m_totalHoistedExpressions;
9114 void Compiler::PrintPerMethodLoopHoistStats()
9116 double pctWithHoisted = 0.0;
9117 if (m_loopsConsidered > 0)
9119 pctWithHoisted = 100.0 * (double(m_loopsWithHoistedExpressions) / double(m_loopsConsidered));
9121 double exprsPerLoopWithExpr = 0.0;
9122 if (m_loopsWithHoistedExpressions > 0)
9124 exprsPerLoopWithExpr = double(m_totalHoistedExpressions) / double(m_loopsWithHoistedExpressions);
9126 printf("Considered %d loops. Of these, we hoisted expressions out of %d (%5.2f%%).\n", m_loopsConsidered,
9127 m_loopsWithHoistedExpressions, pctWithHoisted);
9128 printf(" A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
9129 m_totalHoistedExpressions, exprsPerLoopWithExpr);
9131 #endif // LOOP_HOIST_STATS
9133 //------------------------------------------------------------------------
9134 // RecordStateAtEndOfInlining: capture timing data (if enabled) after
9135 // inlining as completed.
9138 // Records data needed for SQM and inlining data dumps. Should be
9139 // called after inlining is complete. (We do this after inlining
9140 // because this marks the last point at which the JIT is likely to
9141 // cause type-loading and class initialization).
9143 void Compiler::RecordStateAtEndOfInlining()
9145 #if defined(DEBUG) || defined(INLINE_DATA)
9147 m_compCyclesAtEndOfInlining = 0;
9148 m_compTickCountAtEndOfInlining = 0;
9149 bool b = CycleTimer::GetThreadCyclesS(&m_compCyclesAtEndOfInlining);
9152 return; // We don't have a thread cycle counter.
9154 m_compTickCountAtEndOfInlining = GetTickCount();
9156 #endif // defined(DEBUG) || defined(INLINE_DATA)
9159 //------------------------------------------------------------------------
9160 // RecordStateAtEndOfCompilation: capture timing data (if enabled) after
9161 // compilation is completed.
9163 void Compiler::RecordStateAtEndOfCompilation()
9165 #if defined(DEBUG) || defined(INLINE_DATA)
9169 unsigned __int64 compCyclesAtEnd;
9170 bool b = CycleTimer::GetThreadCyclesS(&compCyclesAtEnd);
9173 return; // We don't have a thread cycle counter.
9175 assert(compCyclesAtEnd >= m_compCyclesAtEndOfInlining);
9177 m_compCycles = compCyclesAtEnd - m_compCyclesAtEndOfInlining;
9179 #endif // defined(DEBUG) || defined(INLINE_DATA)
9182 #if FUNC_INFO_LOGGING
9184 LPCWSTR Compiler::compJitFuncInfoFilename = nullptr;
9187 FILE* Compiler::compJitFuncInfoFile = nullptr;
9188 #endif // FUNC_INFO_LOGGING
9192 // dumpConvertedVarSet() dumps the varset bits that are tracked
9193 // variable indices, and we convert them to variable numbers, sort the variable numbers, and
9194 // print them as variable numbers. To do this, we use a temporary set indexed by
9195 // variable number. We can't use the "all varset" type because it is still size-limited, and might
9196 // not be big enough to handle all possible variable numbers.
9197 void dumpConvertedVarSet(Compiler* comp, VARSET_VALARG_TP vars)
9199 BYTE* pVarNumSet; // trivial set: one byte per varNum, 0 means not in set, 1 means in set.
9201 size_t varNumSetBytes = comp->lvaCount * sizeof(BYTE);
9202 pVarNumSet = (BYTE*)_alloca(varNumSetBytes);
9203 memset(pVarNumSet, 0, varNumSetBytes); // empty the set
9205 VarSetOps::Iter iter(comp, vars);
9206 unsigned varIndex = 0;
9207 while (iter.NextElem(&varIndex))
9209 unsigned varNum = comp->lvaTrackedIndexToLclNum(varIndex);
9210 pVarNumSet[varNum] = 1; // This varNum is in the set
9215 for (size_t varNum = 0; varNum < comp->lvaCount; varNum++)
9217 if (pVarNumSet[varNum] == 1)
9223 printf("V%02u", varNum);
9230 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9231 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9233 XX Debugging helpers XX
9235 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9236 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9239 /*****************************************************************************/
9240 /* The following functions are intended to be called from the debugger, to dump
9241 * various data structures.
9243 * The versions that start with 'c' take a Compiler* as the first argument.
9244 * The versions that start with 'd' use the tlsCompiler, so don't require a Compiler*.
9247 * cBlock, dBlock : Display a basic block (call fgTableDispBasicBlock()).
9248 * cBlocks, dBlocks : Display all the basic blocks of a function (call fgDispBasicBlocks()).
9249 * cBlocksV, dBlocksV : Display all the basic blocks of a function (call fgDispBasicBlocks(true)).
9250 * "V" means "verbose", and will dump all the trees.
9251 * cStmt, dStmt : Display a Statement (call gtDispStmt()).
9252 * cTree, dTree : Display a tree (call gtDispTree()).
9253 * cTreeLIR, dTreeLIR : Display a tree in LIR form (call gtDispLIRNode()).
9254 * cTrees, dTrees : Display all the trees in a function (call fgDumpTrees()).
9255 * cEH, dEH : Display the EH handler table (call fgDispHandlerTab()).
9256 * cVar, dVar : Display a local variable given its number (call lvaDumpEntry()).
9257 * cVarDsc, dVarDsc : Display a local variable given a LclVarDsc* (call lvaDumpEntry()).
9258 * cVars, dVars : Display the local variable table (call lvaTableDump()).
9259 * cVarsFinal, dVarsFinal : Display the local variable table (call lvaTableDump(FINAL_FRAME_LAYOUT)).
9260 * cBlockPreds, dBlockPreds : Display a block's predecessors (call block->dspPreds()).
9261 * cBlockSuccs, dBlockSuccs : Display a block's successors (call block->dspSuccs(compiler)).
9262 * cReach, dReach : Display all block reachability (call fgDispReach()).
9263 * cDoms, dDoms : Display all block dominators (call fgDispDoms()).
9264 * cLiveness, dLiveness : Display per-block variable liveness (call fgDispBBLiveness()).
9265 * cCVarSet, dCVarSet : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable
9266 * indices. These are converted to variable numbers and sorted. (Calls
9267 * dumpConvertedVarSet()).
9268 * cLoop, dLoop : Display the blocks of a loop, including the trees.
9269 * cTreeFlags, dTreeFlags : Display tree flags
9271 * The following don't require a Compiler* to work:
9272 * dRegMask : Display a regMaskTP (call dspRegMask(mask)).
9273 * dBlockList : Display a BasicBlockList*.
9276 void cBlock(Compiler* comp, BasicBlock* block)
9278 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9279 printf("===================================================================== *Block %u\n", sequenceNumber++);
9280 comp->fgTableDispBasicBlock(block);
9283 void cBlocks(Compiler* comp)
9285 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9286 printf("===================================================================== *Blocks %u\n", sequenceNumber++);
9287 comp->fgDispBasicBlocks();
9290 void cBlocksV(Compiler* comp)
9292 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9293 printf("===================================================================== *BlocksV %u\n", sequenceNumber++);
9294 comp->fgDispBasicBlocks(true);
9297 void cStmt(Compiler* comp, Statement* statement)
9299 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9300 printf("===================================================================== *Stmt %u\n", sequenceNumber++);
9301 comp->gtDispStmt(statement, ">>>");
9304 void cTree(Compiler* comp, GenTree* tree)
9306 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9307 printf("===================================================================== *Tree %u\n", sequenceNumber++);
9308 comp->gtDispTree(tree, nullptr, ">>>");
9311 void cTreeLIR(Compiler* comp, GenTree* tree)
9313 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9314 printf("===================================================================== *TreeLIR %u\n", sequenceNumber++);
9315 comp->gtDispLIRNode(tree);
9318 void cTrees(Compiler* comp)
9320 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9321 printf("===================================================================== *Trees %u\n", sequenceNumber++);
9322 comp->fgDumpTrees(comp->fgFirstBB, nullptr);
9325 void cEH(Compiler* comp)
9327 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9328 printf("===================================================================== *EH %u\n", sequenceNumber++);
9329 comp->fgDispHandlerTab();
9332 void cVar(Compiler* comp, unsigned lclNum)
9334 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9335 printf("===================================================================== *Var %u\n", sequenceNumber++);
9336 comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
9339 void cVarDsc(Compiler* comp, LclVarDsc* varDsc)
9341 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9342 printf("===================================================================== *VarDsc %u\n", sequenceNumber++);
9343 unsigned lclNum = comp->lvaGetLclNum(varDsc);
9344 comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
9347 void cVars(Compiler* comp)
9349 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9350 printf("===================================================================== *Vars %u\n", sequenceNumber++);
9351 comp->lvaTableDump();
9354 void cVarsFinal(Compiler* comp)
9356 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9357 printf("===================================================================== *Vars %u\n", sequenceNumber++);
9358 comp->lvaTableDump(Compiler::FINAL_FRAME_LAYOUT);
9361 void cBlockPreds(Compiler* comp, BasicBlock* block)
9363 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9364 printf("===================================================================== *BlockPreds %u\n", sequenceNumber++);
9368 void cBlockSuccs(Compiler* comp, BasicBlock* block)
9370 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9371 printf("===================================================================== *BlockSuccs %u\n", sequenceNumber++);
9372 block->dspSuccs(comp);
9375 void cReach(Compiler* comp)
9377 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9378 printf("===================================================================== *Reach %u\n", sequenceNumber++);
9379 comp->fgDispReach();
9382 void cDoms(Compiler* comp)
9384 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9385 printf("===================================================================== *Doms %u\n", sequenceNumber++);
9389 void cLiveness(Compiler* comp)
9391 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9392 printf("===================================================================== *Liveness %u\n", sequenceNumber++);
9393 comp->fgDispBBLiveness();
9396 void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars)
9398 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9399 printf("===================================================================== *CVarSet %u\n", sequenceNumber++);
9400 dumpConvertedVarSet(comp, vars);
9401 printf("\n"); // dumpConvertedVarSet() doesn't emit a trailing newline
9404 void cLoop(Compiler* comp, unsigned loopNum)
9406 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9407 printf("===================================================================== *Loop %u\n", sequenceNumber++);
9408 comp->optPrintLoopInfo(loopNum, /* verbose */ true);
9412 void cLoopPtr(Compiler* comp, const Compiler::LoopDsc* loop)
9414 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9415 printf("===================================================================== *LoopPtr %u\n", sequenceNumber++);
9416 comp->optPrintLoopInfo(loop, /* verbose */ true);
9420 void cLoops(Compiler* comp)
9422 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9423 printf("===================================================================== *Loops %u\n", sequenceNumber++);
9424 comp->optPrintLoopTable();
9427 void dBlock(BasicBlock* block)
9429 cBlock(JitTls::GetCompiler(), block);
9434 cBlocks(JitTls::GetCompiler());
9439 cBlocksV(JitTls::GetCompiler());
9442 void dStmt(Statement* statement)
9444 cStmt(JitTls::GetCompiler(), statement);
9447 void dTree(GenTree* tree)
9449 cTree(JitTls::GetCompiler(), tree);
9452 void dTreeLIR(GenTree* tree)
9454 cTreeLIR(JitTls::GetCompiler(), tree);
9457 void dTreeRange(GenTree* first, GenTree* last)
9459 Compiler* comp = JitTls::GetCompiler();
9460 GenTree* cur = first;
9463 cTreeLIR(comp, cur);
9473 cTrees(JitTls::GetCompiler());
9478 cEH(JitTls::GetCompiler());
9481 void dVar(unsigned lclNum)
9483 cVar(JitTls::GetCompiler(), lclNum);
9486 void dVarDsc(LclVarDsc* varDsc)
9488 cVarDsc(JitTls::GetCompiler(), varDsc);
9493 cVars(JitTls::GetCompiler());
9498 cVarsFinal(JitTls::GetCompiler());
9501 void dBlockPreds(BasicBlock* block)
9503 cBlockPreds(JitTls::GetCompiler(), block);
9506 void dBlockSuccs(BasicBlock* block)
9508 cBlockSuccs(JitTls::GetCompiler(), block);
9513 cReach(JitTls::GetCompiler());
9518 cDoms(JitTls::GetCompiler());
9523 cLiveness(JitTls::GetCompiler());
9526 void dCVarSet(VARSET_VALARG_TP vars)
9528 cCVarSet(JitTls::GetCompiler(), vars);
9531 void dLoop(unsigned loopNum)
9533 cLoop(JitTls::GetCompiler(), loopNum);
9536 void dLoopPtr(const Compiler::LoopDsc* loop)
9538 cLoopPtr(JitTls::GetCompiler(), loop);
9543 cLoops(JitTls::GetCompiler());
9546 void dRegMask(regMaskTP mask)
9548 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
9549 printf("===================================================================== dRegMask %u\n", sequenceNumber++);
9551 printf("\n"); // dspRegMask() doesn't emit a trailing newline
9554 void dBlockList(BasicBlockList* list)
9556 printf("WorkList: ");
9557 while (list != nullptr)
9559 printf(FMT_BB " ", list->block->bbNum);
9565 // Global variables available in debug mode. That are set by debug APIs for finding
9566 // Trees, Stmts, and/or Blocks using id or bbNum.
9567 // That can be used in watch window or as a way to get address of fields for data break points.
9571 BasicBlock* dbTreeBlock;
9572 BasicBlock* dbBlock;
9574 // Debug APIs for finding Trees, Stmts, and/or Blocks.
9575 // As a side effect, they set the debug variables above.
9577 GenTree* dFindTree(GenTree* tree, unsigned id)
9579 if (tree == nullptr)
9584 if (tree->gtTreeID == id)
9590 GenTree* child = nullptr;
9591 tree->VisitOperands([&child, id](GenTree* operand) -> GenTree::VisitResult {
9592 child = dFindTree(child, id);
9593 return (child != nullptr) ? GenTree::VisitResult::Abort : GenTree::VisitResult::Continue;
9599 GenTree* dFindTree(unsigned id)
9601 Compiler* comp = JitTls::GetCompiler();
9604 dbTreeBlock = nullptr;
9607 for (BasicBlock* const block : comp->Blocks())
9609 for (Statement* const stmt : block->Statements())
9611 tree = dFindTree(stmt->GetRootNode(), id);
9612 if (tree != nullptr)
9614 dbTreeBlock = block;
9623 Statement* dFindStmt(unsigned id)
9625 Compiler* comp = JitTls::GetCompiler();
9629 unsigned stmtId = 0;
9630 for (BasicBlock* const block : comp->Blocks())
9632 for (Statement* const stmt : block->Statements())
9646 BasicBlock* dFindBlock(unsigned bbNum)
9648 Compiler* comp = JitTls::GetCompiler();
9649 BasicBlock* block = nullptr;
9652 for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
9654 if (block->bbNum == bbNum)
9664 Compiler::LoopDsc* dFindLoop(unsigned loopNum)
9666 Compiler* comp = JitTls::GetCompiler();
9668 if (loopNum >= comp->optLoopCount)
9670 printf("loopNum %u out of range\n", loopNum);
9674 return &comp->optLoopTable[loopNum];
9677 void cTreeFlags(Compiler* comp, GenTree* tree)
9681 if (tree->gtFlags != 0)
9683 chars += printf("flags=");
9686 CLANG_FORMAT_COMMENT_ANCHOR;
9689 if (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE)
9691 chars += printf("[NODE_LARGE]");
9693 if (tree->gtDebugFlags & GTF_DEBUG_NODE_SMALL)
9695 chars += printf("[NODE_SMALL]");
9697 if (tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)
9699 chars += printf("[MORPHED]");
9701 #endif // defined(DEBUG)
9703 if (tree->gtFlags & GTF_COLON_COND)
9705 chars += printf("[COLON_COND]");
9710 genTreeOps op = tree->OperGet();
9716 case GT_STORE_LCL_FLD:
9717 case GT_STORE_LCL_VAR:
9718 if (tree->gtFlags & GTF_VAR_DEF)
9720 chars += printf("[VAR_DEF]");
9722 if (tree->gtFlags & GTF_VAR_USEASG)
9724 chars += printf("[VAR_USEASG]");
9726 if (tree->gtFlags & GTF_VAR_ITERATOR)
9728 chars += printf("[VAR_ITERATOR]");
9730 if (tree->gtFlags & GTF_VAR_MOREUSES)
9732 chars += printf("[VAR_MOREUSES]");
9734 if (!comp->lvaGetDesc(tree->AsLclVarCommon())->lvPromoted)
9736 if (tree->gtFlags & GTF_VAR_DEATH)
9738 chars += printf("[VAR_DEATH]");
9743 if (tree->gtFlags & GTF_VAR_FIELD_DEATH0)
9745 chars += printf("[VAR_FIELD_DEATH0]");
9748 if (tree->gtFlags & GTF_VAR_FIELD_DEATH1)
9750 chars += printf("[VAR_FIELD_DEATH1]");
9752 if (tree->gtFlags & GTF_VAR_FIELD_DEATH2)
9754 chars += printf("[VAR_FIELD_DEATH2]");
9756 if (tree->gtFlags & GTF_VAR_FIELD_DEATH3)
9758 chars += printf("[VAR_FIELD_DEATH3]");
9760 if (tree->gtFlags & GTF_VAR_EXPLICIT_INIT)
9762 chars += printf("[VAR_EXPLICIT_INIT]");
9765 if (tree->gtDebugFlags & GTF_DEBUG_VAR_CSE_REF)
9767 chars += printf("[VAR_CSE_REF]");
9776 if (tree->gtFlags & GTF_INX_RNGCHK)
9778 chars += printf("[INX_RNGCHK]");
9785 if (tree->gtFlags & GTF_IND_VOLATILE)
9787 chars += printf("[IND_VOLATILE]");
9789 if (tree->gtFlags & GTF_IND_TGT_NOT_HEAP)
9791 chars += printf("[IND_TGT_NOT_HEAP]");
9793 if (tree->gtFlags & GTF_IND_TGT_HEAP)
9795 chars += printf("[IND_TGT_HEAP]");
9797 if (tree->gtFlags & GTF_IND_REQ_ADDR_IN_REG)
9799 chars += printf("[IND_REQ_ADDR_IN_REG]");
9801 if (tree->gtFlags & GTF_IND_UNALIGNED)
9803 chars += printf("[IND_UNALIGNED]");
9805 if (tree->gtFlags & GTF_IND_INVARIANT)
9807 chars += printf("[IND_INVARIANT]");
9809 if (tree->gtFlags & GTF_IND_NONNULL)
9811 chars += printf("[IND_NONNULL]");
9813 if (tree->gtFlags & GTF_IND_INITCLASS)
9815 chars += printf("[IND_INITCLASS]");
9820 #if !defined(TARGET_64BIT)
9824 if (tree->gtFlags & GTF_MUL_64RSLT)
9826 chars += printf("[64RSLT]");
9828 if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
9830 chars += printf("[ADDRMODE_NO_CSE]");
9836 if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
9838 chars += printf("[ADDRMODE_NO_CSE]");
9844 if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
9846 chars += printf("[ADDRMODE_NO_CSE]");
9861 if (tree->gtFlags & GTF_RELOP_NAN_UN)
9863 chars += printf("[RELOP_NAN_UN]");
9865 if (tree->gtFlags & GTF_RELOP_JMP_USED)
9867 chars += printf("[RELOP_JMP_USED]");
9873 if (tree->gtFlags & GTF_QMARK_CAST_INSTOF)
9875 chars += printf("[QMARK_CAST_INSTOF]");
9881 if (tree->gtFlags & GTF_BOX_VALUE)
9883 chars += printf("[BOX_VALUE]");
9886 if (tree->gtFlags & GTF_BOX_CLONED)
9888 chars += printf("[BOX_CLONED]");
9893 if (tree->gtFlags & GTF_ARR_ADDR_NONNULL)
9895 chars += printf("[ARR_ADDR_NONNULL]");
9901 GenTreeFlags handleKind = (tree->gtFlags & GTF_ICON_HDL_MASK);
9906 case GTF_ICON_SCOPE_HDL:
9908 chars += printf("[ICON_SCOPE_HDL]");
9911 case GTF_ICON_CLASS_HDL:
9913 chars += printf("[ICON_CLASS_HDL]");
9916 case GTF_ICON_METHOD_HDL:
9918 chars += printf("[ICON_METHOD_HDL]");
9921 case GTF_ICON_FIELD_HDL:
9923 chars += printf("[ICON_FIELD_HDL]");
9926 case GTF_ICON_STATIC_HDL:
9928 chars += printf("[ICON_STATIC_HDL]");
9931 case GTF_ICON_STR_HDL:
9933 chars += printf("[ICON_STR_HDL]");
9936 case GTF_ICON_OBJ_HDL:
9938 chars += printf("[ICON_OBJ_HDL]");
9941 case GTF_ICON_CONST_PTR:
9943 chars += printf("[ICON_CONST_PTR]");
9946 case GTF_ICON_GLOBAL_PTR:
9948 chars += printf("[ICON_GLOBAL_PTR]");
9951 case GTF_ICON_VARG_HDL:
9953 chars += printf("[ICON_VARG_HDL]");
9956 case GTF_ICON_PINVKI_HDL:
9958 chars += printf("[ICON_PINVKI_HDL]");
9961 case GTF_ICON_TOKEN_HDL:
9963 chars += printf("[ICON_TOKEN_HDL]");
9966 case GTF_ICON_TLS_HDL:
9968 chars += printf("[ICON_TLD_HDL]");
9971 case GTF_ICON_FTN_ADDR:
9973 chars += printf("[ICON_FTN_ADDR]");
9976 case GTF_ICON_CIDMID_HDL:
9978 chars += printf("[ICON_CIDMID_HDL]");
9981 case GTF_ICON_BBC_PTR:
9983 chars += printf("[ICON_BBC_PTR]");
9986 case GTF_ICON_STATIC_BOX_PTR:
9988 chars += printf("[GTF_ICON_STATIC_BOX_PTR]");
9991 case GTF_ICON_STATIC_ADDR_PTR:
9993 chars += printf("[GTF_ICON_STATIC_ADDR_PTR]");
9997 assert(!"a forgotten handle flag");
10005 case GT_STORE_DYN_BLK:
10007 if (tree->gtFlags & GTF_IND_VOLATILE)
10009 chars += printf("[IND_VOLATILE]");
10011 if (tree->gtFlags & GTF_IND_UNALIGNED)
10013 chars += printf("[IND_UNALIGNED]");
10019 if (tree->gtFlags & GTF_CALL_UNMANAGED)
10021 chars += printf("[CALL_UNMANAGED]");
10023 if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE)
10025 chars += printf("[CALL_INLINE_CANDIDATE]");
10027 if (!tree->AsCall()->IsVirtual())
10029 chars += printf("[CALL_NONVIRT]");
10031 if (tree->AsCall()->IsVirtualVtable())
10033 chars += printf("[CALL_VIRT_VTABLE]");
10035 if (tree->AsCall()->IsVirtualStub())
10037 chars += printf("[CALL_VIRT_STUB]");
10039 if (tree->gtFlags & GTF_CALL_NULLCHECK)
10041 chars += printf("[CALL_NULLCHECK]");
10043 if (tree->gtFlags & GTF_CALL_POP_ARGS)
10045 chars += printf("[CALL_POP_ARGS]");
10047 if (tree->gtFlags & GTF_CALL_HOISTABLE)
10049 chars += printf("[CALL_HOISTABLE]");
10052 // More flags associated with calls.
10055 GenTreeCall* call = tree->AsCall();
10057 if (call->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL)
10059 chars += printf("[CALL_M_EXPLICIT_TAILCALL]");
10061 if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL)
10063 chars += printf("[CALL_M_TAILCALL]");
10065 if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
10067 chars += printf("[CALL_M_RETBUFFARG]");
10069 if (call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV)
10071 chars += printf("[CALL_M_DELEGATE_INV]");
10073 if (call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK)
10075 chars += printf("[CALL_M_NOGCCHECK]");
10077 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
10079 chars += printf("[CALL_M_SPECIAL_INTRINSIC]");
10082 if (call->IsVirtualStub())
10084 if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
10086 chars += printf("[CALL_M_VIRTSTUB_REL_INDIRECT]");
10089 else if (!call->IsVirtual())
10091 if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
10093 chars += printf("[CALL_M_NONVIRT_SAME_THIS]");
10097 if (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)
10099 chars += printf("[CALL_M_FRAME_VAR_DEATH]");
10101 if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_JIT_HELPER)
10103 chars += printf("[CALL_M_TAILCALL_VIA_JIT_HELPER]");
10105 #if FEATURE_TAILCALL_OPT
10106 if (call->gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL)
10108 chars += printf("[CALL_M_IMPLICIT_TAILCALL]");
10111 if (call->gtCallMoreFlags & GTF_CALL_M_PINVOKE)
10113 chars += printf("[CALL_M_PINVOKE]");
10116 if (call->IsFatPointerCandidate())
10118 chars += printf("[CALL_FAT_POINTER_CANDIDATE]");
10121 if (call->IsGuarded())
10123 chars += printf("[CALL_GUARDED]");
10126 if (call->IsExpRuntimeLookup())
10128 chars += printf("[CALL_EXP_RUNTIME_LOOKUP]");
10135 GenTreeFlags flags = (tree->gtFlags & (~(GTF_COMMON_MASK | GTF_OVERFLOW)));
10138 chars += printf("[%08X]", flags);
10146 if (tree->gtFlags & GTF_ASG)
10148 chars += printf("[ASG]");
10150 if (tree->gtFlags & GTF_CALL)
10152 chars += printf("[CALL]");
10160 if (tree->gtFlags & GTF_OVERFLOW)
10162 chars += printf("[OVERFLOW]");
10168 if (tree->gtFlags & GTF_EXCEPT)
10170 chars += printf("[EXCEPT]");
10172 if (tree->gtFlags & GTF_GLOB_REF)
10174 chars += printf("[GLOB_REF]");
10176 if (tree->gtFlags & GTF_ORDER_SIDEEFF)
10178 chars += printf("[ORDER_SIDEEFF]");
10180 if (tree->gtFlags & GTF_REVERSE_OPS)
10182 if (op != GT_LCL_VAR)
10184 chars += printf("[REVERSE_OPS]");
10187 if (tree->gtFlags & GTF_SPILLED)
10189 chars += printf("[SPILLED_OPER]");
10191 #if FEATURE_SET_FLAGS
10192 if (tree->gtFlags & GTF_SET_FLAGS)
10194 if ((op != GT_IND) && (op != GT_STOREIND))
10196 chars += printf("[ZSF_SET_FLAGS]");
10200 if (tree->gtFlags & GTF_IND_NONFAULTING)
10202 if (tree->OperIsIndirOrArrMetaData())
10204 chars += printf("[IND_NONFAULTING]");
10207 if (tree->gtFlags & GTF_MAKE_CSE)
10209 chars += printf("[MAKE_CSE]");
10211 if (tree->gtFlags & GTF_DONT_CSE)
10213 chars += printf("[DONT_CSE]");
10215 if (tree->gtFlags & GTF_BOOLEAN)
10217 chars += printf("[BOOLEAN]");
10219 if (tree->gtFlags & GTF_UNSIGNED)
10221 chars += printf("[SMALL_UNSIGNED]");
10223 if (tree->gtFlags & GTF_SPILL)
10225 chars += printf("[SPILL]");
10227 if (tree->gtFlags & GTF_REUSE_REG_VAL)
10229 if (op == GT_CNS_INT)
10231 chars += printf("[REUSE_REG_VAL]");
10237 void dTreeFlags(GenTree* tree)
10239 cTreeFlags(JitTls::GetCompiler(), tree);
10244 #if VARSET_COUNTOPS
10246 BitSetSupport::BitSetOpCounter Compiler::m_varsetOpCounter("VarSetOpCounts.log");
10248 #if ALLVARSET_COUNTOPS
10250 BitSetSupport::BitSetOpCounter Compiler::m_allvarsetOpCounter("AllVarSetOpCounts.log");
10254 HelperCallProperties Compiler::s_helperCallProperties;
10256 /*****************************************************************************/
10257 /*****************************************************************************/
10259 //------------------------------------------------------------------------
10261 // Given some tree node return does it need all GC refs to be spilled from
10262 // callee save registers.
10265 // tree - the tree for which we ask about gc refs.
10268 // true - tree kills GC refs on callee save registers
10269 // false - tree doesn't affect GC refs on callee save registers
10270 bool Compiler::killGCRefs(GenTree* tree)
10272 if (tree->IsCall())
10274 GenTreeCall* call = tree->AsCall();
10275 if (call->IsUnmanaged())
10280 if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_JIT_PINVOKE_BEGIN))
10282 assert(opts.ShouldUsePInvokeHelpers());
10286 else if (tree->OperIs(GT_START_PREEMPTGC))
10294 //------------------------------------------------------------------------
10295 // lvaIsOSRLocal: check if this local var is one that requires special
10296 // treatment for OSR compilations.
10299 // varNum - variable of interest
10302 // true - this is an OSR compile and this local requires special treatment
10303 // false - not an OSR compile, or not an interesting local for OSR
10305 bool Compiler::lvaIsOSRLocal(unsigned varNum)
10307 LclVarDsc* const varDsc = lvaGetDesc(varNum);
10312 if (varDsc->lvIsOSRLocal)
10314 // Sanity check for promoted fields of OSR locals.
10316 if (varNum >= info.compLocalsCount)
10318 assert(varDsc->lvIsStructField);
10319 assert(varDsc->lvParentLcl < info.compLocalsCount);
10325 assert(!varDsc->lvIsOSRLocal);
10329 return varDsc->lvIsOSRLocal;
10332 //------------------------------------------------------------------------------
10333 // gtTypeForNullCheck: helper to get the most optimal and correct type for nullcheck
10336 // tree - the node for nullcheck;
10338 var_types Compiler::gtTypeForNullCheck(GenTree* tree)
10340 static const var_types s_typesBySize[] = {TYP_UNDEF, TYP_BYTE, TYP_SHORT, TYP_UNDEF, TYP_INT,
10341 TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_LONG};
10343 if (!varTypeIsStruct(tree))
10345 #if defined(TARGET_XARCH)
10346 // Just an optimization for XARCH - smaller mov
10347 if (genTypeSize(tree) == 8)
10353 assert((genTypeSize(tree) < ARRAY_SIZE(s_typesBySize)) && (s_typesBySize[genTypeSize(tree)] != TYP_UNDEF));
10354 return s_typesBySize[genTypeSize(tree)];
10356 // for the rest: probe a single byte to avoid potential AVEs
10360 //------------------------------------------------------------------------------
10361 // gtChangeOperToNullCheck: helper to change tree oper to a NULLCHECK.
10364 // tree - the node to change;
10365 // block - basic block of the node.
10368 // the function should not be called after lowering for platforms that do not support
10369 // emitting NULLCHECK nodes, like arm32. Use `Lowering::TransformUnusedIndirection`
10370 // that handles it and calls this function when appropriate.
10372 void Compiler::gtChangeOperToNullCheck(GenTree* tree, BasicBlock* block)
10374 assert(tree->OperIs(GT_IND, GT_BLK));
10375 tree->ChangeOper(GT_NULLCHECK);
10376 tree->ChangeType(gtTypeForNullCheck(tree));
10377 tree->SetIndirExceptionFlags(this);
10378 block->bbFlags |= BBF_HAS_NULLCHECK;
10379 optMethodFlags |= OMF_HAS_NULLCHECK;
10383 //------------------------------------------------------------------------------
10384 // devirtualizationDetailToString: describe the detailed devirtualization reason
10387 // detail - detail to describe
10390 // descriptive string
10392 const char* Compiler::devirtualizationDetailToString(CORINFO_DEVIRTUALIZATION_DETAIL detail)
10396 case CORINFO_DEVIRTUALIZATION_UNKNOWN:
10398 case CORINFO_DEVIRTUALIZATION_SUCCESS:
10400 case CORINFO_DEVIRTUALIZATION_FAILED_CANON:
10401 return "object class was canonical";
10402 case CORINFO_DEVIRTUALIZATION_FAILED_COM:
10403 return "object class was com";
10404 case CORINFO_DEVIRTUALIZATION_FAILED_CAST:
10405 return "object class could not be cast to interface class";
10406 case CORINFO_DEVIRTUALIZATION_FAILED_LOOKUP:
10407 return "interface method could not be found";
10408 case CORINFO_DEVIRTUALIZATION_FAILED_DIM:
10409 return "interface method was default interface method";
10410 case CORINFO_DEVIRTUALIZATION_FAILED_SUBCLASS:
10411 return "object not subclass of base class";
10412 case CORINFO_DEVIRTUALIZATION_FAILED_SLOT:
10413 return "virtual method installed via explicit override";
10414 case CORINFO_DEVIRTUALIZATION_FAILED_BUBBLE:
10415 return "devirtualization crossed version bubble";
10416 case CORINFO_DEVIRTUALIZATION_MULTIPLE_IMPL:
10417 return "object class has multiple implementations of interface";
10418 case CORINFO_DEVIRTUALIZATION_FAILED_BUBBLE_CLASS_DECL:
10419 return "decl method is defined on class and decl method not in version bubble, and decl method not in "
10420 "type closest to version bubble";
10421 case CORINFO_DEVIRTUALIZATION_FAILED_BUBBLE_INTERFACE_DECL:
10422 return "decl method is defined on interface and not in version bubble, and implementation type not "
10423 "entirely defined in bubble";
10424 case CORINFO_DEVIRTUALIZATION_FAILED_BUBBLE_IMPL:
10425 return "object class not defined within version bubble";
10426 case CORINFO_DEVIRTUALIZATION_FAILED_BUBBLE_IMPL_NOT_REFERENCEABLE:
10427 return "object class cannot be referenced from R2R code due to missing tokens";
10428 case CORINFO_DEVIRTUALIZATION_FAILED_DUPLICATE_INTERFACE:
10429 return "crossgen2 virtual method algorithm and runtime algorithm differ in the presence of duplicate "
10430 "interface implementations";
10431 case CORINFO_DEVIRTUALIZATION_FAILED_DECL_NOT_REPRESENTABLE:
10432 return "Decl method cannot be represented in R2R image";
10433 case CORINFO_DEVIRTUALIZATION_FAILED_TYPE_EQUIVALENCE:
10434 return "Support for type equivalence in devirtualization is not yet implemented in crossgen2";
10436 return "undefined";
10439 #endif // defined(DEBUG)
10441 #if TRACK_ENREG_STATS
10442 Compiler::EnregisterStats Compiler::s_enregisterStats;
10444 void Compiler::EnregisterStats::RecordLocal(const LclVarDsc* varDsc)
10446 m_totalNumberOfVars++;
10447 if (varDsc->TypeGet() == TYP_STRUCT)
10449 m_totalNumberOfStructVars++;
10451 if (!varDsc->lvDoNotEnregister)
10453 m_totalNumberOfEnregVars++;
10454 if (varDsc->TypeGet() == TYP_STRUCT)
10456 m_totalNumberOfStructEnregVars++;
10461 switch (varDsc->GetDoNotEnregReason())
10463 case DoNotEnregisterReason::AddrExposed:
10466 case DoNotEnregisterReason::HiddenBufferStructArg:
10467 m_hiddenStructArg++;
10469 case DoNotEnregisterReason::DontEnregStructs:
10470 m_dontEnregStructs++;
10472 case DoNotEnregisterReason::NotRegSizeStruct:
10473 m_notRegSizeStruct++;
10475 case DoNotEnregisterReason::LocalField:
10478 case DoNotEnregisterReason::VMNeedsStackAddr:
10479 m_VMNeedsStackAddr++;
10481 case DoNotEnregisterReason::LiveInOutOfHandler:
10482 m_liveInOutHndlr++;
10484 case DoNotEnregisterReason::BlockOp:
10487 case DoNotEnregisterReason::IsStructArg:
10490 case DoNotEnregisterReason::DepField:
10493 case DoNotEnregisterReason::NoRegVars:
10496 case DoNotEnregisterReason::MinOptsGC:
10499 #if !defined(TARGET_64BIT)
10500 case DoNotEnregisterReason::LongParamField:
10501 m_longParamField++;
10504 #ifdef JIT32_GCENCODER
10505 case DoNotEnregisterReason::PinningRef:
10509 case DoNotEnregisterReason::LclAddrNode:
10513 case DoNotEnregisterReason::CastTakesAddr:
10517 case DoNotEnregisterReason::StoreBlkSrc:
10521 case DoNotEnregisterReason::SwizzleArg:
10525 case DoNotEnregisterReason::BlockOpRet:
10529 case DoNotEnregisterReason::ReturnSpCheck:
10533 case DoNotEnregisterReason::CallSpCheck:
10537 case DoNotEnregisterReason::SimdUserForcesDep:
10538 m_simdUserForcesDep++;
10546 if (varDsc->GetDoNotEnregReason() == DoNotEnregisterReason::AddrExposed)
10548 // We can't `assert(IsAddressExposed())` because `fgAdjustForAddressExposedOrWrittenThis`
10549 // does not clear `m_doNotEnregReason` on `this`.
10550 switch (varDsc->GetAddrExposedReason())
10552 case AddressExposedReason::PARENT_EXPOSED:
10556 case AddressExposedReason::TOO_CONSERVATIVE:
10557 m_tooConservative++;
10560 case AddressExposedReason::ESCAPE_ADDRESS:
10564 case AddressExposedReason::WIDE_INDIR:
10568 case AddressExposedReason::OSR_EXPOSED:
10572 case AddressExposedReason::STRESS_LCL_FLD:
10576 case AddressExposedReason::DISPATCH_RET_BUF:
10577 m_dispatchRetBuf++;
10580 case AddressExposedReason::STRESS_POISON_IMPLICIT_BYREFS:
10581 m_stressPoisonImplicitByrefs++;
10584 case AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY:
10585 m_externallyVisibleImplicitly++;
10596 void Compiler::EnregisterStats::Dump(FILE* fout) const
10598 const unsigned totalNumberOfNotStructVars =
10599 s_enregisterStats.m_totalNumberOfVars - s_enregisterStats.m_totalNumberOfStructVars;
10600 const unsigned totalNumberOfNotStructEnregVars =
10601 s_enregisterStats.m_totalNumberOfEnregVars - s_enregisterStats.m_totalNumberOfStructEnregVars;
10602 const unsigned notEnreg = s_enregisterStats.m_totalNumberOfVars - s_enregisterStats.m_totalNumberOfEnregVars;
10604 fprintf(fout, "\nLocals enregistration statistics:\n");
10605 if (m_totalNumberOfVars == 0)
10607 fprintf(fout, "No locals to report.\n");
10610 fprintf(fout, "total number of locals: %d, number of enregistered: %d, notEnreg: %d, ratio: %.2f\n",
10611 m_totalNumberOfVars, m_totalNumberOfEnregVars, m_totalNumberOfVars - m_totalNumberOfEnregVars,
10612 (float)m_totalNumberOfEnregVars / m_totalNumberOfVars);
10614 if (m_totalNumberOfStructVars != 0)
10616 fprintf(fout, "total number of struct locals: %d, number of enregistered: %d, notEnreg: %d, ratio: %.2f\n",
10617 m_totalNumberOfStructVars, m_totalNumberOfStructEnregVars,
10618 m_totalNumberOfStructVars - m_totalNumberOfStructEnregVars,
10619 (float)m_totalNumberOfStructEnregVars / m_totalNumberOfStructVars);
10622 const unsigned numberOfPrimitiveLocals = totalNumberOfNotStructVars - totalNumberOfNotStructEnregVars;
10623 if (numberOfPrimitiveLocals != 0)
10625 fprintf(fout, "total number of primitive locals: %d, number of enregistered: %d, notEnreg: %d, ratio: %.2f\n",
10626 totalNumberOfNotStructVars, totalNumberOfNotStructEnregVars, numberOfPrimitiveLocals,
10627 (float)totalNumberOfNotStructEnregVars / totalNumberOfNotStructVars);
10632 fprintf(fout, "All locals are enregistered.\n");
10636 #define PRINT_STATS(stat, total) \
10639 fprintf(fout, #stat " %d, ratio: %.2f\n", stat, (float)stat / total); \
10642 PRINT_STATS(m_addrExposed, notEnreg);
10643 PRINT_STATS(m_hiddenStructArg, notEnreg);
10644 PRINT_STATS(m_dontEnregStructs, notEnreg);
10645 PRINT_STATS(m_notRegSizeStruct, notEnreg);
10646 PRINT_STATS(m_localField, notEnreg);
10647 PRINT_STATS(m_VMNeedsStackAddr, notEnreg);
10648 PRINT_STATS(m_liveInOutHndlr, notEnreg);
10649 PRINT_STATS(m_blockOp, notEnreg);
10650 PRINT_STATS(m_structArg, notEnreg);
10651 PRINT_STATS(m_depField, notEnreg);
10652 PRINT_STATS(m_noRegVars, notEnreg);
10653 PRINT_STATS(m_minOptsGC, notEnreg);
10654 #if !defined(TARGET_64BIT)
10655 PRINT_STATS(m_longParamField, notEnreg);
10656 #endif // !TARGET_64BIT
10657 #ifdef JIT32_GCENCODER
10658 PRINT_STATS(m_PinningRef, notEnreg);
10659 #endif // JIT32_GCENCODER
10660 PRINT_STATS(m_lclAddrNode, notEnreg);
10661 PRINT_STATS(m_castTakesAddr, notEnreg);
10662 PRINT_STATS(m_storeBlkSrc, notEnreg);
10663 PRINT_STATS(m_swizzleArg, notEnreg);
10664 PRINT_STATS(m_blockOpRet, notEnreg);
10665 PRINT_STATS(m_returnSpCheck, notEnreg);
10666 PRINT_STATS(m_callSpCheck, notEnreg);
10667 PRINT_STATS(m_simdUserForcesDep, notEnreg);
10669 fprintf(fout, "\nAddr exposed details:\n");
10670 if (m_addrExposed == 0)
10672 fprintf(fout, "\nNo address exposed locals to report.\n");
10676 PRINT_STATS(m_parentExposed, m_addrExposed);
10677 PRINT_STATS(m_tooConservative, m_addrExposed);
10678 PRINT_STATS(m_escapeAddress, m_addrExposed);
10679 PRINT_STATS(m_wideIndir, m_addrExposed);
10680 PRINT_STATS(m_osrExposed, m_addrExposed);
10681 PRINT_STATS(m_stressLclFld, m_addrExposed);
10682 PRINT_STATS(m_dispatchRetBuf, m_addrExposed);
10683 PRINT_STATS(m_stressPoisonImplicitByrefs, m_addrExposed);
10684 PRINT_STATS(m_externallyVisibleImplicitly, m_addrExposed);
10686 #endif // TRACK_ENREG_STATS