1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
4 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16 #include "hostallocator.h"
18 #include "ssabuilder.h"
20 #include "rangecheck.h"
22 #include "stacklevelsetter.h"
23 #include "jittelemetry.h"
24 #include "patchpointinfo.h"
25 #include "jitstd/algorithm.h"
28 // Column settings for COMPlus_JitDumpIR. We could(should) make these programmable.
29 #define COLUMN_OPCODE 30
30 #define COLUMN_OPERANDS (COLUMN_OPCODE + 25)
31 #define COLUMN_KINDS 110
32 #define COLUMN_FLAGS (COLUMN_KINDS + 32)
36 unsigned Compiler::jitTotalMethodCompiled = 0;
37 #endif // defined(DEBUG)
40 LONG Compiler::jitNestingLevel = 0;
41 #endif // defined(DEBUG)
44 bool Compiler::s_pAltJitExcludeAssembliesListInitialized = false;
45 AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullptr;
49 bool Compiler::s_pJitDisasmIncludeAssembliesListInitialized = false;
50 AssemblyNamesList2* Compiler::s_pJitDisasmIncludeAssembliesList = nullptr;
53 bool Compiler::s_pJitFunctionFileInitialized = false;
54 MethodSet* Compiler::s_pJitMethodSet = nullptr;
57 #ifdef CONFIGURABLE_ARM_ABI
59 bool GlobalJitOptions::compFeatureHfa = false;
60 LONG GlobalJitOptions::compUseSoftFPConfigured = 0;
61 #endif // CONFIGURABLE_ARM_ABI
63 /*****************************************************************************
65 * Little helpers to grab the current cycle counter value; this is done
66 * differently based on target architecture, host toolchain, etc. The
67 * main thing is to keep the overhead absolutely minimal; in fact, on
68 * x86/x64 we use RDTSC even though it's not thread-safe; GetThreadCycles
69 * (which is monotonous) is just too expensive.
71 #ifdef FEATURE_JIT_METHOD_PERF
73 #if defined(HOST_X86) || defined(HOST_AMD64)
78 inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
80 *cycleOut = __rdtsc();
84 #elif defined(__GNUC__)
86 inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
89 __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
90 *cycleOut = (static_cast<unsigned __int64>(hi) << 32) | static_cast<unsigned __int64>(lo);
94 #else // neither _MSC_VER nor __GNUC__
96 // The following *might* work - might as well try.
97 #define _our_GetThreadCycles(cp) GetThreadCycles(cp)
101 #elif defined(HOST_ARM) || defined(HOST_ARM64)
103 // If this doesn't work please see ../gc/gc.cpp for additional ARM
104 // info (and possible solutions).
105 #define _our_GetThreadCycles(cp) GetThreadCycles(cp)
107 #else // not x86/x64 and not ARM
109 // Don't know what this target is, but let's give it a try; if
110 // someone really wants to make this work, please add the right
112 #define _our_GetThreadCycles(cp) GetThreadCycles(cp)
114 #endif // which host OS
116 #endif // FEATURE_JIT_METHOD_PERF
117 /*****************************************************************************/
118 inline unsigned getCurTime()
124 return (((tim.wHour * 60) + tim.wMinute) * 60 + tim.wSecond) * 1000 + tim.wMilliseconds;
127 /*****************************************************************************/
129 /*****************************************************************************/
131 static FILE* jitSrcFilePtr;
133 static unsigned jitCurSrcLine;
135 void Compiler::JitLogEE(unsigned level, const char* fmt, ...)
142 vflogf(jitstdout, fmt, args);
147 vlogf(level, fmt, args);
151 void Compiler::compDspSrcLinesByLineNum(unsigned line, bool seek)
158 if (jitCurSrcLine == line)
163 if (jitCurSrcLine > line)
170 if (fseek(jitSrcFilePtr, 0, SEEK_SET) != 0)
172 printf("Compiler::compDspSrcLinesByLineNum: fseek returned an error.\n");
187 if (!fgets(temp, sizeof(temp), jitSrcFilePtr))
198 if (llen && temp[llen - 1] == '\n')
203 printf("; %s\n", temp);
204 } while (++jitCurSrcLine < line);
212 /*****************************************************************************/
214 void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
216 static IPmappingDsc* nextMappingDsc;
217 static unsigned lastLine;
226 if (genIPmappingList)
228 nextMappingDsc = genIPmappingList;
229 lastLine = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
231 unsigned firstLine = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
233 unsigned earlierLine = (firstLine < 5) ? 0 : firstLine - 5;
235 compDspSrcLinesByLineNum(earlierLine, true); // display previous 5 lines
236 compDspSrcLinesByLineNum(firstLine, false);
240 nextMappingDsc = nullptr;
248 UNATIVE_OFFSET offset = nextMappingDsc->ipmdNativeLoc.CodeOffset(GetEmitter());
252 IL_OFFSET nextOffs = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
254 if (lastLine < nextOffs)
256 compDspSrcLinesByLineNum(nextOffs);
260 // This offset corresponds to a previous line. Rewind to that line
262 compDspSrcLinesByLineNum(nextOffs - 2, true);
263 compDspSrcLinesByLineNum(nextOffs);
267 nextMappingDsc = nextMappingDsc->ipmdNext;
272 /*****************************************************************************/
275 /*****************************************************************************/
276 #if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
278 static unsigned genMethodCnt; // total number of methods JIT'ted
279 unsigned genMethodICnt; // number of interruptible methods
280 unsigned genMethodNCnt; // number of non-interruptible methods
281 static unsigned genSmallMethodsNeedingExtraMemoryCnt = 0;
285 /*****************************************************************************/
286 #if MEASURE_NODE_SIZE
287 NodeSizeStats genNodeSizeStats;
288 NodeSizeStats genNodeSizeStatsPerFunc;
290 unsigned genTreeNcntHistBuckets[] = {10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 5000, 10000, 0};
291 Histogram genTreeNcntHist(genTreeNcntHistBuckets);
293 unsigned genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000, 1000000, 0};
294 Histogram genTreeNsizHist(genTreeNsizHistBuckets);
295 #endif // MEASURE_NODE_SIZE
297 /*****************************************************************************/
298 #if MEASURE_MEM_ALLOC
300 unsigned memAllocHistBuckets[] = {64, 128, 192, 256, 512, 1024, 4096, 8192, 0};
301 Histogram memAllocHist(memAllocHistBuckets);
302 unsigned memUsedHistBuckets[] = {16, 32, 64, 128, 192, 256, 512, 1024, 4096, 8192, 0};
303 Histogram memUsedHist(memUsedHistBuckets);
305 #endif // MEASURE_MEM_ALLOC
307 /*****************************************************************************
309 * Variables to keep track of total code amounts.
314 size_t grossVMsize; // Total IL code size
315 size_t grossNCsize; // Native code + data size
316 size_t totalNCsize; // Native code + data + GC info size (TODO-Cleanup: GC info size only accurate for JIT32_GCENCODER)
317 size_t gcHeaderISize; // GC header size: interruptible methods
318 size_t gcPtrMapISize; // GC pointer map size: interruptible methods
319 size_t gcHeaderNSize; // GC header size: non-interruptible methods
320 size_t gcPtrMapNSize; // GC pointer map size: non-interruptible methods
322 #endif // DISPLAY_SIZES
324 /*****************************************************************************
326 * Variables to keep track of argument counts.
331 unsigned argTotalCalls;
332 unsigned argHelperCalls;
333 unsigned argStaticCalls;
334 unsigned argNonVirtualCalls;
335 unsigned argVirtualCalls;
337 unsigned argTotalArgs; // total number of args for all calls (including objectPtr)
338 unsigned argTotalDWordArgs;
339 unsigned argTotalLongArgs;
340 unsigned argTotalFloatArgs;
341 unsigned argTotalDoubleArgs;
343 unsigned argTotalRegArgs;
344 unsigned argTotalTemps;
345 unsigned argTotalLclVar;
346 unsigned argTotalDeferred;
347 unsigned argTotalConst;
349 unsigned argTotalObjPtr;
350 unsigned argTotalGTF_ASGinArgs;
352 unsigned argMaxTempsPerMethod;
354 unsigned argCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
355 Histogram argCntTable(argCntBuckets);
357 unsigned argDWordCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
358 Histogram argDWordCntTable(argDWordCntBuckets);
360 unsigned argDWordLngCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
361 Histogram argDWordLngCntTable(argDWordLngCntBuckets);
363 unsigned argTempsCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
364 Histogram argTempsCntTable(argTempsCntBuckets);
366 #endif // CALL_ARG_STATS
368 /*****************************************************************************
370 * Variables to keep track of basic block counts.
373 #if COUNT_BASIC_BLOCKS
375 // --------------------------------------------------
376 // Basic block count frequency table:
377 // --------------------------------------------------
378 // <= 1 ===> 26872 count ( 56% of total)
379 // 2 .. 2 ===> 669 count ( 58% of total)
380 // 3 .. 3 ===> 4687 count ( 68% of total)
381 // 4 .. 5 ===> 5101 count ( 78% of total)
382 // 6 .. 10 ===> 5575 count ( 90% of total)
383 // 11 .. 20 ===> 3028 count ( 97% of total)
384 // 21 .. 50 ===> 1108 count ( 99% of total)
385 // 51 .. 100 ===> 182 count ( 99% of total)
386 // 101 .. 1000 ===> 34 count (100% of total)
387 // 1001 .. 10000 ===> 0 count (100% of total)
388 // --------------------------------------------------
390 unsigned bbCntBuckets[] = {1, 2, 3, 5, 10, 20, 50, 100, 1000, 10000, 0};
391 Histogram bbCntTable(bbCntBuckets);
393 /* Histogram for the IL opcode size of methods with a single basic block */
395 unsigned bbSizeBuckets[] = {1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 0};
396 Histogram bbOneBBSizeTable(bbSizeBuckets);
398 #endif // COUNT_BASIC_BLOCKS
400 /*****************************************************************************
402 * Used by optFindNaturalLoops to gather statistical information such as
403 * - total number of natural loops
404 * - number of loops with 1, 2, ... exit conditions
405 * - number of loops that have an iterator (for like)
406 * - number of loops that have a constant iterator
411 unsigned totalLoopMethods; // counts the total number of methods that have natural loops
412 unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has
413 unsigned totalLoopOverflows; // # of methods that identified more loops than we can represent
414 unsigned totalLoopCount; // counts the total number of natural loops
415 unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
416 unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
417 unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
418 unsigned simpleTestLoopCount; // counts the # of loops with an iterator and a simple loop condition (iter < const)
419 unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
420 bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
421 unsigned loopsThisMethod; // counts the number of loops in the current method
422 bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
424 /* Histogram for number of loops in a method */
426 unsigned loopCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0};
427 Histogram loopCountTable(loopCountBuckets);
429 /* Histogram for number of loop exits */
431 unsigned loopExitCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 0};
432 Histogram loopExitCountTable(loopExitCountBuckets);
434 #endif // COUNT_LOOPS
436 //------------------------------------------------------------------------
437 // getJitGCType: Given the VM's CorInfoGCType convert it to the JIT's var_types
440 // gcType - an enum value that originally came from an element
441 // of the BYTE[] returned from getClassGClayout()
444 // The corresponsing enum value from the JIT's var_types
447 // The gcLayout of each field of a struct is returned from getClassGClayout()
448 // as a BYTE[] but each BYTE element is actually a CorInfoGCType value
449 // Note when we 'know' that there is only one element in theis array
450 // the JIT will often pass the address of a single BYTE, instead of a BYTE[]
453 var_types Compiler::getJitGCType(BYTE gcType)
455 var_types result = TYP_UNKNOWN;
456 CorInfoGCType corInfoType = (CorInfoGCType)gcType;
458 if (corInfoType == TYPE_GC_NONE)
462 else if (corInfoType == TYPE_GC_REF)
466 else if (corInfoType == TYPE_GC_BYREF)
472 noway_assert(!"Bad value of 'gcType'");
478 //---------------------------------------------------------------------------
479 // isTrivialPointerSizedStruct:
480 // Check if the given struct type contains only one pointer-sized integer value type
483 // clsHnd - the handle for the struct type.
486 // true if the given struct type contains only one pointer-sized integer value type,
489 bool Compiler::isTrivialPointerSizedStruct(CORINFO_CLASS_HANDLE clsHnd) const
491 assert(info.compCompHnd->isValueClass(clsHnd));
492 if (info.compCompHnd->getClassSize(clsHnd) != TARGET_POINTER_SIZE)
498 // all of class chain must be of value type and must have only one field
499 if (!info.compCompHnd->isValueClass(clsHnd) || info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
504 CORINFO_CLASS_HANDLE* pClsHnd = &clsHnd;
505 CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
506 CorInfoType fieldType = info.compCompHnd->getFieldType(fldHnd, pClsHnd);
508 var_types vt = JITtype2varType(fieldType);
510 if (fieldType == CORINFO_TYPE_VALUECLASS)
514 else if (varTypeIsI(vt) && !varTypeIsGC(vt))
526 //---------------------------------------------------------------------------
527 // isNativePrimitiveStructType:
528 // Check if the given struct type is an intrinsic type that should be treated as though
529 // it is not a struct at the unmanaged ABI boundary.
532 // clsHnd - the handle for the struct type.
535 // true if the given struct type should be treated as a primitive for unmanaged calls,
538 bool Compiler::isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd)
540 if (!isIntrinsicType(clsHnd))
544 const char* namespaceName = nullptr;
545 const char* typeName = getClassNameFromMetadata(clsHnd, &namespaceName);
547 if (strcmp(namespaceName, "System.Runtime.InteropServices") != 0)
552 return strcmp(typeName, "CLong") == 0 || strcmp(typeName, "CULong") == 0 || strcmp(typeName, "NFloat") == 0;
555 //-----------------------------------------------------------------------------
556 // getPrimitiveTypeForStruct:
557 // Get the "primitive" type that is is used for a struct
558 // of size 'structSize'.
559 // We examine 'clsHnd' to check the GC layout of the struct and
560 // return TYP_REF for structs that simply wrap an object.
561 // If the struct is a one element HFA/HVA, we will return the
562 // proper floating point or vector type.
565 // structSize - the size of the struct type, cannot be zero
566 // clsHnd - the handle for the struct type, used when may have
567 // an HFA or if we need the GC layout for an object ref.
570 // The primitive type (i.e. byte, short, int, long, ref, float, double)
571 // used to pass or return structs of this size.
572 // If we shouldn't use a "primitive" type then TYP_UNKNOWN is returned.
574 // For 32-bit targets (X86/ARM32) the 64-bit TYP_LONG type is not
575 // considered a primitive type by this method.
576 // So a struct that wraps a 'long' is passed and returned in the
577 // same way as any other 8-byte struct
578 // For ARM32 if we have an HFA struct that wraps a 64-bit double
579 // we will return TYP_DOUBLE.
580 // For vector calling conventions, a vector is considered a "primitive"
581 // type, as it is passed in a single register.
583 var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg)
585 assert(structSize != 0);
587 var_types useType = TYP_UNKNOWN;
589 // Start by determining if we have an HFA/HVA with a single element.
590 if (GlobalJitOptions::compFeatureHfa)
592 #if defined(TARGET_WINDOWS) && defined(TARGET_ARM64)
593 // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated
594 // as if they are not HFA types.
596 #endif // defined(TARGET_WINDOWS) && defined(TARGET_ARM64)
604 #endif // TARGET_ARM64
606 var_types hfaType = GetHfaType(clsHnd);
607 // We're only interested in the case where the struct size is equal to the size of the hfaType.
608 if (varTypeIsValidHfaType(hfaType))
610 if (genTypeSize(hfaType) == structSize)
621 if (useType != TYP_UNKNOWN)
628 // Now deal with non-HFA/HVA structs.
639 #if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
644 #endif // !TARGET_XARCH || UNIX_AMD64_ABI
648 // We dealt with the one-float HFA above. All other 4-byte structs are handled as INT.
652 #if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
656 useType = TYP_I_IMPL;
659 #endif // !TARGET_XARCH || UNIX_AMD64_ABI
660 #endif // TARGET_64BIT
662 case TARGET_POINTER_SIZE:
665 // Check if this pointer-sized struct is wrapping a GC object
666 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
667 useType = getJitGCType(gcPtr);
672 useType = TYP_UNKNOWN;
679 //-----------------------------------------------------------------------------
680 // getArgTypeForStruct:
681 // Get the type that is used to pass values of the given struct type.
682 // If you have already retrieved the struct size then it should be
683 // passed as the optional fourth argument, as this allows us to avoid
684 // an extra call to getClassSize(clsHnd)
687 // clsHnd - the handle for the struct type
688 // wbPassStruct - An "out" argument with information about how
689 // the struct is to be passed
690 // isVarArg - is vararg, used to ignore HFA types for Arm64 windows varargs
691 // structSize - the size of the struct type,
692 // or zero if we should call getClassSize(clsHnd)
695 // For wbPassStruct you can pass a 'nullptr' and nothing will be written
696 // or returned for that out parameter.
697 // When *wbPassStruct is SPK_PrimitiveType this method's return value
698 // is the primitive type used to pass the struct.
699 // When *wbPassStruct is SPK_ByReference this method's return value
700 // is always TYP_UNKNOWN and the struct type is passed by reference to a copy
701 // When *wbPassStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
702 // is always TYP_STRUCT and the struct type is passed by value either
703 // using multiple registers or on the stack.
706 // The size must be the size of the given type.
707 // The given class handle must be for a value type (struct).
711 // When the clsHnd is a one element HFA type we return the appropriate
712 // floating point primitive type and *wbPassStruct is SPK_PrimitiveType
713 // If there are two or more elements in the HFA type then the this method's
714 // return value is TYP_STRUCT and *wbPassStruct is SPK_ByValueAsHfa
716 var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
717 structPassingKind* wbPassStruct,
721 var_types useType = TYP_UNKNOWN;
722 structPassingKind howToPassStruct = SPK_Unknown; // We must change this before we return
724 assert(structSize != 0);
726 // Determine if we can pass the struct as a primitive type.
727 // Note that on x86 we only pass specific pointer-sized structs that satisfy isTrivialPointerSizedStruct checks.
729 #ifdef UNIX_AMD64_ABI
731 // An 8-byte struct may need to be passed in a floating point register
732 // So we always consult the struct "Classifier" routine
734 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
735 eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
737 if (structDesc.passedInRegisters && (structDesc.eightByteCount != 1))
739 // We can't pass this as a primitive type.
741 else if (structDesc.eightByteClassifications[0] == SystemVClassificationTypeSSE)
743 // If this is passed as a floating type, use that.
744 // Otherwise, we'll use the general case - we don't want to use the "EightByteType"
745 // directly, because it returns `TYP_INT` for any integral type <= 4 bytes, and
746 // we need to preserve small types.
747 useType = GetEightByteType(structDesc, 0);
750 #endif // UNIX_AMD64_ABI
752 // The largest arg passed in a single register is MAX_PASS_SINGLEREG_BYTES,
753 // so we can skip calling getPrimitiveTypeForStruct when we
754 // have a struct that is larger than that.
756 if (structSize <= MAX_PASS_SINGLEREG_BYTES)
758 // We set the "primitive" useType based upon the structSize
759 // and also examine the clsHnd to see if it is an HFA of count one
760 useType = getPrimitiveTypeForStruct(structSize, clsHnd, isVarArg);
763 if (isTrivialPointerSizedStruct(clsHnd))
765 useType = TYP_I_IMPL;
767 #endif // !TARGET_X86
769 // Did we change this struct type into a simple "primitive" type?
771 if (useType != TYP_UNKNOWN)
773 // Yes, we should use the "primitive" type in 'useType'
774 howToPassStruct = SPK_PrimitiveType;
776 else // We can't replace the struct with a "primitive" type
778 // See if we can pass this struct by value, possibly in multiple registers
779 // or if we should pass it by reference to a copy
781 if (structSize <= MAX_PASS_MULTIREG_BYTES)
783 // Structs that are HFA/HVA's are passed by value in multiple registers.
784 // Arm64 Windows VarArg methods arguments will not classify HFA/HVA types, they will need to be treated
785 // as if they are not HFA/HVA types.
787 #if defined(TARGET_WINDOWS) && defined(TARGET_ARM64)
793 #endif // defined(TARGET_WINDOWS) && defined(TARGET_ARM64)
795 hfaType = GetHfaType(clsHnd);
797 if (varTypeIsValidHfaType(hfaType))
799 // HFA's of count one should have been handled by getPrimitiveTypeForStruct
800 assert(GetHfaCount(clsHnd) >= 2);
802 // setup wbPassType and useType indicate that this is passed by value as an HFA
803 // using multiple registers
804 // (when all of the parameters registers are used, then the stack will be used)
805 howToPassStruct = SPK_ByValueAsHfa;
806 useType = TYP_STRUCT;
808 else // Not an HFA struct type
811 #ifdef UNIX_AMD64_ABI
812 // The case of (structDesc.eightByteCount == 1) should have already been handled
813 if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters)
815 // setup wbPassType and useType indicate that this is passed by value in multiple registers
816 // (when all of the parameters registers are used, then the stack will be used)
817 howToPassStruct = SPK_ByValue;
818 useType = TYP_STRUCT;
822 assert(structDesc.eightByteCount == 0);
823 // Otherwise we pass this struct by reference to a copy
824 // setup wbPassType and useType indicate that this is passed using one register
825 // (by reference to a copy)
826 howToPassStruct = SPK_ByReference;
827 useType = TYP_UNKNOWN;
830 #elif defined(TARGET_ARM64)
832 // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
833 assert(structSize > TARGET_POINTER_SIZE);
835 // On ARM64 structs that are 9-16 bytes are passed by value in multiple registers
837 if (structSize <= (TARGET_POINTER_SIZE * 2))
839 // setup wbPassType and useType indicate that this is passed by value in multiple registers
840 // (when all of the parameters registers are used, then the stack will be used)
841 howToPassStruct = SPK_ByValue;
842 useType = TYP_STRUCT;
844 else // a structSize that is 17-32 bytes in size
846 // Otherwise we pass this struct by reference to a copy
847 // setup wbPassType and useType indicate that this is passed using one register
848 // (by reference to a copy)
849 howToPassStruct = SPK_ByReference;
850 useType = TYP_UNKNOWN;
853 #elif defined(TARGET_X86) || defined(TARGET_ARM)
855 // Otherwise we pass this struct by value on the stack
856 // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
857 howToPassStruct = SPK_ByValue;
858 useType = TYP_STRUCT;
862 noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
867 else // (structSize > MAX_PASS_MULTIREG_BYTES)
869 // We have a (large) struct that can't be replaced with a "primitive" type
870 // and can't be passed in multiple registers
871 CLANG_FORMAT_COMMENT_ANCHOR;
873 #if defined(TARGET_X86) || defined(TARGET_ARM) || defined(UNIX_AMD64_ABI)
875 // Otherwise we pass this struct by value on the stack
876 // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
877 howToPassStruct = SPK_ByValue;
878 useType = TYP_STRUCT;
880 #elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
882 // Otherwise we pass this struct by reference to a copy
883 // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
884 howToPassStruct = SPK_ByReference;
885 useType = TYP_UNKNOWN;
889 noway_assert(!"Unhandled TARGET in getArgTypeForStruct");
895 // 'howToPassStruct' must be set to one of the valid values before we return
896 assert(howToPassStruct != SPK_Unknown);
897 if (wbPassStruct != nullptr)
899 *wbPassStruct = howToPassStruct;
905 //-----------------------------------------------------------------------------
906 // getReturnTypeForStruct:
907 // Get the type that is used to return values of the given struct type.
908 // If you have already retrieved the struct size then it should be
909 // passed as the optional third argument, as this allows us to avoid
910 // an extra call to getClassSize(clsHnd)
913 // clsHnd - the handle for the struct type
914 // callConv - the calling convention of the function
915 // that returns this struct.
916 // wbReturnStruct - An "out" argument with information about how
917 // the struct is to be returned
918 // structSize - the size of the struct type,
919 // or zero if we should call getClassSize(clsHnd)
922 // For wbReturnStruct you can pass a 'nullptr' and nothing will be written
923 // or returned for that out parameter.
924 // When *wbReturnStruct is SPK_PrimitiveType this method's return value
925 // is the primitive type used to return the struct.
926 // When *wbReturnStruct is SPK_ByReference this method's return value
927 // is always TYP_UNKNOWN and the struct type is returned using a return buffer
928 // When *wbReturnStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
929 // is always TYP_STRUCT and the struct type is returned using multiple registers.
932 // The size must be the size of the given type.
933 // The given class handle must be for a value type (struct).
937 // When the clsHnd is a one element HFA type then this method's return
938 // value is the appropriate floating point primitive type and
939 // *wbReturnStruct is SPK_PrimitiveType.
940 // If there are two or more elements in the HFA type and the target supports
941 // multireg return types then the return value is TYP_STRUCT and
942 // *wbReturnStruct is SPK_ByValueAsHfa.
943 // Additionally if there are two or more elements in the HFA type and
944 // the target doesn't support multreg return types then it is treated
945 // as if it wasn't an HFA type.
946 // About returning TYP_STRUCT:
947 // Whenever this method's return value is TYP_STRUCT it always means
948 // that multiple registers are used to return this struct.
950 var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
951 CorInfoCallConvExtension callConv,
952 structPassingKind* wbReturnStruct /* = nullptr */,
953 unsigned structSize /* = 0 */)
955 var_types useType = TYP_UNKNOWN;
956 structPassingKind howToReturnStruct = SPK_Unknown; // We must change this before we return
957 bool canReturnInRegister = true;
959 assert(clsHnd != NO_CLASS_HANDLE);
963 structSize = info.compCompHnd->getClassSize(clsHnd);
965 assert(structSize > 0);
967 #ifdef UNIX_AMD64_ABI
968 // An 8-byte struct may need to be returned in a floating point register
969 // So we always consult the struct "Classifier" routine
971 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
972 eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
974 if (structDesc.eightByteCount == 1)
976 assert(structSize <= sizeof(double));
977 assert(structDesc.passedInRegisters);
979 if (structDesc.eightByteClassifications[0] == SystemVClassificationTypeSSE)
981 // If this is returned as a floating type, use that.
982 // Otherwise, leave as TYP_UNKONWN and we'll sort things out below.
983 useType = GetEightByteType(structDesc, 0);
984 howToReturnStruct = SPK_PrimitiveType;
989 // Return classification is not always size based...
990 canReturnInRegister = structDesc.passedInRegisters;
991 if (!canReturnInRegister)
993 assert(structDesc.eightByteCount == 0);
994 howToReturnStruct = SPK_ByReference;
995 useType = TYP_UNKNOWN;
999 if (callConv != CorInfoCallConvExtension::Managed && !isNativePrimitiveStructType(clsHnd))
1001 canReturnInRegister = false;
1002 howToReturnStruct = SPK_ByReference;
1003 useType = TYP_UNKNOWN;
1005 #elif defined(TARGET_WINDOWS) && !defined(TARGET_ARM)
1006 if (callConvIsInstanceMethodCallConv(callConv) && !isNativePrimitiveStructType(clsHnd))
1008 canReturnInRegister = false;
1009 howToReturnStruct = SPK_ByReference;
1010 useType = TYP_UNKNOWN;
1014 // Check for cases where a small struct is returned in a register
1015 // via a primitive type.
1017 // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES
1018 // so we can skip calling getPrimitiveTypeForStruct when we
1019 // have a struct that is larger than that.
1020 if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES))
1022 // We set the "primitive" useType based upon the structSize
1023 // and also examine the clsHnd to see if it is an HFA of count one
1025 // The ABI for struct returns in varArg methods, is same as the normal case,
1026 // so pass false for isVararg
1027 useType = getPrimitiveTypeForStruct(structSize, clsHnd, /*isVararg=*/false);
1029 if (useType != TYP_UNKNOWN)
1031 if (structSize == genTypeSize(useType))
1033 // Currently: 1, 2, 4, or 8 byte structs
1034 howToReturnStruct = SPK_PrimitiveType;
1038 // Currently: 3, 5, 6, or 7 byte structs
1039 assert(structSize < genTypeSize(useType));
1040 howToReturnStruct = SPK_EnclosingType;
1046 // Note this handles an odd case when FEATURE_MULTIREG_RET is disabled and HFAs are enabled
1048 // getPrimitiveTypeForStruct will return TYP_UNKNOWN for a struct that is an HFA of two floats
1049 // because when HFA are enabled, normally we would use two FP registers to pass or return it
1051 // But if we don't have support for multiple register return types, we have to change this.
1052 // Since what we have is an 8-byte struct (float + float) we change useType to TYP_I_IMPL
1053 // so that the struct is returned instead using an 8-byte integer register.
1055 if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd))
1057 useType = TYP_I_IMPL;
1058 howToReturnStruct = SPK_PrimitiveType;
1062 // Did we change this struct type into a simple "primitive" type?
1063 if (useType != TYP_UNKNOWN)
1065 // If so, we should have already set howToReturnStruct, too.
1066 assert(howToReturnStruct != SPK_Unknown);
1068 else if (canReturnInRegister) // We can't replace the struct with a "primitive" type
1070 // See if we can return this struct by value, possibly in multiple registers
1071 // or if we should return it using a return buffer register
1073 if ((FEATURE_MULTIREG_RET == 1) && (structSize <= MAX_RET_MULTIREG_BYTES))
1075 // Structs that are HFA's are returned in multiple registers
1078 // HFA's of count one should have been handled by getPrimitiveTypeForStruct
1079 assert(GetHfaCount(clsHnd) >= 2);
1081 // setup wbPassType and useType indicate that this is returned by value as an HFA
1082 // using multiple registers
1083 howToReturnStruct = SPK_ByValueAsHfa;
1084 useType = TYP_STRUCT;
1086 else // Not an HFA struct type
1089 #ifdef UNIX_AMD64_ABI
1091 // The cases of (structDesc.eightByteCount == 1) and (structDesc.eightByteCount == 0)
1092 // should have already been handled
1093 assert(structDesc.eightByteCount > 1);
1094 // setup wbPassType and useType indicate that this is returned by value in multiple registers
1095 howToReturnStruct = SPK_ByValue;
1096 useType = TYP_STRUCT;
1097 assert(structDesc.passedInRegisters == true);
1099 #elif defined(TARGET_ARM64)
1101 // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
1102 assert(structSize > TARGET_POINTER_SIZE);
1104 // On ARM64 structs that are 9-16 bytes are returned by value in multiple registers
1106 if (structSize <= (TARGET_POINTER_SIZE * 2))
1108 // setup wbPassType and useType indicate that this is return by value in multiple registers
1109 howToReturnStruct = SPK_ByValue;
1110 useType = TYP_STRUCT;
1112 else // a structSize that is 17-32 bytes in size
1114 // Otherwise we return this struct using a return buffer
1115 // setup wbPassType and useType indicate that this is returned using a return buffer register
1116 // (reference to a return buffer)
1117 howToReturnStruct = SPK_ByReference;
1118 useType = TYP_UNKNOWN;
1120 #elif defined(TARGET_X86)
1122 // Only 8-byte structs are return in multiple registers.
1123 // We also only support multireg struct returns on x86 to match the native calling convention.
1124 // So return 8-byte structs only when the calling convention is a native calling convention.
1125 if (structSize == MAX_RET_MULTIREG_BYTES && callConv != CorInfoCallConvExtension::Managed)
1127 // setup wbPassType and useType indicate that this is return by value in multiple registers
1128 howToReturnStruct = SPK_ByValue;
1129 useType = TYP_STRUCT;
1133 // Otherwise we return this struct using a return buffer
1134 // setup wbPassType and useType indicate that this is returned using a return buffer register
1135 // (reference to a return buffer)
1136 howToReturnStruct = SPK_ByReference;
1137 useType = TYP_UNKNOWN;
1139 #elif defined(TARGET_ARM)
1141 // Otherwise we return this struct using a return buffer
1142 // setup wbPassType and useType indicate that this is returned using a return buffer register
1143 // (reference to a return buffer)
1144 howToReturnStruct = SPK_ByReference;
1145 useType = TYP_UNKNOWN;
1149 noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
1151 #endif // TARGET_XXX
1154 else // (structSize > MAX_RET_MULTIREG_BYTES) || (FEATURE_MULTIREG_RET == 0)
1156 // We have a (large) struct that can't be replaced with a "primitive" type
1157 // and can't be returned in multiple registers
1159 // We return this struct using a return buffer register
1160 // setup wbPassType and useType indicate that this is returned using a return buffer register
1161 // (reference to a return buffer)
1162 howToReturnStruct = SPK_ByReference;
1163 useType = TYP_UNKNOWN;
1167 // 'howToReturnStruct' must be set to one of the valid values before we return
1168 assert(howToReturnStruct != SPK_Unknown);
1169 if (wbReturnStruct != nullptr)
1171 *wbReturnStruct = howToReturnStruct;
1177 ///////////////////////////////////////////////////////////////////////////////
1179 // MEASURE_NOWAY: code to measure and rank dynamic occurrences of noway_assert.
1180 // (Just the appearances of noway_assert, whether the assert is true or false.)
1181 // This might help characterize the cost of noway_assert in non-DEBUG builds,
1182 // or determine which noway_assert should be simple DEBUG-only asserts.
1184 ///////////////////////////////////////////////////////////////////////////////
1194 FileLine() : m_file(nullptr), m_line(0), m_condStr(nullptr)
1198 FileLine(const char* file, unsigned line, const char* condStr) : m_line(line)
1200 size_t newSize = (strlen(file) + 1) * sizeof(char);
1201 m_file = HostAllocator::getHostAllocator().allocate<char>(newSize);
1202 strcpy_s(m_file, newSize, file);
1204 newSize = (strlen(condStr) + 1) * sizeof(char);
1205 m_condStr = HostAllocator::getHostAllocator().allocate<char>(newSize);
1206 strcpy_s(m_condStr, newSize, condStr);
1209 FileLine(const FileLine& other)
1211 m_file = other.m_file;
1212 m_line = other.m_line;
1213 m_condStr = other.m_condStr;
1216 // GetHashCode() and Equals() are needed by JitHashTable
1218 static unsigned GetHashCode(FileLine fl)
1220 assert(fl.m_file != nullptr);
1221 unsigned code = fl.m_line;
1222 for (const char* p = fl.m_file; *p != '\0'; p++)
1226 // Could also add condStr.
1230 static bool Equals(FileLine fl1, FileLine fl2)
1232 return (fl1.m_line == fl2.m_line) && (0 == strcmp(fl1.m_file, fl2.m_file));
1236 typedef JitHashTable<FileLine, FileLine, size_t, HostAllocator> FileLineToCountMap;
1237 FileLineToCountMap* NowayAssertMap;
1239 void Compiler::RecordNowayAssert(const char* filename, unsigned line, const char* condStr)
1241 if (NowayAssertMap == nullptr)
1243 NowayAssertMap = new (HostAllocator::getHostAllocator()) FileLineToCountMap(HostAllocator::getHostAllocator());
1245 FileLine fl(filename, line, condStr);
1246 size_t* pCount = NowayAssertMap->LookupPointer(fl);
1247 if (pCount == nullptr)
1249 NowayAssertMap->Set(fl, 1);
1257 void RecordNowayAssertGlobal(const char* filename, unsigned line, const char* condStr)
1259 if ((JitConfig.JitMeasureNowayAssert() == 1) && (JitTls::GetCompiler() != nullptr))
1261 JitTls::GetCompiler()->RecordNowayAssert(filename, line, condStr);
1265 struct NowayAssertCountMap
1270 NowayAssertCountMap() : count(0)
1276 bool operator()(const NowayAssertCountMap& elem1, const NowayAssertCountMap& elem2)
1278 return (ssize_t)elem2.count < (ssize_t)elem1.count; // sort in descending order
1283 void DisplayNowayAssertMap()
1285 if (NowayAssertMap != nullptr)
1289 LPCWSTR strJitMeasureNowayAssertFile = JitConfig.JitMeasureNowayAssertFile();
1290 if (strJitMeasureNowayAssertFile != nullptr)
1292 fout = _wfopen(strJitMeasureNowayAssertFile, W("a"));
1293 if (fout == nullptr)
1295 fprintf(jitstdout, "Failed to open JitMeasureNowayAssertFile \"%ws\"\n", strJitMeasureNowayAssertFile);
1304 // Iterate noway assert map, create sorted table by occurrence, dump it.
1305 unsigned count = NowayAssertMap->GetCount();
1306 NowayAssertCountMap* nacp = new NowayAssertCountMap[count];
1309 for (FileLineToCountMap::KeyIterator iter = NowayAssertMap->Begin(), end = NowayAssertMap->End();
1310 !iter.Equal(end); ++iter)
1312 nacp[i].count = iter.GetValue();
1313 nacp[i].fl = iter.Get();
1317 jitstd::sort(nacp, nacp + count, NowayAssertCountMap::compare());
1319 if (fout == jitstdout)
1321 // Don't output the header if writing to a file, since we'll be appending to existing dumps in that case.
1322 fprintf(fout, "\nnoway_assert counts:\n");
1323 fprintf(fout, "count, file, line, text\n");
1326 for (i = 0; i < count; i++)
1328 fprintf(fout, "%u, %s, %u, \"%s\"\n", nacp[i].count, nacp[i].fl.m_file, nacp[i].fl.m_line,
1329 nacp[i].fl.m_condStr);
1332 if (fout != jitstdout)
1340 #endif // MEASURE_NOWAY
1342 /*****************************************************************************
1343 * variables to keep track of how many iterations we go in a dataflow pass
1348 unsigned CSEiterCount; // counts the # of iteration for the CSE dataflow
1349 unsigned CFiterCount; // counts the # of iteration for the Const Folding dataflow
1351 #endif // DATAFLOW_ITER
1353 #if MEASURE_BLOCK_SIZE
1354 size_t genFlowNodeSize;
1355 size_t genFlowNodeCnt;
1356 #endif // MEASURE_BLOCK_SIZE
1358 /*****************************************************************************/
1359 // We keep track of methods we've already compiled.
1361 /*****************************************************************************
1362 * Declare the statics
1367 LONG Compiler::s_compMethodsCount = 0; // to produce unique label names
1370 #if MEASURE_MEM_ALLOC
1372 bool Compiler::s_dspMemStats = false;
1375 #ifndef PROFILING_SUPPORTED
1376 const bool Compiler::Options::compNoPInvokeInlineCB = false;
1379 /*****************************************************************************
1381 * One time initialization code
1385 void Compiler::compStartup()
1388 grossVMsize = grossNCsize = totalNCsize = 0;
1389 #endif // DISPLAY_SIZES
1391 /* Initialize the table of tree node sizes */
1393 GenTree::InitNodeSize();
1395 #ifdef JIT32_GCENCODER
1396 // Initialize the GC encoder lookup table
1398 GCInfo::gcInitEncoderLookupTable();
1401 /* Initialize the emitter */
1403 emitter::emitInit();
1405 // Static vars of ValueNumStore
1406 ValueNumStore::InitValueNumStoreStatics();
1408 compDisplayStaticSizes(jitstdout);
1411 /*****************************************************************************
1413 * One time finalization code
1417 void Compiler::compShutdown()
1419 if (s_pAltJitExcludeAssembliesList != nullptr)
1421 s_pAltJitExcludeAssembliesList->~AssemblyNamesList2(); // call the destructor
1422 s_pAltJitExcludeAssembliesList = nullptr;
1426 if (s_pJitDisasmIncludeAssembliesList != nullptr)
1428 s_pJitDisasmIncludeAssembliesList->~AssemblyNamesList2(); // call the destructor
1429 s_pJitDisasmIncludeAssembliesList = nullptr;
1434 DisplayNowayAssertMap();
1435 #endif // MEASURE_NOWAY
1437 /* Shut down the emitter */
1439 emitter::emitDone();
1441 #if defined(DEBUG) || defined(INLINE_DATA)
1442 // Finish reading and/or writing inline xml
1443 InlineStrategy::FinalizeXml();
1444 #endif // defined(DEBUG) || defined(INLINE_DATA)
1446 #if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
1447 if (genMethodCnt == 0)
1454 GenTree::ReportOperBashing(jitstdout);
1457 // Where should we write our statistics output?
1458 FILE* fout = jitstdout;
1460 #ifdef FEATURE_JIT_METHOD_PERF
1461 if (compJitTimeLogFilename != nullptr)
1463 FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a"));
1464 if (jitTimeLogFile != nullptr)
1466 CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
1467 fclose(jitTimeLogFile);
1470 #endif // FEATURE_JIT_METHOD_PERF
1474 // Add up all the counts so that we can show percentages of total
1476 for (unsigned op = 0; op < GT_COUNT; op++)
1477 gtc += GenTree::s_gtNodeCounts[op];
1481 unsigned rem_total = gtc;
1482 unsigned rem_large = 0;
1483 unsigned rem_small = 0;
1485 unsigned tot_large = 0;
1486 unsigned tot_small = 0;
1488 fprintf(fout, "\nGenTree operator counts (approximate):\n\n");
1490 for (unsigned op = 0; op < GT_COUNT; op++)
1492 unsigned siz = GenTree::s_gtTrueSizes[op];
1493 unsigned cnt = GenTree::s_gtNodeCounts[op];
1494 double pct = 100.0 * cnt / gtc;
1496 if (siz > TREE_NODE_SZ_SMALL)
1501 // Let's not show anything below a threshold
1504 fprintf(fout, " GT_%-17s %7u (%4.1lf%%) %3u bytes each\n", GenTree::OpName((genTreeOps)op), cnt,
1510 if (siz > TREE_NODE_SZ_SMALL)
1518 fprintf(fout, " All other GT_xxx ... %7u (%4.1lf%%) ... %4.1lf%% small + %4.1lf%% large\n", rem_total,
1519 100.0 * rem_total / gtc, 100.0 * rem_small / gtc, 100.0 * rem_large / gtc);
1521 fprintf(fout, " -----------------------------------------------------\n");
1522 fprintf(fout, " Total ....... %11u --ALL-- ... %4.1lf%% small + %4.1lf%% large\n", gtc,
1523 100.0 * tot_small / gtc, 100.0 * tot_large / gtc);
1524 fprintf(fout, "\n");
1527 #endif // COUNT_AST_OPERS
1531 if (grossVMsize && grossNCsize)
1533 fprintf(fout, "\n");
1534 fprintf(fout, "--------------------------------------\n");
1535 fprintf(fout, "Function and GC info size stats\n");
1536 fprintf(fout, "--------------------------------------\n");
1538 fprintf(fout, "[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, grossNCsize, Target::g_tgtCPUName,
1539 100 * grossNCsize / grossVMsize, "Total (excluding GC info)");
1541 fprintf(fout, "[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, totalNCsize, Target::g_tgtCPUName,
1542 100 * totalNCsize / grossVMsize, "Total (including GC info)");
1544 if (gcHeaderISize || gcHeaderNSize)
1546 fprintf(fout, "\n");
1548 fprintf(fout, "GC tables : [%7uI,%7uN] %7u byt (%u%% of IL, %u%% of %s).\n",
1549 gcHeaderISize + gcPtrMapISize, gcHeaderNSize + gcPtrMapNSize, totalNCsize - grossNCsize,
1550 100 * (totalNCsize - grossNCsize) / grossVMsize, 100 * (totalNCsize - grossNCsize) / grossNCsize,
1551 Target::g_tgtCPUName);
1553 fprintf(fout, "GC headers : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcHeaderISize,
1554 gcHeaderNSize, gcHeaderISize + gcHeaderNSize, (float)gcHeaderISize / (genMethodICnt + 0.001),
1555 (float)gcHeaderNSize / (genMethodNCnt + 0.001),
1556 (float)(gcHeaderISize + gcHeaderNSize) / genMethodCnt);
1558 fprintf(fout, "GC ptr maps : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcPtrMapISize,
1559 gcPtrMapNSize, gcPtrMapISize + gcPtrMapNSize, (float)gcPtrMapISize / (genMethodICnt + 0.001),
1560 (float)gcPtrMapNSize / (genMethodNCnt + 0.001),
1561 (float)(gcPtrMapISize + gcPtrMapNSize) / genMethodCnt);
1565 fprintf(fout, "\n");
1567 fprintf(fout, "GC tables take up %u bytes (%u%% of instr, %u%% of %6s code).\n",
1568 totalNCsize - grossNCsize, 100 * (totalNCsize - grossNCsize) / grossVMsize,
1569 100 * (totalNCsize - grossNCsize) / grossNCsize, Target::g_tgtCPUName);
1574 fprintf(fout, "%u out of %u methods generated with double-aligned stack\n",
1575 Compiler::s_lvaDoubleAlignedProcsCount, genMethodCnt);
1580 #endif // DISPLAY_SIZES
1583 compDispCallArgStats(fout);
1586 #if COUNT_BASIC_BLOCKS
1587 fprintf(fout, "--------------------------------------------------\n");
1588 fprintf(fout, "Basic block count frequency table:\n");
1589 fprintf(fout, "--------------------------------------------------\n");
1590 bbCntTable.dump(fout);
1591 fprintf(fout, "--------------------------------------------------\n");
1593 fprintf(fout, "\n");
1595 fprintf(fout, "--------------------------------------------------\n");
1596 fprintf(fout, "IL method size frequency table for methods with a single basic block:\n");
1597 fprintf(fout, "--------------------------------------------------\n");
1598 bbOneBBSizeTable.dump(fout);
1599 fprintf(fout, "--------------------------------------------------\n");
1600 #endif // COUNT_BASIC_BLOCKS
1604 fprintf(fout, "\n");
1605 fprintf(fout, "---------------------------------------------------\n");
1606 fprintf(fout, "Loop stats\n");
1607 fprintf(fout, "---------------------------------------------------\n");
1608 fprintf(fout, "Total number of methods with loops is %5u\n", totalLoopMethods);
1609 fprintf(fout, "Total number of loops is %5u\n", totalLoopCount);
1610 fprintf(fout, "Maximum number of loops per method is %5u\n", maxLoopsPerMethod);
1611 fprintf(fout, "# of methods overflowing nat loop table is %5u\n", totalLoopOverflows);
1612 fprintf(fout, "Total number of 'unnatural' loops is %5u\n", totalUnnatLoopCount);
1613 fprintf(fout, "# of methods overflowing unnat loop limit is %5u\n", totalUnnatLoopOverflows);
1614 fprintf(fout, "Total number of loops with an iterator is %5u\n", iterLoopCount);
1615 fprintf(fout, "Total number of loops with a simple iterator is %5u\n", simpleTestLoopCount);
1616 fprintf(fout, "Total number of loops with a constant iterator is %5u\n", constIterLoopCount);
1618 fprintf(fout, "--------------------------------------------------\n");
1619 fprintf(fout, "Loop count frequency table:\n");
1620 fprintf(fout, "--------------------------------------------------\n");
1621 loopCountTable.dump(fout);
1622 fprintf(fout, "--------------------------------------------------\n");
1623 fprintf(fout, "Loop exit count frequency table:\n");
1624 fprintf(fout, "--------------------------------------------------\n");
1625 loopExitCountTable.dump(fout);
1626 fprintf(fout, "--------------------------------------------------\n");
1628 #endif // COUNT_LOOPS
1632 fprintf(fout, "---------------------------------------------------\n");
1633 fprintf(fout, "Total number of iterations in the CSE dataflow loop is %5u\n", CSEiterCount);
1634 fprintf(fout, "Total number of iterations in the CF dataflow loop is %5u\n", CFiterCount);
1636 #endif // DATAFLOW_ITER
1638 #if MEASURE_NODE_SIZE
1640 fprintf(fout, "\n");
1641 fprintf(fout, "---------------------------------------------------\n");
1642 fprintf(fout, "GenTree node allocation stats\n");
1643 fprintf(fout, "---------------------------------------------------\n");
1645 fprintf(fout, "Allocated %6I64u tree nodes (%7I64u bytes total, avg %4I64u bytes per method)\n",
1646 genNodeSizeStats.genTreeNodeCnt, genNodeSizeStats.genTreeNodeSize,
1647 genNodeSizeStats.genTreeNodeSize / genMethodCnt);
1649 fprintf(fout, "Allocated %7I64u bytes of unused tree node space (%3.2f%%)\n",
1650 genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize,
1651 (float)(100 * (genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize)) /
1652 genNodeSizeStats.genTreeNodeSize);
1654 fprintf(fout, "\n");
1655 fprintf(fout, "---------------------------------------------------\n");
1656 fprintf(fout, "Distribution of per-method GenTree node counts:\n");
1657 genTreeNcntHist.dump(fout);
1659 fprintf(fout, "\n");
1660 fprintf(fout, "---------------------------------------------------\n");
1661 fprintf(fout, "Distribution of per-method GenTree node allocations (in bytes):\n");
1662 genTreeNsizHist.dump(fout);
1664 #endif // MEASURE_NODE_SIZE
1666 #if MEASURE_BLOCK_SIZE
1668 fprintf(fout, "\n");
1669 fprintf(fout, "---------------------------------------------------\n");
1670 fprintf(fout, "BasicBlock and flowList/BasicBlockList allocation stats\n");
1671 fprintf(fout, "---------------------------------------------------\n");
1673 fprintf(fout, "Allocated %6u basic blocks (%7u bytes total, avg %4u bytes per method)\n", BasicBlock::s_Count,
1674 BasicBlock::s_Size, BasicBlock::s_Size / genMethodCnt);
1675 fprintf(fout, "Allocated %6u flow nodes (%7u bytes total, avg %4u bytes per method)\n", genFlowNodeCnt,
1676 genFlowNodeSize, genFlowNodeSize / genMethodCnt);
1678 #endif // MEASURE_BLOCK_SIZE
1680 #if MEASURE_MEM_ALLOC
1684 fprintf(fout, "\nAll allocations:\n");
1685 ArenaAllocator::dumpAggregateMemStats(jitstdout);
1687 fprintf(fout, "\nLargest method:\n");
1688 ArenaAllocator::dumpMaxMemStats(jitstdout);
1690 fprintf(fout, "\n");
1691 fprintf(fout, "---------------------------------------------------\n");
1692 fprintf(fout, "Distribution of total memory allocated per method (in KB):\n");
1693 memAllocHist.dump(fout);
1695 fprintf(fout, "\n");
1696 fprintf(fout, "---------------------------------------------------\n");
1697 fprintf(fout, "Distribution of total memory used per method (in KB):\n");
1698 memUsedHist.dump(fout);
1701 #endif // MEASURE_MEM_ALLOC
1703 #if LOOP_HOIST_STATS
1704 #ifdef DEBUG // Always display loop stats in retail
1705 if (JitConfig.DisplayLoopHoistStats() != 0)
1708 PrintAggregateLoopHoistStats(jitstdout);
1710 #endif // LOOP_HOIST_STATS
1712 #if MEASURE_PTRTAB_SIZE
1714 fprintf(fout, "\n");
1715 fprintf(fout, "---------------------------------------------------\n");
1716 fprintf(fout, "GC pointer table stats\n");
1717 fprintf(fout, "---------------------------------------------------\n");
1719 fprintf(fout, "Reg pointer descriptor size (internal): %8u (avg %4u per method)\n", GCInfo::s_gcRegPtrDscSize,
1720 GCInfo::s_gcRegPtrDscSize / genMethodCnt);
1722 fprintf(fout, "Total pointer table size: %8u (avg %4u per method)\n", GCInfo::s_gcTotalPtrTabSize,
1723 GCInfo::s_gcTotalPtrTabSize / genMethodCnt);
1725 #endif // MEASURE_PTRTAB_SIZE
1727 #if MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
1729 if (genMethodCnt != 0)
1731 fprintf(fout, "\n");
1732 fprintf(fout, "A total of %6u methods compiled", genMethodCnt);
1734 if (genMethodICnt || genMethodNCnt)
1736 fprintf(fout, " (%u interruptible, %u non-interruptible)", genMethodICnt, genMethodNCnt);
1738 #endif // DISPLAY_SIZES
1739 fprintf(fout, ".\n");
1742 #endif // MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
1749 fprintf(fout, "\n");
1750 fprintf(fout, "---------------------------------------------------\n");
1751 fprintf(fout, "Fatal errors stats\n");
1752 fprintf(fout, "---------------------------------------------------\n");
1753 fprintf(fout, " badCode: %u\n", fatal_badCode);
1754 fprintf(fout, " noWay: %u\n", fatal_noWay);
1755 fprintf(fout, " implLimitation: %u\n", fatal_implLimitation);
1756 fprintf(fout, " NOMEM: %u\n", fatal_NOMEM);
1757 fprintf(fout, " noWayAssertBody: %u\n", fatal_noWayAssertBody);
1759 fprintf(fout, " noWayAssertBodyArgs: %u\n", fatal_noWayAssertBodyArgs);
1761 fprintf(fout, " NYI: %u\n", fatal_NYI);
1762 #endif // MEASURE_FATAL
1765 /*****************************************************************************
1766 * Display static data structure sizes.
1770 void Compiler::compDisplayStaticSizes(FILE* fout)
1772 #if MEASURE_NODE_SIZE
1773 GenTree::DumpNodeSizes(fout);
1777 emitterStaticStats(fout);
1781 /*****************************************************************************
1785 void Compiler::compInit(ArenaAllocator* pAlloc,
1786 CORINFO_METHOD_HANDLE methodHnd,
1787 COMP_HANDLE compHnd,
1788 CORINFO_METHOD_INFO* methodInfo,
1789 InlineInfo* inlineInfo)
1792 compArenaAllocator = pAlloc;
1794 // Inlinee Compile object will only be allocated when needed for the 1st time.
1795 InlineeCompiler = nullptr;
1797 // Set the inline info.
1798 impInlineInfo = inlineInfo;
1799 info.compCompHnd = compHnd;
1800 info.compMethodHnd = methodHnd;
1801 info.compMethodInfo = methodInfo;
1804 bRangeAllowStress = false;
1807 #if defined(DEBUG) || defined(LATE_DISASM)
1808 // Initialize the method name and related info, as it is used early in determining whether to
1809 // apply stress modes, and which ones to apply.
1810 // Note that even allocating memory can invoke the stress mechanism, so ensure that both
1811 // 'compMethodName' and 'compFullName' are either null or valid before we allocate.
1812 // (The stress mode checks references these prior to checking bRangeAllowStress.)
1814 info.compMethodName = nullptr;
1815 info.compClassName = nullptr;
1816 info.compFullName = nullptr;
1818 const char* classNamePtr;
1819 const char* methodName;
1821 methodName = eeGetMethodName(methodHnd, &classNamePtr);
1822 unsigned len = (unsigned)roundUp(strlen(classNamePtr) + 1);
1823 info.compClassName = getAllocator(CMK_DebugOnly).allocate<char>(len);
1824 info.compMethodName = methodName;
1825 strcpy_s((char*)info.compClassName, len, classNamePtr);
1827 info.compFullName = eeGetMethodFullName(methodHnd);
1828 info.compPerfScore = 0.0;
1829 #endif // defined(DEBUG) || defined(LATE_DISASM)
1831 #if defined(DEBUG) || defined(INLINE_DATA)
1832 info.compMethodHashPrivate = 0;
1833 #endif // defined(DEBUG) || defined(INLINE_DATA)
1836 // Opt-in to jit stress based on method hash ranges.
1838 // Note the default (with JitStressRange not set) is that all
1839 // methods will be subject to stress.
1840 static ConfigMethodRange fJitStressRange;
1841 fJitStressRange.EnsureInit(JitConfig.JitStressRange());
1842 assert(!fJitStressRange.Error());
1843 bRangeAllowStress = fJitStressRange.Contains(info.compMethodHash());
1846 eeInfoInitialized = false;
1848 compDoAggressiveInlining = false;
1850 if (compIsForInlining())
1852 m_inlineStrategy = nullptr;
1853 compInlineResult = inlineInfo->inlineResult;
1857 m_inlineStrategy = new (this, CMK_Inlining) InlineStrategy(this);
1858 compInlineResult = nullptr;
1861 // Initialize this to the first phase to run.
1862 mostRecentlyActivePhase = PHASE_PRE_IMPORT;
1864 // Initially, no phase checks are active.
1865 activePhaseChecks = PhaseChecks::CHECK_NONE;
1867 #ifdef FEATURE_TRACELOGGING
1868 // Make sure JIT telemetry is initialized as soon as allocations can be made
1869 // but no later than a point where noway_asserts can be thrown.
1870 // 1. JIT telemetry could allocate some objects internally.
1871 // 2. NowayAsserts are tracked through telemetry.
1872 // Note: JIT telemetry could gather data when compiler is not fully initialized.
1873 // So you have to initialize the compiler variables you use for telemetry.
1874 assert((unsigned)PHASE_PRE_IMPORT == 0);
1875 info.compILCodeSize = 0;
1876 info.compMethodHnd = nullptr;
1877 compJitTelemetry.Initialize(this);
1883 if (!compIsForInlining())
1885 codeGen = getCodeGenerator(this);
1889 compVarScopeMap = nullptr;
1891 // If this method were a real constructor for Compiler, these would
1892 // become method initializations.
1893 impPendingBlockMembers = JitExpandArray<BYTE>(getAllocator());
1894 impSpillCliquePredMembers = JitExpandArray<BYTE>(getAllocator());
1895 impSpillCliqueSuccMembers = JitExpandArray<BYTE>(getAllocator());
1897 lvMemoryPerSsaData = SsaDefArray<SsaMemDef>();
1900 // Initialize all the per-method statistics gathering data structures.
1905 #if LOOP_HOIST_STATS
1906 m_loopsConsidered = 0;
1907 m_curLoopHasHoistedExpression = false;
1908 m_loopsWithHoistedExpressions = 0;
1909 m_totalHoistedExpressions = 0;
1910 #endif // LOOP_HOIST_STATS
1911 #if MEASURE_NODE_SIZE
1912 genNodeSizeStatsPerFunc.Init();
1913 #endif // MEASURE_NODE_SIZE
1920 compJmpOpUsed = false;
1921 compLongUsed = false;
1922 compTailCallUsed = false;
1923 compLocallocUsed = false;
1924 compLocallocOptimized = false;
1925 compQmarkRationalized = false;
1926 compQmarkUsed = false;
1927 compFloatingPointUsed = false;
1928 compUnsafeCastUsed = false;
1930 compSuppressedZeroInit = false;
1932 compNeedsGSSecurityCookie = false;
1933 compGSReorderStackLayout = false;
1935 compGeneratingProlog = false;
1936 compGeneratingEpilog = false;
1938 compLSRADone = false;
1939 compRationalIRForm = false;
1942 compCodeGenDone = false;
1943 opts.compMinOptsIsUsed = false;
1945 opts.compMinOptsIsSet = false;
1947 // Used by fgFindJumpTargets for inlining heuristics.
1948 opts.instrCount = 0;
1950 // Used to track when we should consider running EarlyProp
1952 optNoReturnCallCount = 0;
1955 m_nodeTestData = nullptr;
1956 m_loopHoistCSEClass = FIRST_LOOP_HOIST_CSE_CLASS;
1958 m_switchDescMap = nullptr;
1959 m_blockToEHPreds = nullptr;
1960 m_fieldSeqStore = nullptr;
1961 m_zeroOffsetFieldMap = nullptr;
1962 m_arrayInfoMap = nullptr;
1963 m_refAnyClass = nullptr;
1964 for (MemoryKind memoryKind : allMemoryKinds())
1966 m_memorySsaMap[memoryKind] = nullptr;
1970 if (!compIsForInlining())
1972 compDoComponentUnitTestsOnce();
1977 m_opAsgnVarDefSsaNums = nullptr;
1978 fgSsaPassesCompleted = 0;
1979 fgVNPassesCompleted = 0;
1981 // check that HelperCallProperties are initialized
1983 assert(s_helperCallProperties.IsPure(CORINFO_HELP_GETSHARED_GCSTATIC_BASE));
1984 assert(!s_helperCallProperties.IsPure(CORINFO_HELP_GETFIELDOBJ)); // quick sanity check
1986 // We start with the flow graph in tree-order
1987 fgOrder = FGOrderTree;
1989 m_classLayoutTable = nullptr;
1992 m_simdHandleCache = nullptr;
1993 #endif // FEATURE_SIMD
1995 compUsesThrowHelper = false;
1998 /*****************************************************************************
2003 void Compiler::compDone()
2007 void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN */
2008 void** ppIndirection) /* OUT */
2012 if (info.compMatchedVM)
2014 addr = info.compCompHnd->getHelperFtn(ftnNum, ppIndirection);
2018 // If we don't have a matched VM, we won't get valid results when asking for a helper function.
2019 addr = UlongToPtr(0xCA11CA11); // "callcall"
2025 unsigned Compiler::compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd)
2027 var_types sigType = genActualType(JITtype2varType(cit));
2029 sigSize = genTypeSize(sigType);
2030 if (cit == CORINFO_TYPE_VALUECLASS)
2032 sigSize = info.compCompHnd->getClassSize(clsHnd);
2034 else if (cit == CORINFO_TYPE_REFANY)
2036 sigSize = 2 * TARGET_POINTER_SIZE;
2042 static bool DidComponentUnitTests = false;
2044 void Compiler::compDoComponentUnitTestsOnce()
2046 if (!JitConfig.RunComponentUnitTests())
2051 if (!DidComponentUnitTests)
2053 DidComponentUnitTests = true;
2054 ValueNumStore::RunTests(this);
2055 BitSetSupport::TestSuite(getAllocatorDebugOnly());
2059 //------------------------------------------------------------------------
2060 // compGetJitDefaultFill:
2063 // An unsigned char value used to initizalize memory allocated by the JIT.
2064 // The default value is taken from COMPLUS_JitDefaultFill, if is not set
2065 // the value will be 0xdd. When JitStress is active a random value based
2066 // on the method hash is used.
2069 // Note that we can't use small values like zero, because we have some
2070 // asserts that can fire for such values.
2073 unsigned char Compiler::compGetJitDefaultFill(Compiler* comp)
2075 unsigned char defaultFill = (unsigned char)JitConfig.JitDefaultFill();
2077 if (comp != nullptr && comp->compStressCompile(STRESS_GENERIC_VARN, 50))
2080 temp = comp->info.compMethodHash();
2081 temp = (temp >> 16) ^ temp;
2082 temp = (temp >> 8) ^ temp;
2084 // asserts like this: assert(!IsUninitialized(stkLvl));
2085 // mean that small values for defaultFill are problematic
2086 // so we make the value larger in that case.
2092 // Make a misaligned pointer value to reduce probability of getting a valid value and firing
2093 // assert(!IsUninitialized(pointer)).
2096 defaultFill = (unsigned char)temp;
2104 /*****************************************************************************/
2106 /*****************************************************************************/
2108 VarName Compiler::compVarName(regNumber reg, bool isFloatReg)
2112 assert(genIsValidFloatReg(reg));
2116 assert(genIsValidReg(reg));
2119 if ((info.compVarScopesCount > 0) && compCurBB && opts.varNames)
2124 /* Look for the matching register */
2125 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
2127 /* If the variable is not in a register, or not in the register we're looking for, quit. */
2128 /* Also, if it is a compiler generated variable (i.e. slot# > info.compVarScopesCount), don't bother. */
2129 if ((varDsc->lvRegister != 0) && (varDsc->GetRegNum() == reg) &&
2130 (varDsc->IsFloatRegType() || !isFloatReg) && (varDsc->lvSlotNum < info.compVarScopesCount))
2132 /* check if variable in that register is live */
2133 if (VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
2135 /* variable is live - find the corresponding slot */
2136 VarScopeDsc* varScope =
2137 compFindLocalVar(varDsc->lvSlotNum, compCurBB->bbCodeOffs, compCurBB->bbCodeOffsEnd);
2140 return varScope->vsdName;
2150 const char* Compiler::compRegVarName(regNumber reg, bool displayVar, bool isFloatReg)
2154 isFloatReg = genIsValidFloatReg(reg);
2157 if (displayVar && (reg != REG_NA))
2159 VarName varName = compVarName(reg, isFloatReg);
2163 const int NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
2164 static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2
2165 // consecutive calls before printing
2166 static int index = 0; // for circular index into the name array
2168 index = (index + 1) % 2; // circular reuse of index
2169 sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "%s'%s'", getRegName(reg, isFloatReg),
2170 VarNameToStr(varName));
2172 return nameVarReg[index];
2176 /* no debug info required or no variable in that register
2177 -> return standard name */
2179 return getRegName(reg, isFloatReg);
2182 const char* Compiler::compRegNameForSize(regNumber reg, size_t size)
2184 if (size == 0 || size >= 4)
2186 return compRegVarName(reg, true);
2191 const char * sizeNames[][2] =
2198 { "spl", "sp" }, // ESP
2199 { "bpl", "bp" }, // EBP
2200 { "sil", "si" }, // ESI
2201 { "dil", "di" }, // EDI
2210 #endif // TARGET_AMD64
2214 assert(isByteReg(reg));
2215 assert(genRegMask(reg) & RBM_BYTE_REGS);
2216 assert(size == 1 || size == 2);
2218 return sizeNames[reg][size - 1];
2221 const char* Compiler::compFPregVarName(unsigned fpReg, bool displayVar)
2223 const int NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
2224 static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2 consecutive calls
2226 static int index = 0; // for circular index into the name array
2228 index = (index + 1) % 2; // circular reuse of index
2230 /* no debug info required or no variable in that register
2231 -> return standard name */
2233 sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "ST(%d)", fpReg);
2234 return nameVarReg[index];
2237 const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
2242 for (i = 0, t = info.compVarScopes; i < info.compVarScopesCount; i++, t++)
2244 if (t->vsdVarNum != varNum)
2249 if (offs >= t->vsdLifeBeg && offs < t->vsdLifeEnd)
2251 return VarNameToStr(t->vsdName);
2258 /*****************************************************************************/
2260 /*****************************************************************************/
2262 void Compiler::compSetProcessor()
2265 // NOTE: This function needs to be kept in sync with EEJitManager::SetCpuInfo() in vm\codeman.cpp
2268 const JitFlags& jitFlags = *opts.jitFlags;
2270 #if defined(TARGET_ARM)
2271 info.genCPU = CPU_ARM;
2272 #elif defined(TARGET_ARM64)
2273 info.genCPU = CPU_ARM64;
2274 #elif defined(TARGET_AMD64)
2275 info.genCPU = CPU_X64;
2276 #elif defined(TARGET_X86)
2277 if (jitFlags.IsSet(JitFlags::JIT_FLAG_TARGET_P4))
2278 info.genCPU = CPU_X86_PENTIUM_4;
2280 info.genCPU = CPU_X86;
2284 // Processor specific optimizations
2286 CLANG_FORMAT_COMMENT_ANCHOR;
2289 opts.compUseCMOV = true;
2290 #elif defined(TARGET_X86)
2291 opts.compUseCMOV = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_CMOV);
2293 if (opts.compUseCMOV)
2294 opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
2297 #endif // TARGET_X86
2299 // The VM will set the ISA flags depending on actual hardware support.
2300 // We then select which ISAs to leave enabled based on the JIT config.
2301 // The exception to this is the dummy Vector64/128/256 ISAs, which must be added explicitly.
2302 CORINFO_InstructionSetFlags instructionSetFlags = jitFlags.GetInstructionSetFlags();
2303 opts.compSupportsISA = 0;
2304 opts.compSupportsISAReported = 0;
2305 opts.compSupportsISAExactly = 0;
2308 if (JitConfig.EnableHWIntrinsic())
2310 // Dummy ISAs for simplifying the JIT code
2311 instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
2312 instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
2315 if (!JitConfig.EnableSSE())
2317 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE);
2319 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE_X64);
2323 if (!JitConfig.EnableSSE2())
2325 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE2);
2327 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE2_X64);
2331 if (!JitConfig.EnableAES())
2333 instructionSetFlags.RemoveInstructionSet(InstructionSet_AES);
2336 if (!JitConfig.EnablePCLMULQDQ())
2338 instructionSetFlags.RemoveInstructionSet(InstructionSet_PCLMULQDQ);
2341 // We need to additionally check that COMPlus_EnableSSE3_4 is set, as that
2342 // is a prexisting config flag that controls the SSE3+ ISAs
2343 if (!JitConfig.EnableSSE3() || !JitConfig.EnableSSE3_4())
2345 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE3);
2348 if (!JitConfig.EnableSSSE3())
2350 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSSE3);
2353 if (!JitConfig.EnableSSE41())
2355 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE41);
2357 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE41_X64);
2361 if (!JitConfig.EnableSSE42())
2363 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE42);
2365 instructionSetFlags.RemoveInstructionSet(InstructionSet_SSE42_X64);
2369 if (!JitConfig.EnablePOPCNT())
2371 instructionSetFlags.RemoveInstructionSet(InstructionSet_POPCNT);
2373 instructionSetFlags.RemoveInstructionSet(InstructionSet_POPCNT_X64);
2377 if (!JitConfig.EnableAVX())
2379 instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX);
2382 if (!JitConfig.EnableFMA())
2384 instructionSetFlags.RemoveInstructionSet(InstructionSet_FMA);
2387 if (!JitConfig.EnableAVX2())
2389 instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2);
2392 if (!JitConfig.EnableLZCNT())
2394 instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT);
2396 instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT_X64);
2397 #endif // TARGET_AMD64
2400 if (!JitConfig.EnableBMI1())
2402 instructionSetFlags.RemoveInstructionSet(InstructionSet_BMI1);
2404 instructionSetFlags.RemoveInstructionSet(InstructionSet_BMI1_X64);
2405 #endif // TARGET_AMD64
2408 if (!JitConfig.EnableBMI2())
2410 instructionSetFlags.RemoveInstructionSet(InstructionSet_BMI2);
2412 instructionSetFlags.RemoveInstructionSet(InstructionSet_BMI2_X64);
2413 #endif // TARGET_AMD64
2416 #endif // TARGET_XARCH
2417 #if defined(TARGET_ARM64)
2418 if (JitConfig.EnableHWIntrinsic())
2420 // Dummy ISAs for simplifying the JIT code
2421 instructionSetFlags.AddInstructionSet(InstructionSet_Vector64);
2422 instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
2425 if (!JitConfig.EnableArm64Aes())
2427 instructionSetFlags.RemoveInstructionSet(InstructionSet_Aes);
2430 if (!JitConfig.EnableArm64Atomics())
2432 instructionSetFlags.RemoveInstructionSet(InstructionSet_Atomics);
2435 if (!JitConfig.EnableArm64Crc32())
2437 instructionSetFlags.RemoveInstructionSet(InstructionSet_Crc32);
2438 instructionSetFlags.RemoveInstructionSet(InstructionSet_Crc32_Arm64);
2441 if (!JitConfig.EnableArm64Sha1())
2443 instructionSetFlags.RemoveInstructionSet(InstructionSet_Sha1);
2446 if (!JitConfig.EnableArm64Sha256())
2448 instructionSetFlags.RemoveInstructionSet(InstructionSet_Sha256);
2451 if (!JitConfig.EnableArm64AdvSimd())
2453 instructionSetFlags.RemoveInstructionSet(InstructionSet_AdvSimd);
2454 instructionSetFlags.RemoveInstructionSet(InstructionSet_AdvSimd_Arm64);
2458 instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags);
2459 opts.setSupportedISAs(instructionSetFlags);
2462 if (!compIsForInlining())
2464 if (canUseVexEncoding())
2466 codeGen->GetEmitter()->SetUseVEXEncoding(true);
2467 // Assume each JITted method does not contain AVX instruction at first
2468 codeGen->GetEmitter()->SetContainsAVX(false);
2469 codeGen->GetEmitter()->SetContains256bitAVX(false);
2472 #endif // TARGET_XARCH
2475 bool Compiler::notifyInstructionSetUsage(CORINFO_InstructionSet isa, bool supported) const
2477 const char* isaString = InstructionSetToString(isa);
2478 JITDUMP("Notify VM instruction set (%s) %s be supported.\n", isaString, supported ? "must" : "must not");
2479 return info.compCompHnd->notifyInstructionSetUsage(isa, supported);
2482 #ifdef PROFILING_SUPPORTED
2483 // A Dummy routine to receive Enter/Leave/Tailcall profiler callbacks.
2484 // These are used when complus_JitEltHookEnabled=1
2486 void DummyProfilerELTStub(UINT_PTR ProfilerHandle, UINT_PTR callerSP)
2490 #else //! TARGET_AMD64
2491 void DummyProfilerELTStub(UINT_PTR ProfilerHandle)
2495 #endif //! TARGET_AMD64
2497 #endif // PROFILING_SUPPORTED
2499 bool Compiler::compShouldThrowOnNoway(
2500 #ifdef FEATURE_TRACELOGGING
2501 const char* filename, unsigned line
2505 #ifdef FEATURE_TRACELOGGING
2506 compJitTelemetry.NotifyNowayAssert(filename, line);
2509 // In min opts, we don't want the noway assert to go through the exception
2510 // path. Instead we want it to just silently go through codegen for
2512 return !opts.MinOpts();
2515 // ConfigInteger does not offer an option for decimal flags. Any numbers are interpreted as hex.
2516 // I could add the decimal option to ConfigInteger or I could write a function to reinterpret this
2517 // value as the user intended.
2518 unsigned ReinterpretHexAsDecimal(unsigned in)
2520 // ex: in: 0x100 returns: 100
2521 unsigned result = 0;
2532 unsigned digit = in % 16;
2535 result += digit * index;
2541 void Compiler::compInitOptions(JitFlags* jitFlags)
2543 #ifdef UNIX_AMD64_ABI
2544 opts.compNeedToAlignFrame = false;
2545 #endif // UNIX_AMD64_ABI
2546 memset(&opts, 0, sizeof(opts));
2548 if (compIsForInlining())
2550 // The following flags are lost when inlining. (They are removed in
2551 // Compiler::fgInvokeInlineeCompiler().)
2552 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR));
2553 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE));
2554 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC));
2555 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO));
2556 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE));
2557 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_TRACK_TRANSITIONS));
2560 opts.jitFlags = jitFlags;
2561 opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
2563 if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) ||
2564 jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2566 opts.compFlags = CLFLG_MINOPT;
2568 // Don't optimize .cctors (except prejit) or if we're an inlinee
2569 else if (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) &&
2570 !compIsForInlining())
2572 opts.compFlags = CLFLG_MINOPT;
2575 // Default value is to generate a blend of size and speed optimizations
2577 opts.compCodeOpt = BLENDED_CODE;
2579 // If the EE sets SIZE_OPT or if we are compiling a Class constructor
2580 // we will optimize for code size at the expense of speed
2582 if (jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
2584 opts.compCodeOpt = SMALL_CODE;
2587 // If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
2589 else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT) ||
2590 (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1) && !jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)))
2592 opts.compCodeOpt = FAST_CODE;
2593 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT));
2596 //-------------------------------------------------------------------------
2598 opts.compDbgCode = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE);
2599 opts.compDbgInfo = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO);
2600 opts.compDbgEnC = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC);
2603 opts.compJitAlignLoopAdaptive = JitConfig.JitAlignLoopAdaptive() == 1;
2604 opts.compJitAlignLoopBoundary = (unsigned short)JitConfig.JitAlignLoopBoundary();
2605 opts.compJitAlignLoopMinBlockWeight = (unsigned short)JitConfig.JitAlignLoopMinBlockWeight();
2607 opts.compJitAlignLoopForJcc = JitConfig.JitAlignLoopForJcc() == 1;
2608 opts.compJitAlignLoopMaxCodeSize = (unsigned short)JitConfig.JitAlignLoopMaxCodeSize();
2610 opts.compJitAlignLoopAdaptive = true;
2611 opts.compJitAlignLoopBoundary = DEFAULT_ALIGN_LOOP_BOUNDARY;
2612 opts.compJitAlignLoopMinBlockWeight = DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT;
2614 if (opts.compJitAlignLoopAdaptive)
2616 opts.compJitAlignPaddingLimit = (opts.compJitAlignLoopBoundary >> 1) - 1;
2620 opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary - 1;
2623 assert(isPow2(opts.compJitAlignLoopBoundary));
2625 #if REGEN_SHORTCUTS || REGEN_CALLPAT
2626 // We never want to have debugging enabled when regenerating GC encoding patterns
2627 opts.compDbgCode = false;
2628 opts.compDbgInfo = false;
2629 opts.compDbgEnC = false;
2635 opts.dspOrder = false;
2636 if (compIsForInlining())
2638 verbose = impInlineInfo->InlinerCompiler->verbose;
2643 codeGen->setVerbose(false);
2645 verboseTrees = verbose && shouldUseVerboseTrees();
2646 verboseSsa = verbose && shouldUseVerboseSsa();
2647 asciiTrees = shouldDumpASCIITrees();
2648 opts.dspDiffable = compIsForInlining() ? impInlineInfo->InlinerCompiler->opts.dspDiffable : false;
2651 opts.altJit = false;
2653 #if defined(LATE_DISASM) && !defined(DEBUG)
2654 // For non-debug builds with the late disassembler built in, we currently always do late disassembly
2655 // (we have no way to determine when not to, since we don't have class/method names).
2656 // In the DEBUG case, this is initialized to false, below.
2657 opts.doLateDisasm = true;
2662 const JitConfigValues::MethodSet* pfAltJit;
2663 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
2665 pfAltJit = &JitConfig.AltJitNgen();
2669 pfAltJit = &JitConfig.AltJit();
2672 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
2674 if (pfAltJit->contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2679 unsigned altJitLimit = ReinterpretHexAsDecimal(JitConfig.AltJitLimit());
2680 if (altJitLimit > 0 && Compiler::jitTotalMethodCompiled >= altJitLimit)
2682 opts.altJit = false;
2688 const char* altJitVal;
2689 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
2691 altJitVal = JitConfig.AltJitNgen().list();
2695 altJitVal = JitConfig.AltJit().list();
2698 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
2700 // In release mode, you either get all methods or no methods. You must use "*" as the parameter, or we ignore
2701 // it. You don't get to give a regular expression of methods to match.
2702 // (Partially, this is because we haven't computed and stored the method and class name except in debug, and it
2703 // might be expensive to do so.)
2704 if ((altJitVal != nullptr) && (strcmp(altJitVal, "*") == 0))
2712 // Take care of COMPlus_AltJitExcludeAssemblies.
2715 // First, initialize the AltJitExcludeAssemblies list, but only do it once.
2716 if (!s_pAltJitExcludeAssembliesListInitialized)
2718 const WCHAR* wszAltJitExcludeAssemblyList = JitConfig.AltJitExcludeAssemblies();
2719 if (wszAltJitExcludeAssemblyList != nullptr)
2721 // NOTE: The Assembly name list is allocated in the process heap, not in the no-release heap, which is
2723 // for every compilation. This is ok because we only allocate once, due to the static.
2724 s_pAltJitExcludeAssembliesList = new (HostAllocator::getHostAllocator())
2725 AssemblyNamesList2(wszAltJitExcludeAssemblyList, HostAllocator::getHostAllocator());
2727 s_pAltJitExcludeAssembliesListInitialized = true;
2730 if (s_pAltJitExcludeAssembliesList != nullptr)
2732 // We have an exclusion list. See if this method is in an assembly that is on the list.
2733 // Note that we check this for every method, since we might inline across modules, and
2734 // if the inlinee module is on the list, we don't want to use the altjit for it.
2735 const char* methodAssemblyName = info.compCompHnd->getAssemblyName(
2736 info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
2737 if (s_pAltJitExcludeAssembliesList->IsInList(methodAssemblyName))
2739 opts.altJit = false;
2746 bool altJitConfig = !pfAltJit->isEmpty();
2748 // If we have a non-empty AltJit config then we change all of these other
2749 // config values to refer only to the AltJit. Otherwise, a lot of COMPlus_* variables
2750 // would apply to both the altjit and the normal JIT, but we only care about
2751 // debugging the altjit if the COMPlus_AltJit configuration is set.
2753 if (compIsForImportOnly() && (!altJitConfig || opts.altJit))
2755 if (JitConfig.JitImportBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2757 assert(!"JitImportBreak reached");
2761 bool verboseDump = false;
2763 if (!altJitConfig || opts.altJit)
2765 LPCWSTR dumpIRFormat = nullptr;
2767 // We should only enable 'verboseDump' when we are actually compiling a matching method
2768 // and not enable it when we are just considering inlining a matching method.
2770 if (!compIsForInlining())
2772 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
2774 if (JitConfig.NgenDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2778 unsigned ngenHashDumpVal = (unsigned)JitConfig.NgenHashDump();
2779 if ((ngenHashDumpVal != (DWORD)-1) && (ngenHashDumpVal == info.compMethodHash()))
2786 if (JitConfig.JitDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2790 unsigned jitHashDumpVal = (unsigned)JitConfig.JitHashDump();
2791 if ((jitHashDumpVal != (DWORD)-1) && (jitHashDumpVal == info.compMethodHash()))
2806 // Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86.
2807 featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
2808 setUsesSIMDTypes(false);
2809 #endif // FEATURE_SIMD
2811 lvaEnregEHVars = (((opts.compFlags & CLFLG_REGVAR) != 0) && JitConfig.EnableEHWriteThru());
2812 lvaEnregMultiRegVars = (((opts.compFlags & CLFLG_REGVAR) != 0) && JitConfig.EnableMultiRegLocals());
2814 if (compIsForImportOnly())
2819 #if FEATURE_TAILCALL_OPT
2820 // By default opportunistic tail call optimization is enabled.
2821 // Recognition is done in the importer so this must be set for
2822 // inlinees as well.
2823 opts.compTailCallOpt = true;
2824 #endif // FEATURE_TAILCALL_OPT
2826 #if FEATURE_FASTTAILCALL
2827 // By default fast tail calls are enabled.
2828 opts.compFastTailCalls = true;
2829 #endif // FEATURE_FASTTAILCALL
2833 fgPgoSchema = nullptr;
2834 fgPgoData = nullptr;
2835 fgPgoSchemaCount = 0;
2836 fgPgoQueryResult = E_FAIL;
2837 fgPgoFailReason = nullptr;
2839 if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
2841 fgPgoQueryResult = info.compCompHnd->getPgoInstrumentationResults(info.compMethodHnd, &fgPgoSchema,
2842 &fgPgoSchemaCount, &fgPgoData);
2844 // a failed result that also has a non-NULL fgPgoSchema
2845 // indicates that the ILSize for the method no longer matches
2846 // the ILSize for the method when profile data was collected.
2848 // We will discard the IBC data in this case
2850 if (FAILED(fgPgoQueryResult))
2852 fgPgoFailReason = (fgPgoSchema != nullptr) ? "No matching PGO data" : "No PGO data";
2853 fgPgoData = nullptr;
2854 fgPgoSchema = nullptr;
2856 // Optionally, discard the profile data.
2858 else if (JitConfig.JitDisablePGO() != 0)
2860 fgPgoFailReason = "PGO data available, but JitDisablePGO != 0";
2861 fgPgoQueryResult = E_FAIL;
2862 fgPgoData = nullptr;
2863 fgPgoSchema = nullptr;
2867 // A successful result implies a non-NULL fgPgoSchema
2869 if (SUCCEEDED(fgPgoQueryResult))
2871 assert(fgPgoSchema != nullptr);
2874 // A failed result implies a NULL fgPgoSchema
2875 // see implementation of Compiler::fgHaveProfileData()
2877 if (FAILED(fgPgoQueryResult))
2879 assert(fgPgoSchema == nullptr);
2884 if (compIsForInlining())
2889 // The rest of the opts fields that we initialize here
2890 // should only be used when we generate code for the method
2891 // They should not be used when importing or inlining
2892 CLANG_FORMAT_COMMENT_ANCHOR;
2894 #if FEATURE_TAILCALL_OPT
2895 opts.compTailCallLoopOpt = true;
2896 #endif // FEATURE_TAILCALL_OPT
2898 opts.genFPorder = true;
2899 opts.genFPopt = true;
2901 opts.instrCount = 0;
2902 opts.lvRefCount = 0;
2904 #ifdef PROFILING_SUPPORTED
2905 opts.compJitELTHookEnabled = false;
2906 #endif // PROFILING_SUPPORTED
2908 #if defined(TARGET_ARM64)
2909 // 0 is default: use the appropriate frame type based on the function.
2910 opts.compJitSaveFpLrWithCalleeSavedRegisters = 0;
2911 #endif // defined(TARGET_ARM64)
2914 opts.dspInstrs = false;
2915 opts.dspLines = false;
2916 opts.varNames = false;
2917 opts.dmpHex = false;
2918 opts.disAsm = false;
2919 opts.disAsmSpilled = false;
2920 opts.disDiffable = false;
2921 opts.disAddr = false;
2922 opts.dspCode = false;
2923 opts.dspEHTable = false;
2924 opts.dspDebugInfo = false;
2925 opts.dspGCtbls = false;
2926 opts.disAsm2 = false;
2927 opts.dspUnwind = false;
2928 opts.compLongAddress = false;
2929 opts.optRepeat = false;
2932 opts.doLateDisasm = false;
2933 #endif // LATE_DISASM
2935 compDebugBreak = false;
2937 // If we have a non-empty AltJit config then we change all of these other
2938 // config values to refer only to the AltJit.
2940 if (!altJitConfig || opts.altJit)
2942 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
2944 if ((JitConfig.NgenOrder() & 1) == 1)
2946 opts.dspOrder = true;
2949 if (JitConfig.NgenGCDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2951 opts.dspGCtbls = true;
2954 if (JitConfig.NgenDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2958 if (JitConfig.NgenDisasm().contains("SPILLED", nullptr, nullptr))
2960 opts.disAsmSpilled = true;
2963 if (JitConfig.NgenUnwindDump().contains(info.compMethodName, info.compClassName,
2964 &info.compMethodInfo->args))
2966 opts.dspUnwind = true;
2969 if (JitConfig.NgenEHDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2971 opts.dspEHTable = true;
2974 if (JitConfig.NgenDebugDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
2976 opts.dspDebugInfo = true;
2981 bool disEnabled = true;
2983 // Setup assembly name list for disassembly, if not already set up.
2984 if (!s_pJitDisasmIncludeAssembliesListInitialized)
2986 const WCHAR* assemblyNameList = JitConfig.JitDisasmAssemblies();
2987 if (assemblyNameList != nullptr)
2989 s_pJitDisasmIncludeAssembliesList = new (HostAllocator::getHostAllocator())
2990 AssemblyNamesList2(assemblyNameList, HostAllocator::getHostAllocator());
2992 s_pJitDisasmIncludeAssembliesListInitialized = true;
2995 // If we have an assembly name list for disassembly, also check this method's assembly.
2996 if (s_pJitDisasmIncludeAssembliesList != nullptr && !s_pJitDisasmIncludeAssembliesList->IsEmpty())
2998 const char* assemblyName = info.compCompHnd->getAssemblyName(
2999 info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
3001 if (!s_pJitDisasmIncludeAssembliesList->IsInList(assemblyName))
3009 if ((JitConfig.JitOrder() & 1) == 1)
3011 opts.dspOrder = true;
3014 if (JitConfig.JitGCDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3016 opts.dspGCtbls = true;
3019 if (JitConfig.JitDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3024 if (JitConfig.JitDisasm().contains("SPILLED", nullptr, nullptr))
3026 opts.disAsmSpilled = true;
3029 if (JitConfig.JitUnwindDump().contains(info.compMethodName, info.compClassName,
3030 &info.compMethodInfo->args))
3032 opts.dspUnwind = true;
3035 if (JitConfig.JitEHDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3037 opts.dspEHTable = true;
3040 if (JitConfig.JitDebugDump().contains(info.compMethodName, info.compClassName,
3041 &info.compMethodInfo->args))
3043 opts.dspDebugInfo = true;
3047 if (opts.disAsm && JitConfig.JitDisasmWithGC())
3049 opts.disasmWithGC = true;
3053 if (JitConfig.JitLateDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3054 opts.doLateDisasm = true;
3055 #endif // LATE_DISASM
3057 // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDiffableDasm=1
3058 if (JitConfig.DiffableDasm() != 0)
3060 opts.disDiffable = true;
3061 opts.dspDiffable = true;
3064 // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDasmWithAddress=1
3065 if (JitConfig.JitDasmWithAddress() != 0)
3067 opts.disAddr = true;
3070 if (JitConfig.JitLongAddress() != 0)
3072 opts.compLongAddress = true;
3075 if (JitConfig.JitOptRepeat().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3077 opts.optRepeat = true;
3083 opts.dspCode = true;
3084 opts.dspEHTable = true;
3085 opts.dspGCtbls = true;
3086 opts.disAsm2 = true;
3087 opts.dspUnwind = true;
3089 verboseTrees = shouldUseVerboseTrees();
3090 verboseSsa = shouldUseVerboseSsa();
3091 codeGen->setVerbose(true);
3094 treesBeforeAfterMorph = (JitConfig.TreesBeforeAfterMorph() == 1);
3095 morphNum = 0; // Initialize the morphed-trees counting.
3097 expensiveDebugCheckLevel = JitConfig.JitExpensiveDebugCheckLevel();
3098 if (expensiveDebugCheckLevel == 0)
3100 // If we're in a stress mode that modifies the flowgraph, make 1 the default.
3101 if (fgStressBBProf() || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3103 expensiveDebugCheckLevel = 1;
3109 printf("****** START compiling %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
3110 printf("Generating code for %s %s\n", Target::g_tgtPlatformName, Target::g_tgtCPUName);
3111 printf(""); // in our logic this causes a flush
3114 if (JitConfig.JitBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3116 assert(!"JitBreak reached");
3119 unsigned jitHashBreakVal = (unsigned)JitConfig.JitHashBreak();
3120 if ((jitHashBreakVal != (DWORD)-1) && (jitHashBreakVal == info.compMethodHash()))
3122 assert(!"JitHashBreak reached");
3126 JitConfig.JitDebugBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args) ||
3127 JitConfig.JitBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3129 compDebugBreak = true;
3132 memset(compActiveStressModes, 0, sizeof(compActiveStressModes));
3134 // Read function list, if not already read, and there exists such a list.
3135 if (!s_pJitFunctionFileInitialized)
3137 const WCHAR* functionFileName = JitConfig.JitFunctionFile();
3138 if (functionFileName != nullptr)
3141 new (HostAllocator::getHostAllocator()) MethodSet(functionFileName, HostAllocator::getHostAllocator());
3143 s_pJitFunctionFileInitialized = true;
3148 //-------------------------------------------------------------------------
3151 assert(!codeGen->isGCTypeFixed());
3152 opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5);
3155 #if defined(DEBUG) && defined(TARGET_XARCH)
3158 STACK_CHECK_ON_RETURN = 0x1,
3159 STACK_CHECK_ON_CALL = 0x2,
3160 STACK_CHECK_ALL = 0x3
3163 DWORD dwJitStackChecks = JitConfig.JitStackChecks();
3164 if (compStressCompile(STRESS_GENERIC_VARN, 5))
3166 dwJitStackChecks = STACK_CHECK_ALL;
3168 opts.compStackCheckOnRet = (dwJitStackChecks & DWORD(STACK_CHECK_ON_RETURN)) != 0;
3169 #if defined(TARGET_X86)
3170 opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0;
3171 #endif // defined(TARGET_X86)
3172 #endif // defined(DEBUG) && defined(TARGET_XARCH)
3174 #if MEASURE_MEM_ALLOC
3175 s_dspMemStats = (JitConfig.DisplayMemStats() != 0);
3178 #ifdef PROFILING_SUPPORTED
3179 opts.compNoPInvokeInlineCB = jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_NO_PINVOKE_INLINE);
3181 // Cache the profiler handle
3182 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE))
3186 info.compCompHnd->GetProfilingHandle(&hookNeeded, &compProfilerMethHnd, &indirected);
3187 compProfilerHookNeeded = !!hookNeeded;
3188 compProfilerMethHndIndirected = !!indirected;
3192 compProfilerHookNeeded = false;
3193 compProfilerMethHnd = nullptr;
3194 compProfilerMethHndIndirected = false;
3197 // Honour COMPlus_JitELTHookEnabled or STRESS_PROFILER_CALLBACKS stress mode
3198 // only if VM has not asked us to generate profiler hooks in the first place.
3199 // That is, override VM only if it hasn't asked for a profiler callback for this method.
3200 // Don't run this stress mode when pre-JITing, as we would need to emit a relocation
3201 // for the call to the fake ELT hook, which wouldn't make sense, as we can't store that
3202 // in the pre-JIT image.
3203 if (!compProfilerHookNeeded)
3205 if ((JitConfig.JitELTHookEnabled() != 0) ||
3206 (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && compStressCompile(STRESS_PROFILER_CALLBACKS, 5)))
3208 opts.compJitELTHookEnabled = true;
3212 // TBD: Exclude PInvoke stubs
3213 if (opts.compJitELTHookEnabled)
3215 compProfilerMethHnd = (void*)DummyProfilerELTStub;
3216 compProfilerMethHndIndirected = false;
3219 #endif // PROFILING_SUPPORTED
3221 #if FEATURE_TAILCALL_OPT
3222 const WCHAR* strTailCallOpt = JitConfig.TailCallOpt();
3223 if (strTailCallOpt != nullptr)
3225 opts.compTailCallOpt = (UINT)_wtoi(strTailCallOpt) != 0;
3228 if (JitConfig.TailCallLoopOpt() == 0)
3230 opts.compTailCallLoopOpt = false;
3234 #if FEATURE_FASTTAILCALL
3235 if (JitConfig.FastTailCalls() == 0)
3237 opts.compFastTailCalls = false;
3239 #endif // FEATURE_FASTTAILCALL
3241 #ifdef CONFIGURABLE_ARM_ABI
3242 opts.compUseSoftFP = jitFlags->IsSet(JitFlags::JIT_FLAG_SOFTFP_ABI);
3243 unsigned int softFPConfig = opts.compUseSoftFP ? 2 : 1;
3244 unsigned int oldSoftFPConfig =
3245 InterlockedCompareExchange(&GlobalJitOptions::compUseSoftFPConfigured, softFPConfig, 0);
3246 if (oldSoftFPConfig != softFPConfig && oldSoftFPConfig != 0)
3248 // There are no current scenarios where the abi can change during the lifetime of a process
3249 // that uses the JIT. If such a change occurs, either compFeatureHfa will need to change to a TLS static
3250 // or we will need to have some means to reset the flag safely.
3251 NO_WAY("SoftFP ABI setting changed during lifetime of process");
3254 GlobalJitOptions::compFeatureHfa = !opts.compUseSoftFP;
3255 #elif defined(ARM_SOFTFP)
3256 // Armel is unconditionally enabled in the JIT. Verify that the VM side agrees.
3257 assert(jitFlags->IsSet(JitFlags::JIT_FLAG_SOFTFP_ABI));
3259 assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SOFTFP_ABI));
3260 #endif // CONFIGURABLE_ARM_ABI
3262 opts.compScopeInfo = opts.compDbgInfo;
3265 codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName,
3266 info.compMethodInfo->args.pSig);
3269 //-------------------------------------------------------------------------
3271 opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);
3274 #if defined(TARGET_XARCH)
3275 // Whether encoding of absolute addr as PC-rel offset is enabled
3276 opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0);
3280 opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT);
3283 // TODO-ARM64-NYI: enable hot/cold splitting
3284 opts.compProcedureSplitting = false;
3285 #endif // TARGET_ARM64
3288 opts.compProcedureSplittingEH = opts.compProcedureSplitting;
3291 if (opts.compProcedureSplitting)
3293 // Note that opts.compdbgCode is true under ngen for checked assemblies!
3294 opts.compProcedureSplitting = !opts.compDbgCode;
3297 // JitForceProcedureSplitting is used to force procedure splitting on checked assemblies.
3298 // This is useful for debugging on a checked build. Note that we still only do procedure
3299 // splitting in the zapper.
3300 if (JitConfig.JitForceProcedureSplitting().contains(info.compMethodName, info.compClassName,
3301 &info.compMethodInfo->args))
3303 opts.compProcedureSplitting = true;
3306 // JitNoProcedureSplitting will always disable procedure splitting.
3307 if (JitConfig.JitNoProcedureSplitting().contains(info.compMethodName, info.compClassName,
3308 &info.compMethodInfo->args))
3310 opts.compProcedureSplitting = false;
3313 // JitNoProcedureSplittingEH will disable procedure splitting in functions with EH.
3314 if (JitConfig.JitNoProcedureSplittingEH().contains(info.compMethodName, info.compClassName,
3315 &info.compMethodInfo->args))
3317 opts.compProcedureSplittingEH = false;
3323 // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
3324 if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
3326 compMaxUncheckedOffsetForNullObject = (size_t)JitConfig.JitMaxUncheckedOffset();
3329 printf("STRESS_NULL_OBJECT_CHECK: compMaxUncheckedOffsetForNullObject=0x%X\n",
3330 compMaxUncheckedOffsetForNullObject);
3336 // If we are compiling for a specific tier, make that very obvious in the output.
3337 // Note that we don't expect multiple TIER flags to be set at one time, but there
3338 // is nothing preventing that.
3339 if (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
3341 printf("OPTIONS: Tier-0 compilation (set COMPlus_TieredCompilation=0 to disable)\n");
3343 if (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
3345 printf("OPTIONS: Tier-1 compilation\n");
3347 if (compSwitchedToOptimized)
3349 printf("OPTIONS: Tier-0 compilation, switched to FullOpts\n");
3351 if (compSwitchedToMinOpts)
3353 printf("OPTIONS: Tier-1/FullOpts compilation, switched to MinOpts\n");
3356 if (jitFlags->IsSet(JitFlags::JIT_FLAG_OSR))
3358 printf("OPTIONS: OSR variant with entry point 0x%x\n", info.compILEntry);
3361 printf("OPTIONS: compCodeOpt = %s\n",
3362 (opts.compCodeOpt == BLENDED_CODE)
3364 : (opts.compCodeOpt == SMALL_CODE) ? "SMALL_CODE"
3365 : (opts.compCodeOpt == FAST_CODE) ? "FAST_CODE" : "UNKNOWN_CODE");
3367 printf("OPTIONS: compDbgCode = %s\n", dspBool(opts.compDbgCode));
3368 printf("OPTIONS: compDbgInfo = %s\n", dspBool(opts.compDbgInfo));
3369 printf("OPTIONS: compDbgEnC = %s\n", dspBool(opts.compDbgEnC));
3370 printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting));
3371 printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
3373 if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
3375 printf("OPTIONS: optimized using profile data\n");
3378 if (fgPgoFailReason != nullptr)
3380 printf("OPTIONS: %s\n", fgPgoFailReason);
3383 if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
3385 printf("OPTIONS: Jit invoked for ngen\n");
3390 #ifdef PROFILING_SUPPORTED
3391 #ifdef UNIX_AMD64_ABI
3392 if (compIsProfilerHookNeeded())
3394 opts.compNeedToAlignFrame = true;
3396 #endif // UNIX_AMD64_ABI
3399 #if defined(DEBUG) && defined(TARGET_ARM64)
3400 if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash()))
3402 opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
3404 #endif // defined(DEBUG) && defined(TARGET_ARM64)
3409 bool Compiler::compJitHaltMethod()
3411 /* This method returns true when we use an INS_BREAKPOINT to allow us to step into the generated native code */
3412 /* Note that this these two "Jit" environment variables also work for ngen images */
3414 if (JitConfig.JitHalt().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3419 /* Use this Hash variant when there are a lot of method with the same name and different signatures */
3421 unsigned fJitHashHaltVal = (unsigned)JitConfig.JitHashHalt();
3422 if ((fJitHashHaltVal != (unsigned)-1) && (fJitHashHaltVal == info.compMethodHash()))
3430 /*****************************************************************************
3431 * Should we use a "stress-mode" for the given stressArea. We have different
3432 * areas to allow the areas to be mixed in different combinations in
3433 * different methods.
3434 * 'weight' indicates how often (as a percentage) the area should be stressed.
3435 * It should reflect the usefulness:overhead ratio.
3438 const LPCWSTR Compiler::s_compStressModeNames[STRESS_COUNT + 1] = {
3439 #define STRESS_MODE(mode) W("STRESS_") W(#mode),
3445 //------------------------------------------------------------------------
3446 // compStressCompile: determine if a stress mode should be enabled
3449 // stressArea - stress mode to possibly enable
3450 // weight - percent of time this mode should be turned on
3451 // (range 0 to 100); weight 0 effectively disables
3454 // true if this stress mode is enabled
3457 // Methods may be excluded from stress via name or hash.
3459 // Particular stress modes may be disabled or forcibly enabled.
3461 // With JitStress=2, some stress modes are enabled regardless of weight;
3462 // these modes are the ones after COUNT_VARN in the enumeration.
3464 // For other modes or for nonzero JitStress values, stress will be
3465 // enabled selectively for roughly weight% of methods.
3467 bool Compiler::compStressCompile(compStressArea stressArea, unsigned weight)
3469 // This can be called early, before info is fully set up.
3470 if ((info.compMethodName == nullptr) || (info.compFullName == nullptr))
3475 // Inlinees defer to the root method for stress, so that we can
3476 // more easily isolate methods that cause stress failures.
3477 if (compIsForInlining())
3479 return impInlineRoot()->compStressCompile(stressArea, weight);
3482 const bool doStress = compStressCompileHelper(stressArea, weight);
3484 if (doStress && !compActiveStressModes[stressArea])
3488 printf("\n\n*** JitStress: %ws ***\n\n", s_compStressModeNames[stressArea]);
3490 compActiveStressModes[stressArea] = 1;
3496 //------------------------------------------------------------------------
3497 // compStressCompileHelper: helper to determine if a stress mode should be enabled
3500 // stressArea - stress mode to possibly enable
3501 // weight - percent of time this mode should be turned on
3502 // (range 0 to 100); weight 0 effectively disables
3505 // true if this stress mode is enabled
3508 // See compStressCompile
3510 bool Compiler::compStressCompileHelper(compStressArea stressArea, unsigned weight)
3512 if (!bRangeAllowStress)
3517 if (!JitConfig.JitStressOnly().isEmpty() &&
3518 !JitConfig.JitStressOnly().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3523 // Does user explicitly prevent using this STRESS_MODE through the command line?
3524 const WCHAR* strStressModeNamesNot = JitConfig.JitStressModeNamesNot();
3525 if ((strStressModeNamesNot != nullptr) &&
3526 (wcsstr(strStressModeNamesNot, s_compStressModeNames[stressArea]) != nullptr))
3531 // Does user explicitly set this STRESS_MODE through the command line?
3532 const WCHAR* strStressModeNames = JitConfig.JitStressModeNames();
3533 if (strStressModeNames != nullptr)
3535 if (wcsstr(strStressModeNames, s_compStressModeNames[stressArea]) != nullptr)
3540 // This stress mode name did not match anything in the stress
3541 // mode allowlist. If user has requested only enable mode,
3542 // don't allow this stress mode to turn on.
3543 const bool onlyEnableMode = JitConfig.JitStressModeNamesOnly() != 0;
3551 // 0: No stress (Except when explicitly set in complus_JitStressModeNames)
3552 // !=2: Vary stress. Performance will be slightly/moderately degraded
3553 // 2: Check-all stress. Performance will be REALLY horrible
3554 const int stressLevel = getJitStressLevel();
3556 assert(weight <= MAX_STRESS_WEIGHT);
3558 // Check for boundary conditions
3559 if (stressLevel == 0 || weight == 0)
3564 // Should we allow unlimited stress ?
3565 if ((stressArea > STRESS_COUNT_VARN) && (stressLevel == 2))
3570 if (weight == MAX_STRESS_WEIGHT)
3575 // Get a hash which can be compared with 'weight'
3576 assert(stressArea != 0);
3577 const unsigned hash = (info.compMethodHash() ^ stressArea ^ stressLevel) % MAX_STRESS_WEIGHT;
3579 assert(hash < MAX_STRESS_WEIGHT && weight <= MAX_STRESS_WEIGHT);
3580 return (hash < weight);
3583 //------------------------------------------------------------------------
3584 // compPromoteFewerStructs: helper to determine if the local
3585 // should not be promoted under a stress mode.
3588 // lclNum - local number to test
3591 // true if this local should not be promoted.
3594 // Reject ~50% of the potential promotions if STRESS_PROMOTE_FEWER_STRUCTS is active.
3596 bool Compiler::compPromoteFewerStructs(unsigned lclNum)
3598 bool rejectThisPromo = false;
3599 const bool promoteLess = compStressCompile(STRESS_PROMOTE_FEWER_STRUCTS, 50);
3603 rejectThisPromo = (((info.compMethodHash() ^ lclNum) & 1) == 0);
3605 return rejectThisPromo;
3610 void Compiler::compInitDebuggingInfo()
3612 assert(!compIsForInlining());
3617 printf("*************** In compInitDebuggingInfo() for %s\n", info.compFullName);
3621 /*-------------------------------------------------------------------------
3623 * Get hold of the local variable records, if there are any
3626 info.compVarScopesCount = 0;
3628 if (opts.compScopeInfo)
3633 compInitVarScopeMap();
3635 if (opts.compScopeInfo || opts.compDbgCode)
3637 compInitScopeLists();
3640 if (opts.compDbgCode && (info.compVarScopesCount > 0))
3642 /* Create a new empty basic block. fgExtendDbgLifetimes() may add
3643 initialization of variables which are in scope right from the
3644 start of the (real) first BB (and therefore artificially marked
3645 as alive) into this block.
3648 fgEnsureFirstBBisScratch();
3650 fgNewStmtAtEnd(fgFirstBB, gtNewNothingNode());
3652 JITDUMP("Debuggable code - Add new %s to perform initialization of variables\n", fgFirstBB->dspToString());
3655 /*-------------------------------------------------------------------------
3657 * Read the stmt-offsets table and the line-number table
3660 info.compStmtOffsetsImplicit = ICorDebugInfo::NO_BOUNDARIES;
3662 // We can only report debug info for EnC at places where the stack is empty.
3663 // Actually, at places where there are not live temps. Else, we won't be able
3664 // to map between the old and the new versions correctly as we won't have
3665 // any info for the live temps.
3667 assert(!opts.compDbgEnC || !opts.compDbgInfo ||
3668 0 == (info.compStmtOffsetsImplicit & ~ICorDebugInfo::STACK_EMPTY_BOUNDARIES));
3670 info.compStmtOffsetsCount = 0;
3672 if (opts.compDbgInfo)
3674 /* Get hold of the line# records, if there are any */
3681 printf("info.compStmtOffsetsCount = %d\n", info.compStmtOffsetsCount);
3682 printf("info.compStmtOffsetsImplicit = %04Xh", info.compStmtOffsetsImplicit);
3684 if (info.compStmtOffsetsImplicit)
3687 if (info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES)
3689 printf("STACK_EMPTY ");
3691 if (info.compStmtOffsetsImplicit & ICorDebugInfo::NOP_BOUNDARIES)
3695 if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
3697 printf("CALL_SITE ");
3702 IL_OFFSET* pOffs = info.compStmtOffsets;
3703 for (unsigned i = 0; i < info.compStmtOffsetsCount; i++, pOffs++)
3705 printf("%02d) IL_%04Xh\n", i, *pOffs);
3712 void Compiler::compSetOptimizationLevel()
3714 bool theMinOptsValue;
3715 #pragma warning(suppress : 4101)
3716 unsigned jitMinOpts;
3718 if (compIsForInlining())
3720 theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts();
3724 theMinOptsValue = false;
3726 if (opts.compFlags == CLFLG_MINOPT)
3728 JITLOG((LL_INFO100, "CLFLG_MINOPT set for method %s\n", info.compFullName));
3729 theMinOptsValue = true;
3733 jitMinOpts = JitConfig.JitMinOpts();
3735 if (!theMinOptsValue && (jitMinOpts > 0))
3737 // jitTotalMethodCompiled does not include the method that is being compiled now, so make +1.
3738 unsigned methodCount = Compiler::jitTotalMethodCompiled + 1;
3739 unsigned methodCountMask = methodCount & 0xFFF;
3740 unsigned kind = (jitMinOpts & 0xF000000) >> 24;
3744 if (jitMinOpts <= methodCount)
3748 printf(" Optimizations disabled by JitMinOpts and methodCount\n");
3750 theMinOptsValue = true;
3755 unsigned firstMinopts = (jitMinOpts >> 12) & 0xFFF;
3756 unsigned secondMinopts = (jitMinOpts >> 0) & 0xFFF;
3758 if ((firstMinopts == methodCountMask) || (secondMinopts == methodCountMask))
3762 printf("0xD: Optimizations disabled by JitMinOpts and methodCountMask\n");
3764 theMinOptsValue = true;
3770 unsigned startMinopts = (jitMinOpts >> 12) & 0xFFF;
3771 unsigned endMinopts = (jitMinOpts >> 0) & 0xFFF;
3773 if ((startMinopts <= methodCountMask) && (endMinopts >= methodCountMask))
3777 printf("0xE: Optimizations disabled by JitMinOpts and methodCountMask\n");
3779 theMinOptsValue = true;
3785 unsigned bitsZero = (jitMinOpts >> 12) & 0xFFF;
3786 unsigned bitsOne = (jitMinOpts >> 0) & 0xFFF;
3788 if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
3792 printf("0xF: Optimizations disabled by JitMinOpts and methodCountMask\n");
3794 theMinOptsValue = true;
3801 if (!theMinOptsValue)
3803 if (JitConfig.JitMinOptsName().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
3805 theMinOptsValue = true;
3810 // The code in this #if can be used to debug optimization issues according to method hash.
3811 // To use, uncomment, rebuild and set environment variables minoptshashlo and minoptshashhi.
3813 unsigned methHash = info.compMethodHash();
3814 char* lostr = getenv("minoptshashlo");
3815 unsigned methHashLo = 0;
3816 if (lostr != nullptr)
3818 sscanf_s(lostr, "%x", &methHashLo);
3819 char* histr = getenv("minoptshashhi");
3820 unsigned methHashHi = UINT32_MAX;
3821 if (histr != nullptr)
3823 sscanf_s(histr, "%x", &methHashHi);
3824 if (methHash >= methHashLo && methHash <= methHashHi)
3826 printf("MinOpts for method %s, hash = %08x.\n",
3827 info.compFullName, methHash);
3828 printf(""); // in our logic this causes a flush
3829 theMinOptsValue = true;
3836 if (compStressCompile(STRESS_MIN_OPTS, 5))
3838 theMinOptsValue = true;
3840 // For PREJIT we never drop down to MinOpts
3841 // unless unless CLFLG_MINOPT is set
3842 else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
3844 if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize)
3846 JITLOG((LL_INFO10, "IL Code Size exceeded, using MinOpts for method %s\n", info.compFullName));
3847 theMinOptsValue = true;
3849 else if ((unsigned)JitConfig.JitMinOptsInstrCount() < opts.instrCount)
3851 JITLOG((LL_INFO10, "IL instruction count exceeded, using MinOpts for method %s\n", info.compFullName));
3852 theMinOptsValue = true;
3854 else if ((unsigned)JitConfig.JitMinOptsBbCount() < fgBBcount)
3856 JITLOG((LL_INFO10, "Basic Block count exceeded, using MinOpts for method %s\n", info.compFullName));
3857 theMinOptsValue = true;
3859 else if ((unsigned)JitConfig.JitMinOptsLvNumCount() < lvaCount)
3861 JITLOG((LL_INFO10, "Local Variable Num count exceeded, using MinOpts for method %s\n", info.compFullName));
3862 theMinOptsValue = true;
3864 else if ((unsigned)JitConfig.JitMinOptsLvRefCount() < opts.lvRefCount)
3866 JITLOG((LL_INFO10, "Local Variable Ref count exceeded, using MinOpts for method %s\n", info.compFullName));
3867 theMinOptsValue = true;
3869 if (theMinOptsValue == true)
3871 JITLOG((LL_INFO10000, "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count "
3872 "%3d,%3d for method %s\n",
3873 info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
3874 if (JitConfig.JitBreakOnMinOpts() != 0)
3876 assert(!"MinOpts enabled");
3881 // Retail check if we should force Minopts due to the complexity of the method
3882 // For PREJIT we never drop down to MinOpts
3883 // unless unless CLFLG_MINOPT is set
3884 if (!theMinOptsValue && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
3885 ((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) ||
3886 (DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) ||
3887 (DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount)))
3889 theMinOptsValue = true;
3893 JITLOG((LL_INFO10000,
3894 "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count %3d,%3d for method %s\n",
3895 info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
3898 // The code in this #if has been useful in debugging loop cloning issues, by
3899 // enabling selective enablement of the loop cloning optimization according to
3902 if (!theMinOptsValue)
3904 unsigned methHash = info.compMethodHash();
3905 char* lostr = getenv("opthashlo");
3906 unsigned methHashLo = 0;
3909 sscanf_s(lostr, "%x", &methHashLo);
3910 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
3912 char* histr = getenv("opthashhi");
3913 unsigned methHashHi = UINT32_MAX;
3916 sscanf_s(histr, "%x", &methHashHi);
3917 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
3919 if (methHash < methHashLo || methHash > methHashHi)
3921 theMinOptsValue = true;
3925 printf("Doing optimization in in %s (0x%x).\n", info.compFullName, methHash);
3933 // Set the MinOpts value
3934 opts.SetMinOpts(theMinOptsValue);
3936 // Notify the VM if MinOpts is being used when not requested
3937 if (theMinOptsValue && !compIsForInlining() && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) &&
3938 !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) && !opts.compDbgCode)
3940 info.compCompHnd->setMethodAttribs(info.compMethodHnd, CORINFO_FLG_SWITCHED_TO_MIN_OPT);
3941 opts.jitFlags->Clear(JitFlags::JIT_FLAG_TIER1);
3942 compSwitchedToMinOpts = true;
3946 if (verbose && !compIsForInlining())
3948 printf("OPTIONS: opts.MinOpts() == %s\n", opts.MinOpts() ? "true" : "false");
3952 /* Control the optimizations */
3954 if (opts.OptimizationDisabled())
3956 opts.compFlags &= ~CLFLG_MAXOPT;
3957 opts.compFlags |= CLFLG_MINOPT;
3960 if (!compIsForInlining())
3962 codeGen->setFramePointerRequired(false);
3963 codeGen->setFrameRequired(false);
3965 if (opts.OptimizationDisabled())
3967 codeGen->setFrameRequired(true);
3970 #if !defined(TARGET_AMD64)
3971 // The VM sets JitFlags::JIT_FLAG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
3972 // (2) the function is marked "noinline". The reason for #2 is that people mark functions
3973 // noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame
3974 // pointer for the frame to show up in stack walk.
3975 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_FRAMED))
3976 codeGen->setFrameRequired(true);
3979 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
3981 // The JIT doesn't currently support loop alignment for prejitted images.
3982 // (The JIT doesn't know the final address of the code, hence
3983 // it can't align code based on unknown addresses.)
3985 codeGen->SetAlignLoops(false); // loop alignment not supported for prejitted code
3989 codeGen->SetAlignLoops(JitConfig.JitAlignLoops() == 1);
3994 // A single JitStress=1 Linux ARM32 test fails when we expand virtual calls early
3995 // JIT\HardwareIntrinsics\General\Vector128_1\Vector128_1_ro
3997 opts.compExpandCallsEarly = (JitConfig.JitExpandCallsEarly() == 2);
3999 opts.compExpandCallsEarly = (JitConfig.JitExpandCallsEarly() != 0);
4002 fgCanRelocateEHRegions = true;
4005 #ifdef TARGET_ARMARCH
4006 // Function compRsvdRegCheck:
4007 // given a curState to use for calculating the total frame size
4008 // it will return true if the REG_OPT_RSVD should be reserved so
4009 // that it can be use to form large offsets when accessing stack
4010 // based LclVar including both incoming and out going argument areas.
4012 // The method advances the frame layout state to curState by calling
4013 // lvaFrameSize(curState).
4015 bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
4017 // Always do the layout even if returning early. Callers might
4018 // depend on us to do the layout.
4019 unsigned frameSize = lvaFrameSize(curState);
4021 "compRsvdRegCheck\n"
4022 " frame size = %6d\n"
4023 " compArgSize = %6d\n",
4024 frameSize, compArgSize);
4028 // Have a recovery path in case we fail to reserve REG_OPT_RSVD and go
4029 // over the limit of SP and FP offset ranges due to large
4031 JITDUMP(" Returning true (MinOpts)\n\n");
4035 unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
4036 if (compFloatingPointUsed)
4038 calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
4040 calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR. See genPushCalleeSavedRegisters
4042 noway_assert(frameSize >= calleeSavedRegMaxSz);
4044 #if defined(TARGET_ARM64)
4046 // TODO-ARM64-CQ: update this!
4047 JITDUMP(" Returning true (ARM64)\n\n");
4048 return true; // just always assume we'll need it, for now
4054 // ... high addresses ...
4055 // frame contents size
4056 // ------------------- ------------------------
4057 // inArgs compArgSize (includes prespill)
4061 // R11 ---> R11 REGSIZE_BYTES
4062 // callee saved regs CALLEE_SAVED_REG_MAXSZ (32 bytes)
4063 // optional saved fp regs CALLEE_SAVED_FLOAT_MAXSZ (64 bytes)
4065 // incl. TEMPS MAX_SPILL_TEMP_SIZE
4068 // ... low addresses ...
4070 // When codeGen->isFramePointerRequired is true, R11 will be established as a frame pointer.
4071 // We can then use R11 to access incoming args with positive offsets, and LclVars with
4072 // negative offsets.
4074 // In functions with EH, in the non-funclet (or main) region, even though we will have a
4075 // frame pointer, we can use SP with positive offsets to access any or all locals or arguments
4076 // that we can reach with SP-relative encodings. The funclet region might require the reserved
4077 // register, since it must use offsets from R11 to access the parent frame.
4079 unsigned maxR11PositiveEncodingOffset = compFloatingPointUsed ? 0x03FC : 0x0FFF;
4080 JITDUMP(" maxR11PositiveEncodingOffset = %6d\n", maxR11PositiveEncodingOffset);
4082 // Floating point load/store instructions (VLDR/VSTR) can address up to -0x3FC from R11, but we
4083 // don't know if there are either no integer locals, or if we don't need large negative offsets
4084 // for the integer locals, so we must use the integer max negative offset, which is a
4085 // smaller (absolute value) number.
4086 unsigned maxR11NegativeEncodingOffset = 0x00FF; // This is a negative offset from R11.
4087 JITDUMP(" maxR11NegativeEncodingOffset = %6d\n", maxR11NegativeEncodingOffset);
4089 // -1 because otherwise we are computing the address just beyond the last argument, which we don't need to do.
4090 unsigned maxR11PositiveOffset = compArgSize + (2 * REGSIZE_BYTES) - 1;
4091 JITDUMP(" maxR11PositiveOffset = %6d\n", maxR11PositiveOffset);
4093 // The value is positive, but represents a negative offset from R11.
4094 // frameSize includes callee-saved space for R11 and LR, which are at non-negative offsets from R11
4095 // (+0 and +4, respectively), so don't include those in the max possible negative offset.
4096 assert(frameSize >= (2 * REGSIZE_BYTES));
4097 unsigned maxR11NegativeOffset = frameSize - (2 * REGSIZE_BYTES);
4098 JITDUMP(" maxR11NegativeOffset = %6d\n", maxR11NegativeOffset);
4100 if (codeGen->isFramePointerRequired())
4102 if (maxR11NegativeOffset > maxR11NegativeEncodingOffset)
4104 JITDUMP(" Returning true (frame required and maxR11NegativeOffset)\n\n");
4107 if (maxR11PositiveOffset > maxR11PositiveEncodingOffset)
4109 JITDUMP(" Returning true (frame required and maxR11PositiveOffset)\n\n");
4114 // Now consider the SP based frame case. Note that we will use SP based offsets to access the stack in R11 based
4115 // frames in the non-funclet main code area.
4117 unsigned maxSPPositiveEncodingOffset = compFloatingPointUsed ? 0x03FC : 0x0FFF;
4118 JITDUMP(" maxSPPositiveEncodingOffset = %6d\n", maxSPPositiveEncodingOffset);
4120 // -1 because otherwise we are computing the address just beyond the last argument, which we don't need to do.
4121 assert(compArgSize + frameSize > 0);
4122 unsigned maxSPPositiveOffset = compArgSize + frameSize - 1;
4124 if (codeGen->isFramePointerUsed())
4126 // We have a frame pointer, so we can use it to access part of the stack, even if SP can't reach those parts.
4127 // We will still generate SP-relative offsets if SP can reach.
4129 // First, check that the stack between R11 and SP can be fully reached, either via negative offset from FP
4130 // or positive offset from SP. Don't count stored R11 or LR, which are reached from positive offsets from FP.
4132 unsigned maxSPLocalsCombinedOffset = frameSize - (2 * REGSIZE_BYTES) - 1;
4133 JITDUMP(" maxSPLocalsCombinedOffset = %6d\n", maxSPLocalsCombinedOffset);
4135 if (maxSPLocalsCombinedOffset > maxSPPositiveEncodingOffset)
4138 unsigned maxRemainingLocalsCombinedOffset = maxSPLocalsCombinedOffset - maxSPPositiveEncodingOffset;
4139 JITDUMP(" maxRemainingLocalsCombinedOffset = %6d\n", maxRemainingLocalsCombinedOffset);
4141 if (maxRemainingLocalsCombinedOffset > maxR11NegativeEncodingOffset)
4143 JITDUMP(" Returning true (frame pointer exists; R11 and SP can't reach entire stack between them)\n\n");
4147 // Otherwise, yes, we can address the remaining parts of the locals frame with negative offsets from R11.
4150 // Check whether either R11 or SP can access the arguments.
4151 if ((maxR11PositiveOffset > maxR11PositiveEncodingOffset) &&
4152 (maxSPPositiveOffset > maxSPPositiveEncodingOffset))
4154 JITDUMP(" Returning true (frame pointer exists; R11 and SP can't reach all arguments)\n\n");
4160 if (maxSPPositiveOffset > maxSPPositiveEncodingOffset)
4162 JITDUMP(" Returning true (no frame pointer exists; SP can't reach all of frame)\n\n");
4167 // We won't need to reserve REG_OPT_RSVD.
4169 JITDUMP(" Returning false\n\n");
4171 #endif // TARGET_ARM
4173 #endif // TARGET_ARMARCH
4175 //------------------------------------------------------------------------
4176 // compGetTieringName: get a string describing tiered compilation settings
4180 // wantShortName - true if a short name is ok (say for using in file names)
4183 // String describing tiering decisions for this method, including cases
4184 // where the jit codegen will differ from what the runtime requested.
4186 const char* Compiler::compGetTieringName(bool wantShortName) const
4188 const bool tier0 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0);
4189 const bool tier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
4190 assert(!tier0 || !tier1); // We don't expect multiple TIER flags to be set at one time.
4198 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_OSR))
4207 else if (opts.OptimizationEnabled())
4209 if (compSwitchedToOptimized)
4211 return wantShortName ? "Tier0-FullOpts" : "Tier-0 switched to FullOpts";
4218 else if (opts.MinOpts())
4220 if (compSwitchedToMinOpts)
4222 if (compSwitchedToOptimized)
4224 return wantShortName ? "Tier0-FullOpts-MinOpts" : "Tier-0 switched to FullOpts, then to MinOpts";
4228 return wantShortName ? "Tier0-MinOpts" : "Tier-0 switched MinOpts";
4236 else if (opts.compDbgCode)
4242 return wantShortName ? "Unknown" : "Unknown optimization level";
4246 //------------------------------------------------------------------------
4247 // compGetStressMessage: get a string describing jitstress capability
4251 // An empty string if stress is not enabled, else a string describing
4252 // if this method is subject to stress or is excluded by name or hash.
4254 const char* Compiler::compGetStressMessage() const
4256 // Add note about stress where appropriate
4257 const char* stressMessage = "";
4260 // Is stress enabled via mode name or level?
4261 if ((JitConfig.JitStressModeNames() != nullptr) || (getJitStressLevel() > 0))
4263 // Is the method being jitted excluded from stress via range?
4264 if (bRangeAllowStress)
4266 // Or is it excluded via name?
4267 if (!JitConfig.JitStressOnly().isEmpty() ||
4268 !JitConfig.JitStressOnly().contains(info.compMethodName, info.compClassName,
4269 &info.compMethodInfo->args))
4271 // Not excluded -- stress can happen
4272 stressMessage = " JitStress";
4276 stressMessage = " NoJitStress(Only)";
4281 stressMessage = " NoJitStress(Range)";
4286 return stressMessage;
4289 void Compiler::compFunctionTraceStart()
4292 if (compIsForInlining())
4297 if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
4299 LONG newJitNestingLevel = InterlockedIncrement(&Compiler::jitNestingLevel);
4300 if (newJitNestingLevel <= 0)
4302 printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
4305 for (LONG i = 0; i < newJitNestingLevel - 1; i++)
4309 printf("{ Start Jitting Method %4d %s (MethodHash=%08x) %s\n", Compiler::jitTotalMethodCompiled,
4310 info.compFullName, info.compMethodHash(),
4311 compGetTieringName()); /* } editor brace matching workaround for this printf */
4316 void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, bool isNYI)
4319 assert(!compIsForInlining());
4321 if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
4323 LONG newJitNestingLevel = InterlockedDecrement(&Compiler::jitNestingLevel);
4324 if (newJitNestingLevel < 0)
4326 printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
4329 for (LONG i = 0; i < newJitNestingLevel; i++)
4334 // Note: that is incorrect if we are compiling several methods at the same time.
4335 unsigned methodNumber = Compiler::jitTotalMethodCompiled - 1;
4337 /* { editor brace-matching workaround for following printf */
4338 printf("} Jitted Method %4d at" FMT_ADDR "method %s size %08x%s%s\n", methodNumber, DBG_ADDR(methodCodePtr),
4339 info.compFullName, methodCodeSize, isNYI ? " NYI" : (compIsForImportOnly() ? " import only" : ""),
4340 opts.altJit ? " altjit" : "");
4345 //------------------------------------------------------------------------
4346 // BeginPhase: begin execution of a phase
4349 // phase - the phase that is about to begin
4351 void Compiler::BeginPhase(Phases phase)
4353 mostRecentlyActivePhase = phase;
4356 //------------------------------------------------------------------------
4357 // EndPhase: finish execution of a phase
4360 // phase - the phase that has just finished
4362 void Compiler::EndPhase(Phases phase)
4364 #if defined(FEATURE_JIT_METHOD_PERF)
4365 if (pCompJitTimer != nullptr)
4367 pCompJitTimer->EndPhase(this, phase);
4371 mostRecentlyActivePhase = phase;
4374 //------------------------------------------------------------------------
4375 // compCompile: run phases needed for compilation
4378 // methodCodePtr [OUT] - address of generated code
4379 // methodCodeSize [OUT] - size of the generated code (hot + cold setions)
4380 // compileFlags [IN] - flags controlling jit behavior
4383 // This is the most interesting 'toplevel' function in the JIT. It goes through the operations of
4384 // importing, morphing, optimizations and code generation. This is called from the EE through the
4385 // code:CILJit::compileMethod function.
4387 // For an overview of the structure of the JIT, see:
4388 // https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/jit/ryujit-overview.md
4390 // Also called for inlinees, though they will only be run through the first few phases.
4392 void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags)
4394 // Prepare for importation
4396 auto preImportPhase = [this]() {
4397 if (compIsForInlining())
4399 // Notify root instance that an inline attempt is about to import IL
4400 impInlineRoot()->m_inlineStrategy->NoteImport();
4405 VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal());
4407 // The temp holding the secret stub argument is used by fgImport() when importing the intrinsic.
4408 if (info.compPublishStubParam)
4410 assert(lvaStubArgumentVar == BAD_VAR_NUM);
4411 lvaStubArgumentVar = lvaGrabTempWithImplicitUse(false DEBUGARG("stub argument"));
4412 lvaTable[lvaStubArgumentVar].lvType = TYP_I_IMPL;
4415 DoPhase(this, PHASE_PRE_IMPORT, preImportPhase);
4417 compFunctionTraceStart();
4419 // Incorporate profile data.
4421 // Note: the importer is sensitive to block weights, so this has
4422 // to happen before importation.
4424 DoPhase(this, PHASE_INCPROFILE, &Compiler::fgIncorporateProfileData);
4426 // If we're going to instrument code, we may need to prepare before
4429 if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
4431 DoPhase(this, PHASE_IBCPREP, &Compiler::fgPrepareToInstrumentMethod);
4434 // Import: convert the instrs in each basic block to a tree based intermediate representation
4436 DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport);
4438 // If instrumenting, add block and class probes.
4440 if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
4442 DoPhase(this, PHASE_IBCINSTR, &Compiler::fgInstrumentMethod);
4445 // Transform indirect calls that require control flow expansion.
4447 DoPhase(this, PHASE_INDXCALL, &Compiler::fgTransformIndirectCalls);
4449 // Expand any patchpoints
4451 DoPhase(this, PHASE_PATCHPOINTS, &Compiler::fgTransformPatchpoints);
4453 // PostImportPhase: cleanup inlinees
4455 auto postImportPhase = [this]() {
4457 // If this is a viable inline candidate
4458 if (compIsForInlining() && !compDonotInline())
4460 // Filter out unimported BBs
4461 fgRemoveEmptyBlocks();
4463 // Update type of return spill temp if we have gathered
4464 // better info when importing the inlinee, and the return
4465 // spill temp is single def.
4466 if (fgNeedReturnSpillTemp())
4468 CORINFO_CLASS_HANDLE retExprClassHnd = impInlineInfo->retExprClassHnd;
4469 if (retExprClassHnd != nullptr)
4471 LclVarDsc* returnSpillVarDsc = lvaGetDesc(lvaInlineeReturnSpillTemp);
4473 if (returnSpillVarDsc->lvSingleDef)
4475 lvaUpdateClass(lvaInlineeReturnSpillTemp, retExprClassHnd,
4476 impInlineInfo->retExprClassHndIsExact);
4482 DoPhase(this, PHASE_POST_IMPORT, postImportPhase);
4484 // If we're importing for inlining, we're done.
4485 if (compIsForInlining())
4488 #ifdef FEATURE_JIT_METHOD_PERF
4489 if (pCompJitTimer != nullptr)
4491 #if MEASURE_CLRAPI_CALLS
4492 EndPhase(PHASE_CLR_API);
4494 pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, false);
4501 // At this point in the phase list, all the inlinee phases have
4502 // been run, and inlinee compiles have exited, so we should only
4503 // get this far if we are jitting the root method.
4504 noway_assert(!compIsForInlining());
4506 // Maybe the caller was not interested in generating code
4507 if (compIsForImportOnly())
4509 compFunctionTraceEnd(nullptr, 0, false);
4514 // If we aren't yet supporting EH in a compiler bring-up, remove as many EH handlers as possible, so
4515 // we can pass tests that contain try/catch EH, but don't actually throw any exceptions.
4517 #endif // !FEATURE_EH
4519 // We could allow ESP frames. Just need to reserve space for
4520 // pushing EBP if the method becomes an EBP-frame after an edit.
4521 // Note that requiring a EBP Frame disallows double alignment. Thus if we change this
4522 // we either have to disallow double alignment for E&C some other way or handle it in EETwain.
4524 if (opts.compDbgEnC)
4526 codeGen->setFramePointerRequired(true);
4528 // We don't care about localloc right now. If we do support it,
4529 // EECodeManager::FixContextForEnC() needs to handle it smartly
4530 // in case the localloc was actually executed.
4532 // compLocallocUsed = true;
4535 // Start phases that are broadly called morphing, and includes
4536 // global morph, as well as other phases that massage the trees so
4537 // that we can generate code out of them.
4539 auto morphInitPhase = [this]() {
4541 // Initialize the BlockSet epoch
4542 NewBasicBlockEpoch();
4544 fgOutgoingArgTemps = nullptr;
4546 // Insert call to class constructor as the first basic block if
4547 // we were asked to do so.
4548 if (info.compCompHnd->initClass(nullptr /* field */, nullptr /* method */,
4549 impTokenLookupContextHandle /* context */) &
4550 CORINFO_INITCLASS_USE_HELPER)
4552 fgEnsureFirstBBisScratch();
4553 fgNewStmtAtBeg(fgFirstBB, fgInitThisClass());
4557 if (opts.compGcChecks)
4559 for (unsigned i = 0; i < info.compArgsCount; i++)
4561 if (lvaTable[i].TypeGet() == TYP_REF)
4563 // confirm that the argument is a GC pointer (for debugging (GC stress))
4564 GenTree* op = gtNewLclvNode(i, TYP_REF);
4565 GenTreeCall::Use* args = gtNewCallArgs(op);
4566 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args);
4568 fgEnsureFirstBBisScratch();
4569 fgNewStmtAtEnd(fgFirstBB, op);
4573 printf("\ncompGcChecks tree:\n");
4581 #if defined(DEBUG) && defined(TARGET_XARCH)
4582 if (opts.compStackCheckOnRet)
4584 lvaReturnSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnSpCheck"));
4585 lvaTable[lvaReturnSpCheck].lvType = TYP_I_IMPL;
4587 #endif // defined(DEBUG) && defined(TARGET_XARCH)
4589 #if defined(DEBUG) && defined(TARGET_X86)
4590 if (opts.compStackCheckOnCall)
4592 lvaCallSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallSpCheck"));
4593 lvaTable[lvaCallSpCheck].lvType = TYP_I_IMPL;
4595 #endif // defined(DEBUG) && defined(TARGET_X86)
4597 // Filter out unimported BBs
4598 fgRemoveEmptyBlocks();
4600 DoPhase(this, PHASE_MORPH_INIT, morphInitPhase);
4603 // Inliner could add basic blocks. Check that the flowgraph data is up-to-date
4604 fgDebugCheckBBlist(false, false);
4607 // Inline callee methods into this root method
4609 DoPhase(this, PHASE_MORPH_INLINE, &Compiler::fgInline);
4611 // Record "start" values for post-inlining cycles and elapsed time.
4612 RecordStateAtEndOfInlining();
4614 // Transform each GT_ALLOCOBJ node into either an allocation helper call or
4615 // local variable allocation on the stack.
4616 ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
4618 if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled())
4620 objectAllocator.EnableObjectStackAllocation();
4623 objectAllocator.Run();
4625 // Add any internal blocks/trees we may need
4627 DoPhase(this, PHASE_MORPH_ADD_INTERNAL, &Compiler::fgAddInternal);
4629 // Remove empty try regions
4631 DoPhase(this, PHASE_EMPTY_TRY, &Compiler::fgRemoveEmptyTry);
4633 // Remove empty finally regions
4635 DoPhase(this, PHASE_EMPTY_FINALLY, &Compiler::fgRemoveEmptyFinally);
4637 // Streamline chains of finally invocations
4639 DoPhase(this, PHASE_MERGE_FINALLY_CHAINS, &Compiler::fgMergeFinallyChains);
4641 // Clone code in finallys to reduce overhead for non-exceptional paths
4643 DoPhase(this, PHASE_CLONE_FINALLY, &Compiler::fgCloneFinally);
4645 #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
4647 // Update finally target flags after EH optimizations
4649 DoPhase(this, PHASE_UPDATE_FINALLY_FLAGS, &Compiler::fgUpdateFinallyTargetFlags);
4651 #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
4656 unsigned methHash = info.compMethodHash();
4657 char* lostr = getenv("JitEHWTHashLo");
4658 unsigned methHashLo = 0;
4660 if (lostr != nullptr)
4662 sscanf_s(lostr, "%x", &methHashLo);
4665 char* histr = getenv("JitEHWTHashHi");
4666 unsigned methHashHi = UINT32_MAX;
4667 if (histr != nullptr)
4669 sscanf_s(histr, "%x", &methHashHi);
4672 if (methHash < methHashLo || methHash > methHashHi)
4674 lvaEnregEHVars = false;
4678 printf("Enregistering EH Vars for method %s, hash = 0x%x.\n", info.compFullName, info.compMethodHash());
4679 printf(""); // flush
4682 if (lvaEnregMultiRegVars)
4684 unsigned methHash = info.compMethodHash();
4685 char* lostr = getenv("JitMultiRegHashLo");
4686 unsigned methHashLo = 0;
4688 if (lostr != nullptr)
4690 sscanf_s(lostr, "%x", &methHashLo);
4693 char* histr = getenv("JitMultiRegHashHi");
4694 unsigned methHashHi = UINT32_MAX;
4695 if (histr != nullptr)
4697 sscanf_s(histr, "%x", &methHashHi);
4700 if (methHash < methHashLo || methHash > methHashHi)
4702 lvaEnregMultiRegVars = false;
4706 printf("Enregistering MultiReg Vars for method %s, hash = 0x%x.\n", info.compFullName,
4707 info.compMethodHash());
4708 printf(""); // flush
4713 // Compute bbNum, bbRefs and bbPreds
4715 // This is the first time full (not cheap) preds will be computed.
4716 // And, if we have profile data, we can now check integrity.
4718 // From this point on the flowgraph information such as bbNum,
4719 // bbRefs or bbPreds has to be kept updated.
4721 auto computePredsPhase = [this]() {
4722 JITDUMP("\nRenumbering the basic blocks for fgComputePred\n");
4724 noway_assert(!fgComputePredsDone);
4727 DoPhase(this, PHASE_COMPUTE_PREDS, computePredsPhase);
4729 // Now that we have pred lists, do some flow-related optimizations
4731 if (opts.OptimizationEnabled())
4733 // Merge common throw blocks
4735 DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows);
4737 // Run an early flow graph simplification pass
4739 auto earlyUpdateFlowGraphPhase = [this]() {
4740 const bool doTailDup = false;
4741 fgUpdateFlowGraph(doTailDup);
4743 DoPhase(this, PHASE_EARLY_UPDATE_FLOW_GRAPH, earlyUpdateFlowGraphPhase);
4746 // Promote struct locals
4748 auto promoteStructsPhase = [this]() {
4750 // For x64 and ARM64 we need to mark irregular parameters
4751 lvaRefCountState = RCS_EARLY;
4752 fgResetImplicitByRefRefCount();
4756 DoPhase(this, PHASE_PROMOTE_STRUCTS, promoteStructsPhase);
4758 // Figure out what locals are address-taken.
4760 DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals);
4762 // Apply the type update to implicit byref parameters; also choose (based on address-exposed
4763 // analysis) which implicit byref promotions to keep (requires copy to initialize) or discard.
4765 DoPhase(this, PHASE_MORPH_IMPBYREF, &Compiler::fgRetypeImplicitByRefArgs);
4768 // Now that locals have address-taken and implicit byref marked, we can safely apply stress.
4770 fgStress64RsltMul();
4773 // Morph the trees in all the blocks of the method
4775 auto morphGlobalPhase = [this]() {
4776 unsigned prevBBCount = fgBBcount;
4779 // Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args
4780 fgMarkDemotedImplicitByRefArgs();
4781 lvaRefCountState = RCS_INVALID;
4783 #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
4784 if (fgNeedToAddFinallyTargetBits)
4786 // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back.
4787 fgAddFinallyTargetFlags();
4788 fgNeedToAddFinallyTargetBits = false;
4790 #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
4792 // Decide the kind of code we want to generate
4795 fgExpandQmarkNodes();
4798 compCurBB = nullptr;
4801 // If we needed to create any new BasicBlocks then renumber the blocks
4802 if (fgBBcount > prevBBCount)
4807 // We can now enable all phase checking
4808 activePhaseChecks = PhaseChecks::CHECK_ALL;
4810 DoPhase(this, PHASE_MORPH_GLOBAL, morphGlobalPhase);
4812 // GS security checks for unsafe buffers
4814 auto gsPhase = [this]() {
4815 unsigned prevBBCount = fgBBcount;
4816 if (getNeedsGSSecurityCookie())
4818 gsGSChecksInitCookie();
4820 if (compGSReorderStackLayout)
4822 gsCopyShadowParams();
4825 // If we needed to create any new BasicBlocks then renumber the blocks
4826 if (fgBBcount > prevBBCount)
4833 JITDUMP("No GS security needed\n");
4836 DoPhase(this, PHASE_GS_COOKIE, gsPhase);
4838 // Compute the block and edge weights
4840 DoPhase(this, PHASE_COMPUTE_EDGE_WEIGHTS, &Compiler::fgComputeBlockAndEdgeWeights);
4842 #if defined(FEATURE_EH_FUNCLETS)
4844 // Create funclets from the EH handlers.
4846 DoPhase(this, PHASE_CREATE_FUNCLETS, &Compiler::fgCreateFunclets);
4848 #endif // FEATURE_EH_FUNCLETS
4850 if (opts.OptimizationEnabled())
4854 DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops);
4856 // Optimize block order
4858 DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
4860 // Compute reachability sets and dominators.
4862 DoPhase(this, PHASE_COMPUTE_REACHABILITY, &Compiler::fgComputeReachability);
4864 // Discover and classify natural loops
4865 // (e.g. mark iterative loops as such). Also marks loop blocks
4866 // and sets bbWeight to the loop nesting levels
4868 DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoops);
4870 // Clone loops with optimization opportunities, and
4871 // choose the one based on dynamic condition evaluation.
4873 DoPhase(this, PHASE_CLONE_LOOPS, &Compiler::optCloneLoops);
4877 DoPhase(this, PHASE_UNROLL_LOOPS, &Compiler::optUnrollLoops);
4881 fgDebugCheckLinks();
4884 // Create the variable table (and compute variable ref counts)
4886 DoPhase(this, PHASE_MARK_LOCAL_VARS, &Compiler::lvaMarkLocalVars);
4888 // IMPORTANT, after this point, locals are ref counted.
4889 // However, ref counts are not kept incrementally up to date.
4890 assert(lvaLocalVarRefCounted());
4892 if (opts.OptimizationEnabled())
4894 // Optimize boolean conditions
4896 DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools);
4898 // optOptimizeBools() might have changed the number of blocks; the dominators/reachability might be bad.
4901 // Figure out the order in which operators are to be evaluated
4903 DoPhase(this, PHASE_FIND_OPER_ORDER, &Compiler::fgFindOperOrder);
4905 // Weave the tree lists. Anyone who modifies the tree shapes after
4906 // this point is responsible for calling fgSetStmtSeq() to keep the
4907 // nodes properly linked.
4908 // This can create GC poll calls, and create new BasicBlocks (without updating dominators/reachability).
4910 DoPhase(this, PHASE_SET_BLOCK_ORDER, &Compiler::fgSetBlockOrder);
4912 // At this point we know if we are fully interruptible or not
4913 if (opts.OptimizationEnabled())
4916 bool doEarlyProp = true;
4917 bool doValueNum = true;
4918 bool doLoopHoisting = true;
4919 bool doCopyProp = true;
4920 bool doBranchOpt = true;
4921 bool doAssertionProp = true;
4922 bool doRangeAnalysis = true;
4925 #if defined(OPT_CONFIG)
4926 doSsa = (JitConfig.JitDoSsa() != 0);
4927 doEarlyProp = doSsa && (JitConfig.JitDoEarlyProp() != 0);
4928 doValueNum = doSsa && (JitConfig.JitDoValueNumber() != 0);
4929 doLoopHoisting = doValueNum && (JitConfig.JitDoLoopHoisting() != 0);
4930 doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0);
4931 doBranchOpt = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0);
4932 doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0);
4933 doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
4937 iterations = JitConfig.JitOptRepeatCount();
4939 #endif // defined(OPT_CONFIG)
4941 while (iterations > 0)
4945 // Build up SSA form for the IR
4947 DoPhase(this, PHASE_BUILD_SSA, &Compiler::fgSsaBuild);
4952 // Propagate array length and rewrite getType() method call
4954 DoPhase(this, PHASE_EARLY_PROP, &Compiler::optEarlyProp);
4959 // Value number the trees
4961 DoPhase(this, PHASE_VALUE_NUMBER, &Compiler::fgValueNumber);
4966 // Hoist invariant code out of loops
4968 DoPhase(this, PHASE_HOIST_LOOP_CODE, &Compiler::optHoistLoopCode);
4973 // Perform VN based copy propagation
4975 DoPhase(this, PHASE_VN_COPY_PROP, &Compiler::optVnCopyProp);
4980 DoPhase(this, PHASE_OPTIMIZE_BRANCHES, &Compiler::optRedundantBranches);
4984 // Remove common sub-expressions
4986 DoPhase(this, PHASE_OPTIMIZE_VALNUM_CSES, &Compiler::optOptimizeCSEs);
4987 #endif // FEATURE_ANYCSE
4990 if (doAssertionProp)
4992 // Assertion propagation
4994 DoPhase(this, PHASE_ASSERTION_PROP_MAIN, &Compiler::optAssertionPropMain);
4997 if (doRangeAnalysis)
4999 auto rangePhase = [this]() {
5000 RangeCheck rc(this);
5001 rc.OptimizeRangeChecks();
5004 // Bounds check elimination via range analysis
5006 DoPhase(this, PHASE_OPTIMIZE_INDEX_CHECKS, rangePhase);
5008 #endif // ASSERTION_PROP
5012 // update the flowgraph if we modified it during the optimization phase
5014 auto optUpdateFlowGraphPhase = [this]() {
5015 const bool doTailDup = false;
5016 fgUpdateFlowGraph(doTailDup);
5018 DoPhase(this, PHASE_OPT_UPDATE_FLOW_GRAPH, optUpdateFlowGraphPhase);
5020 // Recompute the edge weight if we have modified the flow graph
5022 DoPhase(this, PHASE_COMPUTE_EDGE_WEIGHTS2, &Compiler::fgComputeEdgeWeights);
5025 // Iterate if requested, resetting annotations first.
5026 if (--iterations == 0)
5030 ResetOptAnnotations();
5031 RecomputeLoopInfo();
5036 // Check if we need to add the Quirk for the PPP backward compat issue
5037 compQuirkForPPPflag = compQuirkForPPP();
5041 DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls);
5043 // Determine start of cold region if we are hot/cold splitting
5045 DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock);
5048 fgDebugCheckLinks(compStressCompile(STRESS_REMORPH_TREES, 50));
5050 // Stash the current estimate of the function's size if necessary.
5053 compSizeEstimate = 0;
5054 compCycleEstimate = 0;
5055 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
5057 for (Statement* stmt : block->Statements())
5059 compSizeEstimate += stmt->GetCostSz();
5060 compCycleEstimate += stmt->GetCostEx();
5066 // rationalize trees
5067 Rationalizer rat(this); // PHASE_RATIONALIZE
5070 // Here we do "simple lowering". When the RyuJIT backend works for all
5071 // platforms, this will be part of the more general lowering phase. For now, though, we do a separate
5072 // pass of "final lowering." We must do this before (final) liveness analysis, because this creates
5073 // range check throw blocks, in which the liveness must be correct.
5075 DoPhase(this, PHASE_SIMPLE_LOWERING, &Compiler::fgSimpleLowering);
5078 fgDebugCheckBBlist();
5079 fgDebugCheckLinks();
5082 // Enable this to gather statistical data such as
5083 // call and register argument info, flowgraph and loop info, etc.
5087 if (compLocallocUsed)
5089 // We reserve REG_SAVED_LOCALLOC_SP to store SP on entry for stack unwinding
5090 codeGen->regSet.rsMaskResvd |= RBM_SAVED_LOCALLOC_SP;
5092 #endif // TARGET_ARM
5094 // Assign registers to variables, etc.
5096 ///////////////////////////////////////////////////////////////////////////////
5097 // Dominator and reachability sets are no longer valid. They haven't been
5098 // maintained up to here, and shouldn't be used (unless recomputed).
5099 ///////////////////////////////////////////////////////////////////////////////
5100 fgDomsComputed = false;
5102 // Create LinearScan before Lowering, so that Lowering can call LinearScan methods
5103 // for determining whether locals are register candidates and (for xarch) whether
5104 // a node is a containable memory op.
5105 m_pLinearScan = getLinearScanAllocator(this);
5109 m_pLowering = new (this, CMK_LSRA) Lowering(this, m_pLinearScan); // PHASE_LOWERING
5112 #if !defined(OSX_ARM64_ABI)
5113 // Set stack levels; this information is necessary for x86
5114 // but on other platforms it is used only in asserts.
5115 // TODO: do not run it in release on other platforms, see https://github.com/dotnet/runtime/issues/42673.
5116 StackLevelSetter stackLevelSetter(this);
5117 stackLevelSetter.Run();
5118 #endif // !OSX_ARM64_ABI
5120 // We can not add any new tracked variables after this point.
5121 lvaTrackedFixed = true;
5123 // Now that lowering is completed we can proceed to perform register allocation
5125 auto linearScanPhase = [this]() { m_pLinearScan->doLinearScan(); };
5126 DoPhase(this, PHASE_LINEAR_SCAN, linearScanPhase);
5128 // Copied from rpPredictRegUse()
5129 SetFullPtrRegMapRequired(codeGen->GetInterruptible() || !codeGen->isFramePointerUsed());
5132 fgDebugCheckLinks();
5136 codeGen->genGenerateCode(methodCodePtr, methodCodeSize);
5138 // We're done -- set the active phase to the last phase
5139 // (which isn't really a phase)
5140 mostRecentlyActivePhase = PHASE_POST_EMIT;
5142 #ifdef FEATURE_JIT_METHOD_PERF
5145 #if MEASURE_CLRAPI_CALLS
5146 EndPhase(PHASE_CLR_API);
5148 EndPhase(PHASE_POST_EMIT);
5150 pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, true);
5154 // Generate PatchpointInfo
5155 generatePatchpointInfo();
5157 RecordStateAtEndOfCompilation();
5159 #ifdef FEATURE_TRACELOGGING
5160 compJitTelemetry.NotifyEndOfCompilation();
5164 ++Compiler::jitTotalMethodCompiled;
5165 #endif // defined(DEBUG)
5167 compFunctionTraceEnd(*methodCodePtr, *methodCodeSize, false);
5168 JITDUMP("Method code size: %d\n", (unsigned)(*methodCodeSize));
5170 #if FUNC_INFO_LOGGING
5171 if (compJitFuncInfoFile != nullptr)
5173 assert(!compIsForInlining());
5174 #ifdef DEBUG // We only have access to info.compFullName in DEBUG builds.
5175 fprintf(compJitFuncInfoFile, "%s\n", info.compFullName);
5177 fprintf(compJitFuncInfoFile, " %s\n", eeGetMethodFullName(info.compMethodHnd));
5179 fprintf(compJitFuncInfoFile, ""); // in our logic this causes a flush
5181 #endif // FUNC_INFO_LOGGING
5184 //------------------------------------------------------------------------
5185 // generatePatchpointInfo: allocate and fill in patchpoint info data,
5186 // and report it to the VM
5188 void Compiler::generatePatchpointInfo()
5190 if (!doesMethodHavePatchpoints())
5192 // Nothing to report
5196 // Patchpoints are only found in Tier0 code, which is unoptimized, and so
5197 // should always have frame pointer.
5198 assert(codeGen->isFramePointerUsed());
5200 // Allocate patchpoint info storage from runtime, and fill in initial bits of data.
5201 const unsigned patchpointInfoSize = PatchpointInfo::ComputeSize(info.compLocalsCount);
5202 PatchpointInfo* const patchpointInfo = (PatchpointInfo*)info.compCompHnd->allocateArray(patchpointInfoSize);
5204 // The +TARGET_POINTER_SIZE here is to account for the extra slot the runtime
5205 // creates when it simulates calling the OSR method (the "pseudo return address" slot).
5206 patchpointInfo->Initialize(info.compLocalsCount, codeGen->genSPtoFPdelta() + TARGET_POINTER_SIZE);
5208 JITDUMP("--OSR--- FP-SP delta is %d\n", patchpointInfo->FpToSpDelta());
5210 // We record offsets for all the "locals" here. Could restrict
5211 // this to just the IL locals with some extra logic, and save a bit of space,
5212 // but would need to adjust all consumers, too.
5213 for (unsigned lclNum = 0; lclNum < info.compLocalsCount; lclNum++)
5215 LclVarDsc* const varDsc = lvaGetDesc(lclNum);
5217 // We expect all these to have stack homes, and be FP relative
5218 assert(varDsc->lvOnFrame);
5219 assert(varDsc->lvFramePointerBased);
5221 // Record FramePtr relative offset (no localloc yet)
5222 patchpointInfo->SetOffset(lclNum, varDsc->GetStackOffset());
5224 // Note if IL stream contained an address-of that potentially leads to exposure.
5225 // This bit of IL may be skipped by OSR partial importation.
5226 if (varDsc->lvHasLdAddrOp)
5228 patchpointInfo->SetIsExposed(lclNum);
5231 JITDUMP("--OSR-- V%02u is at offset %d%s\n", lclNum, patchpointInfo->Offset(lclNum),
5232 patchpointInfo->IsExposed(lclNum) ? " (exposed)" : "");
5237 if (lvaReportParamTypeArg() || lvaKeepAliveAndReportThis())
5239 const int offset = lvaToCallerSPRelativeOffset(lvaCachedGenericContextArgOffset(), true);
5240 patchpointInfo->SetGenericContextArgOffset(offset);
5241 JITDUMP("--OSR-- cached generic context offset is CallerSP %d\n", patchpointInfo->GenericContextArgOffset());
5244 if (lvaKeepAliveAndReportThis())
5246 const int offset = lvaCachedGenericContextArgOffset();
5247 patchpointInfo->SetKeptAliveThisOffset(offset);
5248 JITDUMP("--OSR-- kept-alive this offset is FP %d\n", patchpointInfo->KeptAliveThisOffset());
5251 if (compGSReorderStackLayout)
5253 assert(lvaGSSecurityCookie != BAD_VAR_NUM);
5254 LclVarDsc* const varDsc = lvaGetDesc(lvaGSSecurityCookie);
5255 patchpointInfo->SetSecurityCookieOffset(varDsc->GetStackOffset());
5256 JITDUMP("--OSR-- security cookie V%02u offset is FP %d\n", lvaGSSecurityCookie,
5257 patchpointInfo->SecurityCookieOffset());
5260 // Register this with the runtime.
5261 info.compCompHnd->setPatchpointInfo(patchpointInfo);
5264 //------------------------------------------------------------------------
5265 // ResetOptAnnotations: Clear annotations produced during global optimizations.
5268 // The intent of this method is to clear any information typically assumed
5269 // to be set only once; it is used between iterations when JitOptRepeat is
5272 void Compiler::ResetOptAnnotations()
5274 assert(opts.optRepeat);
5275 assert(JitConfig.JitOptRepeatCount() > 0);
5278 m_opAsgnVarDefSsaNums = nullptr;
5279 m_blockToEHPreds = nullptr;
5280 fgSsaPassesCompleted = 0;
5281 fgVNPassesCompleted = 0;
5283 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
5285 for (Statement* stmt : block->Statements())
5287 for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext)
5290 tree->ClearAssertion();
5291 tree->gtCSEnum = NO_CSE;
5297 //------------------------------------------------------------------------
5298 // RecomputeLoopInfo: Recompute loop annotations between opt-repeat iterations.
5301 // The intent of this method is to update loop structure annotations, and those
5302 // they depend on; these annotations may have become stale during optimization,
5303 // and need to be up-to-date before running another iteration of optimizations.
5305 void Compiler::RecomputeLoopInfo()
5307 assert(opts.optRepeat);
5308 assert(JitConfig.JitOptRepeatCount() > 0);
5309 // Recompute reachability sets, dominators, and loops.
5311 fgDomsComputed = false;
5312 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
5314 block->bbFlags &= ~BBF_LOOP_FLAGS;
5316 fgComputeReachability();
5317 // Rebuild the loop tree annotations themselves
5321 /*****************************************************************************/
5322 void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo)
5327 // Check if we need to add the Quirk for the PPP backward compat issue.
5328 // This Quirk addresses a compatibility issue between the new RyuJit and the previous JIT64.
5329 // A backward compatibity issue called 'PPP' exists where a PInvoke call passes a 32-byte struct
5330 // into a native API which basically writes 48 bytes of data into the struct.
5331 // With the stack frame layout used by the RyuJIT the extra 16 bytes written corrupts a
5332 // caller saved register and this leads to an A/V in the calling method.
5333 // The older JIT64 jit compiler just happened to have a different stack layout and/or
5334 // caller saved register set so that it didn't hit the A/V in the caller.
5335 // By increasing the amount of stack allocted for the struct by 32 bytes we can fix this.
5337 // Return true if we actually perform the Quirk, otherwise return false
5339 bool Compiler::compQuirkForPPP()
5342 { // We require that there are exactly two locals
5346 if (compTailCallUsed)
5347 { // Don't try this quirk if a tail call was used
5351 bool hasOutArgs = false;
5352 LclVarDsc* varDscExposedStruct = nullptr;
5357 /* Look for struct locals that are address taken */
5358 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5360 if (varDsc->lvIsParam) // It can't be a parameter
5365 // We require that the OutgoingArg space lclVar exists
5366 if (lclNum == lvaOutgoingArgSpaceVar)
5368 hasOutArgs = true; // Record that we saw it
5372 // Look for a 32-byte address exposed Struct and record its varDsc
5373 if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvAddrExposed && (varDsc->lvExactSize == 32))
5375 varDscExposedStruct = varDsc;
5379 // We only perform the Quirk when there are two locals
5380 // one of them is a address exposed struct of size 32
5381 // and the other is the outgoing arg space local
5383 if (hasOutArgs && (varDscExposedStruct != nullptr))
5388 printf("\nAdding a backwards compatibility quirk for the 'PPP' issue\n");
5392 // Increase the exact size of this struct by 32 bytes
5393 // This fixes the PPP backward compat issue
5394 varDscExposedStruct->lvExactSize += 32;
5396 // The struct is now 64 bytes.
5397 // We're on x64 so this should be 8 pointer slots.
5398 assert((varDscExposedStruct->lvExactSize / TARGET_POINTER_SIZE) == 8);
5400 varDscExposedStruct->SetLayout(
5401 varDscExposedStruct->GetLayout()->GetPPPQuirkLayout(getAllocator(CMK_ClassLayout)));
5407 #endif // TARGET_AMD64
5409 /*****************************************************************************/
5412 void* forceFrameJIT; // used to force to frame &useful for fastchecked debugging
5414 bool Compiler::skipMethod()
5416 static ConfigMethodRange fJitRange;
5417 fJitRange.EnsureInit(JitConfig.JitRange());
5418 assert(!fJitRange.Error());
5420 // Normally JitConfig.JitRange() is null, we don't want to skip
5421 // jitting any methods.
5423 // So, the logic below relies on the fact that a null range string
5424 // passed to ConfigMethodRange represents the set of all methods.
5426 if (!fJitRange.Contains(info.compMethodHash()))
5431 if (JitConfig.JitExclude().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
5436 if (!JitConfig.JitInclude().isEmpty() &&
5437 !JitConfig.JitInclude().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
5447 /*****************************************************************************/
5449 int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
5450 void** methodCodePtr,
5451 uint32_t* methodCodeSize,
5452 JitFlags* compileFlags)
5454 // compInit should have set these already.
5455 noway_assert(info.compMethodInfo != nullptr);
5456 noway_assert(info.compCompHnd != nullptr);
5457 noway_assert(info.compMethodHnd != nullptr);
5459 #ifdef FEATURE_JIT_METHOD_PERF
5460 static bool checkedForJitTimeLog = false;
5462 pCompJitTimer = nullptr;
5464 if (!checkedForJitTimeLog)
5466 // Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for
5467 // retail builds. Do not call the regular Config helper here as it would pull
5468 // in a copy of the config parser into the clrjit.dll.
5469 InterlockedCompareExchangeT(&Compiler::compJitTimeLogFilename,
5470 (LPCWSTR)info.compCompHnd->getJitTimeLogFilename(), NULL);
5472 // At a process or module boundary clear the file and start afresh.
5473 JitTimer::PrintCsvHeader();
5475 checkedForJitTimeLog = true;
5477 if ((Compiler::compJitTimeLogFilename != nullptr) || (JitTimeLogCsv() != nullptr))
5479 pCompJitTimer = JitTimer::Create(this, info.compMethodInfo->ILCodeSize);
5481 #endif // FEATURE_JIT_METHOD_PERF
5484 Compiler* me = this;
5485 forceFrameJIT = (void*)&me; // let us see the this pointer in fastchecked build
5486 // set this early so we can use it without relying on random memory values
5487 verbose = compIsForInlining() ? impInlineInfo->InlinerCompiler->verbose : false;
5490 #if FUNC_INFO_LOGGING
5491 LPCWSTR tmpJitFuncInfoFilename = JitConfig.JitFuncInfoFile();
5493 if (tmpJitFuncInfoFilename != nullptr)
5495 LPCWSTR oldFuncInfoFileName =
5496 InterlockedCompareExchangeT(&compJitFuncInfoFilename, tmpJitFuncInfoFilename, NULL);
5497 if (oldFuncInfoFileName == nullptr)
5499 assert(compJitFuncInfoFile == nullptr);
5500 compJitFuncInfoFile = _wfopen(compJitFuncInfoFilename, W("a"));
5501 if (compJitFuncInfoFile == nullptr)
5503 #if defined(DEBUG) && !defined(HOST_UNIX) // no 'perror' in the PAL
5504 perror("Failed to open JitFuncInfoLogFile");
5505 #endif // defined(DEBUG) && !defined(HOST_UNIX)
5509 #endif // FUNC_INFO_LOGGING
5511 // if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
5513 if (compIsForInlining())
5515 compileFlags->Clear(JitFlags::JIT_FLAG_OSR);
5516 info.compILEntry = 0;
5517 info.compPatchpointInfo = nullptr;
5519 else if (compileFlags->IsSet(JitFlags::JIT_FLAG_OSR))
5521 // Fetch OSR info from the runtime
5522 info.compPatchpointInfo = info.compCompHnd->getOSRInfo(&info.compILEntry);
5523 assert(info.compPatchpointInfo != nullptr);
5526 virtualStubParamInfo = new (this, CMK_Unknown) VirtualStubParamInfo(IsTargetAbi(CORINFO_CORERT_ABI));
5528 // compMatchedVM is set to true if both CPU/ABI and OS are matching the execution engine requirements
5530 // Do we have a matched VM? Or are we "abusing" the VM to help us do JIT work (such as using an x86 native VM
5531 // with an ARM-targeting "altjit").
5532 // Match CPU/ABI for compMatchedVM
5533 info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
5535 // Match OS for compMatchedVM
5536 CORINFO_EE_INFO* eeInfo = eeGetEEInfo();
5538 info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_UNIX);
5540 info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_WINNT);
5543 // If we are not compiling for a matched VM, then we are getting JIT flags that don't match our target
5544 // architecture. The two main examples here are an ARM targeting altjit hosted on x86 and an ARM64
5545 // targeting altjit hosted on x64. (Though with cross-bitness work, the host doesn't necessarily need
5546 // to be of the same bitness.) In these cases, we need to fix up the JIT flags to be appropriate for
5547 // the target, as the VM's expected target may overlap bit flags with different meaning to our target.
5548 // Note that it might be better to do this immediately when setting the JIT flags in CILJit::compileMethod()
5549 // (when JitFlags::SetFromFlags() is called), but this is close enough. (To move this logic to
5550 // CILJit::compileMethod() would require moving the info.compMatchedVM computation there as well.)
5552 if (!info.compMatchedVM)
5554 #if defined(TARGET_ARM)
5556 // Currently nothing needs to be done. There are no ARM flags that conflict with other flags.
5558 #endif // defined(TARGET_ARM)
5560 #if defined(TARGET_ARM64)
5562 // The x86/x64 architecture capabilities flags overlap with the ARM64 ones. Set a reasonable architecture
5563 // target default. Currently this is disabling all ARM64 architecture features except FP and SIMD, but this
5564 // should be altered to possibly enable all of them, when they are known to all work.
5566 CORINFO_InstructionSetFlags defaultArm64Flags;
5567 defaultArm64Flags.AddInstructionSet(InstructionSet_ArmBase);
5568 defaultArm64Flags.AddInstructionSet(InstructionSet_AdvSimd);
5569 defaultArm64Flags.Set64BitInstructionSetVariants();
5570 compileFlags->SetInstructionSetFlags(defaultArm64Flags);
5571 #endif // defined(TARGET_ARM64)
5574 compMaxUncheckedOffsetForNullObject = eeGetEEInfo()->maxUncheckedOffsetForNullObject;
5576 // Set the context for token lookup.
5577 if (compIsForInlining())
5579 impTokenLookupContextHandle = impInlineInfo->tokenLookupContextHandle;
5581 assert(impInlineInfo->inlineCandidateInfo->clsHandle == info.compCompHnd->getMethodClass(info.compMethodHnd));
5582 info.compClassHnd = impInlineInfo->inlineCandidateInfo->clsHandle;
5584 assert(impInlineInfo->inlineCandidateInfo->clsAttr == info.compCompHnd->getClassAttribs(info.compClassHnd));
5585 // printf("%x != %x\n", impInlineInfo->inlineCandidateInfo->clsAttr,
5586 // info.compCompHnd->getClassAttribs(info.compClassHnd));
5587 info.compClassAttr = impInlineInfo->inlineCandidateInfo->clsAttr;
5591 impTokenLookupContextHandle = METHOD_BEING_COMPILED_CONTEXT();
5593 info.compClassHnd = info.compCompHnd->getMethodClass(info.compMethodHnd);
5594 info.compClassAttr = info.compCompHnd->getClassAttribs(info.compClassHnd);
5598 if (JitConfig.EnableExtraSuperPmiQueries())
5600 // This call to getClassModule/getModuleAssembly/getAssemblyName fails in crossgen2 due to these
5601 // APIs being unimplemented. So disable this extra info for pre-jit mode. See
5602 // https://github.com/dotnet/runtime/issues/48888.
5603 if (!compileFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
5605 // Get the assembly name, to aid finding any particular SuperPMI method context function
5606 (void)info.compCompHnd->getAssemblyName(
5607 info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
5612 info.compProfilerCallback = false; // Assume false until we are told to hook this method.
5614 #if defined(DEBUG) || defined(LATE_DISASM)
5615 const char* classNamePtr;
5617 info.compMethodName = eeGetMethodName(info.compMethodHnd, &classNamePtr);
5618 unsigned len = (unsigned)roundUp(strlen(classNamePtr) + 1);
5619 info.compClassName = getAllocator(CMK_DebugOnly).allocate<char>(len);
5620 strcpy_s((char*)info.compClassName, len, classNamePtr);
5622 info.compFullName = eeGetMethodFullName(info.compMethodHnd);
5623 info.compPerfScore = 0.0;
5624 #endif // defined(DEBUG) || defined(LATE_DISASM)
5627 if (!compIsForInlining())
5629 JitTls::GetLogEnv()->setCompiler(this);
5632 // Have we been told to be more selective in our Jitting?
5635 if (compIsForInlining())
5637 compInlineResult->NoteFatal(InlineObservation::CALLEE_MARKED_AS_SKIPPED);
5639 return CORJIT_SKIPPED;
5644 // Set this before the first 'BADCODE'
5645 // Skip verification where possible
5646 //.tiVerificationNeeded = !compileFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION);
5647 assert(compileFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION));
5649 assert(!compIsForInlining() || !tiVerificationNeeded); // Inlinees must have been verified.
5651 /* Setup an error trap */
5657 CORINFO_MODULE_HANDLE classPtr;
5658 COMP_HANDLE compHnd;
5659 CORINFO_METHOD_INFO* methodInfo;
5660 void** methodCodePtr;
5661 uint32_t* methodCodeSize;
5662 JitFlags* compileFlags;
5667 param.classPtr = classPtr;
5668 param.compHnd = info.compCompHnd;
5669 param.methodInfo = info.compMethodInfo;
5670 param.methodCodePtr = methodCodePtr;
5671 param.methodCodeSize = methodCodeSize;
5672 param.compileFlags = compileFlags;
5673 param.result = CORJIT_INTERNALERROR;
5675 setErrorTrap(info.compCompHnd, Param*, pParam, ¶m) // ERROR TRAP: Start normal block
5678 pParam->pThis->compCompileHelper(pParam->classPtr, pParam->compHnd, pParam->methodInfo,
5679 pParam->methodCodePtr, pParam->methodCodeSize, pParam->compileFlags);
5681 finallyErrorTrap() // ERROR TRAP: The following block handles errors
5685 if (compIsForInlining())
5690 /* Tell the emitter that we're done with this function */
5692 GetEmitter()->emitEndCG();
5697 endErrorTrap() // ERROR TRAP: End
5699 return param.result;
5702 #if defined(DEBUG) || defined(INLINE_DATA)
5703 //------------------------------------------------------------------------
5704 // compMethodHash: get hash code for currently jitted method
5707 // Hash based on method's full name
5709 unsigned Compiler::Info::compMethodHash() const
5711 if (compMethodHashPrivate == 0)
5713 // compMethodHashPrivate = compCompHnd->getMethodHash(compMethodHnd);
5714 assert(compFullName != nullptr);
5715 assert(*compFullName != 0);
5716 COUNT_T hash = HashStringA(compFullName); // Use compFullName to generate the hash, as it contains the signature
5718 compMethodHashPrivate = hash;
5720 return compMethodHashPrivate;
5723 //------------------------------------------------------------------------
5724 // compMethodHash: get hash code for specified method
5727 // methodHnd - method of interest
5730 // Hash based on method's full name
5732 unsigned Compiler::compMethodHash(CORINFO_METHOD_HANDLE methodHnd)
5734 // If this is the root method, delegate to the caching version
5736 if (methodHnd == info.compMethodHnd)
5738 return info.compMethodHash();
5741 // Else compute from scratch. Might consider caching this too.
5743 unsigned methodHash = 0;
5744 const char* calleeName = eeGetMethodFullName(methodHnd);
5746 if (calleeName != nullptr)
5748 methodHash = HashStringA(calleeName);
5752 methodHash = info.compCompHnd->getMethodHash(methodHnd);
5758 #endif // defined(DEBUG) || defined(INLINE_DATA)
5760 void Compiler::compCompileFinish()
5762 #if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
5766 #if MEASURE_MEM_ALLOC
5768 compArenaAllocator->finishMemStats();
5769 memAllocHist.record((unsigned)((compArenaAllocator->getTotalBytesAllocated() + 1023) / 1024));
5770 memUsedHist.record((unsigned)((compArenaAllocator->getTotalBytesUsed() + 1023) / 1024));
5774 if (s_dspMemStats || verbose)
5776 printf("\nAllocations for %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
5777 compArenaAllocator->dumpMemStats(jitstdout);
5780 #endif // MEASURE_MEM_ALLOC
5782 #if LOOP_HOIST_STATS
5783 AddLoopHoistStats();
5784 #endif // LOOP_HOIST_STATS
5786 #if MEASURE_NODE_SIZE
5787 genTreeNcntHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeCnt));
5788 genTreeNsizHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeSize));
5792 // Small methods should fit in ArenaAllocator::getDefaultPageSize(), or else
5793 // we should bump up ArenaAllocator::getDefaultPageSize()
5795 if ((info.compILCodeSize <= 32) && // Is it a reasonably small method?
5796 (info.compNativeCodeSize < 512) && // Some trivial methods generate huge native code. eg. pushing a single huge
5798 (impInlinedCodeSize <= 128) && // Is the the inlining reasonably bounded?
5799 // Small methods cannot meaningfully have a big number of locals
5800 // or arguments. We always track arguments at the start of
5801 // the prolog which requires memory
5802 (info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc
5803 (getJitStressLevel() == 0) && // We need extra memory for stress
5804 !opts.optRepeat && // We need extra memory to repeat opts
5805 !compArenaAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for
5807 // Factor of 2x is because data-structures are bigger under DEBUG
5808 (compArenaAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) &&
5809 // RyuJIT backend needs memory tuning! TODO-Cleanup: remove this case when memory tuning is complete.
5810 (compArenaAllocator->getTotalBytesAllocated() > (10 * ArenaAllocator::getDefaultPageSize())) &&
5811 !verbose) // We allocate lots of memory to convert sets to strings for JitDump
5813 genSmallMethodsNeedingExtraMemoryCnt++;
5815 // Less than 1% of all methods should run into this.
5816 // We cannot be more strict as there are always degenerate cases where we
5817 // would need extra memory (like huge structs as locals - see lvaSetStruct()).
5818 assert((genMethodCnt < 500) || (genSmallMethodsNeedingExtraMemoryCnt < (genMethodCnt / 100)));
5822 #if defined(DEBUG) || defined(INLINE_DATA)
5824 m_inlineStrategy->DumpData();
5825 m_inlineStrategy->DumpXml();
5832 // mdMethodDef __stdcall CEEInfo::getMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod)
5833 mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
5835 unsigned profCallCount = 0;
5836 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
5838 bool foundEntrypointBasicBlockCount = false;
5839 for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++)
5841 if ((fgPgoSchema[iSchema].InstrumentationKind ==
5842 ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) &&
5843 (fgPgoSchema[iSchema].ILOffset == 0))
5845 foundEntrypointBasicBlockCount = true;
5846 profCallCount = *(uint32_t*)(fgPgoData + fgPgoSchema[iSchema].Offset);
5850 assert(foundEntrypointBasicBlockCount);
5853 static bool headerPrinted = false;
5857 headerPrinted = true;
5858 printf(" | Profiled | Method | Method has | calls | Num |LclV |AProp| CSE | Perf |bytes | %3s codesize| \n", Target::g_tgtCPUName);
5859 printf(" mdToken | CNT | RGN | Hash | EH | FRM | LOOP | NRM | IND | BBs | Cnt | Cnt | Cnt | Score | IL | HOT | CLD | method name \n");
5860 printf("---------+------+------+----------+----+-----+------+-----+-----+-----+-----+-----+-----+---------+------+-------+-----+\n");
5861 // 06001234 | 1234 | HOT | 0f1e2d3c | EH | ebp | LOOP | 15 | 6 | 12 | 17 | 12 | 8 | 1234.56 | 145 | 1234 | 123 | System.Example(int)
5865 printf("%08X | ", currentMethodToken);
5867 if (fgHaveProfileData())
5869 if (profCallCount <= 9999)
5871 printf("%4d | ", profCallCount);
5873 else if (profCallCount <= 999500)
5875 printf("%3dK | ", (profCallCount + 500) / 1000);
5879 printf("%3dM | ", (profCallCount + 500000) / 1000000);
5887 CorInfoRegionKind regionKind = info.compMethodInfo->regionKind;
5893 else if (regionKind == CORINFO_REGION_NONE)
5897 else if (regionKind == CORINFO_REGION_HOT)
5901 else if (regionKind == CORINFO_REGION_COLD)
5905 else if (regionKind == CORINFO_REGION_JIT)
5914 printf("%08x | ", info.compMethodHash());
5916 if (compHndBBtabCount > 0)
5925 if (rpFrameType == FT_EBP_FRAME)
5927 printf("%3s | ", STR_FPBASE);
5929 else if (rpFrameType == FT_ESP_FRAME)
5931 printf("%3s | ", STR_SPBASE);
5934 else if (rpFrameType == FT_DOUBLE_ALIGN_FRAME)
5939 else // (rpFrameType == FT_NOT_SET)
5953 printf(" %3d |", optCallCount);
5954 printf(" %3d |", optIndirectCallCount);
5955 printf(" %3d |", fgBBcountAtCodegen);
5956 printf(" %3d |", lvaCount);
5960 printf(" MinOpts |");
5964 printf(" %3d |", optAssertionCount);
5966 printf(" %3d |", optCSEcount);
5968 printf(" %3d |", 0);
5969 #endif // FEATURE_ANYCSE
5972 if (info.compPerfScore < 9999.995)
5974 printf(" %7.2f |", info.compPerfScore);
5978 printf(" %7.0f |", info.compPerfScore);
5981 printf(" %4d |", info.compMethodInfo->ILCodeSize);
5982 printf(" %5d |", info.compTotalHotCodeSize);
5983 printf(" %3d |", info.compTotalColdCodeSize);
5985 printf(" %s\n", eeGetMethodFullName(info.compMethodHnd));
5986 printf(""); // in our logic this causes a flush
5991 printf("****** DONE compiling %s\n", info.compFullName);
5992 printf(""); // in our logic this causes a flush
5995 // Only call _DbgBreakCheck when we are jitting, not when we are ngen-ing
5996 // For ngen the int3 or breakpoint instruction will be right at the
5997 // start of the ngen method and we will stop when we execute it.
5999 if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
6001 if (compJitHaltMethod())
6003 #if !defined(HOST_UNIX)
6004 // TODO-UNIX: re-enable this when we have an OS that supports a pop-up dialog
6006 // Don't do an assert, but just put up the dialog box so we get just-in-time debugger
6007 // launching. When you hit 'retry' it will continue and naturally stop at the INT 3
6008 // that the JIT put in the code
6009 _DbgBreakCheck(__FILE__, __LINE__, "JitHalt");
6016 #ifdef PSEUDORANDOM_NOP_INSERTION
6017 // this is zlib adler32 checksum. source came from windows base
6019 #define BASE 65521L // largest prime smaller than 65536
6021 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
6023 #define DO1(buf, i) \
6028 #define DO2(buf, i) \
6031 #define DO4(buf, i) \
6034 #define DO8(buf, i) \
6041 unsigned adler32(unsigned adler, char* buf, unsigned int len)
6043 unsigned int s1 = adler & 0xffff;
6044 unsigned int s2 = (adler >> 16) & 0xffff;
6052 k = len < NMAX ? len : NMAX;
6069 return (s2 << 16) | s1;
6073 unsigned getMethodBodyChecksum(__in_z char* code, int size)
6075 #ifdef PSEUDORANDOM_NOP_INSERTION
6076 return adler32(0, code, size);
6082 int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
6083 COMP_HANDLE compHnd,
6084 CORINFO_METHOD_INFO* methodInfo,
6085 void** methodCodePtr,
6086 uint32_t* methodCodeSize,
6087 JitFlags* compileFlags)
6089 CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
6091 info.compCode = methodInfo->ILCode;
6092 info.compILCodeSize = methodInfo->ILCodeSize;
6093 info.compILImportSize = 0;
6095 if (info.compILCodeSize == 0)
6097 BADCODE("code size is zero");
6100 if (compIsForInlining())
6103 unsigned methAttr_Old = impInlineInfo->inlineCandidateInfo->methAttr;
6104 unsigned methAttr_New = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
6105 unsigned flagsToIgnore = CORINFO_FLG_DONT_INLINE | CORINFO_FLG_FORCEINLINE;
6106 assert((methAttr_Old & (~flagsToIgnore)) == (methAttr_New & (~flagsToIgnore)));
6109 info.compFlags = impInlineInfo->inlineCandidateInfo->methAttr;
6113 info.compFlags = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
6114 #ifdef PSEUDORANDOM_NOP_INSERTION
6115 info.compChecksum = getMethodBodyChecksum((char*)methodInfo->ILCode, methodInfo->ILCodeSize);
6119 compSwitchedToOptimized = false;
6120 compSwitchedToMinOpts = false;
6122 // compInitOptions will set the correct verbose flag.
6124 compInitOptions(compileFlags);
6126 if (!compIsForInlining() && !opts.altJit && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
6128 // We're an altjit, but the COMPlus_AltJit configuration did not say to compile this method,
6130 return CORJIT_SKIPPED;
6137 printf("IL to import:\n");
6138 dumpILRange(info.compCode, info.compILCodeSize);
6143 // Check for COMPlus_AggressiveInlining
6144 if (JitConfig.JitAggressiveInlining())
6146 compDoAggressiveInlining = true;
6149 if (compDoAggressiveInlining)
6151 info.compFlags |= CORINFO_FLG_FORCEINLINE;
6156 // Check for ForceInline stress.
6157 if (compStressCompile(STRESS_FORCE_INLINE, 0))
6159 info.compFlags |= CORINFO_FLG_FORCEINLINE;
6162 if (compIsForInlining())
6164 JITLOG((LL_INFO100000, "\nINLINER impTokenLookupContextHandle for %s is 0x%p.\n",
6165 eeGetMethodFullName(info.compMethodHnd), dspPtr(impTokenLookupContextHandle)));
6168 if (tiVerificationNeeded)
6170 JITLOG((LL_INFO10000, "tiVerificationNeeded initially set to true for %s\n", info.compFullName));
6174 /* Since tiVerificationNeeded can be turned off in the middle of
6175 compiling a method, and it might have caused blocks to be queued up
6176 for reimporting, impCanReimport can be used to check for reimporting. */
6178 impCanReimport = (tiVerificationNeeded || compStressCompile(STRESS_CHK_REIMPORT, 15));
6180 /* Initialize set a bunch of global values */
6182 info.compScopeHnd = classPtr;
6183 info.compXcptnsCount = methodInfo->EHcount;
6184 info.compMaxStack = methodInfo->maxStack;
6185 compHndBBtab = nullptr;
6186 compHndBBtabCount = 0;
6187 compHndBBtabAllocCount = 0;
6189 info.compNativeCodeSize = 0;
6190 info.compTotalHotCodeSize = 0;
6191 info.compTotalColdCodeSize = 0;
6192 info.compClassProbeCount = 0;
6194 compHasBackwardJump = false;
6197 compCurBB = nullptr;
6200 // Reset node and block ID counter
6202 compStatementID = 0;
6203 compBasicBlockID = 0;
6206 /* Initialize emitter */
6208 if (!compIsForInlining())
6210 codeGen->GetEmitter()->emitBegCG(this, compHnd);
6213 info.compIsStatic = (info.compFlags & CORINFO_FLG_STATIC) != 0;
6215 info.compPublishStubParam = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM);
6217 info.compHasNextCallRetAddr = false;
6219 if (opts.IsReversePInvoke())
6222 info.compCallConv = info.compCompHnd->getUnmanagedCallConv(methodInfo->ftn, nullptr, &unused);
6223 info.compArgOrder = Target::g_tgtUnmanagedArgOrder;
6227 info.compCallConv = CorInfoCallConvExtension::Managed;
6228 info.compArgOrder = Target::g_tgtArgOrder;
6231 info.compIsVarArgs = false;
6233 switch (methodInfo->args.getCallConv())
6235 case CORINFO_CALLCONV_NATIVEVARARG:
6236 case CORINFO_CALLCONV_VARARG:
6237 info.compIsVarArgs = true;
6243 info.compRetNativeType = info.compRetType = JITtype2varType(methodInfo->args.retType);
6245 info.compUnmanagedCallCountWithGCTransition = 0;
6246 info.compLvFrameListRoot = BAD_VAR_NUM;
6248 info.compInitMem = ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0);
6250 /* Allocate the local variable table */
6254 if (!compIsForInlining())
6256 compInitDebuggingInfo();
6260 if (compIsForInlining())
6262 compBasicBlockID = impInlineInfo->InlinerCompiler->compBasicBlockID;
6266 const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE);
6268 if (!compIsForInlining() && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
6270 // We're prejitting the root method. We also will analyze it as
6271 // a potential inline candidate.
6272 InlineResult prejitResult(this, methodHnd, "prejit");
6274 // Do the initial inline screen.
6275 impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult);
6277 // Temporarily install the prejitResult as the
6278 // compInlineResult so it's available to fgFindJumpTargets
6279 // and can accumulate more observations as the IL is
6282 // We don't pass prejitResult in as a parameter to avoid
6283 // potential aliasing confusion -- the other call to
6284 // fgFindBasicBlocks may have set up compInlineResult and
6285 // the code in fgFindJumpTargets references that data
6286 // member extensively.
6287 assert(compInlineResult == nullptr);
6288 assert(impInlineInfo == nullptr);
6289 compInlineResult = &prejitResult;
6291 // Find the basic blocks. We must do this regardless of
6292 // inlineability, since we are prejitting this method.
6294 // This will also update the status of this method as
6295 // an inline candidate.
6296 fgFindBasicBlocks();
6298 // Undo the temporary setup.
6299 assert(compInlineResult == &prejitResult);
6300 compInlineResult = nullptr;
6302 // If still a viable, discretionary inline, assess
6304 if (prejitResult.IsDiscretionaryCandidate())
6306 prejitResult.DetermineProfitability(methodInfo);
6309 m_inlineStrategy->NotePrejitDecision(prejitResult);
6311 // Handle the results of the inline analysis.
6312 if (prejitResult.IsFailure())
6314 // This method is a bad inlinee according to our
6315 // analysis. We will let the InlineResult destructor
6316 // mark it as noinline in the prejit image to save the
6319 // This decision better not be context-dependent.
6320 assert(prejitResult.IsNever());
6324 // This looks like a viable inline candidate. Since
6325 // we're not actually inlining, don't report anything.
6326 prejitResult.SetReported();
6331 // We are jitting the root method, or inlining.
6332 fgFindBasicBlocks();
6335 // If we're inlining and the candidate is bad, bail out.
6336 if (compDonotInline())
6341 if (compHasBackwardJump && (info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0 && fgCanSwitchToOptimized())
6343 // Method likely has a loop, switch to the OptimizedTier to avoid spending too much time running slower code
6344 fgSwitchToOptimized();
6347 compSetOptimizationLevel();
6349 #if COUNT_BASIC_BLOCKS
6350 bbCntTable.record(fgBBcount);
6354 bbOneBBSizeTable.record(methodInfo->ILCodeSize);
6356 #endif // COUNT_BASIC_BLOCKS
6361 printf("Basic block list for '%s'\n", info.compFullName);
6362 fgDispBasicBlocks();
6367 /* Give the function a unique number */
6369 if (opts.disAsm || verbose)
6371 compMethodID = ~info.compMethodHash() & 0xffff;
6375 compMethodID = InterlockedIncrement(&s_compMethodsCount);
6379 if (compIsForInlining())
6381 compInlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS, fgBBcount);
6383 if (compInlineResult->IsFailure())
6390 if ((JitConfig.DumpJittedMethods() == 1) && !compIsForInlining())
6392 printf("Compiling %4d %s::%s, IL size = %u, hash=0x%08x %s%s%s\n", Compiler::jitTotalMethodCompiled,
6393 info.compClassName, info.compMethodName, info.compILCodeSize, info.compMethodHash(),
6394 compGetTieringName(), opts.IsOSR() ? " OSR" : "", compGetStressMessage());
6396 if (compIsForInlining())
6398 compGenTreeID = impInlineInfo->InlinerCompiler->compGenTreeID;
6399 compStatementID = impInlineInfo->InlinerCompiler->compStatementID;
6403 compCompile(methodCodePtr, methodCodeSize, compileFlags);
6406 if (compIsForInlining())
6408 impInlineInfo->InlinerCompiler->compGenTreeID = compGenTreeID;
6409 impInlineInfo->InlinerCompiler->compStatementID = compStatementID;
6410 impInlineInfo->InlinerCompiler->compBasicBlockID = compBasicBlockID;
6416 if (compDonotInline())
6418 // Verify we have only one inline result in play.
6419 assert(impInlineInfo->inlineResult == compInlineResult);
6422 if (!compIsForInlining())
6424 compCompileFinish();
6426 // Did we just compile for a target architecture that the VM isn't expecting? If so, the VM
6427 // can't used the generated code (and we better be an AltJit!).
6429 if (!info.compMatchedVM)
6431 return CORJIT_SKIPPED;
6435 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT) && JitConfig.RunAltJitCode() == 0)
6437 return CORJIT_SKIPPED;
6446 //------------------------------------------------------------------------
6447 // compFindLocalVarLinear: Linear search for variable's scope containing offset.
6450 // varNum The variable number to search for in the array of scopes.
6451 // offs The offset value which should occur within the life of the variable.
6454 // VarScopeDsc* of a matching variable that contains the offset within its life
6455 // begin and life end or nullptr when there is no match found.
6458 // Linear search for matching variables with their life begin and end containing
6460 // or NULL if one couldn't be found.
6463 // Usually called for scope count = 4. Could be called for values upto 8.
6465 VarScopeDsc* Compiler::compFindLocalVarLinear(unsigned varNum, unsigned offs)
6467 for (unsigned i = 0; i < info.compVarScopesCount; i++)
6469 VarScopeDsc* dsc = &info.compVarScopes[i];
6470 if ((dsc->vsdVarNum == varNum) && (dsc->vsdLifeBeg <= offs) && (dsc->vsdLifeEnd > offs))
6478 //------------------------------------------------------------------------
6479 // compFindLocalVar: Search for variable's scope containing offset.
6482 // varNum The variable number to search for in the array of scopes.
6483 // offs The offset value which should occur within the life of the variable.
6486 // VarScopeDsc* of a matching variable that contains the offset within its life
6487 // begin and life end.
6488 // or NULL if one couldn't be found.
6491 // Linear search for matching variables with their life begin and end containing
6492 // the offset only when the scope count is < MAX_LINEAR_FIND_LCL_SCOPELIST,
6493 // else use the hashtable lookup.
6495 VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned offs)
6497 if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
6499 return compFindLocalVarLinear(varNum, offs);
6503 VarScopeDsc* ret = compFindLocalVar(varNum, offs, offs);
6504 assert(ret == compFindLocalVarLinear(varNum, offs));
6509 //------------------------------------------------------------------------
6510 // compFindLocalVar: Search for variable's scope containing offset.
6513 // varNum The variable number to search for in the array of scopes.
6514 // lifeBeg The life begin of the variable's scope
6515 // lifeEnd The life end of the variable's scope
6518 // VarScopeDsc* of a matching variable that contains the offset within its life
6519 // begin and life end, or NULL if one couldn't be found.
6522 // Following are the steps used:
6523 // 1. Index into the hashtable using varNum.
6524 // 2. Iterate through the linked list at index varNum to find a matching
6527 VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned lifeBeg, unsigned lifeEnd)
6529 assert(compVarScopeMap != nullptr);
6531 VarScopeMapInfo* info;
6532 if (compVarScopeMap->Lookup(varNum, &info))
6534 VarScopeListNode* list = info->head;
6535 while (list != nullptr)
6537 if ((list->data->vsdLifeBeg <= lifeBeg) && (list->data->vsdLifeEnd > lifeEnd))
6547 //-------------------------------------------------------------------------
6548 // compInitVarScopeMap: Create a scope map so it can be looked up by varNum
6551 // Map.K => Map.V :: varNum => List(ScopeDsc)
6553 // Create a scope map that can be indexed by varNum and can be iterated
6554 // on it's values to look for matching scope when given an offs or
6555 // lifeBeg and lifeEnd.
6558 // 1. Build the map only when we think linear search is slow, i.e.,
6559 // MAX_LINEAR_FIND_LCL_SCOPELIST is large.
6560 // 2. Linked list preserves original array order.
6562 void Compiler::compInitVarScopeMap()
6564 if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
6569 assert(compVarScopeMap == nullptr);
6571 compVarScopeMap = new (getAllocator()) VarNumToScopeDscMap(getAllocator());
6573 // 599 prime to limit huge allocations; for ex: duplicated scopes on single var.
6574 compVarScopeMap->Reallocate(min(info.compVarScopesCount, 599));
6576 for (unsigned i = 0; i < info.compVarScopesCount; ++i)
6578 unsigned varNum = info.compVarScopes[i].vsdVarNum;
6580 VarScopeListNode* node = VarScopeListNode::Create(&info.compVarScopes[i], getAllocator());
6582 // Index by varNum and if the list exists append "node" to the "list".
6583 VarScopeMapInfo* info;
6584 if (compVarScopeMap->Lookup(varNum, &info))
6586 info->tail->next = node;
6589 // Create a new list.
6592 info = VarScopeMapInfo::Create(node, getAllocator());
6593 compVarScopeMap->Set(varNum, info);
6598 struct genCmpLocalVarLifeBeg
6600 bool operator()(const VarScopeDsc* elem1, const VarScopeDsc* elem2)
6602 return elem1->vsdLifeBeg < elem2->vsdLifeBeg;
6606 struct genCmpLocalVarLifeEnd
6608 bool operator()(const VarScopeDsc* elem1, const VarScopeDsc* elem2)
6610 return elem1->vsdLifeEnd < elem2->vsdLifeEnd;
6614 inline void Compiler::compInitScopeLists()
6616 if (info.compVarScopesCount == 0)
6618 compEnterScopeList = compExitScopeList = nullptr;
6622 // Populate the 'compEnterScopeList' and 'compExitScopeList' lists
6624 compEnterScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
6625 compExitScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
6627 for (unsigned i = 0; i < info.compVarScopesCount; i++)
6629 compEnterScopeList[i] = compExitScopeList[i] = &info.compVarScopes[i];
6632 jitstd::sort(compEnterScopeList, compEnterScopeList + info.compVarScopesCount, genCmpLocalVarLifeBeg());
6633 jitstd::sort(compExitScopeList, compExitScopeList + info.compVarScopesCount, genCmpLocalVarLifeEnd());
6636 void Compiler::compResetScopeLists()
6638 if (info.compVarScopesCount == 0)
6643 assert(compEnterScopeList && compExitScopeList);
6645 compNextEnterScope = compNextExitScope = 0;
6648 VarScopeDsc* Compiler::compGetNextEnterScope(unsigned offs, bool scan)
6650 assert(info.compVarScopesCount);
6651 assert(compEnterScopeList && compExitScopeList);
6653 if (compNextEnterScope < info.compVarScopesCount)
6655 assert(compEnterScopeList[compNextEnterScope]);
6656 unsigned nextEnterOff = compEnterScopeList[compNextEnterScope]->vsdLifeBeg;
6657 assert(scan || (offs <= nextEnterOff));
6661 if (offs == nextEnterOff)
6663 return compEnterScopeList[compNextEnterScope++];
6668 if (nextEnterOff <= offs)
6670 return compEnterScopeList[compNextEnterScope++];
6678 VarScopeDsc* Compiler::compGetNextExitScope(unsigned offs, bool scan)
6680 assert(info.compVarScopesCount);
6681 assert(compEnterScopeList && compExitScopeList);
6683 if (compNextExitScope < info.compVarScopesCount)
6685 assert(compExitScopeList[compNextExitScope]);
6686 unsigned nextExitOffs = compExitScopeList[compNextExitScope]->vsdLifeEnd;
6687 assert(scan || (offs <= nextExitOffs));
6691 if (offs == nextExitOffs)
6693 return compExitScopeList[compNextExitScope++];
6698 if (nextExitOffs <= offs)
6700 return compExitScopeList[compNextExitScope++];
6708 // The function will call the callback functions for scopes with boundaries
6709 // at instrs from the current status of the scope lists to 'offset',
6710 // ordered by instrs.
6712 void Compiler::compProcessScopesUntil(unsigned offset,
6714 void (Compiler::*enterScopeFn)(VARSET_TP* inScope, VarScopeDsc*),
6715 void (Compiler::*exitScopeFn)(VARSET_TP* inScope, VarScopeDsc*))
6717 assert(offset != BAD_IL_OFFSET);
6718 assert(inScope != nullptr);
6720 bool foundExit = false, foundEnter = true;
6722 VarScopeDsc* nextExitScope = nullptr;
6723 VarScopeDsc* nextEnterScope = nullptr;
6724 unsigned offs = offset, curEnterOffs = 0;
6726 goto START_FINDING_SCOPES;
6728 // We need to determine the scopes which are open for the current block.
6729 // This loop walks over the missing blocks between the current and the
6730 // previous block, keeping the enter and exit offsets in lockstep.
6734 foundExit = foundEnter = false;
6738 (this->*exitScopeFn)(inScope, nextExitScope);
6739 nextExitScope = nullptr;
6743 offs = nextEnterScope ? nextEnterScope->vsdLifeBeg : offset;
6745 while ((scope = compGetNextExitScope(offs, true)) != nullptr)
6749 if (!nextEnterScope || scope->vsdLifeEnd > nextEnterScope->vsdLifeBeg)
6751 // We overshot the last found Enter scope. Save the scope for later
6752 // and find an entering scope
6754 nextExitScope = scope;
6758 (this->*exitScopeFn)(inScope, scope);
6763 (this->*enterScopeFn)(inScope, nextEnterScope);
6764 curEnterOffs = nextEnterScope->vsdLifeBeg;
6765 nextEnterScope = nullptr;
6769 offs = nextExitScope ? nextExitScope->vsdLifeEnd : offset;
6771 START_FINDING_SCOPES:
6773 while ((scope = compGetNextEnterScope(offs, true)) != nullptr)
6777 if ((nextExitScope && scope->vsdLifeBeg >= nextExitScope->vsdLifeEnd) || (scope->vsdLifeBeg > curEnterOffs))
6779 // We overshot the last found exit scope. Save the scope for later
6780 // and find an exiting scope
6782 nextEnterScope = scope;
6786 (this->*enterScopeFn)(inScope, scope);
6790 curEnterOffs = scope->vsdLifeBeg;
6793 } while (foundExit || foundEnter);
6798 void Compiler::compDispScopeLists()
6802 printf("Local variable scopes = %d\n", info.compVarScopesCount);
6804 if (info.compVarScopesCount)
6806 printf(" \tVarNum \tLVNum \t Name \tBeg \tEnd\n");
6809 printf("Sorted by enter scope:\n");
6810 for (i = 0; i < info.compVarScopesCount; i++)
6812 VarScopeDsc* varScope = compEnterScopeList[i];
6814 printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
6815 VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
6816 varScope->vsdLifeBeg, varScope->vsdLifeEnd);
6818 if (compNextEnterScope == i)
6820 printf(" <-- next enter scope");
6826 printf("Sorted by exit scope:\n");
6827 for (i = 0; i < info.compVarScopesCount; i++)
6829 VarScopeDsc* varScope = compExitScopeList[i];
6831 printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
6832 VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
6833 varScope->vsdLifeBeg, varScope->vsdLifeEnd);
6835 if (compNextExitScope == i)
6837 printf(" <-- next exit scope");
6844 void Compiler::compDispLocalVars()
6846 printf("info.compVarScopesCount = %d\n", info.compVarScopesCount);
6848 if (info.compVarScopesCount > 0)
6850 printf(" \tVarNum \tLVNum \t Name \tBeg \tEnd\n");
6853 for (unsigned i = 0; i < info.compVarScopesCount; i++)
6855 VarScopeDsc* varScope = &info.compVarScopes[i];
6856 printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh\n", i, varScope->vsdVarNum, varScope->vsdLVnum,
6857 VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
6858 varScope->vsdLifeBeg, varScope->vsdLifeEnd);
6864 /*****************************************************************************/
6866 #if MEASURE_CLRAPI_CALLS
6868 struct WrapICorJitInfo : public ICorJitInfo
6870 //------------------------------------------------------------------------
6871 // WrapICorJitInfo::makeOne: allocate an instance of WrapICorJitInfo
6874 // alloc - the allocator to get memory from for the instance
6875 // compile - the compiler instance
6876 // compHndRef - the ICorJitInfo handle from the EE; the caller's
6877 // copy may be replaced with a "wrapper" instance
6880 // If the config flags indicate that ICorJitInfo should be wrapped,
6881 // we return the "wrapper" instance; otherwise we return "nullptr".
6883 static WrapICorJitInfo* makeOne(ArenaAllocator* alloc, Compiler* compiler, COMP_HANDLE& compHndRef /* INOUT */)
6885 WrapICorJitInfo* wrap = nullptr;
6887 if (JitConfig.JitEECallTimingInfo() != 0)
6889 // It's too early to use the default allocator, so we do this
6890 // in two steps to be safe (the constructor doesn't need to do
6891 // anything except fill in the vtable pointer, so we let the
6893 void* inst = alloc->allocateMemory(roundUp(sizeof(WrapICorJitInfo)));
6894 if (inst != nullptr)
6896 // If you get a build error here due to 'WrapICorJitInfo' being
6897 // an abstract class, it's very likely that the wrapper bodies
6898 // in ICorJitInfo_API_wrapper.hpp are no longer in sync with
6899 // the EE interface; please be kind and update the header file.
6900 wrap = new (inst, jitstd::placement_t()) WrapICorJitInfo();
6902 wrap->wrapComp = compiler;
6904 // Save the real handle and replace it with our wrapped version.
6905 wrap->wrapHnd = compHndRef;
6915 COMP_HANDLE wrapHnd; // the "real thing"
6918 #include "ICorJitInfo_API_wrapper.hpp"
6921 #endif // MEASURE_CLRAPI_CALLS
6923 /*****************************************************************************/
6925 // Compile a single method
6927 int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
6928 CORINFO_MODULE_HANDLE classPtr,
6929 COMP_HANDLE compHnd,
6930 CORINFO_METHOD_INFO* methodInfo,
6931 void** methodCodePtr,
6932 uint32_t* methodCodeSize,
6933 JitFlags* compileFlags,
6934 void* inlineInfoPtr)
6937 // A non-NULL inlineInfo means we are compiling the inlinee method.
6939 InlineInfo* inlineInfo = (InlineInfo*)inlineInfoPtr;
6941 bool jitFallbackCompile = false;
6943 int result = CORJIT_INTERNALERROR;
6945 ArenaAllocator* pAlloc = nullptr;
6946 ArenaAllocator alloc;
6948 #if MEASURE_CLRAPI_CALLS
6949 WrapICorJitInfo* wrapCLR = nullptr;
6954 // Use inliner's memory allocator when compiling the inlinee.
6955 pAlloc = inlineInfo->InlinerCompiler->compGetArenaAllocator();
6968 ArenaAllocator* pAlloc;
6969 bool jitFallbackCompile;
6971 CORINFO_METHOD_HANDLE methodHnd;
6972 CORINFO_MODULE_HANDLE classPtr;
6973 COMP_HANDLE compHnd;
6974 CORINFO_METHOD_INFO* methodInfo;
6975 void** methodCodePtr;
6976 uint32_t* methodCodeSize;
6977 JitFlags* compileFlags;
6978 InlineInfo* inlineInfo;
6979 #if MEASURE_CLRAPI_CALLS
6980 WrapICorJitInfo* wrapCLR;
6985 param.pComp = nullptr;
6986 param.pAlloc = pAlloc;
6987 param.jitFallbackCompile = jitFallbackCompile;
6988 param.methodHnd = methodHnd;
6989 param.classPtr = classPtr;
6990 param.compHnd = compHnd;
6991 param.methodInfo = methodInfo;
6992 param.methodCodePtr = methodCodePtr;
6993 param.methodCodeSize = methodCodeSize;
6994 param.compileFlags = compileFlags;
6995 param.inlineInfo = inlineInfo;
6996 #if MEASURE_CLRAPI_CALLS
6997 param.wrapCLR = nullptr;
6999 param.result = result;
7001 setErrorTrap(compHnd, Param*, pParamOuter, ¶m)
7003 setErrorTrap(nullptr, Param*, pParam, pParamOuter)
7005 if (pParam->inlineInfo)
7007 // Lazily create the inlinee compiler object
7008 if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr)
7010 pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
7011 (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
7014 // Use the inlinee compiler object
7015 pParam->pComp = pParam->inlineInfo->InlinerCompiler->InlineeCompiler;
7017 // memset(pParam->pComp, 0xEE, sizeof(Compiler));
7022 // Allocate create the inliner compiler object
7023 pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
7026 #if MEASURE_CLRAPI_CALLS
7027 pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd);
7030 // push this compiler on the stack (TLS)
7031 pParam->pComp->prevCompiler = JitTls::GetCompiler();
7032 JitTls::SetCompiler(pParam->pComp);
7034 // PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
7035 #if defined(_PREFAST_) || defined(_PREFIX_)
7036 PREFIX_ASSUME(pParam->pComp != NULL);
7038 assert(pParam->pComp != nullptr);
7041 pParam->pComp->compInit(pParam->pAlloc, pParam->methodHnd, pParam->compHnd, pParam->methodInfo,
7042 pParam->inlineInfo);
7045 pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile;
7048 // Now generate the code
7049 pParam->result = pParam->pComp->compCompile(pParam->classPtr, pParam->methodCodePtr, pParam->methodCodeSize,
7050 pParam->compileFlags);
7054 Compiler* pCompiler = pParamOuter->pComp;
7056 // If OOM is thrown when allocating memory for a pComp, we will end up here.
7057 // For this case, pComp and also pCompiler will be a nullptr
7059 if (pCompiler != nullptr)
7061 pCompiler->info.compCode = nullptr;
7063 // pop the compiler off the TLS stack only if it was linked above
7064 assert(JitTls::GetCompiler() == pCompiler);
7065 JitTls::SetCompiler(pCompiler->prevCompiler);
7068 if (pParamOuter->inlineInfo == nullptr)
7070 // Free up the allocator we were using
7071 pParamOuter->pAlloc->destroy();
7078 // If we were looking at an inlinee....
7079 if (inlineInfo != nullptr)
7081 // Note that we failed to compile the inlinee, and that
7082 // there's no point trying to inline it again anywhere else.
7083 inlineInfo->inlineResult->NoteFatal(InlineObservation::CALLEE_COMPILATION_ERROR);
7085 param.result = __errc;
7089 result = param.result;
7092 (result == CORJIT_INTERNALERROR || result == CORJIT_RECOVERABLEERROR || result == CORJIT_IMPLLIMITATION) &&
7093 !jitFallbackCompile)
7095 // If we failed the JIT, reattempt with debuggable code.
7096 jitFallbackCompile = true;
7098 // Update the flags for 'safer' code generation.
7099 compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT);
7100 compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT);
7101 compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT);
7109 #if defined(UNIX_AMD64_ABI)
7111 // GetTypeFromClassificationAndSizes:
7112 // Returns the type of the eightbyte accounting for the classification and size of the eightbyte.
7115 // classType: classification type
7116 // size: size of the eightbyte.
7119 var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size)
7121 var_types type = TYP_UNKNOWN;
7124 case SystemVClassificationTypeInteger:
7143 assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type.");
7146 case SystemVClassificationTypeIntegerReference:
7149 case SystemVClassificationTypeIntegerByRef:
7152 case SystemVClassificationTypeSSE:
7163 assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type.");
7168 assert(false && "GetTypeFromClassificationAndSizes Invalid classification type.");
7175 //-------------------------------------------------------------------
7176 // GetEightByteType: Returns the type of eightbyte slot of a struct
7179 // structDesc - struct classification description.
7180 // slotNum - eightbyte slot number for the struct.
7183 // type of the eightbyte slot of the struct
7186 var_types Compiler::GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
7189 var_types eightByteType = TYP_UNDEF;
7190 unsigned len = structDesc.eightByteSizes[slotNum];
7192 switch (structDesc.eightByteClassifications[slotNum])
7194 case SystemVClassificationTypeInteger:
7195 // See typelist.h for jit type definition.
7196 // All the types of size < 4 bytes are of jit type TYP_INT.
7197 if (structDesc.eightByteSizes[slotNum] <= 4)
7199 eightByteType = TYP_INT;
7201 else if (structDesc.eightByteSizes[slotNum] <= 8)
7203 eightByteType = TYP_LONG;
7207 assert(false && "GetEightByteType Invalid Integer classification type.");
7210 case SystemVClassificationTypeIntegerReference:
7211 assert(len == REGSIZE_BYTES);
7212 eightByteType = TYP_REF;
7214 case SystemVClassificationTypeIntegerByRef:
7215 assert(len == REGSIZE_BYTES);
7216 eightByteType = TYP_BYREF;
7218 case SystemVClassificationTypeSSE:
7219 if (structDesc.eightByteSizes[slotNum] <= 4)
7221 eightByteType = TYP_FLOAT;
7223 else if (structDesc.eightByteSizes[slotNum] <= 8)
7225 eightByteType = TYP_DOUBLE;
7229 assert(false && "GetEightByteType Invalid SSE classification type.");
7233 assert(false && "GetEightByteType Invalid classification type.");
7237 return eightByteType;
7240 //------------------------------------------------------------------------------------------------------
7241 // GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
7244 // 'structDesc' - struct description
7245 // 'type0' - out param; returns the type of the first eightbyte.
7246 // 'type1' - out param; returns the type of the second eightbyte.
7247 // 'offset0' - out param; returns the offset of the first eightbyte.
7248 // 'offset1' - out param; returns the offset of the second eightbyte.
7251 void Compiler::GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
7254 unsigned __int8* offset0,
7255 unsigned __int8* offset1)
7257 *offset0 = structDesc.eightByteOffsets[0];
7258 *offset1 = structDesc.eightByteOffsets[1];
7260 *type0 = TYP_UNKNOWN;
7261 *type1 = TYP_UNKNOWN;
7263 // Set the first eightbyte data
7264 if (structDesc.eightByteCount >= 1)
7266 *type0 = GetEightByteType(structDesc, 0);
7269 // Set the second eight byte data
7270 if (structDesc.eightByteCount == 2)
7272 *type1 = GetEightByteType(structDesc, 1);
7276 //------------------------------------------------------------------------------------------------------
7277 // GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
7280 // 'typeHnd' - type handle
7281 // 'type0' - out param; returns the type of the first eightbyte.
7282 // 'type1' - out param; returns the type of the second eightbyte.
7283 // 'offset0' - out param; returns the offset of the first eightbyte.
7284 // 'offset1' - out param; returns the offset of the second eightbyte.
7286 void Compiler::GetStructTypeOffset(CORINFO_CLASS_HANDLE typeHnd,
7289 unsigned __int8* offset0,
7290 unsigned __int8* offset1)
7292 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
7293 eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
7294 assert(structDesc.passedInRegisters);
7295 GetStructTypeOffset(structDesc, type0, type1, offset0, offset1);
7298 #endif // defined(UNIX_AMD64_ABI)
7300 /*****************************************************************************/
7301 /*****************************************************************************/
7304 Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData()
7306 NodeToIntMap* reachable = new (getAllocatorDebugOnly()) NodeToIntMap(getAllocatorDebugOnly());
7308 if (m_nodeTestData == nullptr)
7313 // Otherwise, iterate.
7315 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
7317 for (Statement* stmt = block->FirstNonPhiDef(); stmt != nullptr; stmt = stmt->GetNextStmt())
7319 for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext)
7321 TestLabelAndNum tlAndN;
7323 // For call nodes, translate late args to what they stand for.
7324 if (tree->OperGet() == GT_CALL)
7326 GenTreeCall* call = tree->AsCall();
7328 for (GenTreeCall::Use& use : call->Args())
7330 if ((use.GetNode()->gtFlags & GTF_LATE_ARG) != 0)
7332 // Find the corresponding late arg.
7333 GenTree* lateArg = call->fgArgInfo->GetArgNode(i);
7334 if (GetNodeTestData()->Lookup(lateArg, &tlAndN))
7336 reachable->Set(lateArg, 0);
7343 if (GetNodeTestData()->Lookup(tree, &tlAndN))
7345 reachable->Set(tree, 0);
7353 void Compiler::TransferTestDataToNode(GenTree* from, GenTree* to)
7355 TestLabelAndNum tlAndN;
7356 // We can't currently associate multiple annotations with a single node.
7357 // If we need to, we can fix this...
7359 // If the table is null, don't create it just to do the lookup, which would fail...
7360 if (m_nodeTestData != nullptr && GetNodeTestData()->Lookup(from, &tlAndN))
7362 assert(!GetNodeTestData()->Lookup(to, &tlAndN));
7363 // We can't currently associate multiple annotations with a single node.
7364 // If we need to, we can fix this...
7365 TestLabelAndNum tlAndNTo;
7366 assert(!GetNodeTestData()->Lookup(to, &tlAndNTo));
7368 GetNodeTestData()->Remove(from);
7369 GetNodeTestData()->Set(to, tlAndN);
7376 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7377 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7381 XX Functions for the stand-alone version of the JIT . XX
7383 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7384 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7387 /*****************************************************************************/
7388 void codeGeneratorCodeSizeBeg()
7392 /*****************************************************************************
7394 * Used for counting pointer assignments.
7397 /*****************************************************************************/
7398 void codeGeneratorCodeSizeEnd()
7401 /*****************************************************************************
7403 * Gather statistics - mainly used for the standalone
7404 * Enable various #ifdef's to get the information you need
7407 void Compiler::compJitStats()
7411 /* Method types and argument statistics */
7413 #endif // CALL_ARG_STATS
7418 /*****************************************************************************
7420 * Gather statistics about method calls and arguments
7423 void Compiler::compCallArgStats()
7430 unsigned argDWordNum;
7436 unsigned regArgDeferred;
7437 unsigned regArgTemp;
7439 unsigned regArgLclVar;
7440 unsigned regArgConst;
7442 unsigned argTempsThisMethod = 0;
7444 assert(fgStmtListThreaded);
7446 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
7448 for (Statement* stmt : block->Statements())
7450 for (GenTree* call = stmt->GetTreeList(); call != nullptr; call = call->gtNext)
7452 if (call->gtOper != GT_CALL)
7457 regArgNum = regArgDeferred = regArgTemp =
7459 regArgConst = regArgLclVar =
7461 argDWordNum = argLngNum = argFltNum = argDblNum = 0;
7465 if (call->AsCall()->gtCallThisArg == nullptr)
7467 if (call->AsCall()->gtCallType == CT_HELPER)
7478 /* We have a 'this' pointer */
7486 if (call->IsVirtual())
7488 /* virtual function */
7493 argNonVirtualCalls++;
7500 argTempsCntTable.record(argTempsThisMethod);
7502 if (argMaxTempsPerMethod < argTempsThisMethod)
7504 argMaxTempsPerMethod = argTempsThisMethod;
7509 void Compiler::compDispCallArgStats(FILE* fout)
7511 if (argTotalCalls == 0)
7514 fprintf(fout, "\n");
7515 fprintf(fout, "--------------------------------------------------\n");
7516 fprintf(fout, "Call stats\n");
7517 fprintf(fout, "--------------------------------------------------\n");
7518 fprintf(fout, "Total # of calls = %d, calls / method = %.3f\n\n", argTotalCalls,
7519 (float)argTotalCalls / genMethodCnt);
7521 fprintf(fout, "Percentage of helper calls = %4.2f %%\n", (float)(100 * argHelperCalls) / argTotalCalls);
7522 fprintf(fout, "Percentage of static calls = %4.2f %%\n", (float)(100 * argStaticCalls) / argTotalCalls);
7523 fprintf(fout, "Percentage of virtual calls = %4.2f %%\n", (float)(100 * argVirtualCalls) / argTotalCalls);
7524 fprintf(fout, "Percentage of non-virtual calls = %4.2f %%\n\n", (float)(100 * argNonVirtualCalls) / argTotalCalls);
7526 fprintf(fout, "Average # of arguments per call = %.2f%%\n\n", (float)argTotalArgs / argTotalCalls);
7528 fprintf(fout, "Percentage of DWORD arguments = %.2f %%\n", (float)(100 * argTotalDWordArgs) / argTotalArgs);
7529 fprintf(fout, "Percentage of LONG arguments = %.2f %%\n", (float)(100 * argTotalLongArgs) / argTotalArgs);
7530 fprintf(fout, "Percentage of FLOAT arguments = %.2f %%\n", (float)(100 * argTotalFloatArgs) / argTotalArgs);
7531 fprintf(fout, "Percentage of DOUBLE arguments = %.2f %%\n\n", (float)(100 * argTotalDoubleArgs) / argTotalArgs);
7533 if (argTotalRegArgs == 0)
7537 fprintf(fout, "Total deferred arguments = %d \n", argTotalDeferred);
7539 fprintf(fout, "Total temp arguments = %d \n\n", argTotalTemps);
7541 fprintf(fout, "Total 'this' arguments = %d \n", argTotalObjPtr);
7542 fprintf(fout, "Total local var arguments = %d \n", argTotalLclVar);
7543 fprintf(fout, "Total constant arguments = %d \n\n", argTotalConst);
7546 fprintf(fout, "\nRegister Arguments:\n\n");
7548 fprintf(fout, "Percentage of deferred arguments = %.2f %%\n", (float)(100 * argTotalDeferred) / argTotalRegArgs);
7549 fprintf(fout, "Percentage of temp arguments = %.2f %%\n\n", (float)(100 * argTotalTemps) / argTotalRegArgs);
7551 fprintf(fout, "Maximum # of temps per method = %d\n\n", argMaxTempsPerMethod);
7553 fprintf(fout, "Percentage of ObjPtr arguments = %.2f %%\n", (float)(100 * argTotalObjPtr) / argTotalRegArgs);
7554 // fprintf(fout, "Percentage of global arguments = %.2f %%\n", (float)(100 * argTotalDWordGlobEf) /
7555 // argTotalRegArgs);
7556 fprintf(fout, "Percentage of constant arguments = %.2f %%\n", (float)(100 * argTotalConst) / argTotalRegArgs);
7557 fprintf(fout, "Percentage of lcl var arguments = %.2f %%\n\n", (float)(100 * argTotalLclVar) / argTotalRegArgs);
7559 fprintf(fout, "--------------------------------------------------\n");
7560 fprintf(fout, "Argument count frequency table (includes ObjPtr):\n");
7561 fprintf(fout, "--------------------------------------------------\n");
7562 argCntTable.dump(fout);
7563 fprintf(fout, "--------------------------------------------------\n");
7565 fprintf(fout, "--------------------------------------------------\n");
7566 fprintf(fout, "DWORD argument count frequency table (w/o LONG):\n");
7567 fprintf(fout, "--------------------------------------------------\n");
7568 argDWordCntTable.dump(fout);
7569 fprintf(fout, "--------------------------------------------------\n");
7571 fprintf(fout, "--------------------------------------------------\n");
7572 fprintf(fout, "Temps count frequency table (per method):\n");
7573 fprintf(fout, "--------------------------------------------------\n");
7574 argTempsCntTable.dump(fout);
7575 fprintf(fout, "--------------------------------------------------\n");
7578 fprintf(fout, "--------------------------------------------------\n");
7579 fprintf(fout, "DWORD argument count frequency table (w/ LONG):\n");
7580 fprintf(fout, "--------------------------------------------------\n");
7581 argDWordLngCntTable.dump(fout);
7582 fprintf(fout, "--------------------------------------------------\n");
7586 #endif // CALL_ARG_STATS
7588 // JIT time end to end, and by phases.
7590 #ifdef FEATURE_JIT_METHOD_PERF
7592 CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock;
7593 CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
7594 #if MEASURE_CLRAPI_CALLS
7595 double JitTimer::s_cyclesPerSec = CycleTimer::CyclesPerSecond();
7597 #endif // FEATURE_JIT_METHOD_PERF
7599 #if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS || defined(FEATURE_TRACELOGGING)
7600 const char* PhaseNames[] = {
7601 #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) string_nm,
7602 #include "compphases.h"
7605 const char* PhaseEnums[] = {
7606 #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) #enum_nm,
7607 #include "compphases.h"
7610 const LPCWSTR PhaseShortNames[] = {
7611 #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) W(short_nm),
7612 #include "compphases.h"
7614 #endif // defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
7616 #ifdef FEATURE_JIT_METHOD_PERF
7617 bool PhaseHasChildren[] = {
7618 #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) hasChildren,
7619 #include "compphases.h"
7622 int PhaseParent[] = {
7623 #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) parent,
7624 #include "compphases.h"
7627 bool PhaseReportsIRSize[] = {
7628 #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) measureIR,
7629 #include "compphases.h"
7632 CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes)
7633 : m_byteCodeBytes(byteCodeBytes)
7635 , m_parentPhaseEndSlop(0)
7636 , m_timerFailure(false)
7637 #if MEASURE_CLRAPI_CALLS
7638 , m_allClrAPIcalls(0)
7639 , m_allClrAPIcycles(0)
7642 for (int i = 0; i < PHASE_NUMBER_OF; i++)
7644 m_invokesByPhase[i] = 0;
7645 m_cyclesByPhase[i] = 0;
7646 #if MEASURE_CLRAPI_CALLS
7647 m_CLRinvokesByPhase[i] = 0;
7648 m_CLRcyclesByPhase[i] = 0;
7652 #if MEASURE_CLRAPI_CALLS
7653 assert(ARRAYSIZE(m_perClrAPIcalls) == API_ICorJitInfo_Names::API_COUNT);
7654 assert(ARRAYSIZE(m_perClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
7655 assert(ARRAYSIZE(m_maxClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
7656 for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
7658 m_perClrAPIcalls[i] = 0;
7659 m_perClrAPIcycles[i] = 0;
7660 m_maxClrAPIcycles[i] = 0;
7665 bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
7667 return false; // info.m_byteCodeBytes < 10;
7670 //------------------------------------------------------------------------
7671 // CompTimeSummaryInfo::AddInfo: Record timing info from one compile.
7674 // info - The timing information to record.
7675 // includePhases - If "true", the per-phase info in "info" is valid,
7676 // which means that a "normal" compile has ended; if
7677 // the value is "false" we are recording the results
7678 // of a partial compile (typically an import-only run
7679 // on behalf of the inliner) in which case the phase
7680 // info is not valid and so we only record EE call
7682 void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info, bool includePhases)
7684 if (info.m_timerFailure)
7686 return; // Don't update if there was a failure.
7689 CritSecHolder timeLock(s_compTimeSummaryLock);
7693 bool includeInFiltered = IncludedInFilteredData(info);
7697 // Update the totals and maxima.
7698 m_total.m_byteCodeBytes += info.m_byteCodeBytes;
7699 m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
7700 m_total.m_totalCycles += info.m_totalCycles;
7701 m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
7703 #if MEASURE_CLRAPI_CALLS
7704 // Update the CLR-API values.
7705 m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
7706 m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
7707 m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
7708 m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
7711 if (includeInFiltered)
7713 m_numFilteredMethods++;
7714 m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
7715 m_filtered.m_totalCycles += info.m_totalCycles;
7716 m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
7719 for (int i = 0; i < PHASE_NUMBER_OF; i++)
7721 m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
7722 m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
7724 #if MEASURE_CLRAPI_CALLS
7725 m_total.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
7726 m_total.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
7729 if (includeInFiltered)
7731 m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
7732 m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
7733 #if MEASURE_CLRAPI_CALLS
7734 m_filtered.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
7735 m_filtered.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
7738 m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
7740 #if MEASURE_CLRAPI_CALLS
7741 m_maximum.m_CLRcyclesByPhase[i] = max(m_maximum.m_CLRcyclesByPhase[i], info.m_CLRcyclesByPhase[i]);
7744 m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
7745 m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
7747 #if MEASURE_CLRAPI_CALLS
7752 // Update the "global" CLR-API values.
7753 m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
7754 m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
7755 m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
7756 m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
7758 // Update the per-phase CLR-API values.
7759 m_total.m_invokesByPhase[PHASE_CLR_API] += info.m_allClrAPIcalls;
7760 m_maximum.m_invokesByPhase[PHASE_CLR_API] =
7761 max(m_maximum.m_perClrAPIcalls[PHASE_CLR_API], info.m_allClrAPIcalls);
7762 m_total.m_cyclesByPhase[PHASE_CLR_API] += info.m_allClrAPIcycles;
7763 m_maximum.m_cyclesByPhase[PHASE_CLR_API] =
7764 max(m_maximum.m_cyclesByPhase[PHASE_CLR_API], info.m_allClrAPIcycles);
7767 for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
7769 m_total.m_perClrAPIcalls[i] += info.m_perClrAPIcalls[i];
7770 m_maximum.m_perClrAPIcalls[i] = max(m_maximum.m_perClrAPIcalls[i], info.m_perClrAPIcalls[i]);
7772 m_total.m_perClrAPIcycles[i] += info.m_perClrAPIcycles[i];
7773 m_maximum.m_perClrAPIcycles[i] = max(m_maximum.m_perClrAPIcycles[i], info.m_perClrAPIcycles[i]);
7775 m_maximum.m_maxClrAPIcycles[i] = max(m_maximum.m_maxClrAPIcycles[i], info.m_maxClrAPIcycles[i]);
7781 LPCWSTR Compiler::compJitTimeLogFilename = nullptr;
7783 void CompTimeSummaryInfo::Print(FILE* f)
7790 double countsPerSec = CycleTimer::CyclesPerSecond();
7791 if (countsPerSec == 0.0)
7793 fprintf(f, "Processor does not have a high-frequency timer.\n");
7797 double totTime_ms = 0.0;
7799 fprintf(f, "JIT Compilation time report:\n");
7800 fprintf(f, " Compiled %d methods.\n", m_numMethods);
7801 if (m_numMethods != 0)
7803 fprintf(f, " Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes,
7804 m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods);
7805 totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
7806 fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0),
7808 fprintf(f, " max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0,
7809 ((double)m_maximum.m_totalCycles / countsPerSec) * 1000.0);
7810 fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
7811 ((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods);
7813 const char* extraHdr1 = "";
7814 const char* extraHdr2 = "";
7815 #if MEASURE_CLRAPI_CALLS
7816 bool extraInfo = (JitConfig.JitEECallTimingInfo() != 0);
7819 extraHdr1 = " CLRs/meth % in CLR";
7820 extraHdr2 = "-----------------------";
7824 fprintf(f, "\n Total time by phases:\n");
7825 fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)%s\n",
7827 fprintf(f, " ---------------------------------------------------------------------------------------%s\n",
7830 // Ensure that at least the names array and the Phases enum have the same number of entries:
7831 assert(_countof(PhaseNames) == PHASE_NUMBER_OF);
7832 for (int i = 0; i < PHASE_NUMBER_OF; i++)
7834 double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
7835 double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
7837 #if MEASURE_CLRAPI_CALLS
7838 // Skip showing CLR API call info if we didn't collect any
7839 if (i == PHASE_CLR_API && !extraInfo)
7843 // Indent nested phases, according to depth.
7844 int ancPhase = PhaseParent[i];
7845 while (ancPhase != -1)
7848 ancPhase = PhaseParent[ancPhase];
7850 fprintf(f, " %-30s %6.2f %10.2f %9.3f %8.2f%% %8.3f", PhaseNames[i],
7851 ((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods),
7852 ((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms),
7855 #if MEASURE_CLRAPI_CALLS
7856 if (extraInfo && i != PHASE_CLR_API)
7858 double nest_tot_ms = (((double)m_total.m_CLRcyclesByPhase[i]) / countsPerSec) * 1000.0;
7859 double nest_percent = nest_tot_ms * 100.0 / totTime_ms;
7860 double calls_per_fn = ((double)m_total.m_CLRinvokesByPhase[i]) / ((double)m_numMethods);
7862 if (nest_percent > 0.1 || calls_per_fn > 10)
7863 fprintf(f, " %5.1f %8.2f%%", calls_per_fn, nest_percent);
7869 // Show slop if it's over a certain percentage of the total
7870 double pslop_pct = 100.0 * m_total.m_parentPhaseEndSlop * 1000.0 / countsPerSec / totTime_ms;
7871 if (pslop_pct >= 1.0)
7873 fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
7874 "%3.1f%% of total.\n\n",
7875 m_total.m_parentPhaseEndSlop / 1000000.0, pslop_pct);
7878 if (m_numFilteredMethods > 0)
7880 fprintf(f, " Compiled %d methods that meet the filter requirement.\n", m_numFilteredMethods);
7881 fprintf(f, " Compiled %d bytecodes total (%8.2f avg).\n", m_filtered.m_byteCodeBytes,
7882 (double)m_filtered.m_byteCodeBytes / (double)m_numFilteredMethods);
7883 double totTime_ms = ((double)m_filtered.m_totalCycles / countsPerSec) * 1000.0;
7884 fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_filtered.m_totalCycles / 1000000.0),
7886 fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
7887 ((double)m_filtered.m_totalCycles) / 1000000.0 / (double)m_numFilteredMethods,
7888 totTime_ms / (double)m_numFilteredMethods);
7890 fprintf(f, " Total time by phases:\n");
7891 fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total\n");
7892 fprintf(f, " --------------------------------------------------------------------------------------\n");
7893 // Ensure that at least the names array and the Phases enum have the same number of entries:
7894 assert(_countof(PhaseNames) == PHASE_NUMBER_OF);
7895 for (int i = 0; i < PHASE_NUMBER_OF; i++)
7897 double phase_tot_ms = (((double)m_filtered.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
7898 // Indent nested phases, according to depth.
7899 int ancPhase = PhaseParent[i];
7900 while (ancPhase != -1)
7903 ancPhase = PhaseParent[ancPhase];
7905 fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%%\n", PhaseNames[i],
7906 ((double)m_filtered.m_invokesByPhase[i]) / ((double)m_numFilteredMethods),
7907 ((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms,
7908 (phase_tot_ms * 100.0 / totTime_ms));
7911 double fslop_ms = m_filtered.m_parentPhaseEndSlop * 1000.0 / countsPerSec;
7914 fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
7915 "%3.1f%% of total.\n\n",
7916 m_filtered.m_parentPhaseEndSlop / 1000000.0, fslop_ms);
7920 #if MEASURE_CLRAPI_CALLS
7921 if (m_total.m_allClrAPIcalls > 0 && m_total.m_allClrAPIcycles > 0)
7924 if (m_totMethods > 0)
7925 fprintf(f, " Imported %u methods.\n\n", m_numMethods + m_totMethods);
7927 fprintf(f, " CLR API # calls total time max time avg time %% "
7929 fprintf(f, " -------------------------------------------------------------------------------");
7930 fprintf(f, "---------------------\n");
7932 static const char* APInames[] = {
7933 #define DEF_CLR_API(name) #name,
7934 #include "ICorJitInfo_API_names.h"
7937 unsigned shownCalls = 0;
7938 double shownMillis = 0.0;
7940 unsigned checkedCalls = 0;
7941 double checkedMillis = 0.0;
7944 for (unsigned pass = 0; pass < 2; pass++)
7946 for (unsigned i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
7948 unsigned calls = m_total.m_perClrAPIcalls[i];
7952 unsigned __int64 cycles = m_total.m_perClrAPIcycles[i];
7953 double millis = 1000.0 * cycles / countsPerSec;
7955 // Don't show the small fry to keep the results manageable
7958 // We always show the following API because it is always called
7959 // exactly once for each method and its body is the simplest one
7960 // possible (it just returns an integer constant), and therefore
7961 // it can be used to measure the overhead of adding the CLR API
7962 // timing code. Roughly speaking, on a 3GHz x64 box the overhead
7963 // per call should be around 40 ns when using RDTSC, compared to
7964 // about 140 ns when using GetThreadCycles() under Windows.
7965 if (i != API_ICorJitInfo_Names::API_getExpectedTargetArchitecture)
7969 // In the first pass we just compute the totals.
7972 shownCalls += m_total.m_perClrAPIcalls[i];
7973 shownMillis += millis;
7977 unsigned __int32 maxcyc = m_maximum.m_maxClrAPIcycles[i];
7978 double max_ms = 1000.0 * maxcyc / countsPerSec;
7980 fprintf(f, " %-40s", APInames[i]); // API name
7981 fprintf(f, " %8u %9.1f ms", calls, millis); // #calls, total time
7982 fprintf(f, " %8.1f ms %8.1f ns", max_ms, 1000000.0 * millis / calls); // max, avg time
7983 fprintf(f, " %5.1f%%\n", 100.0 * millis / shownMillis); // % of total
7986 checkedCalls += m_total.m_perClrAPIcalls[i];
7987 checkedMillis += millis;
7993 assert(checkedCalls == shownCalls);
7994 assert(checkedMillis == shownMillis);
7997 if (shownCalls > 0 || shownMillis > 0)
7999 fprintf(f, " -------------------------");
8000 fprintf(f, "---------------------------------------------------------------------------\n");
8001 fprintf(f, " Total for calls shown above %8u %10.1f ms", shownCalls, shownMillis);
8002 if (totTime_ms > 0.0)
8003 fprintf(f, " (%4.1lf%% of overall JIT time)", shownMillis * 100.0 / totTime_ms);
8013 JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
8015 #if MEASURE_CLRAPI_CALLS
8016 m_CLRcallInvokes = 0;
8017 m_CLRcallCycles = 0;
8021 m_lastPhase = (Phases)-1;
8022 #if MEASURE_CLRAPI_CALLS
8023 m_CLRcallAPInum = -1;
8027 unsigned __int64 threadCurCycles;
8028 if (_our_GetThreadCycles(&threadCurCycles))
8030 m_start = threadCurCycles;
8031 m_curPhaseStart = threadCurCycles;
8035 void JitTimer::EndPhase(Compiler* compiler, Phases phase)
8038 // We re-run some phases currently, so this following assert doesn't work.
8039 // assert((int)phase > (int)m_lastPhase); // We should end phases in increasing order.
8041 unsigned __int64 threadCurCycles;
8042 if (_our_GetThreadCycles(&threadCurCycles))
8044 unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart);
8046 // If this is not a leaf phase, the assumption is that the last subphase must have just recently ended.
8047 // Credit the duration to "slop", the total of which should be very small.
8048 if (PhaseHasChildren[phase])
8050 m_info.m_parentPhaseEndSlop += phaseCycles;
8054 // It is a leaf phase. Credit duration to it.
8055 m_info.m_invokesByPhase[phase]++;
8056 m_info.m_cyclesByPhase[phase] += phaseCycles;
8058 #if MEASURE_CLRAPI_CALLS
8059 // Record the CLR API timing info as well.
8060 m_info.m_CLRinvokesByPhase[phase] += m_CLRcallInvokes;
8061 m_info.m_CLRcyclesByPhase[phase] += m_CLRcallCycles;
8064 // Credit the phase's ancestors, if any.
8065 int ancPhase = PhaseParent[phase];
8066 while (ancPhase != -1)
8068 m_info.m_cyclesByPhase[ancPhase] += phaseCycles;
8069 ancPhase = PhaseParent[ancPhase];
8072 #if MEASURE_CLRAPI_CALLS
8073 const Phases lastPhase = PHASE_CLR_API;
8075 const Phases lastPhase = PHASE_NUMBER_OF;
8077 if (phase + 1 == lastPhase)
8079 m_info.m_totalCycles = (threadCurCycles - m_start);
8083 m_curPhaseStart = threadCurCycles;
8087 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[phase])
8089 m_info.m_nodeCountAfterPhase[phase] = compiler->fgMeasureIR();
8093 m_info.m_nodeCountAfterPhase[phase] = 0;
8098 m_lastPhase = phase;
8100 #if MEASURE_CLRAPI_CALLS
8101 m_CLRcallInvokes = 0;
8102 m_CLRcallCycles = 0;
8106 #if MEASURE_CLRAPI_CALLS
8108 //------------------------------------------------------------------------
8109 // JitTimer::CLRApiCallEnter: Start the stopwatch for an EE call.
8112 // apix - The API index - an "enum API_ICorJitInfo_Names" value.
8115 void JitTimer::CLRApiCallEnter(unsigned apix)
8117 assert(m_CLRcallAPInum == -1); // Nested calls not allowed
8118 m_CLRcallAPInum = apix;
8120 // If we can't get the cycles, we'll just ignore this call
8121 if (!_our_GetThreadCycles(&m_CLRcallStart))
8125 //------------------------------------------------------------------------
8126 // JitTimer::CLRApiCallLeave: compute / record time spent in an EE call.
8129 // apix - The API's "enum API_ICorJitInfo_Names" value; this value
8130 // should match the value passed to the most recent call to
8131 // "CLRApiCallEnter" (i.e. these must come as matched pairs),
8132 // and they also may not nest.
8135 void JitTimer::CLRApiCallLeave(unsigned apix)
8137 // Make sure we're actually inside a measured CLR call.
8138 assert(m_CLRcallAPInum != -1);
8139 m_CLRcallAPInum = -1;
8141 // Ignore this one if we don't have a valid starting counter.
8142 if (m_CLRcallStart != 0)
8144 if (JitConfig.JitEECallTimingInfo() != 0)
8146 unsigned __int64 threadCurCycles;
8147 if (_our_GetThreadCycles(&threadCurCycles))
8149 // Compute the cycles spent in the call.
8150 threadCurCycles -= m_CLRcallStart;
8152 // Add the cycles to the 'phase' and bump its use count.
8153 m_info.m_cyclesByPhase[PHASE_CLR_API] += threadCurCycles;
8154 m_info.m_invokesByPhase[PHASE_CLR_API] += 1;
8156 // Add the values to the "per API" info.
8157 m_info.m_allClrAPIcycles += threadCurCycles;
8158 m_info.m_allClrAPIcalls += 1;
8160 m_info.m_perClrAPIcalls[apix] += 1;
8161 m_info.m_perClrAPIcycles[apix] += threadCurCycles;
8162 m_info.m_maxClrAPIcycles[apix] = max(m_info.m_maxClrAPIcycles[apix], (unsigned __int32)threadCurCycles);
8164 // Subtract the cycles from the enclosing phase by bumping its start time
8165 m_curPhaseStart += threadCurCycles;
8167 // Update the running totals.
8168 m_CLRcallInvokes += 1;
8169 m_CLRcallCycles += threadCurCycles;
8176 assert(m_CLRcallAPInum != -1); // No longer in this API call.
8177 m_CLRcallAPInum = -1;
8180 #endif // MEASURE_CLRAPI_CALLS
8182 CritSecObject JitTimer::s_csvLock;
8184 LPCWSTR Compiler::JitTimeLogCsv()
8186 LPCWSTR jitTimeLogCsv = JitConfig.JitTimeLogCsv();
8187 return jitTimeLogCsv;
8190 void JitTimer::PrintCsvHeader()
8192 LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
8193 if (jitTimeLogCsv == nullptr)
8198 CritSecHolder csvLock(s_csvLock);
8200 FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
8203 // Seek to the end of the file s.t. `ftell` doesn't lie to us on Windows
8204 fseek(fp, 0, SEEK_END);
8206 // Write the header if the file is empty
8209 fprintf(fp, "\"Method Name\",");
8210 fprintf(fp, "\"Assembly or SPMI Index\",");
8211 fprintf(fp, "\"IL Bytes\",");
8212 fprintf(fp, "\"Basic Blocks\",");
8213 fprintf(fp, "\"Min Opts\",");
8214 fprintf(fp, "\"Loops Cloned\",");
8216 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8218 fprintf(fp, "\"%s\",", PhaseNames[i]);
8219 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
8221 fprintf(fp, "\"Node Count After %s\",", PhaseNames[i]);
8225 InlineStrategy::DumpCsvHeader(fp);
8227 fprintf(fp, "\"Executable Code Bytes\",");
8228 fprintf(fp, "\"GC Info Bytes\",");
8229 fprintf(fp, "\"Total Bytes Allocated\",");
8230 fprintf(fp, "\"Total Cycles\",");
8231 fprintf(fp, "\"CPS\"\n");
8237 extern ICorJitHost* g_jitHost;
8239 void JitTimer::PrintCsvMethodStats(Compiler* comp)
8241 LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
8242 if (jitTimeLogCsv == nullptr)
8247 // eeGetMethodFullName uses locks, so don't enter crit sec before this call.
8248 const char* methName = comp->eeGetMethodFullName(comp->info.compMethodHnd);
8250 // Try and access the SPMI index to report in the data set.
8252 // If the jit is not hosted under SPMI this will return the
8253 // default value of zero.
8255 // Query the jit host directly here instead of going via the
8256 // config cache, since value will change for each method.
8257 int index = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), 0);
8259 CritSecHolder csvLock(s_csvLock);
8261 FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
8262 fprintf(fp, "\"%s\",", methName);
8265 fprintf(fp, "%d,", index);
8269 const char* methodAssemblyName = comp->info.compCompHnd->getAssemblyName(
8270 comp->info.compCompHnd->getModuleAssembly(comp->info.compCompHnd->getClassModule(comp->info.compClassHnd)));
8271 fprintf(fp, "\"%s\",", methodAssemblyName);
8273 fprintf(fp, "%u,", comp->info.compILCodeSize);
8274 fprintf(fp, "%u,", comp->fgBBcount);
8275 fprintf(fp, "%u,", comp->opts.MinOpts());
8276 fprintf(fp, "%u,", comp->optLoopsCloned);
8277 unsigned __int64 totCycles = 0;
8278 for (int i = 0; i < PHASE_NUMBER_OF; i++)
8280 if (!PhaseHasChildren[i])
8282 totCycles += m_info.m_cyclesByPhase[i];
8284 fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
8286 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
8288 fprintf(fp, "%u,", m_info.m_nodeCountAfterPhase[i]);
8292 comp->m_inlineStrategy->DumpCsvData(fp);
8294 fprintf(fp, "%u,", comp->info.compNativeCodeSize);
8295 fprintf(fp, "%Iu,", comp->compInfoBlkSize);
8296 fprintf(fp, "%Iu,", comp->compGetArenaAllocator()->getTotalBytesAllocated());
8297 fprintf(fp, "%I64u,", m_info.m_totalCycles);
8298 fprintf(fp, "%f\n", CycleTimer::CyclesPerSecond());
8302 // Completes the timing of the current method, and adds it to "sum".
8303 void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases)
8307 PrintCsvMethodStats(comp);
8310 sum.AddInfo(m_info, includePhases);
8312 #endif // FEATURE_JIT_METHOD_PERF
8314 #if LOOP_HOIST_STATS
8316 CritSecObject Compiler::s_loopHoistStatsLock; // Default constructor.
8317 unsigned Compiler::s_loopsConsidered = 0;
8318 unsigned Compiler::s_loopsWithHoistedExpressions = 0;
8319 unsigned Compiler::s_totalHoistedExpressions = 0;
8322 void Compiler::PrintAggregateLoopHoistStats(FILE* f)
8325 fprintf(f, "---------------------------------------------------\n");
8326 fprintf(f, "Loop hoisting stats\n");
8327 fprintf(f, "---------------------------------------------------\n");
8329 double pctWithHoisted = 0.0;
8330 if (s_loopsConsidered > 0)
8332 pctWithHoisted = 100.0 * (double(s_loopsWithHoistedExpressions) / double(s_loopsConsidered));
8334 double exprsPerLoopWithExpr = 0.0;
8335 if (s_loopsWithHoistedExpressions > 0)
8337 exprsPerLoopWithExpr = double(s_totalHoistedExpressions) / double(s_loopsWithHoistedExpressions);
8339 fprintf(f, "Considered %d loops. Of these, we hoisted expressions out of %d (%6.2f%%).\n", s_loopsConsidered,
8340 s_loopsWithHoistedExpressions, pctWithHoisted);
8341 fprintf(f, " A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
8342 s_totalHoistedExpressions, exprsPerLoopWithExpr);
8345 void Compiler::AddLoopHoistStats()
8347 CritSecHolder statsLock(s_loopHoistStatsLock);
8349 s_loopsConsidered += m_loopsConsidered;
8350 s_loopsWithHoistedExpressions += m_loopsWithHoistedExpressions;
8351 s_totalHoistedExpressions += m_totalHoistedExpressions;
8354 void Compiler::PrintPerMethodLoopHoistStats()
8356 double pctWithHoisted = 0.0;
8357 if (m_loopsConsidered > 0)
8359 pctWithHoisted = 100.0 * (double(m_loopsWithHoistedExpressions) / double(m_loopsConsidered));
8361 double exprsPerLoopWithExpr = 0.0;
8362 if (m_loopsWithHoistedExpressions > 0)
8364 exprsPerLoopWithExpr = double(m_totalHoistedExpressions) / double(m_loopsWithHoistedExpressions);
8366 printf("Considered %d loops. Of these, we hoisted expressions out of %d (%5.2f%%).\n", m_loopsConsidered,
8367 m_loopsWithHoistedExpressions, pctWithHoisted);
8368 printf(" A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
8369 m_totalHoistedExpressions, exprsPerLoopWithExpr);
8371 #endif // LOOP_HOIST_STATS
8373 //------------------------------------------------------------------------
8374 // RecordStateAtEndOfInlining: capture timing data (if enabled) after
8375 // inlining as completed.
8378 // Records data needed for SQM and inlining data dumps. Should be
8379 // called after inlining is complete. (We do this after inlining
8380 // because this marks the last point at which the JIT is likely to
8381 // cause type-loading and class initialization).
8383 void Compiler::RecordStateAtEndOfInlining()
8385 #if defined(DEBUG) || defined(INLINE_DATA)
8387 m_compCyclesAtEndOfInlining = 0;
8388 m_compTickCountAtEndOfInlining = 0;
8389 bool b = CycleTimer::GetThreadCyclesS(&m_compCyclesAtEndOfInlining);
8392 return; // We don't have a thread cycle counter.
8394 m_compTickCountAtEndOfInlining = GetTickCount();
8396 #endif // defined(DEBUG) || defined(INLINE_DATA)
8399 //------------------------------------------------------------------------
8400 // RecordStateAtEndOfCompilation: capture timing data (if enabled) after
8401 // compilation is completed.
8403 void Compiler::RecordStateAtEndOfCompilation()
8405 #if defined(DEBUG) || defined(INLINE_DATA)
8409 unsigned __int64 compCyclesAtEnd;
8410 bool b = CycleTimer::GetThreadCyclesS(&compCyclesAtEnd);
8413 return; // We don't have a thread cycle counter.
8415 assert(compCyclesAtEnd >= m_compCyclesAtEndOfInlining);
8417 m_compCycles = compCyclesAtEnd - m_compCyclesAtEndOfInlining;
8419 #endif // defined(DEBUG) || defined(INLINE_DATA)
8422 #if FUNC_INFO_LOGGING
8424 LPCWSTR Compiler::compJitFuncInfoFilename = nullptr;
8427 FILE* Compiler::compJitFuncInfoFile = nullptr;
8428 #endif // FUNC_INFO_LOGGING
8432 // dumpConvertedVarSet() dumps the varset bits that are tracked
8433 // variable indices, and we convert them to variable numbers, sort the variable numbers, and
8434 // print them as variable numbers. To do this, we use a temporary set indexed by
8435 // variable number. We can't use the "all varset" type because it is still size-limited, and might
8436 // not be big enough to handle all possible variable numbers.
8437 void dumpConvertedVarSet(Compiler* comp, VARSET_VALARG_TP vars)
8439 BYTE* pVarNumSet; // trivial set: one byte per varNum, 0 means not in set, 1 means in set.
8441 size_t varNumSetBytes = comp->lvaCount * sizeof(BYTE);
8442 pVarNumSet = (BYTE*)_alloca(varNumSetBytes);
8443 memset(pVarNumSet, 0, varNumSetBytes); // empty the set
8445 VarSetOps::Iter iter(comp, vars);
8446 unsigned varIndex = 0;
8447 while (iter.NextElem(&varIndex))
8449 unsigned varNum = comp->lvaTrackedIndexToLclNum(varIndex);
8450 pVarNumSet[varNum] = 1; // This varNum is in the set
8455 for (size_t varNum = 0; varNum < comp->lvaCount; varNum++)
8457 if (pVarNumSet[varNum] == 1)
8463 printf("V%02u", varNum);
8470 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8471 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8473 XX Debugging helpers XX
8475 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8476 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8479 /*****************************************************************************/
8480 /* The following functions are intended to be called from the debugger, to dump
8481 * various data structures.
8483 * The versions that start with 'c' take a Compiler* as the first argument.
8484 * The versions that start with 'd' use the tlsCompiler, so don't require a Compiler*.
8487 * cBlock, dBlock : Display a basic block (call fgTableDispBasicBlock()).
8488 * cBlocks, dBlocks : Display all the basic blocks of a function (call fgDispBasicBlocks()).
8489 * cBlocksV, dBlocksV : Display all the basic blocks of a function (call fgDispBasicBlocks(true)).
8490 * "V" means "verbose", and will dump all the trees.
8491 * cTree, dTree : Display a tree (call gtDispTree()).
8492 * cTreeLIR, dTreeLIR : Display a tree in LIR form (call gtDispLIRNode()).
8493 * cTrees, dTrees : Display all the trees in a function (call fgDumpTrees()).
8494 * cEH, dEH : Display the EH handler table (call fgDispHandlerTab()).
8495 * cVar, dVar : Display a local variable given its number (call lvaDumpEntry()).
8496 * cVarDsc, dVarDsc : Display a local variable given a LclVarDsc* (call lvaDumpEntry()).
8497 * cVars, dVars : Display the local variable table (call lvaTableDump()).
8498 * cVarsFinal, dVarsFinal : Display the local variable table (call lvaTableDump(FINAL_FRAME_LAYOUT)).
8499 * cBlockCheapPreds, dBlockCheapPreds : Display a block's cheap predecessors (call block->dspCheapPreds()).
8500 * cBlockPreds, dBlockPreds : Display a block's predecessors (call block->dspPreds()).
8501 * cBlockSuccs, dBlockSuccs : Display a block's successors (call block->dspSuccs(compiler)).
8502 * cReach, dReach : Display all block reachability (call fgDispReach()).
8503 * cDoms, dDoms : Display all block dominators (call fgDispDoms()).
8504 * cLiveness, dLiveness : Display per-block variable liveness (call fgDispBBLiveness()).
8505 * cCVarSet, dCVarSet : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable
8506 * indices. These are converted to variable numbers and sorted. (Calls
8507 * dumpConvertedVarSet()).
8508 * cLoop, dLoop : Display the blocks of a loop, including the trees.
8509 * cTreeFlags, dTreeFlags : Display tree flags
8511 * The following don't require a Compiler* to work:
8512 * dRegMask : Display a regMaskTP (call dspRegMask(mask)).
8515 void cBlock(Compiler* comp, BasicBlock* block)
8517 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8518 printf("===================================================================== *Block %u\n", sequenceNumber++);
8519 comp->fgTableDispBasicBlock(block);
8522 void cBlocks(Compiler* comp)
8524 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8525 printf("===================================================================== *Blocks %u\n", sequenceNumber++);
8526 comp->fgDispBasicBlocks();
8529 void cBlocksV(Compiler* comp)
8531 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8532 printf("===================================================================== *BlocksV %u\n", sequenceNumber++);
8533 comp->fgDispBasicBlocks(true);
8536 void cStmt(Compiler* comp, Statement* statement)
8538 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8539 printf("===================================================================== *Stmt %u\n", sequenceNumber++);
8540 comp->gtDispStmt(statement, ">>>");
8543 void cTree(Compiler* comp, GenTree* tree)
8545 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8546 printf("===================================================================== *Tree %u\n", sequenceNumber++);
8547 comp->gtDispTree(tree, nullptr, ">>>");
8550 void cTreeLIR(Compiler* comp, GenTree* tree)
8552 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8553 printf("===================================================================== *TreeLIR %u\n", sequenceNumber++);
8554 comp->gtDispLIRNode(tree);
8557 void cTrees(Compiler* comp)
8559 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8560 printf("===================================================================== *Trees %u\n", sequenceNumber++);
8561 comp->fgDumpTrees(comp->fgFirstBB, nullptr);
8564 void cEH(Compiler* comp)
8566 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8567 printf("===================================================================== *EH %u\n", sequenceNumber++);
8568 comp->fgDispHandlerTab();
8571 void cVar(Compiler* comp, unsigned lclNum)
8573 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8574 printf("===================================================================== *Var %u\n", sequenceNumber++);
8575 comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
8578 void cVarDsc(Compiler* comp, LclVarDsc* varDsc)
8580 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8581 printf("===================================================================== *VarDsc %u\n", sequenceNumber++);
8582 unsigned lclNum = (unsigned)(varDsc - comp->lvaTable);
8583 comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
8586 void cVars(Compiler* comp)
8588 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8589 printf("===================================================================== *Vars %u\n", sequenceNumber++);
8590 comp->lvaTableDump();
8593 void cVarsFinal(Compiler* comp)
8595 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8596 printf("===================================================================== *Vars %u\n", sequenceNumber++);
8597 comp->lvaTableDump(Compiler::FINAL_FRAME_LAYOUT);
8600 void cBlockCheapPreds(Compiler* comp, BasicBlock* block)
8602 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8603 printf("===================================================================== *BlockCheapPreds %u\n",
8605 block->dspCheapPreds();
8608 void cBlockPreds(Compiler* comp, BasicBlock* block)
8610 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8611 printf("===================================================================== *BlockPreds %u\n", sequenceNumber++);
8615 void cBlockSuccs(Compiler* comp, BasicBlock* block)
8617 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8618 printf("===================================================================== *BlockSuccs %u\n", sequenceNumber++);
8619 block->dspSuccs(comp);
8622 void cReach(Compiler* comp)
8624 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8625 printf("===================================================================== *Reach %u\n", sequenceNumber++);
8626 comp->fgDispReach();
8629 void cDoms(Compiler* comp)
8631 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8632 printf("===================================================================== *Doms %u\n", sequenceNumber++);
8636 void cLiveness(Compiler* comp)
8638 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8639 printf("===================================================================== *Liveness %u\n", sequenceNumber++);
8640 comp->fgDispBBLiveness();
8643 void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars)
8645 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8646 printf("===================================================================== dCVarSet %u\n", sequenceNumber++);
8647 dumpConvertedVarSet(comp, vars);
8648 printf("\n"); // dumpConvertedVarSet() doesn't emit a trailing newline
8651 void cLoop(Compiler* comp, Compiler::LoopDsc* loop)
8653 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8654 printf("===================================================================== Loop %u\n", sequenceNumber++);
8655 printf("HEAD " FMT_BB "\n", loop->lpHead->bbNum);
8656 printf("FIRST " FMT_BB "\n", loop->lpFirst->bbNum);
8657 printf("TOP " FMT_BB "\n", loop->lpTop->bbNum);
8658 printf("ENTRY " FMT_BB "\n", loop->lpEntry->bbNum);
8659 if (loop->lpExitCnt == 1)
8661 printf("EXIT " FMT_BB "\n", loop->lpExit->bbNum);
8665 printf("EXITS %u\n", loop->lpExitCnt);
8667 printf("BOTTOM " FMT_BB "\n", loop->lpBottom->bbNum);
8669 comp->fgDispBasicBlocks(loop->lpHead, loop->lpBottom, true);
8672 void dBlock(BasicBlock* block)
8674 cBlock(JitTls::GetCompiler(), block);
8679 cBlocks(JitTls::GetCompiler());
8684 cBlocksV(JitTls::GetCompiler());
8687 void dTree(GenTree* tree)
8689 cTree(JitTls::GetCompiler(), tree);
8692 void dTreeLIR(GenTree* tree)
8694 cTreeLIR(JitTls::GetCompiler(), tree);
8699 cTrees(JitTls::GetCompiler());
8704 cEH(JitTls::GetCompiler());
8707 void dVar(unsigned lclNum)
8709 cVar(JitTls::GetCompiler(), lclNum);
8712 void dVarDsc(LclVarDsc* varDsc)
8714 cVarDsc(JitTls::GetCompiler(), varDsc);
8719 cVars(JitTls::GetCompiler());
8724 cVarsFinal(JitTls::GetCompiler());
8727 void dBlockPreds(BasicBlock* block)
8729 cBlockPreds(JitTls::GetCompiler(), block);
8732 void dBlockCheapPreds(BasicBlock* block)
8734 cBlockCheapPreds(JitTls::GetCompiler(), block);
8737 void dBlockSuccs(BasicBlock* block)
8739 cBlockSuccs(JitTls::GetCompiler(), block);
8744 cReach(JitTls::GetCompiler());
8749 cDoms(JitTls::GetCompiler());
8754 cLiveness(JitTls::GetCompiler());
8757 void dCVarSet(VARSET_VALARG_TP vars)
8759 cCVarSet(JitTls::GetCompiler(), vars);
8762 void dLoop(Compiler::LoopDsc* loop)
8764 cLoop(JitTls::GetCompiler(), loop);
8767 void dRegMask(regMaskTP mask)
8769 static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
8770 printf("===================================================================== dRegMask %u\n", sequenceNumber++);
8772 printf("\n"); // dspRegMask() doesn't emit a trailing newline
8775 void dBlockList(BasicBlockList* list)
8777 printf("WorkList: ");
8778 while (list != nullptr)
8780 printf(FMT_BB " ", list->block->bbNum);
8786 // Global variables available in debug mode. That are set by debug APIs for finding
8787 // Trees, Stmts, and/or Blocks using id or bbNum.
8788 // That can be used in watch window or as a way to get address of fields for data break points.
8792 BasicBlock* dbTreeBlock;
8793 BasicBlock* dbBlock;
8795 // Debug APIs for finding Trees, Stmts, and/or Blocks.
8796 // As a side effect, they set the debug variables above.
8798 GenTree* dFindTree(GenTree* tree, unsigned id)
8802 if (tree == nullptr)
8807 if (tree->gtTreeID == id)
8813 unsigned childCount = tree->NumChildren();
8814 for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
8816 child = tree->GetChild(childIndex);
8817 child = dFindTree(child, id);
8818 if (child != nullptr)
8827 GenTree* dFindTree(unsigned id)
8829 Compiler* comp = JitTls::GetCompiler();
8833 dbTreeBlock = nullptr;
8836 for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
8838 for (Statement* stmt : block->Statements())
8840 tree = dFindTree(stmt->GetRootNode(), id);
8841 if (tree != nullptr)
8843 dbTreeBlock = block;
8852 Statement* dFindStmt(unsigned id)
8854 Compiler* comp = JitTls::GetCompiler();
8859 unsigned stmtId = 0;
8860 for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
8862 for (Statement* stmt : block->Statements())
8876 BasicBlock* dFindBlock(unsigned bbNum)
8878 Compiler* comp = JitTls::GetCompiler();
8879 BasicBlock* block = nullptr;
8882 for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
8884 if (block->bbNum == bbNum)
8894 Compiler::LoopDsc* dFindLoop(unsigned loopNum)
8896 Compiler* comp = JitTls::GetCompiler();
8898 if (loopNum >= comp->optLoopCount)
8900 printf("loopNum %u out of range\n");
8904 return &comp->optLoopTable[loopNum];
8907 void cTreeFlags(Compiler* comp, GenTree* tree)
8911 if (tree->gtFlags != 0)
8913 chars += printf("flags=");
8916 CLANG_FORMAT_COMMENT_ANCHOR;
8919 if (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE)
8921 chars += printf("[NODE_LARGE]");
8923 if (tree->gtDebugFlags & GTF_DEBUG_NODE_SMALL)
8925 chars += printf("[NODE_SMALL]");
8927 if (tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)
8929 chars += printf("[MORPHED]");
8931 #endif // defined(DEBUG)
8933 if (tree->gtFlags & GTF_COLON_COND)
8935 chars += printf("[COLON_COND]");
8940 genTreeOps op = tree->OperGet();
8945 case GT_LCL_VAR_ADDR:
8947 case GT_LCL_FLD_ADDR:
8948 case GT_STORE_LCL_FLD:
8949 case GT_STORE_LCL_VAR:
8950 if (tree->gtFlags & GTF_VAR_DEF)
8952 chars += printf("[VAR_DEF]");
8954 if (tree->gtFlags & GTF_VAR_USEASG)
8956 chars += printf("[VAR_USEASG]");
8958 if (tree->gtFlags & GTF_VAR_CAST)
8960 chars += printf("[VAR_CAST]");
8962 if (tree->gtFlags & GTF_VAR_ITERATOR)
8964 chars += printf("[VAR_ITERATOR]");
8966 if (tree->gtFlags & GTF_VAR_CLONED)
8968 chars += printf("[VAR_CLONED]");
8970 if (tree->gtFlags & GTF_VAR_DEATH)
8972 chars += printf("[VAR_DEATH]");
8974 if (tree->gtFlags & GTF_VAR_ARR_INDEX)
8976 chars += printf("[VAR_ARR_INDEX]");
8979 if (tree->gtDebugFlags & GTF_DEBUG_VAR_CSE_REF)
8981 chars += printf("[VAR_CSE_REF]");
8988 if (tree->gtFlags & GTF_NOP_DEATH)
8990 chars += printf("[NOP_DEATH]");
8998 if (tree->gtFlags & GTF_FLD_VOLATILE)
9000 chars += printf("[FLD_VOLATILE]");
9006 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
9008 chars += printf("[INX_STRING_LAYOUT]");
9012 if (tree->gtFlags & GTF_INX_RNGCHK)
9014 chars += printf("[INX_RNGCHK]");
9021 if (tree->gtFlags & GTF_IND_VOLATILE)
9023 chars += printf("[IND_VOLATILE]");
9025 if (tree->gtFlags & GTF_IND_TGTANYWHERE)
9027 chars += printf("[IND_TGTANYWHERE]");
9029 if (tree->gtFlags & GTF_IND_TGT_NOT_HEAP)
9031 chars += printf("[IND_TGT_NOT_HEAP]");
9033 if (tree->gtFlags & GTF_IND_TLS_REF)
9035 chars += printf("[IND_TLS_REF]");
9037 if (tree->gtFlags & GTF_IND_ASG_LHS)
9039 chars += printf("[IND_ASG_LHS]");
9041 if (tree->gtFlags & GTF_IND_UNALIGNED)
9043 chars += printf("[IND_UNALIGNED]");
9045 if (tree->gtFlags & GTF_IND_INVARIANT)
9047 chars += printf("[IND_INVARIANT]");
9053 if (tree->gtFlags & GTF_CLS_VAR_ASG_LHS)
9055 chars += printf("[CLS_VAR_ASG_LHS]");
9060 #if !defined(TARGET_64BIT)
9064 if (tree->gtFlags & GTF_MUL_64RSLT)
9066 chars += printf("[64RSLT]");
9068 if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
9070 chars += printf("[ADDRMODE_NO_CSE]");
9076 if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
9078 chars += printf("[ADDRMODE_NO_CSE]");
9084 if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
9086 chars += printf("[ADDRMODE_NO_CSE]");
9101 if (tree->gtFlags & GTF_RELOP_NAN_UN)
9103 chars += printf("[RELOP_NAN_UN]");
9105 if (tree->gtFlags & GTF_RELOP_JMP_USED)
9107 chars += printf("[RELOP_JMP_USED]");
9109 if (tree->gtFlags & GTF_RELOP_QMARK)
9111 chars += printf("[RELOP_QMARK]");
9117 if (tree->gtFlags & GTF_QMARK_CAST_INSTOF)
9119 chars += printf("[QMARK_CAST_INSTOF]");
9125 if (tree->gtFlags & GTF_BOX_VALUE)
9127 chars += printf("[BOX_VALUE]");
9134 unsigned handleKind = (tree->gtFlags & GTF_ICON_HDL_MASK);
9139 case GTF_ICON_SCOPE_HDL:
9141 chars += printf("[ICON_SCOPE_HDL]");
9144 case GTF_ICON_CLASS_HDL:
9146 chars += printf("[ICON_CLASS_HDL]");
9149 case GTF_ICON_METHOD_HDL:
9151 chars += printf("[ICON_METHOD_HDL]");
9154 case GTF_ICON_FIELD_HDL:
9156 chars += printf("[ICON_FIELD_HDL]");
9159 case GTF_ICON_STATIC_HDL:
9161 chars += printf("[ICON_STATIC_HDL]");
9164 case GTF_ICON_STR_HDL:
9166 chars += printf("[ICON_STR_HDL]");
9169 case GTF_ICON_CONST_PTR:
9171 chars += printf("[ICON_CONST_PTR]");
9174 case GTF_ICON_GLOBAL_PTR:
9176 chars += printf("[ICON_GLOBAL_PTR]");
9179 case GTF_ICON_VARG_HDL:
9181 chars += printf("[ICON_VARG_HDL]");
9184 case GTF_ICON_PINVKI_HDL:
9186 chars += printf("[ICON_PINVKI_HDL]");
9189 case GTF_ICON_TOKEN_HDL:
9191 chars += printf("[ICON_TOKEN_HDL]");
9194 case GTF_ICON_TLS_HDL:
9196 chars += printf("[ICON_TLD_HDL]");
9199 case GTF_ICON_FTN_ADDR:
9201 chars += printf("[ICON_FTN_ADDR]");
9204 case GTF_ICON_CIDMID_HDL:
9206 chars += printf("[ICON_CIDMID_HDL]");
9209 case GTF_ICON_BBC_PTR:
9211 chars += printf("[ICON_BBC_PTR]");
9214 case GTF_ICON_FIELD_OFF:
9216 chars += printf("[ICON_FIELD_OFF]");
9224 if (tree->AsObj()->GetLayout()->HasGCPtr())
9226 chars += printf("[BLK_HASGCPTR]");
9233 case GT_STORE_DYN_BLK:
9235 if (tree->gtFlags & GTF_BLK_VOLATILE)
9237 chars += printf("[BLK_VOLATILE]");
9239 if (tree->AsBlk()->IsUnaligned())
9241 chars += printf("[BLK_UNALIGNED]");
9247 if (tree->gtFlags & GTF_CALL_UNMANAGED)
9249 chars += printf("[CALL_UNMANAGED]");
9251 if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE)
9253 chars += printf("[CALL_INLINE_CANDIDATE]");
9255 if (!tree->AsCall()->IsVirtual())
9257 chars += printf("[CALL_NONVIRT]");
9259 if (tree->AsCall()->IsVirtualVtable())
9261 chars += printf("[CALL_VIRT_VTABLE]");
9263 if (tree->AsCall()->IsVirtualStub())
9265 chars += printf("[CALL_VIRT_STUB]");
9267 if (tree->gtFlags & GTF_CALL_NULLCHECK)
9269 chars += printf("[CALL_NULLCHECK]");
9271 if (tree->gtFlags & GTF_CALL_POP_ARGS)
9273 chars += printf("[CALL_POP_ARGS]");
9275 if (tree->gtFlags & GTF_CALL_HOISTABLE)
9277 chars += printf("[CALL_HOISTABLE]");
9280 // More flags associated with calls.
9283 GenTreeCall* call = tree->AsCall();
9285 if (call->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL)
9287 chars += printf("[CALL_M_EXPLICIT_TAILCALL]");
9289 if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL)
9291 chars += printf("[CALL_M_TAILCALL]");
9293 if (call->gtCallMoreFlags & GTF_CALL_M_VARARGS)
9295 chars += printf("[CALL_M_VARARGS]");
9297 if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
9299 chars += printf("[CALL_M_RETBUFFARG]");
9301 if (call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV)
9303 chars += printf("[CALL_M_DELEGATE_INV]");
9305 if (call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK)
9307 chars += printf("[CALL_M_NOGCCHECK]");
9309 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
9311 chars += printf("[CALL_M_SPECIAL_INTRINSIC]");
9314 if (call->IsUnmanaged())
9316 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
9318 chars += printf("[CALL_M_UNMGD_THISCALL]");
9321 else if (call->IsVirtualStub())
9323 if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
9325 chars += printf("[CALL_M_VIRTSTUB_REL_INDIRECT]");
9328 else if (!call->IsVirtual())
9330 if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
9332 chars += printf("[CALL_M_NONVIRT_SAME_THIS]");
9336 if (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)
9338 chars += printf("[CALL_M_FRAME_VAR_DEATH]");
9340 if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_JIT_HELPER)
9342 chars += printf("[CALL_M_TAILCALL_VIA_JIT_HELPER]");
9344 #if FEATURE_TAILCALL_OPT
9345 if (call->gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL)
9347 chars += printf("[CALL_M_IMPLICIT_TAILCALL]");
9350 if (call->gtCallMoreFlags & GTF_CALL_M_PINVOKE)
9352 chars += printf("[CALL_M_PINVOKE]");
9355 if (call->IsFatPointerCandidate())
9357 chars += printf("[CALL_FAT_POINTER_CANDIDATE]");
9360 if (call->IsGuarded())
9362 chars += printf("[CALL_GUARDED]");
9365 if (call->IsExpRuntimeLookup())
9367 chars += printf("[CALL_EXP_RUNTIME_LOOKUP]");
9374 unsigned flags = (tree->gtFlags & (~(unsigned)(GTF_COMMON_MASK | GTF_OVERFLOW)));
9377 chars += printf("[%08X]", flags);
9385 if (tree->gtFlags & GTF_ASG)
9387 chars += printf("[ASG]");
9389 if (tree->gtFlags & GTF_CALL)
9391 chars += printf("[CALL]");
9399 if (tree->gtFlags & GTF_OVERFLOW)
9401 chars += printf("[OVERFLOW]");
9407 if (tree->gtFlags & GTF_EXCEPT)
9409 chars += printf("[EXCEPT]");
9411 if (tree->gtFlags & GTF_GLOB_REF)
9413 chars += printf("[GLOB_REF]");
9415 if (tree->gtFlags & GTF_ORDER_SIDEEFF)
9417 chars += printf("[ORDER_SIDEEFF]");
9419 if (tree->gtFlags & GTF_REVERSE_OPS)
9421 if (op != GT_LCL_VAR)
9423 chars += printf("[REVERSE_OPS]");
9426 if (tree->gtFlags & GTF_SPILLED)
9428 chars += printf("[SPILLED_OPER]");
9430 #if FEATURE_SET_FLAGS
9431 if (tree->gtFlags & GTF_SET_FLAGS)
9433 if ((op != GT_IND) && (op != GT_STOREIND))
9435 chars += printf("[ZSF_SET_FLAGS]");
9439 if (tree->gtFlags & GTF_IND_NONFAULTING)
9441 if (tree->OperIsIndirOrArrLength())
9443 chars += printf("[IND_NONFAULTING]");
9446 if (tree->gtFlags & GTF_MAKE_CSE)
9448 chars += printf("[MAKE_CSE]");
9450 if (tree->gtFlags & GTF_DONT_CSE)
9452 chars += printf("[DONT_CSE]");
9454 if (tree->gtFlags & GTF_BOOLEAN)
9456 chars += printf("[BOOLEAN]");
9458 if (tree->gtFlags & GTF_UNSIGNED)
9460 chars += printf("[SMALL_UNSIGNED]");
9462 if (tree->gtFlags & GTF_LATE_ARG)
9464 chars += printf("[SMALL_LATE_ARG]");
9466 if (tree->gtFlags & GTF_SPILL)
9468 chars += printf("[SPILL]");
9470 if (tree->gtFlags & GTF_REUSE_REG_VAL)
9472 if (op == GT_CNS_INT)
9474 chars += printf("[REUSE_REG_VAL]");
9480 void dTreeFlags(GenTree* tree)
9482 cTreeFlags(JitTls::GetCompiler(), tree);
9489 BitSetSupport::BitSetOpCounter Compiler::m_varsetOpCounter("VarSetOpCounts.log");
9491 #if ALLVARSET_COUNTOPS
9493 BitSetSupport::BitSetOpCounter Compiler::m_allvarsetOpCounter("AllVarSetOpCounts.log");
9497 HelperCallProperties Compiler::s_helperCallProperties;
9499 /*****************************************************************************/
9500 /*****************************************************************************/
9502 //------------------------------------------------------------------------
9504 // Given some tree node return does it need all GC refs to be spilled from
9505 // callee save registers.
9508 // tree - the tree for which we ask about gc refs.
9511 // true - tree kills GC refs on callee save registers
9512 // false - tree doesn't affect GC refs on callee save registers
9513 bool Compiler::killGCRefs(GenTree* tree)
9517 GenTreeCall* call = tree->AsCall();
9518 if (call->IsUnmanaged())
9523 if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_JIT_PINVOKE_BEGIN))
9525 assert(opts.ShouldUsePInvokeHelpers());
9529 else if (tree->OperIs(GT_START_PREEMPTGC))
9537 //------------------------------------------------------------------------
9538 // lvaIsOSRLocal: check if this local var is one that requires special
9539 // treatment for OSR compilations.
9542 // varNum - variable of interest
9545 // true - this is an OSR compile and this local requires special treatment
9546 // false - not an OSR compile, or not an interesting local for OSR
9548 bool Compiler::lvaIsOSRLocal(unsigned varNum)
9555 if (varNum < info.compLocalsCount)
9560 LclVarDsc* varDsc = lvaGetDesc(varNum);
9562 if (varDsc->lvIsStructField)
9564 return (varDsc->lvParentLcl < info.compLocalsCount);
9570 //------------------------------------------------------------------------------
9571 // gtChangeOperToNullCheck: helper to change tree oper to a NULLCHECK.
9574 // tree - the node to change;
9575 // basicBlock - basic block of the node.
9578 // the function should not be called after lowering for platforms that do not support
9579 // emitting NULLCHECK nodes, like arm32. Use `Lowering::TransformUnusedIndirection`
9580 // that handles it and calls this function when appropriate.
9582 void Compiler::gtChangeOperToNullCheck(GenTree* tree, BasicBlock* block)
9584 assert(tree->OperIs(GT_FIELD, GT_IND, GT_OBJ, GT_BLK, GT_DYN_BLK));
9585 tree->ChangeOper(GT_NULLCHECK);
9586 tree->ChangeType(TYP_INT);
9587 block->bbFlags |= BBF_HAS_NULLCHECK;
9588 optMethodFlags |= OMF_HAS_NULLCHECK;