1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
4 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9 XX The variables to be used by the code generator. XX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
21 #include "registerargconvention.h"
22 #include "jitstd/algorithm.h"
23 #include "patchpointinfo.h"
25 /*****************************************************************************/
30 unsigned Compiler::s_lvaDoubleAlignedProcsCount = 0;
34 /*****************************************************************************/
36 void Compiler::lvaInit()
38 /* We haven't allocated stack variables yet */
39 lvaRefCountState = RCS_INVALID;
41 lvaGenericsContextInUse = false;
43 lvaTrackedToVarNumSize = 0;
44 lvaTrackedToVarNum = nullptr;
46 lvaTrackedFixed = false; // false: We can still add new tracked variables
48 lvaDoneFrameLayout = NO_FRAME_LAYOUT;
49 #if !defined(FEATURE_EH_FUNCLETS)
50 lvaShadowSPslotsVar = BAD_VAR_NUM;
51 #endif // !FEATURE_EH_FUNCLETS
52 lvaInlinedPInvokeFrameVar = BAD_VAR_NUM;
53 lvaReversePInvokeFrameVar = BAD_VAR_NUM;
54 #if FEATURE_FIXED_OUT_ARGS
55 lvaOutgoingArgSpaceVar = BAD_VAR_NUM;
56 lvaOutgoingArgSpaceSize = PhasedVar<unsigned>();
57 #endif // FEATURE_FIXED_OUT_ARGS
58 #ifdef JIT32_GCENCODER
59 lvaLocAllocSPvar = BAD_VAR_NUM;
60 #endif // JIT32_GCENCODER
61 lvaNewObjArrayArgs = BAD_VAR_NUM;
62 lvaGSSecurityCookie = BAD_VAR_NUM;
64 lvaVarargsBaseOfStkArgs = BAD_VAR_NUM;
66 lvaVarargsHandleArg = BAD_VAR_NUM;
67 lvaStubArgumentVar = BAD_VAR_NUM;
68 lvaArg0Var = BAD_VAR_NUM;
69 lvaMonAcquired = BAD_VAR_NUM;
70 lvaRetAddrVar = BAD_VAR_NUM;
72 lvaInlineeReturnSpillTemp = BAD_VAR_NUM;
74 gsShadowVarInfo = nullptr;
75 #if defined(FEATURE_EH_FUNCLETS)
76 lvaPSPSym = BAD_VAR_NUM;
79 lvaSIMDInitTempVarNum = BAD_VAR_NUM;
80 #endif // FEATURE_SIMD
83 #if defined(DEBUG) && defined(TARGET_XARCH)
84 lvaReturnSpCheck = BAD_VAR_NUM;
87 #if defined(DEBUG) && defined(TARGET_X86)
88 lvaCallSpCheck = BAD_VAR_NUM;
91 structPromotionHelper = new (this, CMK_Generic) StructPromotionHelper(this);
94 /*****************************************************************************/
96 void Compiler::lvaInitTypeRef()
99 /* x86 args look something like this:
100 [this ptr] [hidden return buffer] [declared arguments]* [generic context] [var arg cookie]
102 x64 is closer to the native ABI:
103 [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]*
104 (Note: prior to .NET Framework 4.5.1 for Windows 8.1 (but not .NET Framework 4.5.1 "downlevel"),
105 the "hidden return buffer" came before the "this ptr". Now, the "this ptr" comes first. This
106 is different from the C++ order, where the "hidden return buffer" always comes first.)
108 ARM and ARM64 are the same as the current x64 convention:
109 [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]*
112 The var arg cookie and generic context are swapped with respect to the user arguments
115 /* Set compArgsCount and compLocalsCount */
117 info.compArgsCount = info.compMethodInfo->args.numArgs;
119 // Is there a 'this' pointer
121 if (!info.compIsStatic)
123 info.compArgsCount++;
127 info.compThisArg = BAD_VAR_NUM;
130 info.compILargsCount = info.compArgsCount;
132 // Initialize "compRetNativeType" (along with "compRetTypeDesc"):
134 // 1. For structs returned via a return buffer, or in multiple registers, make it TYP_STRUCT.
135 // 2. For structs returned in a single register, make it the corresponding primitive type.
136 // 3. For primitives, leave it as-is. Note this makes it "incorrect" for soft-FP conventions.
138 ReturnTypeDesc retTypeDesc;
139 retTypeDesc.InitializeReturnType(this, info.compRetType, info.compMethodInfo->args.retTypeClass, info.compCallConv);
141 compRetTypeDesc = retTypeDesc;
142 unsigned returnRegCount = retTypeDesc.GetReturnRegCount();
143 bool hasRetBuffArg = false;
144 if (returnRegCount > 1)
146 info.compRetNativeType = varTypeIsMultiReg(info.compRetType) ? info.compRetType : TYP_STRUCT;
148 else if (returnRegCount == 1)
150 info.compRetNativeType = retTypeDesc.GetReturnRegType(0);
154 hasRetBuffArg = info.compRetType != TYP_VOID;
155 info.compRetNativeType = hasRetBuffArg ? TYP_STRUCT : TYP_VOID;
158 // Do we have a RetBuffArg?
161 info.compArgsCount++;
165 info.compRetBuffArg = BAD_VAR_NUM;
168 /* There is a 'hidden' cookie pushed last when the
169 calling convention is varargs */
171 if (info.compIsVarArgs)
173 info.compArgsCount++;
176 // Is there an extra parameter used to pass instantiation info to
177 // shared generic methods and shared generic struct instance methods?
178 if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
180 info.compArgsCount++;
184 info.compTypeCtxtArg = BAD_VAR_NUM;
187 lvaCount = info.compLocalsCount = info.compArgsCount + info.compMethodInfo->locals.numArgs;
189 info.compILlocalsCount = info.compILargsCount + info.compMethodInfo->locals.numArgs;
191 /* Now allocate the variable descriptor table */
193 if (compIsForInlining())
195 lvaTable = impInlineInfo->InlinerCompiler->lvaTable;
196 lvaCount = impInlineInfo->InlinerCompiler->lvaCount;
197 lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
199 // No more stuff needs to be done.
203 lvaTableCnt = lvaCount * 2;
205 if (lvaTableCnt < 16)
210 lvaTable = getAllocator(CMK_LvaTable).allocate<LclVarDsc>(lvaTableCnt);
211 size_t tableSize = lvaTableCnt * sizeof(*lvaTable);
212 memset((void*)lvaTable, 0, tableSize);
213 for (unsigned i = 0; i < lvaTableCnt; i++)
215 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor.
218 //-------------------------------------------------------------------------
219 // Count the arguments and initialize the respective lvaTable[] entries
221 // First the implicit arguments
222 //-------------------------------------------------------------------------
224 InitVarDscInfo varDscInfo;
226 // x86 unmanaged calling conventions limit the number of registers supported
227 // for accepting arguments. As a result, we need to modify the number of registers
228 // when we emit a method with an unmanaged calling convention.
229 switch (info.compCallConv)
231 case CorInfoCallConvExtension::Thiscall:
232 // In thiscall the this parameter goes into a register.
233 varDscInfo.Init(lvaTable, hasRetBuffArg, 1, 0);
235 case CorInfoCallConvExtension::C:
236 case CorInfoCallConvExtension::Stdcall:
237 case CorInfoCallConvExtension::CMemberFunction:
238 case CorInfoCallConvExtension::StdcallMemberFunction:
239 varDscInfo.Init(lvaTable, hasRetBuffArg, 0, 0);
241 case CorInfoCallConvExtension::Managed:
242 case CorInfoCallConvExtension::Fastcall:
243 case CorInfoCallConvExtension::FastcallMemberFunction:
245 varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG);
249 varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG);
252 lvaInitArgs(&varDscInfo);
254 //-------------------------------------------------------------------------
255 // Finally the local variables
256 //-------------------------------------------------------------------------
258 unsigned varNum = varDscInfo.varNum;
259 LclVarDsc* varDsc = varDscInfo.varDsc;
260 CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args;
262 #if defined(TARGET_ARM) || defined(TARGET_RISCV64)
263 compHasSplitParam = varDscInfo.hasSplitParam;
264 #endif // TARGET_ARM || TARGET_RISCV64
266 for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs;
267 i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig))
269 CORINFO_CLASS_HANDLE typeHnd;
270 CorInfoTypeWithMod corInfoTypeWithMod =
271 info.compCompHnd->getArgType(&info.compMethodInfo->locals, localsSig, &typeHnd);
272 CorInfoType corInfoType = strip(corInfoTypeWithMod);
274 lvaInitVarDsc(varDsc, varNum, corInfoType, typeHnd, localsSig, &info.compMethodInfo->locals);
276 if ((corInfoTypeWithMod & CORINFO_TYPE_MOD_PINNED) != 0)
278 if ((corInfoType == CORINFO_TYPE_CLASS) || (corInfoType == CORINFO_TYPE_BYREF))
280 JITDUMP("Setting lvPinned for V%02u\n", varNum);
281 varDsc->lvPinned = 1;
285 // OSR method may not see any references to the pinned local,
286 // but must still report it in GC info.
288 varDsc->lvImplicitlyReferenced = 1;
293 JITDUMP("Ignoring pin for non-GC type V%02u\n", varNum);
297 varDsc->lvOnFrame = true; // The final home for this local variable might be our local stack frame
299 if (corInfoType == CORINFO_TYPE_CLASS)
301 CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->locals, localsSig);
302 lvaSetClass(varNum, clsHnd);
306 if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
307 // as that will cause them to be placed along with the real unsafe buffers,
308 // unnecessarily exposing them to overruns. This can affect GS tests which
309 // intentionally do buffer-overruns.
310 !getNeedsGSSecurityCookie() &&
311 // GS checks require the stack to be re-ordered, which can't be done with EnC
312 !opts.compDbgEnC && compStressCompile(STRESS_UNSAFE_BUFFER_CHECKS, 25))
314 setNeedsGSSecurityCookie();
315 compGSReorderStackLayout = true;
317 for (unsigned i = 0; i < lvaCount; i++)
319 if ((lvaTable[i].lvType == TYP_STRUCT) && compStressCompile(STRESS_GENERIC_VARN, 60))
321 lvaTable[i].lvIsUnsafeBuffer = true;
326 // If this is an OSR method, mark all the OSR locals.
328 // Do this before we add the GS Cookie Dummy or Outgoing args to the locals
329 // so we don't have to do special checks to exclude them.
333 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
335 LclVarDsc* const varDsc = lvaGetDesc(lclNum);
336 varDsc->lvIsOSRLocal = true;
338 if (info.compPatchpointInfo->IsExposed(lclNum))
340 JITDUMP("-- V%02u is OSR exposed\n", lclNum);
341 varDsc->lvIsOSRExposedLocal = true;
343 // Ensure that ref counts for exposed OSR locals take into account
344 // that some of the refs might be in the Tier0 parts of the method
345 // that get trimmed away.
347 varDsc->lvImplicitlyReferenced = 1;
352 if (getNeedsGSSecurityCookie())
354 // Ensure that there will be at least one stack variable since
355 // we require that the GSCookie does not have a 0 stack offset.
356 unsigned dummy = lvaGrabTempWithImplicitUse(false DEBUGARG("GSCookie dummy"));
357 LclVarDsc* gsCookieDummy = lvaGetDesc(dummy);
358 gsCookieDummy->lvType = TYP_INT;
359 gsCookieDummy->lvIsTemp = true; // It is not alive at all, set the flag to prevent zero-init.
360 lvaSetVarDoNotEnregister(dummy DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr));
363 // Allocate the lvaOutgoingArgSpaceVar now because we can run into problems in the
364 // emitter when the varNum is greater that 32767 (see emitLclVarAddr::initLclVarAddr)
365 lvaAllocOutgoingArgSpaceVar();
370 lvaTableDump(INITIAL_FRAME_LAYOUT);
375 /*****************************************************************************/
376 void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
380 #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
381 // Prespill all argument regs on to stack in case of Arm when under profiler.
382 if (compIsProfilerHookNeeded())
384 codeGen->regSet.rsMaskPreSpillRegArg |= RBM_ARG_REGS;
388 //----------------------------------------------------------------------
390 /* Is there a "this" pointer ? */
391 lvaInitThisPtr(varDscInfo);
393 unsigned numUserArgsToSkip = 0;
394 unsigned numUserArgs = info.compMethodInfo->args.numArgs;
395 #if !defined(TARGET_ARM)
396 if (TargetOS::IsWindows && callConvIsInstanceMethodCallConv(info.compCallConv))
398 // If we are a native instance method, handle the first user arg
399 // (the unmanaged this parameter) and then handle the hidden
400 // return buffer parameter.
401 assert(numUserArgs >= 1);
402 lvaInitUserArgs(varDscInfo, 0, 1);
406 lvaInitRetBuffArg(varDscInfo, false);
411 /* If we have a hidden return-buffer parameter, that comes here */
412 lvaInitRetBuffArg(varDscInfo, true);
415 //======================================================================
417 #if USER_ARGS_COME_LAST
418 //@GENERICS: final instantiation-info argument for shared generic methods
419 // and shared generic struct instance methods
420 lvaInitGenericsCtxt(varDscInfo);
422 /* If the method is varargs, process the varargs cookie */
423 lvaInitVarArgsHandle(varDscInfo);
426 //-------------------------------------------------------------------------
427 // Now walk the function signature for the explicit user arguments
428 //-------------------------------------------------------------------------
429 lvaInitUserArgs(varDscInfo, numUserArgsToSkip, numUserArgs);
430 #if !USER_ARGS_COME_LAST
431 //@GENERICS: final instantiation-info argument for shared generic methods
432 // and shared generic struct instance methods
433 lvaInitGenericsCtxt(varDscInfo);
435 /* If the method is varargs, process the varargs cookie */
436 lvaInitVarArgsHandle(varDscInfo);
439 //----------------------------------------------------------------------
441 // We have set info.compArgsCount in compCompile()
442 noway_assert(varDscInfo->varNum == info.compArgsCount);
443 assert(varDscInfo->intRegArgNum <= MAX_REG_ARG);
445 codeGen->intRegState.rsCalleeRegArgCount = varDscInfo->intRegArgNum;
446 codeGen->floatRegState.rsCalleeRegArgCount = varDscInfo->floatRegArgNum;
448 #if FEATURE_FASTTAILCALL
449 // Save the stack usage information
450 // We can get register usage information using codeGen->intRegState and
451 // codeGen->floatRegState
452 info.compArgStackSize = varDscInfo->stackArgSize;
453 #endif // FEATURE_FASTTAILCALL
455 // The total argument size must be aligned.
456 noway_assert((compArgSize % TARGET_POINTER_SIZE) == 0);
459 /* We can not pass more than 2^16 dwords as arguments as the "ret"
460 instruction can only pop 2^16 arguments. Could be handled correctly
461 but it will be very difficult for fully interruptible code */
463 if (compArgSize != (size_t)(unsigned short)compArgSize)
464 IMPL_LIMITATION("Too many arguments for the \"ret\" instruction to pop");
468 /*****************************************************************************/
469 void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
471 LclVarDsc* varDsc = varDscInfo->varDsc;
472 if (!info.compIsStatic)
474 varDsc->lvIsParam = 1;
477 lvaArg0Var = info.compThisArg = varDscInfo->varNum;
478 noway_assert(info.compThisArg == 0);
480 if (eeIsValueClass(info.compClassHnd))
482 varDsc->lvType = TYP_BYREF;
486 varDsc->lvType = TYP_REF;
487 lvaSetClass(varDscInfo->varNum, info.compClassHnd);
490 varDsc->lvIsRegArg = 1;
491 noway_assert(varDscInfo->intRegArgNum == 0);
493 varDsc->SetArgReg(genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet()));
494 #if FEATURE_MULTIREG_ARGS
495 varDsc->SetOtherArgReg(REG_NA);
497 varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
502 printf("'this' passed in register %s\n", getRegName(varDsc->GetArgReg()));
505 compArgSize += TARGET_POINTER_SIZE;
507 varDscInfo->varNum++;
508 varDscInfo->varDsc++;
512 /*****************************************************************************/
513 void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBufReg)
515 if (varDscInfo->hasRetBufArg)
517 info.compRetBuffArg = varDscInfo->varNum;
519 LclVarDsc* varDsc = varDscInfo->varDsc;
520 varDsc->lvType = TYP_BYREF;
521 varDsc->lvIsParam = 1;
522 varDsc->lvIsRegArg = 0;
524 if (useFixedRetBufReg && hasFixedRetBuffReg())
526 varDsc->lvIsRegArg = 1;
527 varDsc->SetArgReg(theFixedRetBuffReg());
529 else if (varDscInfo->canEnreg(TYP_INT))
531 varDsc->lvIsRegArg = 1;
532 unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT);
533 varDsc->SetArgReg(genMapIntRegArgNumToRegNum(retBuffArgNum));
536 #if FEATURE_MULTIREG_ARGS
537 varDsc->SetOtherArgReg(REG_NA);
539 varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
541 assert(!varDsc->lvIsRegArg || isValidIntArgReg(varDsc->GetArgReg()));
544 if (varDsc->lvIsRegArg && verbose)
546 printf("'__retBuf' passed in register %s\n", getRegName(varDsc->GetArgReg()));
550 /* Update the total argument size, count and varDsc */
552 compArgSize += TARGET_POINTER_SIZE;
553 varDscInfo->varNum++;
554 varDscInfo->varDsc++;
558 //-----------------------------------------------------------------------------
560 // Initialize local var descriptions for incoming user arguments
563 // varDscInfo - the local var descriptions
564 // skipArgs - the number of user args to skip processing.
565 // takeArgs - the number of user args to process (after skipping skipArgs number of args)
567 void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, unsigned takeArgs)
569 //-------------------------------------------------------------------------
570 // Walk the function signature for the explicit arguments
571 //-------------------------------------------------------------------------
573 #if defined(TARGET_X86)
574 // Only (some of) the implicit args are enregistered for varargs
575 if (info.compIsVarArgs)
577 varDscInfo->maxIntRegArgNum = varDscInfo->intRegArgNum;
579 #elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
580 // On System V type environment the float registers are not indexed together with the int ones.
581 varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum;
584 CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
586 const unsigned argSigLen = info.compMethodInfo->args.numArgs;
588 // We will process at most takeArgs arguments from the signature after skipping skipArgs arguments
589 const int64_t numUserArgs = min(takeArgs, (argSigLen - (int64_t)skipArgs));
591 // If there are no user args or less than skipArgs args, return here since there's no work to do.
592 if (numUserArgs <= 0)
598 regMaskTP doubleAlignMask = RBM_NONE;
601 // Skip skipArgs arguments from the signature.
602 for (unsigned i = 0; i < skipArgs; i++, argLst = info.compCompHnd->getArgNext(argLst))
607 // Process each user arg.
608 for (unsigned i = 0; i < numUserArgs;
609 i++, varDscInfo->varNum++, varDscInfo->varDsc++, argLst = info.compCompHnd->getArgNext(argLst))
611 LclVarDsc* varDsc = varDscInfo->varDsc;
612 CORINFO_CLASS_HANDLE typeHnd = nullptr;
614 CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
615 varDsc->lvIsParam = 1;
617 lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
619 if (strip(corInfoType) == CORINFO_TYPE_CLASS)
621 CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->args, argLst);
622 lvaSetClass(varDscInfo->varNum, clsHnd);
625 // For ARM, ARM64, LOONGARCH64, RISCV64 and AMD64 varargs, all arguments go in integer registers
626 var_types argType = mangleVarArgsType(varDsc->TypeGet());
628 var_types origArgType = argType;
630 // ARM softfp calling convention should affect only the floating point arguments.
631 // Otherwise there appear too many surplus pre-spills and other memory operations
632 // with the associated locations .
633 bool isSoftFPPreSpill = opts.compUseSoftFP && varTypeIsFloating(varDsc->TypeGet());
634 unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
636 (argSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; // the total number of slots of this argument
637 bool isHfaArg = false;
638 var_types hfaType = TYP_UNDEF;
640 // Methods that use VarArg or SoftFP cannot have HFA arguments except
641 // Native varargs on arm64 unix use the regular calling convention.
642 if (((TargetOS::IsUnix && TargetArchitecture::IsArm64) || !info.compIsVarArgs) && !opts.compUseSoftFP)
644 // If the argType is a struct, then check if it is an HFA
645 if (varTypeIsStruct(argType))
647 // hfaType is set to float, double, or SIMD type if it is an HFA, otherwise TYP_UNDEF
648 hfaType = GetHfaType(typeHnd);
649 isHfaArg = varTypeIsValidHfaType(hfaType);
652 else if (info.compIsVarArgs)
654 // Currently native varargs is not implemented on non windows targets.
656 // Note that some targets like Arm64 Unix should not need much work as
657 // the ABI is the same. While other targets may only need small changes
658 // such as amd64 Unix, which just expects RAX to pass numFPArguments.
659 if (TargetOS::IsUnix)
661 NYI("InitUserArgs for Vararg callee is not yet implemented on non Windows targets.");
667 // We have an HFA argument, so from here on out treat the type as a float, double, or vector.
668 // The original struct type is available by using origArgType.
669 // We also update the cSlots to be the number of float/double/vector fields in the HFA.
670 argType = hfaType; // TODO-Cleanup: remove this assignment and mark `argType` as const.
671 varDsc->SetHfaType(hfaType);
672 cSlots = varDsc->lvHfaSlots();
674 // The number of slots that must be enregistered if we are to consider this argument enregistered.
675 // This is normally the same as cSlots, since we normally either enregister the entire object,
676 // or none of it. For structs on ARM, however, we only need to enregister a single slot to consider
677 // it enregistered, as long as we can split the rest onto the stack.
678 unsigned cSlotsToEnregister = cSlots;
680 #if defined(TARGET_ARM64)
682 if (compFeatureArgSplit())
684 // On arm64 Windows we will need to properly handle the case where a >8byte <=16byte
685 // struct is split between register r7 and virtual stack slot s[0]
686 // We will only do this for calls to vararg methods on Windows Arm64
688 // !!This does not affect the normal arm64 calling convention or Unix Arm64!!
689 if (this->info.compIsVarArgs && argType == TYP_STRUCT)
691 if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register
692 !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register
694 cSlotsToEnregister = 1; // Force the split
699 #endif // defined(TARGET_ARM64)
702 // On ARM we pass the first 4 words of integer arguments and non-HFA structs in registers.
703 // But we pre-spill user arguments in varargs methods and structs.
706 bool preSpill = info.compIsVarArgs || isSoftFPPreSpill;
711 assert(varDsc->lvSize() == argSize);
712 cAlign = varDsc->lvStructDoubleAlign ? 2 : 1;
714 // HFA arguments go on the stack frame. They don't get spilled in the prolog like struct
715 // arguments passed in the integer registers but get homed immediately after the prolog.
718 // TODO-Arm32-Windows: vararg struct should be forced to split like
720 cSlotsToEnregister = 1; // HFAs must be totally enregistered or not, but other structs can be split.
735 if (isRegParamType(argType))
737 compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES;
740 if (argType == TYP_STRUCT)
742 // Are we going to split the struct between registers and stack? We can do that as long as
743 // no floating-point arguments have been put on the stack.
745 // From the ARM Procedure Call Standard:
746 // Rule C.5: "If the NCRN is less than r4 **and** the NSAA is equal to the SP,"
747 // then split the argument between registers and stack. Implication: if something
748 // has already been spilled to the stack, then anything that would normally be
749 // split between the core registers and the stack will be put on the stack.
750 // Anything that follows will also be on the stack. However, if something from
751 // floating point regs has been spilled to the stack, we can still use r0-r3 until they are full.
753 if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register
754 !varDscInfo->canEnreg(TYP_INT, cSlots) && // The end of the struct can't fit in a register
755 varDscInfo->existAnyFloatStackArgs()) // There's at least one stack-based FP arg already
757 varDscInfo->setAllRegArgUsed(TYP_INT); // Prevent all future use of integer registers
758 preSpill = false; // This struct won't be prespilled, since it will go on the stack
764 for (unsigned ix = 0; ix < cSlots; ix++)
766 if (!varDscInfo->canEnreg(TYP_INT, ix + 1))
770 regMaskTP regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT);
773 doubleAlignMask |= regMask;
775 codeGen->regSet.rsMaskPreSpillRegArg |= regMask;
780 #if defined(UNIX_AMD64_ABI)
781 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
782 if (varTypeIsStruct(argType))
784 assert(typeHnd != nullptr);
785 eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
786 if (structDesc.passedInRegisters)
788 unsigned intRegCount = 0;
789 unsigned floatRegCount = 0;
791 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
793 if (structDesc.IsIntegralSlot(i))
797 else if (structDesc.IsSseSlot(i))
803 assert(false && "Invalid eightbyte classification type.");
808 if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount))
810 structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
813 if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount))
815 structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
819 #endif // UNIX_AMD64_ABI
820 #endif // !TARGET_ARM
822 // The final home for this incoming register might be our local stack frame.
823 // For System V platforms the final home will always be on the local stack frame.
824 varDsc->lvOnFrame = true;
826 bool canPassArgInRegisters = false;
828 #if defined(UNIX_AMD64_ABI)
829 if (varTypeIsStruct(argType))
831 canPassArgInRegisters = structDesc.passedInRegisters;
834 #elif defined(TARGET_X86)
835 if (varTypeIsStruct(argType) && isTrivialPointerSizedStruct(typeHnd))
837 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
840 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
841 uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD;
842 var_types argRegTypeInStruct1 = TYP_UNKNOWN;
843 var_types argRegTypeInStruct2 = TYP_UNKNOWN;
845 if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES))
847 #if defined(TARGET_LOONGARCH64)
848 floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd);
850 floatFlags = info.compCompHnd->getRISCV64PassStructInRegisterFlags(typeHnd);
854 if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)
856 assert(varTypeIsStruct(argType));
858 if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
860 assert(argSize <= 8);
861 assert(varDsc->lvExactSize() <= argSize);
864 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
866 argRegTypeInStruct1 = (varDsc->lvExactSize() == 8) ? TYP_DOUBLE : TYP_FLOAT;
868 else if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
871 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
873 argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
874 argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
876 else if ((floatFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
879 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
880 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
882 argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
883 argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
885 else if ((floatFlags & STRUCT_FLOAT_FIELD_SECOND) != 0)
888 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
889 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
891 argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
892 argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
895 assert((floatNum == 1) || (floatNum == 2));
897 if (!canPassArgInRegisters)
899 // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument,
900 // integer registers (if any) are used instead.
901 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
903 argRegTypeInStruct1 = TYP_UNKNOWN;
904 argRegTypeInStruct2 = TYP_UNKNOWN;
906 if (cSlotsToEnregister == 2)
908 if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1))
910 // Here a struct-arg which needs two registers but only one integer register available,
911 // it has to be split.
912 argRegTypeInStruct1 = TYP_I_IMPL;
913 canPassArgInRegisters = true;
919 #endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
921 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
922 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
923 // On LoongArch64 and RISCV64, if there aren't any remaining floating-point registers to pass the
925 // integer registers (if any) are used instead.
926 if (!canPassArgInRegisters && varTypeIsFloating(argType))
928 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
929 argType = canPassArgInRegisters ? TYP_I_IMPL : argType;
931 if (!canPassArgInRegisters && (cSlots > 1))
933 // If a struct-arg which needs two registers but only one integer register available,
934 // it has to be split.
935 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
936 argRegTypeInStruct1 = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
941 if (canPassArgInRegisters)
943 /* Another register argument */
945 // Allocate the registers we need. allocRegArg() returns the first argument register number of the set.
946 // For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting
947 // to the stack happens.
948 unsigned firstAllocatedRegArgNum = 0;
950 #if FEATURE_MULTIREG_ARGS
951 varDsc->SetOtherArgReg(REG_NA);
952 #endif // FEATURE_MULTIREG_ARGS
954 #if defined(UNIX_AMD64_ABI)
955 unsigned secondAllocatedRegArgNum = 0;
956 var_types firstEightByteType = TYP_UNDEF;
957 var_types secondEightByteType = TYP_UNDEF;
959 if (varTypeIsStruct(argType))
961 if (structDesc.eightByteCount >= 1)
963 firstEightByteType = GetEightByteType(structDesc, 0);
964 firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1);
968 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
969 unsigned secondAllocatedRegArgNum = 0;
970 if (argRegTypeInStruct1 != TYP_UNKNOWN)
972 firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1);
975 #endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
977 firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
982 // We need to save the fact that this HFA is enregistered
983 // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics.
984 // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector
985 // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference.
986 if (varDsc->lvHfaSlots() != 1)
988 varDsc->lvIsMultiRegArg = true;
992 varDsc->lvIsRegArg = 1;
994 #if FEATURE_MULTIREG_ARGS
996 if (argType == TYP_STRUCT)
998 varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
1001 varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
1002 varDsc->lvIsMultiRegArg = true;
1005 #elif defined(UNIX_AMD64_ABI)
1006 if (varTypeIsStruct(argType))
1008 varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType));
1010 // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
1011 if (structDesc.eightByteCount >= 2)
1013 secondEightByteType = GetEightByteType(structDesc, 1);
1014 secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1);
1015 varDsc->lvIsMultiRegArg = true;
1018 if (secondEightByteType != TYP_UNDEF)
1020 varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType));
1023 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1024 if (argType == TYP_STRUCT)
1026 if (argRegTypeInStruct1 != TYP_UNKNOWN)
1028 varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1));
1029 varDsc->lvIs4Field1 = (genTypeSize(argRegTypeInStruct1) == 4) ? 1 : 0;
1030 if (argRegTypeInStruct2 != TYP_UNKNOWN)
1032 secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1);
1033 varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2));
1034 varDsc->lvIs4Field2 = (genTypeSize(argRegTypeInStruct2) == 4) ? 1 : 0;
1036 else if (cSlots > 1)
1038 // Here a struct-arg which needs two registers but only one integer register available,
1039 // it has to be split. But we reserved extra 8-bytes for the whole struct.
1040 varDsc->lvIsSplit = 1;
1041 varDsc->SetOtherArgReg(REG_STK);
1042 varDscInfo->setAllRegArgUsed(argRegTypeInStruct1);
1043 #if FEATURE_FASTTAILCALL
1044 varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
1046 #ifdef TARGET_RISCV64
1047 varDscInfo->hasSplitParam = true;
1053 varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
1056 varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
1061 if (varTypeIsStruct(argType))
1063 varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
1067 #endif // FEATURE_MULTIREG_ARGS
1069 varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType));
1073 if (varDsc->TypeGet() == TYP_LONG)
1075 varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT));
1078 #if FEATURE_FASTTAILCALL
1079 // Check if arg was split between registers and stack.
1080 if (varTypeUsesIntReg(argType))
1082 unsigned firstRegArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg());
1083 unsigned lastRegArgNum = firstRegArgNum + cSlots - 1;
1084 if (lastRegArgNum >= varDscInfo->maxIntRegArgNum)
1086 assert(varDscInfo->stackArgSize == 0);
1087 unsigned numEnregistered = varDscInfo->maxIntRegArgNum - firstRegArgNum;
1088 varDsc->SetStackOffset(-(int)numEnregistered * REGSIZE_BYTES);
1089 varDscInfo->stackArgSize += (cSlots - numEnregistered) * REGSIZE_BYTES;
1090 varDscInfo->hasSplitParam = true;
1091 JITDUMP("set user arg V%02u offset to %d\n", varDscInfo->varNum, varDsc->GetStackOffset());
1095 #endif // TARGET_ARM
1100 printf("Arg #%u passed in register(s) ", varDscInfo->varNum);
1102 #if defined(UNIX_AMD64_ABI)
1103 if (varTypeIsStruct(argType))
1105 // Print both registers, just to be clear
1106 if (firstEightByteType == TYP_UNDEF)
1108 printf("firstEightByte: <not used>");
1112 printf("firstEightByte: %s",
1113 getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType)));
1116 if (secondEightByteType == TYP_UNDEF)
1118 printf(", secondEightByte: <not used>");
1122 printf(", secondEightByte: %s",
1123 getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType)));
1127 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1128 if (varTypeIsStruct(argType))
1130 if (argRegTypeInStruct1 == TYP_UNKNOWN)
1132 printf("first: <not used>");
1137 getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1)));
1139 if (argRegTypeInStruct2 == TYP_UNKNOWN)
1141 printf(", second: <not used>");
1145 printf(", second: %s",
1146 getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2)));
1150 #endif // UNIX_AMD64_ABI, TARGET_LOONGARCH64, TARGET_RISCV64
1152 assert(varTypeUsesFloatReg(argType) || varTypeUsesIntReg(argType));
1154 bool isFloat = varTypeUsesFloatReg(argType);
1155 unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), argType);
1157 for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
1164 if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between
1165 // registers and stack
1167 printf(" stack slots:%d", cSlots - ix);
1174 // Print register size prefix
1175 if (argType == TYP_DOUBLE)
1177 // Print both registers, just to be clear
1178 printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType)),
1179 getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType)));
1181 // doubles take 2 slots
1182 assert(ix + 1 < cSlots);
1188 printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType)));
1192 #endif // TARGET_ARM
1194 printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType)));
1201 } // end if (canPassArgInRegisters)
1204 #if defined(TARGET_ARM)
1205 varDscInfo->setAllRegArgUsed(argType);
1207 if (varTypeUsesFloatReg(argType))
1209 varDscInfo->setAnyFloatStackArgs();
1213 assert(varTypeUsesIntReg(argType));
1216 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1218 // If we needed to use the stack in order to pass this argument then
1219 // record the fact that we have used up any remaining registers of this 'type'
1220 // This prevents any 'backfilling' from occurring on ARM64/LoongArch64.
1222 varDscInfo->setAllRegArgUsed(argType);
1224 #endif // TARGET_XXX
1226 #if FEATURE_FASTTAILCALL
1228 unsigned argAlignment = cAlign * TARGET_POINTER_SIZE;
1230 unsigned argAlignment = eeGetArgSizeAlignment(origArgType, (hfaType == TYP_FLOAT));
1231 // We expect the following rounding operation to be a noop on all
1232 // ABIs except ARM (where we have 8-byte aligned args) and macOS
1233 // ARM64 (that allows to pack multiple smaller parameters in a
1234 // single stack slot).
1235 assert(compMacOsArm64Abi() || ((varDscInfo->stackArgSize % argAlignment) == 0));
1237 varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, argAlignment);
1239 JITDUMP("set user arg V%02u offset to %u\n", varDscInfo->varNum, varDscInfo->stackArgSize);
1240 varDsc->SetStackOffset(varDscInfo->stackArgSize);
1241 varDscInfo->stackArgSize += argSize;
1242 #endif // FEATURE_FASTTAILCALL
1245 #ifdef UNIX_AMD64_ABI
1246 // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a
1247 // multiple of TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE,
1249 compArgSize += roundUp(argSize, TARGET_POINTER_SIZE);
1250 #else // !UNIX_AMD64_ABI
1251 compArgSize += argSize;
1252 #endif // !UNIX_AMD64_ABI
1253 if (info.compIsVarArgs || isSoftFPPreSpill)
1255 #if defined(TARGET_X86)
1256 varDsc->SetStackOffset(compArgSize);
1257 #else // !TARGET_X86
1258 // TODO-CQ: We shouldn't have to go as far as to declare these
1259 // address-exposed -- DoNotEnregister should suffice.
1261 lvaSetVarAddrExposed(varDscInfo->varNum DEBUGARG(AddressExposedReason::TOO_CONSERVATIVE));
1262 #endif // !TARGET_X86
1266 compArgSize = GetOutgoingArgByteSize(compArgSize);
1269 if (doubleAlignMask != RBM_NONE)
1271 assert(RBM_ARG_REGS == 0xF);
1272 assert((doubleAlignMask & RBM_ARG_REGS) == doubleAlignMask);
1273 if (doubleAlignMask != RBM_NONE && doubleAlignMask != RBM_ARG_REGS)
1275 // 'double aligned types' can begin only at r0 or r2 and we always expect at least two registers to be used
1276 // Note that in rare cases, we can have double-aligned structs of 12 bytes (if specified explicitly with
1278 assert((doubleAlignMask == 0b0011) || (doubleAlignMask == 0b1100) ||
1279 (doubleAlignMask == 0b0111) /* || 0b1111 is if'ed out */);
1281 // Now if doubleAlignMask is xyz1 i.e., the struct starts in r0, and we prespill r2 or r3
1282 // but not both, then the stack would be misaligned for r0. So spill both
1285 // ; +0 --- caller SP double aligned ----
1288 // ; -c r0 r0 <-- misaligned.
1289 // ; callee saved regs
1290 bool startsAtR0 = (doubleAlignMask & 1) == 1;
1291 bool r2XorR3 = ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R2) == 0) !=
1292 ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0);
1293 if (startsAtR0 && r2XorR3)
1295 codeGen->regSet.rsMaskPreSpillAlign =
1296 (~codeGen->regSet.rsMaskPreSpillRegArg & ~doubleAlignMask) & RBM_ARG_REGS;
1300 #endif // TARGET_ARM
1303 /*****************************************************************************/
1304 void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
1306 //@GENERICS: final instantiation-info argument for shared generic methods
1307 // and shared generic struct instance methods
1308 if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
1310 info.compTypeCtxtArg = varDscInfo->varNum;
1312 LclVarDsc* varDsc = varDscInfo->varDsc;
1313 varDsc->lvIsParam = 1;
1314 varDsc->lvType = TYP_I_IMPL;
1316 if (varDscInfo->canEnreg(TYP_I_IMPL))
1318 /* Another register argument */
1320 varDsc->lvIsRegArg = 1;
1321 varDsc->SetArgReg(genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet()));
1322 #if FEATURE_MULTIREG_ARGS
1323 varDsc->SetOtherArgReg(REG_NA);
1325 varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
1327 varDscInfo->intRegArgNum++;
1332 printf("'GenCtxt' passed in register %s\n", getRegName(varDsc->GetArgReg()));
1338 // We need to mark these as being on the stack, as this is not done elsewhere in the case that canEnreg
1340 varDsc->lvOnFrame = true;
1341 #if FEATURE_FASTTAILCALL
1342 varDsc->SetStackOffset(varDscInfo->stackArgSize);
1343 varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
1344 #endif // FEATURE_FASTTAILCALL
1347 compArgSize += TARGET_POINTER_SIZE;
1349 #if defined(TARGET_X86)
1350 if (info.compIsVarArgs)
1351 varDsc->SetStackOffset(compArgSize);
1352 #endif // TARGET_X86
1354 varDscInfo->varNum++;
1355 varDscInfo->varDsc++;
1359 /*****************************************************************************/
1360 void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
1362 if (info.compIsVarArgs)
1364 lvaVarargsHandleArg = varDscInfo->varNum;
1366 LclVarDsc* varDsc = varDscInfo->varDsc;
1367 varDsc->lvType = TYP_I_IMPL;
1368 varDsc->lvIsParam = 1;
1369 #if defined(TARGET_X86)
1370 // Codegen will need it for x86 scope info.
1371 varDsc->lvImplicitlyReferenced = 1;
1372 #endif // TARGET_X86
1373 varDsc->lvHasLdAddrOp = 1;
1375 lvaSetVarDoNotEnregister(lvaVarargsHandleArg DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr));
1377 assert(mostRecentlyActivePhase == PHASE_PRE_IMPORT);
1379 // TODO-Cleanup: this is preImportation phase, why do we try to work with regs here?
1380 // Should it be just deleted?
1381 if (varDscInfo->canEnreg(TYP_I_IMPL))
1383 /* Another register argument */
1385 unsigned varArgHndArgNum = varDscInfo->allocRegArg(TYP_I_IMPL);
1387 varDsc->lvIsRegArg = 1;
1388 varDsc->SetArgReg(genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL));
1389 #if FEATURE_MULTIREG_ARGS
1390 varDsc->SetOtherArgReg(REG_NA);
1392 varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
1394 // This has to be spilled right in front of the real arguments and we have
1395 // to pre-spill all the argument registers explicitly because we only have
1396 // have symbols for the declared ones, not any potential variadic ones.
1397 for (unsigned ix = varArgHndArgNum; ix < ArrLen(intArgMasks); ix++)
1399 codeGen->regSet.rsMaskPreSpillRegArg |= intArgMasks[ix];
1401 #endif // TARGET_ARM
1406 printf("'VarArgHnd' passed in register %s\n", getRegName(varDsc->GetArgReg()));
1412 // We need to mark these as being on the stack, as this is not done elsewhere in the case that canEnreg
1414 varDsc->lvOnFrame = true;
1415 #if FEATURE_FASTTAILCALL
1416 varDsc->SetStackOffset(varDscInfo->stackArgSize);
1417 varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
1418 #endif // FEATURE_FASTTAILCALL
1421 /* Update the total argument size, count and varDsc */
1423 compArgSize += TARGET_POINTER_SIZE;
1425 varDscInfo->varNum++;
1426 varDscInfo->varDsc++;
1428 #if defined(TARGET_X86)
1429 varDsc->SetStackOffset(compArgSize);
1431 // Allocate a temp to point at the beginning of the args
1433 lvaVarargsBaseOfStkArgs = lvaGrabTemp(false DEBUGARG("Varargs BaseOfStkArgs"));
1434 lvaTable[lvaVarargsBaseOfStkArgs].lvType = TYP_I_IMPL;
1436 #endif // TARGET_X86
1440 /*****************************************************************************/
1441 void Compiler::lvaInitVarDsc(LclVarDsc* varDsc,
1443 CorInfoType corInfoType,
1444 CORINFO_CLASS_HANDLE typeHnd,
1445 CORINFO_ARG_LIST_HANDLE varList,
1446 CORINFO_SIG_INFO* varSig)
1448 noway_assert(varDsc == lvaGetDesc(varNum));
1450 switch (corInfoType)
1452 // Mark types that looks like a pointer for doing shadow-copying of
1453 // parameters if we have an unsafe buffer.
1454 // Note that this does not handle structs with pointer fields. Instead,
1455 // we rely on using the assign-groups/equivalence-groups in
1456 // gsFindVulnerableParams() to determine if a buffer-struct contains a
1457 // pointer. We could do better by having the EE determine this for us.
1458 // Note that we want to keep buffers without pointers at lower memory
1459 // addresses than buffers with pointers.
1460 case CORINFO_TYPE_PTR:
1461 case CORINFO_TYPE_BYREF:
1462 case CORINFO_TYPE_CLASS:
1463 case CORINFO_TYPE_STRING:
1464 case CORINFO_TYPE_VAR:
1465 case CORINFO_TYPE_REFANY:
1466 varDsc->lvIsPtr = 1;
1472 var_types type = JITtype2varType(corInfoType);
1473 if (varTypeIsFloating(type))
1475 compFloatingPointUsed = true;
1478 #if FEATURE_IMPLICIT_BYREFS
1479 varDsc->lvIsImplicitByRef = 0;
1480 #endif // FEATURE_IMPLICIT_BYREFS
1481 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1482 varDsc->lvIs4Field1 = 0;
1483 varDsc->lvIs4Field2 = 0;
1484 varDsc->lvIsSplit = 0;
1485 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
1487 // Set the lvType (before this point it is TYP_UNDEF).
1489 if (GlobalJitOptions::compFeatureHfa)
1491 varDsc->SetHfaType(TYP_UNDEF);
1493 if ((varTypeIsStruct(type)))
1495 lvaSetStruct(varNum, typeHnd, typeHnd != NO_CLASS_HANDLE);
1496 if (info.compIsVarArgs)
1498 lvaSetStructUsedAsVarArg(varNum);
1503 varDsc->lvType = type;
1506 if (type == TYP_BOOL)
1508 varDsc->lvIsBoolean = true;
1512 varDsc->SetStackOffset(BAD_STK_OFFS);
1515 #if FEATURE_MULTIREG_ARGS
1516 varDsc->SetOtherArgReg(REG_NA);
1517 #endif // FEATURE_MULTIREG_ARGS
1520 /*****************************************************************************
1521 * Returns our internal varNum for a given IL variable.
1522 * Asserts assume it is called after lvaTable[] has been set up.
1525 unsigned Compiler::compMapILvarNum(unsigned ILvarNum)
1527 noway_assert(ILvarNum < info.compILlocalsCount || ILvarNum > unsigned(ICorDebugInfo::UNKNOWN_ILNUM));
1531 if (ILvarNum == (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM)
1533 // The varargs cookie is the last argument in lvaTable[]
1534 noway_assert(info.compIsVarArgs);
1536 varNum = lvaVarargsHandleArg;
1537 noway_assert(lvaTable[varNum].lvIsParam);
1539 else if (ILvarNum == (unsigned)ICorDebugInfo::RETBUF_ILNUM)
1541 noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
1542 varNum = info.compRetBuffArg;
1544 else if (ILvarNum == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)
1546 noway_assert(info.compTypeCtxtArg >= 0);
1547 varNum = unsigned(info.compTypeCtxtArg);
1549 else if (ILvarNum < info.compILargsCount)
1552 varNum = compMapILargNum(ILvarNum);
1553 noway_assert(lvaTable[varNum].lvIsParam);
1555 else if (ILvarNum < info.compILlocalsCount)
1558 unsigned lclNum = ILvarNum - info.compILargsCount;
1559 varNum = info.compArgsCount + lclNum;
1560 noway_assert(!lvaTable[varNum].lvIsParam);
1567 noway_assert(varNum < info.compLocalsCount);
1571 /*****************************************************************************
1572 * Returns the IL variable number given our internal varNum.
1573 * Special return values are VARG_ILNUM, RETBUF_ILNUM, TYPECTXT_ILNUM.
1575 * Returns UNKNOWN_ILNUM if it can't be mapped.
1578 unsigned Compiler::compMap2ILvarNum(unsigned varNum) const
1580 if (compIsForInlining())
1582 return impInlineInfo->InlinerCompiler->compMap2ILvarNum(varNum);
1585 noway_assert(varNum < lvaCount);
1587 if (varNum == info.compRetBuffArg)
1589 return (unsigned)ICorDebugInfo::RETBUF_ILNUM;
1592 // Is this a varargs function?
1593 if (info.compIsVarArgs && varNum == lvaVarargsHandleArg)
1595 return (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM;
1598 // We create an extra argument for the type context parameter
1599 // needed for shared generic code.
1600 if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum == (unsigned)info.compTypeCtxtArg)
1602 return (unsigned)ICorDebugInfo::TYPECTXT_ILNUM;
1605 #if FEATURE_FIXED_OUT_ARGS
1606 if (varNum == lvaOutgoingArgSpaceVar)
1608 return (unsigned)ICorDebugInfo::UNKNOWN_ILNUM; // Cannot be mapped
1610 #endif // FEATURE_FIXED_OUT_ARGS
1612 // Now mutate varNum to remove extra parameters from the count.
1613 if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum > (unsigned)info.compTypeCtxtArg)
1618 if (info.compIsVarArgs && varNum > lvaVarargsHandleArg)
1623 /* Is there a hidden argument for the return buffer.
1624 Note that this code works because if the RetBuffArg is not present,
1625 compRetBuffArg will be BAD_VAR_NUM */
1626 if (info.compRetBuffArg != BAD_VAR_NUM && varNum > info.compRetBuffArg)
1631 if (varNum >= info.compLocalsCount)
1633 return (unsigned)ICorDebugInfo::UNKNOWN_ILNUM; // Cannot be mapped
1639 /*****************************************************************************
1640 * Returns true if variable "varNum" may be address-exposed.
1643 bool Compiler::lvaVarAddrExposed(unsigned varNum) const
1645 const LclVarDsc* varDsc = lvaGetDesc(varNum);
1646 return varDsc->IsAddressExposed();
1649 /*****************************************************************************
1650 * Returns true iff variable "varNum" should not be enregistered (or one of several reasons).
1653 bool Compiler::lvaVarDoNotEnregister(unsigned varNum)
1655 LclVarDsc* varDsc = lvaGetDesc(varNum);
1656 return varDsc->lvDoNotEnregister;
1659 //------------------------------------------------------------------------
1660 // lvInitializeDoNotEnregFlag: a helper to initialize `lvDoNotEnregister` flag
1661 // for locals that were created before the compiler decided its optimization level.
1664 // compEnregLocals() value is finalized and is set to false.
1666 void Compiler::lvSetMinOptsDoNotEnreg()
1668 JITDUMP("compEnregLocals() is false, setting doNotEnreg flag for all locals.");
1669 assert(!compEnregLocals());
1670 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
1672 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::NoRegVars));
1676 //------------------------------------------------------------------------
1677 // StructPromotionHelper constructor.
1680 // compiler - pointer to a compiler to get access to an allocator, compHandle etc.
1682 Compiler::StructPromotionHelper::StructPromotionHelper(Compiler* compiler) : compiler(compiler), structPromotionInfo()
1686 //--------------------------------------------------------------------------------------------
1687 // TryPromoteStructVar - promote struct var if it is possible and profitable.
1690 // lclNum - struct number to try.
1693 // true if the struct var was promoted.
1695 bool Compiler::StructPromotionHelper::TryPromoteStructVar(unsigned lclNum)
1697 if (CanPromoteStructVar(lclNum))
1700 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
1701 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
1702 static int structPromoVarNum = 0;
1703 structPromoVarNum++;
1704 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
1706 if (ShouldPromoteStructVar(lclNum))
1708 PromoteStructVar(lclNum);
1715 //--------------------------------------------------------------------------------------------
1716 // CanPromoteStructType - checks if the struct type can be promoted.
1719 // typeHnd - struct handle to check.
1722 // true if the struct type can be promoted.
1725 // The last analyzed type is memorized to skip the check if we ask about the same time again next.
1726 // However, it was not found profitable to memorize all analyzed types in a map.
1728 // The check initializes only necessary fields in lvaStructPromotionInfo,
1729 // so if the promotion is rejected early than most fields will be uninitialized.
1731 bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd)
1733 assert(typeHnd != nullptr);
1734 if (!compiler->eeIsValueClass(typeHnd))
1736 // TODO-ObjectStackAllocation: Enable promotion of fields of stack-allocated objects.
1740 if (structPromotionInfo.typeHnd == typeHnd)
1742 // Asking for the same type of struct as the last time.
1743 // Nothing need to be done.
1745 return structPromotionInfo.canPromote;
1748 // Analyze this type from scratch.
1749 structPromotionInfo = lvaStructPromotionInfo(typeHnd);
1751 #if defined(FEATURE_SIMD)
1752 // getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote.
1753 const unsigned maxSize =
1754 MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), sizeof(double));
1755 #else // !FEATURE_SIMD
1756 // sizeof(double) represents the size of the largest primitive type that we can struct promote.
1757 const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
1758 #endif // !FEATURE_SIMD
1760 // lvaStructFieldInfo.fldOffset is byte-sized and offsets start from 0, so the max size can be 256
1761 assert(static_cast<unsigned char>(maxSize - 1) == (maxSize - 1));
1763 // lvaStructFieldInfo.fieldCnt is byte-sized
1764 assert(static_cast<unsigned char>(MAX_NumOfFieldsInPromotableStruct) == MAX_NumOfFieldsInPromotableStruct);
1766 COMP_HANDLE compHandle = compiler->info.compCompHnd;
1768 unsigned structSize = compHandle->getClassSize(typeHnd);
1769 if (structSize > maxSize)
1771 return false; // struct is too large
1774 DWORD typeFlags = compHandle->getClassAttribs(typeHnd);
1776 if (StructHasOverlappingFields(typeFlags))
1781 if (StructHasIndexableFields(typeFlags))
1787 // On ARM, we have a requirement on the struct alignment; see below.
1788 unsigned structAlignment = roundUp(compHandle->getClassAlignmentRequirement(typeHnd), TARGET_POINTER_SIZE);
1789 #endif // TARGET_ARM
1791 // At most 1 (root node) + (4 promoted fields) + (each could be a wrapped primitive)
1792 CORINFO_TYPE_LAYOUT_NODE treeNodes[1 + MAX_NumOfFieldsInPromotableStruct * 2];
1793 size_t numTreeNodes = ArrLen(treeNodes);
1794 GetTypeLayoutResult result = compHandle->getTypeLayout(typeHnd, treeNodes, &numTreeNodes);
1796 if ((result != GetTypeLayoutResult::Success) || (numTreeNodes <= 1))
1801 assert(treeNodes[0].size == structSize);
1803 structPromotionInfo.fieldCnt = 0;
1805 unsigned fieldsSize = 0;
1807 // Some notes on the following:
1808 // 1. At most MAX_NumOfFieldsInPromotableStruct fields can be promoted
1809 // 2. Recursive promotion is not enabled as the rest of the JIT cannot
1810 // handle some of the patterns produced efficiently
1811 // 3. The exception to the above is structs wrapping primitive types; we do
1812 // support promoting those, but only through one layer of nesting (as a
1813 // quirk -- this can probably be relaxed).
1815 for (size_t i = 1; i < numTreeNodes;)
1817 if (structPromotionInfo.fieldCnt >= MAX_NumOfFieldsInPromotableStruct)
1822 const CORINFO_TYPE_LAYOUT_NODE& node = treeNodes[i];
1823 assert(node.parent == 0);
1824 lvaStructFieldInfo& promField = structPromotionInfo.fields[structPromotionInfo.fieldCnt];
1825 INDEBUG(promField.diagFldHnd = node.diagFieldHnd);
1827 // Ensured by assertion on size above.
1828 assert(FitsIn<decltype(promField.fldOffset)>(node.offset));
1829 promField.fldOffset = (uint8_t)node.offset;
1831 promField.fldOrdinal = structPromotionInfo.fieldCnt;
1832 promField.fldSize = node.size;
1834 structPromotionInfo.fieldCnt++;
1836 if (node.type == CORINFO_TYPE_VALUECLASS)
1838 var_types fldType = TryPromoteValueClassAsPrimitive(treeNodes, numTreeNodes, i);
1839 if (fldType == TYP_UNDEF)
1844 promField.fldType = fldType;
1845 promField.fldSIMDTypeHnd = node.simdTypeHnd;
1846 AdvanceSubTree(treeNodes, numTreeNodes, &i);
1850 promField.fldType = JITtype2varType(node.type);
1854 fieldsSize += promField.fldSize;
1856 if ((promField.fldOffset % promField.fldSize) != 0)
1858 // The code in Compiler::genPushArgList that reconstitutes
1859 // struct values on the stack from promoted fields expects
1860 // those fields to be at their natural alignment.
1864 noway_assert(promField.fldOffset + promField.fldSize <= structSize);
1867 // On ARM, for struct types that don't use explicit layout, the alignment of the struct is
1868 // at least the max alignment of its fields. We take advantage of this invariant in struct promotion,
1869 // so verify it here.
1870 if (promField.fldSize > structAlignment)
1872 // Don't promote vars whose struct types violates the invariant. (Alignment == size for primitives.)
1875 #endif // TARGET_ARM
1878 if (fieldsSize != treeNodes[0].size)
1880 structPromotionInfo.containsHoles = true;
1883 structPromotionInfo.anySignificantPadding = treeNodes[0].hasSignificantPadding && structPromotionInfo.containsHoles;
1885 // Cool, this struct is promotable.
1887 structPromotionInfo.canPromote = true;
1891 //--------------------------------------------------------------------------------------------
1892 // TryPromoteValueClassAsPrimitive - Attempt to promote a value type as a primitive type.
1895 // treeNodes - Layout tree
1896 // maxTreeNodes - Size of 'treeNodes'
1897 // index - Index of layout tree node corresponding to the value class
1900 // Primitive type to promote the field as.
1902 var_types Compiler::StructPromotionHelper::TryPromoteValueClassAsPrimitive(CORINFO_TYPE_LAYOUT_NODE* treeNodes,
1903 size_t maxTreeNodes,
1906 assert(index < maxTreeNodes);
1907 CORINFO_TYPE_LAYOUT_NODE& node = treeNodes[index];
1908 assert(node.type == CORINFO_TYPE_VALUECLASS);
1910 if (node.simdTypeHnd != NO_CLASS_HANDLE)
1912 const char* namespaceName = nullptr;
1913 const char* className = compiler->info.compCompHnd->getClassNameFromMetadata(node.simdTypeHnd, &namespaceName);
1916 if (compiler->isRuntimeIntrinsicsNamespace(namespaceName) || compiler->isNumericsNamespace(namespaceName))
1919 CorInfoType simdBaseJitType = compiler->getBaseJitTypeAndSizeOfSIMDType(node.simdTypeHnd, &simdSize);
1920 // We will only promote fields of SIMD types that fit into a SIMD register.
1921 if (simdBaseJitType != CORINFO_TYPE_UNDEF)
1923 if (compiler->structSizeMightRepresentSIMDType(simdSize))
1925 return compiler->getSIMDTypeForSize(simdSize);
1932 // TODO-Quirk: Vector64 is a SIMD type with one 64-bit field, so when
1933 // compiler->usesSIMDTypes() == false, it used to be promoted as a long
1935 if (compiler->isRuntimeIntrinsicsNamespace(namespaceName) && (strcmp(className, "Vector64`1") == 0))
1942 // Check for a single primitive wrapper.
1943 if (node.numFields != 1)
1948 if (index + 1 >= maxTreeNodes)
1953 CORINFO_TYPE_LAYOUT_NODE& primNode = treeNodes[index + 1];
1955 // Do not promote if the field is not a primitive.
1956 // TODO-CQ: We could likely permit recursive primitive wrappers here quite easily.
1957 if (primNode.type == CORINFO_TYPE_VALUECLASS)
1962 // Do not promote if the single field is not aligned at its natural boundary within
1963 // the struct field.
1964 if (primNode.offset != node.offset)
1969 // Insist this wrapped field occupies all of its parent storage.
1970 if (primNode.size != node.size)
1972 JITDUMP("Promotion blocked: struct contains struct field with one field,"
1973 " but that field is not the same size as its parent.\n");
1977 // Only promote up to pointer sized fields.
1978 // TODO-CQ: Right now we only promote an actual SIMD typed field, which would cause
1979 // a nested SIMD type to fail promotion.
1980 if (primNode.size > TARGET_POINTER_SIZE)
1982 JITDUMP("Promotion blocked: struct contains struct field with one field,"
1983 " but that field has invalid size.\n");
1987 if ((primNode.size != TARGET_POINTER_SIZE) && ((node.offset % primNode.size) != 0))
1989 JITDUMP("Promotion blocked: struct contains struct field with one field,"
1990 " but the outer struct offset %u is not a multiple of the inner field size %u.\n",
1991 node.offset, primNode.size);
1995 return JITtype2varType(primNode.type);
1998 //--------------------------------------------------------------------------------------------
1999 // AdvanceSubTree - Skip over a tree node and all its children.
2002 // treeNodes - array of type layout nodes, stored in preorder.
2003 // maxTreeNodes - size of 'treeNodes'
2004 // index - [in, out] Index pointing to root of subtree to skip.
2007 // Requires the tree nodes to be stored in preorder (as guaranteed by getTypeLayout).
2009 void Compiler::StructPromotionHelper::AdvanceSubTree(CORINFO_TYPE_LAYOUT_NODE* treeNodes,
2010 size_t maxTreeNodes,
2013 size_t parIndex = *index;
2015 while ((*index < maxTreeNodes) && (treeNodes[*index].parent >= parIndex))
2021 //--------------------------------------------------------------------------------------------
2022 // CanPromoteStructVar - checks if the struct can be promoted.
2025 // lclNum - struct number to check.
2028 // true if the struct var can be promoted.
2030 bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum)
2032 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
2034 assert(varTypeIsStruct(varDsc));
2035 assert(!varDsc->lvPromoted); // Don't ask again :)
2037 // If this lclVar is used in a SIMD intrinsic, then we don't want to struct promote it.
2038 // Note, however, that SIMD lclVars that are NOT used in a SIMD intrinsic may be
2039 // profitably promoted.
2040 if (varDsc->lvIsUsedInSIMDIntrinsic())
2042 JITDUMP(" struct promotion of V%02u is disabled because lvIsUsedInSIMDIntrinsic()\n", lclNum);
2046 // Reject struct promotion of parameters when -GS stack reordering is enabled
2047 // as we could introduce shadow copies of them.
2048 if (varDsc->lvIsParam && compiler->compGSReorderStackLayout)
2050 JITDUMP(" struct promotion of V%02u is disabled because lvIsParam and compGSReorderStackLayout\n", lclNum);
2054 if (varDsc->lvIsParam && compiler->fgNoStructParamPromotion)
2056 JITDUMP(" struct promotion of V%02u is disabled by fgNoStructParamPromotion\n", lclNum);
2060 if (!compiler->lvaEnregMultiRegVars && varDsc->lvIsMultiRegArgOrRet())
2062 JITDUMP(" struct promotion of V%02u is disabled because lvIsMultiRegArgOrRet()\n", lclNum);
2066 // If the local was exposed at Tier0, we currently have to assume it's aliased for OSR.
2068 if (compiler->lvaIsOSRLocal(lclNum) && compiler->info.compPatchpointInfo->IsExposed(lclNum))
2070 JITDUMP(" struct promotion of V%02u is disabled because it is an exposed OSR local\n", lclNum);
2074 if (varDsc->IsAddressExposed())
2076 JITDUMP(" struct promotion of V%02u is disabled because it has already been marked address exposed\n", lclNum);
2080 if (varDsc->GetLayout()->IsBlockLayout())
2085 CORINFO_CLASS_HANDLE typeHnd = varDsc->GetLayout()->GetClassHandle();
2086 assert(typeHnd != NO_CLASS_HANDLE);
2088 bool canPromote = CanPromoteStructType(typeHnd);
2089 if (canPromote && varDsc->lvIsMultiRegArgOrRet())
2091 unsigned fieldCnt = structPromotionInfo.fieldCnt;
2092 if (fieldCnt > MAX_MULTIREG_COUNT)
2096 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
2099 for (unsigned i = 0; canPromote && (i < fieldCnt); i++)
2101 var_types fieldType = structPromotionInfo.fields[i].fldType;
2102 // Non-HFA structs are always passed in general purpose registers.
2103 // If there are any floating point fields, don't promote for now.
2104 // Likewise, since HVA structs are passed in SIMD registers
2105 // promotion of non FP or SIMD type fields is disallowed.
2106 // TODO-1stClassStructs: add support in Lowering and prolog generation
2107 // to enable promoting these types.
2108 if (varDsc->lvIsParam && (varDsc->lvIsHfa() != varTypeUsesFloatReg(fieldType)))
2112 #if defined(FEATURE_SIMD)
2113 // If we have a register-passed struct with mixed non-opaque SIMD types (i.e. with defined fields)
2114 // and non-SIMD types, we don't currently handle that case in the prolog, so we can't promote.
2115 else if ((fieldCnt > 1) && varTypeIsStruct(fieldType) &&
2116 (structPromotionInfo.fields[i].fldSIMDTypeHnd != NO_CLASS_HANDLE) &&
2117 !compiler->isOpaqueSIMDType(structPromotionInfo.fields[i].fldSIMDTypeHnd))
2121 #endif // FEATURE_SIMD
2124 #elif defined(UNIX_AMD64_ABI)
2128 // Only promote if the field types match the registers, unless we have a single SIMD field.
2129 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
2130 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
2131 unsigned regCount = structDesc.eightByteCount;
2132 if ((structPromotionInfo.fieldCnt == 1) && varTypeIsSIMD(structPromotionInfo.fields[0].fldType))
2134 // Allow the case of promoting a single SIMD field, even if there are multiple registers.
2135 // We will fix this up in the prolog.
2137 else if (structPromotionInfo.fieldCnt != regCount)
2143 for (unsigned i = 0; canPromote && (i < regCount); i++)
2145 lvaStructFieldInfo* fieldInfo = &(structPromotionInfo.fields[i]);
2146 var_types fieldType = fieldInfo->fldType;
2147 // We don't currently support passing SIMD types in registers.
2148 if (varTypeIsSIMD(fieldType))
2152 else if (varTypeUsesFloatReg(fieldType) !=
2153 (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE))
2160 #endif // UNIX_AMD64_ABI
2165 //--------------------------------------------------------------------------------------------
2166 // ShouldPromoteStructVar - Should a struct var be promoted if it can be promoted?
2167 // This routine mainly performs profitability checks. Right now it also has
2168 // some correctness checks due to limitations of down-stream phases.
2171 // lclNum - struct local number;
2174 // true if the struct should be promoted.
2176 bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
2178 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
2179 assert(varTypeIsStruct(varDsc));
2180 assert(varDsc->GetLayout()->GetClassHandle() == structPromotionInfo.typeHnd);
2181 assert(structPromotionInfo.canPromote);
2183 bool shouldPromote = true;
2185 // We *can* promote; *should* we promote?
2186 // We should only do so if promotion has potential savings. One source of savings
2187 // is if a field of the struct is accessed, since this access will be turned into
2188 // an access of the corresponding promoted field variable. Even if there are no
2189 // field accesses, but only block-level operations on the whole struct, if the struct
2190 // has only one or two fields, then doing those block operations field-wise is probably faster
2191 // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
2192 // Struct promotion also provides the following benefits: reduce stack frame size,
2193 // reduce the need for zero init of stack frame and fine grained constant/copy prop.
2194 // Asm diffs indicate that promoting structs up to 3 fields is a net size win.
2195 // So if no fields are accessed independently, and there are four or more fields,
2196 // then do not promote.
2198 // TODO: Ideally we would want to consider the impact of whether the struct is
2199 // passed as a parameter or assigned the return value of a call. Because once promoted,
2200 // struct copying is done by field by field assignment instead of a more efficient
2201 // rep.stos or xmm reg based copy.
2202 if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed)
2204 JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
2205 structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
2206 shouldPromote = false;
2208 else if (varDsc->lvIsMultiRegRet && structPromotionInfo.anySignificantPadding)
2210 JITDUMP("Not promoting multi-reg returned struct local V%02u with significant padding.\n", lclNum);
2211 shouldPromote = false;
2213 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
2214 else if ((structPromotionInfo.fieldCnt == 2) && (varTypeIsFloating(structPromotionInfo.fields[0].fldType) ||
2215 varTypeIsFloating(structPromotionInfo.fields[1].fldType)))
2217 // TODO-LoongArch64 - struct passed by float registers.
2218 JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
2219 "float field(s).\n",
2220 lclNum, structPromotionInfo.fieldCnt);
2221 shouldPromote = false;
2223 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
2224 else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa())
2226 #if FEATURE_MULTIREG_STRUCT_PROMOTE
2227 // Is this a variable holding a value with exactly two fields passed in
2228 // multiple registers?
2229 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
2231 if (structPromotionInfo.anySignificantPadding)
2233 JITDUMP("Not promoting multi-reg struct local V%02u with significant padding.\n", lclNum);
2234 shouldPromote = false;
2236 else if ((structPromotionInfo.fieldCnt != 2) &&
2237 !((structPromotionInfo.fieldCnt == 1) && varTypeIsSIMD(structPromotionInfo.fields[0].fldType)))
2239 JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true, #fields != 2 and it's "
2240 "not a single SIMD.\n",
2242 shouldPromote = false;
2244 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
2245 else if (varDsc->lvIsSplit)
2247 JITDUMP("Not promoting multireg struct local V%02u, because it is splitted.\n", lclNum);
2248 shouldPromote = false;
2250 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
2253 #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
2255 // TODO-PERF - Implement struct promotion for incoming single-register structs.
2256 // Also the implementation of jmp uses the 4 byte move to store
2257 // byte parameters to the stack, so that if we have a byte field
2258 // with something else occupying the same 4-byte slot, it will
2259 // overwrite other fields.
2260 if (structPromotionInfo.fieldCnt != 1)
2262 JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
2264 lclNum, structPromotionInfo.fieldCnt);
2265 shouldPromote = false;
2268 else if ((lclNum == compiler->genReturnLocal) && (structPromotionInfo.fieldCnt > 1))
2270 // TODO-1stClassStructs: a temporary solution to keep diffs small, it will be fixed later.
2271 shouldPromote = false;
2274 else if (compiler->compPromoteFewerStructs(lclNum))
2276 // Do not promote some structs, that can be promoted, to stress promoted/unpromoted moves.
2277 JITDUMP("Not promoting promotable struct local V%02u, because of STRESS_PROMOTE_FEWER_STRUCTS\n", lclNum);
2278 shouldPromote = false;
2283 // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
2284 // the incoming register into the stack frame slot.
2285 // In that case, we would like to avoid promortion.
2286 // However we haven't yet computed the lvRefCnt values so we can't do that.
2288 CLANG_FORMAT_COMMENT_ANCHOR;
2290 return shouldPromote;
2293 //--------------------------------------------------------------------------------------------
2294 // SortStructFields - sort the fields according to the increasing order of the field offset.
2297 // This is needed because the fields need to be pushed on stack (when referenced as a struct) in offset order.
2299 void Compiler::StructPromotionHelper::SortStructFields()
2301 if (!structPromotionInfo.fieldsSorted)
2303 jitstd::sort(structPromotionInfo.fields, structPromotionInfo.fields + structPromotionInfo.fieldCnt,
2304 [](const lvaStructFieldInfo& lhs, const lvaStructFieldInfo& rhs) {
2305 return lhs.fldOffset < rhs.fldOffset;
2307 structPromotionInfo.fieldsSorted = true;
2311 //--------------------------------------------------------------------------------------------
2312 // PromoteStructVar - promote struct variable.
2315 // lclNum - struct local number;
2317 void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
2319 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
2321 // We should never see a reg-sized non-field-addressed struct here.
2322 assert(!varDsc->lvRegStruct);
2324 assert(varDsc->GetLayout()->GetClassHandle() == structPromotionInfo.typeHnd);
2325 assert(structPromotionInfo.canPromote);
2327 varDsc->lvFieldCnt = structPromotionInfo.fieldCnt;
2328 varDsc->lvFieldLclStart = compiler->lvaCount;
2329 varDsc->lvPromoted = true;
2330 varDsc->lvContainsHoles = structPromotionInfo.containsHoles;
2331 varDsc->lvAnySignificantPadding = structPromotionInfo.anySignificantPadding;
2334 // Don't stress this in LCL_FLD stress.
2335 varDsc->lvKeepType = 1;
2339 if (compiler->verbose)
2341 printf("\nPromoting struct local V%02u (%s):", lclNum,
2342 compiler->eeGetClassName(varDsc->GetLayout()->GetClassHandle()));
2348 for (unsigned index = 0; index < structPromotionInfo.fieldCnt; ++index)
2350 const lvaStructFieldInfo* pFieldInfo = &structPromotionInfo.fields[index];
2352 if (!varTypeUsesIntReg(pFieldInfo->fldType))
2354 // Whenever we promote a struct that contains a floating point field
2355 // it's possible we transition from a method that originally only had integer
2356 // local vars to start having FP. We have to communicate this through this flag
2357 // since LSRA later on will use this flag to determine whether or not to track FP register sets.
2358 compiler->compFloatingPointUsed = true;
2361 // Now grab the temp for the field local.
2365 char fieldNameBuffer[128];
2366 const char* fieldName =
2367 compiler->eeGetFieldName(pFieldInfo->diagFldHnd, false, fieldNameBuffer, sizeof(fieldNameBuffer));
2368 sprintf_s(buf, sizeof(buf), "field V%02u.%s (fldOffset=0x%x)", lclNum, fieldName, pFieldInfo->fldOffset);
2370 // We need to copy 'buf' as lvaGrabTemp() below caches a copy to its argument.
2371 size_t len = strlen(buf) + 1;
2372 char* bufp = compiler->getAllocator(CMK_DebugOnly).allocate<char>(len);
2373 strcpy_s(bufp, len, buf);
2377 noway_assert(pFieldInfo->fldOffset > (pFieldInfo - 1)->fldOffset);
2381 // Lifetime of field locals might span multiple BBs, so they must be long lifetime temps.
2382 const unsigned varNum = compiler->lvaGrabTemp(false DEBUGARG(bufp));
2384 // lvaGrabTemp can reallocate the lvaTable, so
2385 // refresh the cached varDsc for lclNum.
2386 varDsc = compiler->lvaGetDesc(lclNum);
2388 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varNum);
2389 fieldVarDsc->lvType = pFieldInfo->fldType;
2390 fieldVarDsc->lvIsStructField = true;
2391 fieldVarDsc->lvFldOffset = pFieldInfo->fldOffset;
2392 fieldVarDsc->lvFldOrdinal = pFieldInfo->fldOrdinal;
2393 fieldVarDsc->lvParentLcl = lclNum;
2394 fieldVarDsc->lvIsParam = varDsc->lvIsParam;
2395 fieldVarDsc->lvIsOSRLocal = varDsc->lvIsOSRLocal;
2396 fieldVarDsc->lvIsOSRExposedLocal = varDsc->lvIsOSRExposedLocal;
2398 // This new local may be the first time we've seen a long typed local.
2399 if (fieldVarDsc->lvType == TYP_LONG)
2401 compiler->compLongUsed = true;
2404 #if FEATURE_IMPLICIT_BYREFS
2405 // Reset the implicitByRef flag.
2406 fieldVarDsc->lvIsImplicitByRef = 0;
2407 #endif // FEATURE_IMPLICIT_BYREFS
2409 // Do we have a parameter that can be enregistered?
2411 if (varDsc->lvIsRegArg)
2413 fieldVarDsc->lvIsRegArg = true;
2414 regNumber parentArgReg = varDsc->GetArgReg();
2415 #if FEATURE_MULTIREG_ARGS
2416 if (!compiler->lvaIsImplicitByRefLocal(lclNum))
2418 #ifdef UNIX_AMD64_ABI
2419 if (varTypeIsSIMD(fieldVarDsc) && (varDsc->lvFieldCnt == 1))
2421 // This SIMD typed field may be passed in multiple registers.
2422 fieldVarDsc->SetArgReg(parentArgReg);
2423 fieldVarDsc->SetOtherArgReg(varDsc->GetOtherArgReg());
2426 #endif // UNIX_AMD64_ABI
2428 regNumber fieldRegNum;
2431 fieldRegNum = parentArgReg;
2433 else if (varDsc->lvIsHfa())
2435 unsigned regIncrement = fieldVarDsc->lvFldOrdinal;
2437 // TODO: Need to determine if/how to handle split args.
2438 if (varDsc->GetHfaType() == TYP_DOUBLE)
2442 #endif // TARGET_ARM
2443 fieldRegNum = (regNumber)(parentArgReg + regIncrement);
2448 fieldRegNum = varDsc->GetOtherArgReg();
2450 fieldVarDsc->SetArgReg(fieldRegNum);
2454 #endif // FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)
2456 fieldVarDsc->SetArgReg(parentArgReg);
2461 if (varTypeIsSIMD(pFieldInfo->fldType))
2463 // We will not recursively promote this, so mark it as 'lvRegStruct' (note that we wouldn't
2464 // be promoting this if we didn't think it could be enregistered.
2465 fieldVarDsc->lvRegStruct = true;
2467 // SIMD types may be HFAs so we need to set the correct state on
2468 // the promoted fields to get the right ABI treatment in the
2470 if (GlobalJitOptions::compFeatureHfa && (pFieldInfo->fldSize <= MAX_PASS_MULTIREG_BYTES))
2472 // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF
2473 var_types hfaType = compiler->GetHfaType(pFieldInfo->fldSIMDTypeHnd);
2474 if (varTypeIsValidHfaType(hfaType))
2476 fieldVarDsc->SetHfaType(hfaType);
2477 fieldVarDsc->lvIsMultiRegArg = (varDsc->lvIsMultiRegArg != 0) && (fieldVarDsc->lvHfaSlots() > 1);
2481 #endif // FEATURE_SIMD
2484 // This temporary should not be converted to a double in stress mode,
2485 // because we introduce assigns to it after the stress conversion
2486 fieldVarDsc->lvKeepType = 1;
2491 //--------------------------------------------------------------------------------------------
2492 // lvaGetFieldLocal - returns the local var index for a promoted field in a promoted struct var.
2495 // varDsc - the promoted struct var descriptor;
2496 // fldOffset - field offset in the struct.
2499 // the index of the local that represents this field.
2501 unsigned Compiler::lvaGetFieldLocal(const LclVarDsc* varDsc, unsigned int fldOffset)
2503 noway_assert(varTypeIsStruct(varDsc));
2504 noway_assert(varDsc->lvPromoted);
2506 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
2508 noway_assert(lvaTable[i].lvIsStructField);
2509 noway_assert(lvaTable[i].lvParentLcl == (unsigned)(varDsc - lvaTable));
2510 if (lvaTable[i].lvFldOffset == fldOffset)
2516 // This is the not-found error return path, the caller should check for BAD_VAR_NUM
2520 /*****************************************************************************
2522 * Set the local var "varNum" as address-exposed.
2523 * If this is a promoted struct, label it's fields the same way.
2526 void Compiler::lvaSetVarAddrExposed(unsigned varNum DEBUGARG(AddressExposedReason reason))
2528 LclVarDsc* varDsc = lvaGetDesc(varNum);
2529 assert(!varDsc->lvIsStructField);
2531 varDsc->SetAddressExposed(true DEBUGARG(reason));
2533 if (varDsc->lvPromoted)
2535 noway_assert(varTypeIsStruct(varDsc));
2537 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
2539 noway_assert(lvaTable[i].lvIsStructField);
2540 lvaTable[i].SetAddressExposed(true DEBUGARG(AddressExposedReason::PARENT_EXPOSED));
2541 lvaSetVarDoNotEnregister(i DEBUGARG(DoNotEnregisterReason::AddrExposed));
2545 lvaSetVarDoNotEnregister(varNum DEBUGARG(DoNotEnregisterReason::AddrExposed));
2548 //------------------------------------------------------------------------
2549 // lvaSetHiddenBufferStructArg: Set the local var "varNum" as hidden buffer struct arg.
2552 // varNum - the varNum of the local
2555 // Most ABIs "return" large structures via return buffers, where the callee takes an address as the
2556 // argument, and writes the result to it. This presents a problem: ordinarily, addresses of locals
2557 // that escape to calls leave the local in question address-exposed. For this very special case of
2558 // a return buffer, however, it is known that the callee will not do anything with it except write
2559 // to it, once. As such, we handle addresses of locals that represent return buffers specially: we
2560 // *do not* mark the local address-exposed and treat the call much like a local store node throughout
2563 // TODO-ADDR-Bug: currently, we rely on these locals not being present in call argument lists,
2564 // outside of the buffer address argument itself, as liveness - currently - treats the location node
2565 // associated with the address itself as the definition point, and call arguments can be reordered
2566 // rather arbitrarily. We should fix liveness to treat the call as the definition point instead and
2567 // enable this optimization for "!lvIsTemp" locals.
2569 void Compiler::lvaSetHiddenBufferStructArg(unsigned varNum)
2571 LclVarDsc* varDsc = lvaGetDesc(varNum);
2574 varDsc->SetHiddenBufferStructArg(true);
2577 if (varDsc->lvPromoted)
2579 noway_assert(varTypeIsStruct(varDsc));
2581 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
2583 noway_assert(lvaTable[i].lvIsStructField);
2585 lvaTable[i].SetHiddenBufferStructArg(true);
2588 lvaSetVarDoNotEnregister(i DEBUGARG(DoNotEnregisterReason::HiddenBufferStructArg));
2592 lvaSetVarDoNotEnregister(varNum DEBUGARG(DoNotEnregisterReason::HiddenBufferStructArg));
2595 //------------------------------------------------------------------------
2596 // lvaSetVarLiveInOutOfHandler: Set the local varNum as being live in and/or out of a handler
2599 // varNum - the varNum of the local
2601 void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum)
2603 LclVarDsc* varDsc = lvaGetDesc(varNum);
2605 varDsc->lvLiveInOutOfHndlr = 1;
2607 if (varDsc->lvPromoted)
2609 noway_assert(varTypeIsStruct(varDsc));
2611 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
2613 noway_assert(lvaTable[i].lvIsStructField);
2614 lvaTable[i].lvLiveInOutOfHndlr = 1;
2615 // For now, only enregister an EH Var if it is a single def and whose refCnt > 1.
2616 if (!lvaEnregEHVars || !lvaTable[i].lvSingleDefRegCandidate || lvaTable[i].lvRefCnt() <= 1)
2618 lvaSetVarDoNotEnregister(i DEBUGARG(DoNotEnregisterReason::LiveInOutOfHandler));
2623 // For now, only enregister an EH Var if it is a single def and whose refCnt > 1.
2624 if (!lvaEnregEHVars || !varDsc->lvSingleDefRegCandidate || varDsc->lvRefCnt() <= 1)
2626 lvaSetVarDoNotEnregister(varNum DEBUGARG(DoNotEnregisterReason::LiveInOutOfHandler));
2628 #ifdef JIT32_GCENCODER
2629 else if (lvaKeepAliveAndReportThis() && (varNum == info.compThisArg))
2631 // For the JIT32_GCENCODER, when lvaKeepAliveAndReportThis is true, we must either keep the "this" pointer
2632 // in the same register for the entire method, or keep it on the stack. If it is EH-exposed, we can't ever
2633 // keep it in a register, since it must also be live on the stack. Therefore, we won't attempt to allocate it.
2634 lvaSetVarDoNotEnregister(varNum DEBUGARG(DoNotEnregisterReason::LiveInOutOfHandler));
2636 #endif // JIT32_GCENCODER
2639 /*****************************************************************************
2641 * Record that the local var "varNum" should not be enregistered (for one of several reasons.)
2644 void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason))
2646 LclVarDsc* varDsc = lvaGetDesc(varNum);
2648 const bool wasAlreadyMarkedDoNotEnreg = (varDsc->lvDoNotEnregister == 1);
2649 varDsc->lvDoNotEnregister = 1;
2652 if (!wasAlreadyMarkedDoNotEnreg)
2654 varDsc->SetDoNotEnregReason(reason);
2659 printf("\nLocal V%02u should not be enregistered because: ", varNum);
2664 case DoNotEnregisterReason::AddrExposed:
2665 JITDUMP("it is address exposed\n");
2666 assert(varDsc->IsAddressExposed());
2668 case DoNotEnregisterReason::HiddenBufferStructArg:
2669 JITDUMP("it is hidden buffer struct arg\n");
2670 assert(varDsc->IsHiddenBufferStructArg());
2672 case DoNotEnregisterReason::DontEnregStructs:
2673 JITDUMP("struct enregistration is disabled\n");
2674 assert(varTypeIsStruct(varDsc));
2676 case DoNotEnregisterReason::NotRegSizeStruct:
2677 JITDUMP("struct size does not match reg size\n");
2678 assert(varTypeIsStruct(varDsc));
2680 case DoNotEnregisterReason::LocalField:
2681 JITDUMP("was accessed as a local field\n");
2683 case DoNotEnregisterReason::VMNeedsStackAddr:
2684 JITDUMP("VM needs stack addr\n");
2686 case DoNotEnregisterReason::LiveInOutOfHandler:
2687 JITDUMP("live in/out of a handler\n");
2688 varDsc->lvLiveInOutOfHndlr = 1;
2690 case DoNotEnregisterReason::BlockOp:
2691 JITDUMP("written/read in a block op\n");
2693 case DoNotEnregisterReason::IsStructArg:
2694 if (varTypeIsStruct(varDsc))
2696 JITDUMP("it is a struct arg\n");
2700 JITDUMP("it is reinterpreted as a struct arg\n");
2703 case DoNotEnregisterReason::DepField:
2704 JITDUMP("field of a dependently promoted struct\n");
2705 assert(varDsc->lvIsStructField && (lvaGetParentPromotionType(varNum) != PROMOTION_TYPE_INDEPENDENT));
2707 case DoNotEnregisterReason::NoRegVars:
2708 JITDUMP("opts.compFlags & CLFLG_REGVAR is not set\n");
2709 assert(!compEnregLocals());
2711 case DoNotEnregisterReason::MinOptsGC:
2712 JITDUMP("it is a GC Ref and we are compiling MinOpts\n");
2713 assert(!JitConfig.JitMinOptsTrackGCrefs() && varTypeIsGC(varDsc->TypeGet()));
2715 #if !defined(TARGET_64BIT)
2716 case DoNotEnregisterReason::LongParamField:
2717 JITDUMP("it is a decomposed field of a long parameter\n");
2720 #ifdef JIT32_GCENCODER
2721 case DoNotEnregisterReason::PinningRef:
2722 JITDUMP("pinning ref\n");
2723 assert(varDsc->lvPinned);
2726 case DoNotEnregisterReason::LclAddrNode:
2727 JITDUMP("LclAddrVar/Fld takes the address of this node\n");
2730 case DoNotEnregisterReason::CastTakesAddr:
2731 JITDUMP("cast takes addr\n");
2734 case DoNotEnregisterReason::StoreBlkSrc:
2735 JITDUMP("the local is used as store block src\n");
2738 case DoNotEnregisterReason::SwizzleArg:
2739 JITDUMP("SwizzleArg\n");
2742 case DoNotEnregisterReason::BlockOpRet:
2743 JITDUMP("return uses a block op\n");
2746 case DoNotEnregisterReason::ReturnSpCheck:
2747 JITDUMP("Used for SP check on return\n");
2750 case DoNotEnregisterReason::CallSpCheck:
2751 JITDUMP("Used for SP check on call\n");
2754 case DoNotEnregisterReason::SimdUserForcesDep:
2755 JITDUMP("Promoted struct used by a SIMD/HWI node\n");
2765 //------------------------------------------------------------------------
2766 // lvaIsImplicitByRefLocal: Is the local an "implicit byref" parameter?
2768 // We term structs passed via pointers to shadow copies "implicit byrefs".
2769 // They are used on Windows x64 for structs 3, 5, 6, 7, > 8 bytes in size,
2770 // and on ARM64/LoongArch64 for structs larger than 16 bytes.
2772 // They are "byrefs" because the VM sometimes uses memory allocated on the
2773 // GC heap for the shadow copies.
2776 // lclNum - The local in question
2779 // Whether "lclNum" refers to an implicit byref.
2781 bool Compiler::lvaIsImplicitByRefLocal(unsigned lclNum) const
2783 #if FEATURE_IMPLICIT_BYREFS
2784 LclVarDsc* varDsc = lvaGetDesc(lclNum);
2785 if (varDsc->lvIsImplicitByRef)
2787 assert(varDsc->lvIsParam);
2789 assert(varTypeIsStruct(varDsc) || (varDsc->TypeGet() == TYP_BYREF));
2792 #endif // FEATURE_IMPLICIT_BYREFS
2796 //------------------------------------------------------------------------
2797 // lvaIsLocalImplicitlyAccessedByRef: Will this local be accessed indirectly?
2800 // lclNum - The number of local in question
2803 // If "lclNum" is an implicit byref parameter, or its dependently promoted
2804 // field, "true", otherwise, "false".
2807 // This method is only meaningful before the locals have been morphed into
2808 // explicit indirections.
2810 bool Compiler::lvaIsLocalImplicitlyAccessedByRef(unsigned lclNum) const
2812 if (lvaGetDesc(lclNum)->lvIsStructField)
2814 return lvaIsImplicitByRefLocal(lvaGetDesc(lclNum)->lvParentLcl);
2817 return lvaIsImplicitByRefLocal(lclNum);
2820 // Returns true if this local var is a multireg struct.
2821 // TODO-Throughput: This does a lookup on the class handle, and in the outgoing arg context
2822 // this information is already available on the CallArgABIInformation, and shouldn't need to be
2825 bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg)
2827 if (varTypeIsStruct(varDsc->TypeGet()))
2829 CORINFO_CLASS_HANDLE clsHnd = varDsc->GetLayout()->GetClassHandle();
2830 structPassingKind howToPassStruct;
2832 var_types type = getArgTypeForStruct(clsHnd, &howToPassStruct, isVarArg, varDsc->lvExactSize());
2834 if (howToPassStruct == SPK_ByValueAsHfa)
2836 assert(type == TYP_STRUCT);
2840 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
2841 if (howToPassStruct == SPK_ByValue)
2843 assert(type == TYP_STRUCT);
2851 //------------------------------------------------------------------------
2852 // lvaSetStruct: Set the type of a local to a struct, given a layout.
2855 // varNum - The local
2856 // layout - The layout
2857 // unsafeValueClsCheck - Whether to check if we should potentially emit a GS cookie due to this local.
2859 void Compiler::lvaSetStruct(unsigned varNum, ClassLayout* layout, bool unsafeValueClsCheck)
2861 LclVarDsc* varDsc = lvaGetDesc(varNum);
2863 // Set the type and associated info if we haven't already set it.
2864 if (varDsc->lvType == TYP_UNDEF)
2866 varDsc->lvType = TYP_STRUCT;
2868 if (varDsc->GetLayout() == nullptr)
2870 varDsc->SetLayout(layout);
2872 if (layout->IsValueClass())
2874 varDsc->lvType = layout->GetType();
2876 #if FEATURE_IMPLICIT_BYREFS
2877 // Mark implicit byref struct parameters
2878 if (varDsc->lvIsParam && !varDsc->lvIsStructField)
2880 structPassingKind howToReturnStruct;
2881 getArgTypeForStruct(layout->GetClassHandle(), &howToReturnStruct, this->info.compIsVarArgs,
2882 varDsc->lvExactSize());
2884 if (howToReturnStruct == SPK_ByReference)
2886 JITDUMP("Marking V%02i as a byref parameter\n", varNum);
2887 varDsc->lvIsImplicitByRef = 1;
2890 #endif // FEATURE_IMPLICIT_BYREFS
2892 // For structs that are small enough, we check and set HFA element type
2893 if (GlobalJitOptions::compFeatureHfa && (layout->GetSize() <= MAX_PASS_MULTIREG_BYTES))
2895 // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF
2896 var_types hfaType = GetHfaType(layout->GetClassHandle());
2897 if (varTypeIsValidHfaType(hfaType))
2899 varDsc->SetHfaType(hfaType);
2901 // hfa variables can never contain GC pointers
2902 assert(!layout->HasGCPtr());
2903 // The size of this struct should be evenly divisible by 4 or 8
2904 assert((varDsc->lvExactSize() % genTypeSize(hfaType)) == 0);
2905 // The number of elements in the HFA should fit into our MAX_ARG_REG_COUNT limit
2906 assert((varDsc->lvExactSize() / genTypeSize(hfaType)) <= MAX_ARG_REG_COUNT);
2913 assert(ClassLayout::AreCompatible(varDsc->GetLayout(), layout));
2914 // Inlining could replace a canon struct type with an exact one.
2915 varDsc->SetLayout(layout);
2916 assert(layout->IsBlockLayout() || (layout->GetSize() != 0));
2919 if (!layout->IsBlockLayout())
2921 #ifndef TARGET_64BIT
2922 bool fDoubleAlignHint = false;
2924 fDoubleAlignHint = true;
2927 if (info.compCompHnd->getClassAlignmentRequirement(layout->GetClassHandle(), fDoubleAlignHint) == 8)
2932 printf("Marking struct in V%02i with double align flag\n", varNum);
2935 varDsc->lvStructDoubleAlign = 1;
2937 #endif // not TARGET_64BIT
2939 // Check whether this local is an unsafe value type and requires GS cookie protection.
2940 // GS checks require the stack to be re-ordered, which can't be done with EnC.
2941 if (unsafeValueClsCheck)
2943 unsigned classAttribs = info.compCompHnd->getClassAttribs(layout->GetClassHandle());
2945 if ((classAttribs & CORINFO_FLG_UNSAFE_VALUECLASS) && !opts.compDbgEnC)
2947 setNeedsGSSecurityCookie();
2948 compGSReorderStackLayout = true;
2949 varDsc->lvIsUnsafeBuffer = true;
2954 if (JitConfig.EnableExtraSuperPmiQueries())
2956 makeExtraStructQueries(layout->GetClassHandle(), 2);
2962 //------------------------------------------------------------------------
2963 // lvaSetStruct: Set the type of a local to a struct, given its type handle.
2966 // varNum - The local
2967 // typeHnd - The type handle
2968 // unsafeValueClsCheck - Whether to check if we should potentially emit a GS cookie due to this local.
2970 void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck)
2972 lvaSetStruct(varNum, typGetObjLayout(typeHnd), unsafeValueClsCheck);
2976 //------------------------------------------------------------------------
2977 // makeExtraStructQueries: Query the information for the given struct handle.
2980 // structHandle -- The handle for the struct type we're querying.
2981 // level -- How many more levels to recurse.
2983 void Compiler::makeExtraStructQueries(CORINFO_CLASS_HANDLE structHandle, int level)
2989 assert(structHandle != NO_CLASS_HANDLE);
2990 (void)typGetObjLayout(structHandle);
2991 DWORD typeFlags = info.compCompHnd->getClassAttribs(structHandle);
2993 unsigned const fieldCnt = info.compCompHnd->getClassNumInstanceFields(structHandle);
2994 impNormStructType(structHandle);
2995 #ifdef TARGET_ARMARCH
2996 GetHfaType(structHandle);
2999 // In a lambda since this requires a lot of stack and this function is recursive.
3000 auto queryLayout = [this, structHandle]() {
3001 CORINFO_TYPE_LAYOUT_NODE nodes[256];
3002 size_t numNodes = ArrLen(nodes);
3003 info.compCompHnd->getTypeLayout(structHandle, nodes, &numNodes);
3007 // Bypass fetching instance fields of ref classes for now,
3008 // as it requires traversing the class hierarchy.
3010 if ((typeFlags & CORINFO_FLG_VALUECLASS) == 0)
3015 // In R2R we cannot query arbitrary information about struct fields, so
3016 // skip it there. Note that the getTypeLayout call above is enough to cover
3017 // us for promotion at least.
3018 if (!opts.IsReadyToRun())
3020 for (unsigned int i = 0; i < fieldCnt; i++)
3022 CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(structHandle, i);
3023 unsigned fldOffset = info.compCompHnd->getFieldOffset(fieldHandle);
3024 CORINFO_CLASS_HANDLE fieldClassHandle = NO_CLASS_HANDLE;
3025 CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHandle, &fieldClassHandle);
3026 var_types fieldVarType = JITtype2varType(fieldCorType);
3027 if (fieldClassHandle != NO_CLASS_HANDLE)
3029 if (varTypeIsStruct(fieldVarType))
3031 makeExtraStructQueries(fieldClassHandle, level - 1);
3039 //------------------------------------------------------------------------
3040 // lvaSetStructUsedAsVarArg: update hfa information for vararg struct args
3043 // varNum -- number of the variable
3046 // This only affects arm64 varargs on windows where we need to pass
3047 // hfa arguments as if they are not HFAs.
3049 // This function should only be called if the struct is used in a varargs
3052 void Compiler::lvaSetStructUsedAsVarArg(unsigned varNum)
3054 if (GlobalJitOptions::compFeatureHfa && TargetOS::IsWindows)
3056 #if defined(TARGET_ARM64)
3057 LclVarDsc* varDsc = lvaGetDesc(varNum);
3058 // For varargs methods incoming and outgoing arguments should not be treated
3060 varDsc->SetHfaType(TYP_UNDEF);
3061 #endif // defined(TARGET_ARM64)
3065 //------------------------------------------------------------------------
3066 // lvaSetClass: set class information for a local var.
3069 // varNum -- number of the variable
3070 // clsHnd -- class handle to use in set or update
3071 // isExact -- true if class is known exactly
3074 // varNum must not already have a ref class handle.
3076 void Compiler::lvaSetClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact)
3078 noway_assert(varNum < lvaCount);
3080 if (clsHnd != NO_CLASS_HANDLE && !isExact && JitConfig.JitEnableExactDevirtualization())
3082 CORINFO_CLASS_HANDLE exactClass;
3083 if (info.compCompHnd->getExactClasses(clsHnd, 1, &exactClass) == 1)
3086 clsHnd = exactClass;
3090 // Else we should have a type handle.
3091 assert(clsHnd != nullptr);
3093 LclVarDsc* varDsc = lvaGetDesc(varNum);
3094 assert(varDsc->lvType == TYP_REF);
3096 // We should not have any ref type information for this var.
3097 assert(varDsc->lvClassHnd == NO_CLASS_HANDLE);
3098 assert(!varDsc->lvClassIsExact);
3100 JITDUMP("\nlvaSetClass: setting class for V%02i to (%p) %s %s\n", varNum, dspPtr(clsHnd), eeGetClassName(clsHnd),
3101 isExact ? " [exact]" : "");
3103 varDsc->lvClassHnd = clsHnd;
3104 varDsc->lvClassIsExact = isExact;
3107 //------------------------------------------------------------------------
3108 // lvaSetClass: set class information for a local var from a tree or stack type
3111 // varNum -- number of the variable. Must be a single def local
3112 // tree -- tree establishing the variable's value
3113 // stackHnd -- handle for the type from the evaluation stack
3116 // Preferentially uses the tree's type, when available. Since not all
3117 // tree kinds can track ref types, the stack type is used as a
3118 // fallback. If there is no stack type, then the class is set to object.
3120 void Compiler::lvaSetClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HANDLE stackHnd)
3122 bool isExact = false;
3123 bool isNonNull = false;
3124 CORINFO_CLASS_HANDLE clsHnd = gtGetClassHandle(tree, &isExact, &isNonNull);
3126 if (clsHnd != nullptr)
3128 lvaSetClass(varNum, clsHnd, isExact);
3130 else if (stackHnd != nullptr)
3132 lvaSetClass(varNum, stackHnd);
3136 lvaSetClass(varNum, impGetObjectClass());
3140 //------------------------------------------------------------------------
3141 // lvaUpdateClass: update class information for a local var.
3144 // varNum -- number of the variable
3145 // clsHnd -- class handle to use in set or update
3146 // isExact -- true if class is known exactly
3150 // This method models the type update rule for an assignment.
3152 // Updates currently should only happen for single-def user args or
3153 // locals, when we are processing the expression actually being
3154 // used to initialize the local (or inlined arg). The update will
3155 // change the local from the declared type to the type of the
3158 // These updates should always *improve* what we know about the
3159 // type, that is making an inexact type exact, or changing a type
3160 // to some subtype. However the jit lacks precise type information
3161 // for shared code, so ensuring this is so is currently not
3164 void Compiler::lvaUpdateClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact)
3166 assert(varNum < lvaCount);
3168 // Else we should have a class handle to consider
3169 assert(clsHnd != nullptr);
3171 LclVarDsc* varDsc = lvaGetDesc(varNum);
3172 assert(varDsc->lvType == TYP_REF);
3174 // We should already have a class
3175 assert(varDsc->lvClassHnd != NO_CLASS_HANDLE);
3177 // We should only be updating classes for single-def locals.
3178 assert(varDsc->lvSingleDef);
3180 // Now see if we should update.
3182 // New information may not always be "better" so do some
3183 // simple analysis to decide if the update is worthwhile.
3184 const bool isNewClass = (clsHnd != varDsc->lvClassHnd);
3185 bool shouldUpdate = false;
3187 // Are we attempting to update the class? Only check this when we have
3188 // an new type and the existing class is inexact... we should not be
3189 // updating exact classes.
3190 if (!varDsc->lvClassIsExact && isNewClass)
3192 shouldUpdate = !!info.compCompHnd->isMoreSpecificType(varDsc->lvClassHnd, clsHnd);
3194 // Else are we attempting to update exactness?
3195 else if (isExact && !varDsc->lvClassIsExact && !isNewClass)
3197 shouldUpdate = true;
3201 if (isNewClass || (isExact != varDsc->lvClassIsExact))
3203 JITDUMP("\nlvaUpdateClass:%s Updating class for V%02u", shouldUpdate ? "" : " NOT", varNum);
3204 JITDUMP(" from (%p) %s%s", dspPtr(varDsc->lvClassHnd), eeGetClassName(varDsc->lvClassHnd),
3205 varDsc->lvClassIsExact ? " [exact]" : "");
3206 JITDUMP(" to (%p) %s%s\n", dspPtr(clsHnd), eeGetClassName(clsHnd), isExact ? " [exact]" : "");
3212 varDsc->lvClassHnd = clsHnd;
3213 varDsc->lvClassIsExact = isExact;
3216 // Note we've modified the type...
3217 varDsc->lvClassInfoUpdated = true;
3224 //------------------------------------------------------------------------
3225 // lvaUpdateClass: Uupdate class information for a local var from a tree
3229 // varNum -- number of the variable. Must be a single def local
3230 // tree -- tree establishing the variable's value
3231 // stackHnd -- handle for the type from the evaluation stack
3234 // Preferentially uses the tree's type, when available. Since not all
3235 // tree kinds can track ref types, the stack type is used as a
3238 void Compiler::lvaUpdateClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HANDLE stackHnd)
3240 bool isExact = false;
3241 bool isNonNull = false;
3242 CORINFO_CLASS_HANDLE clsHnd = gtGetClassHandle(tree, &isExact, &isNonNull);
3244 if (clsHnd != nullptr)
3246 lvaUpdateClass(varNum, clsHnd, isExact);
3248 else if (stackHnd != nullptr)
3250 lvaUpdateClass(varNum, stackHnd);
3254 //------------------------------------------------------------------------
3255 // lvaLclSize: returns size of a local variable, in bytes
3258 // varNum -- variable to query
3261 // Number of bytes needed on the frame for such a local.
3263 unsigned Compiler::lvaLclSize(unsigned varNum)
3265 assert(varNum < lvaCount);
3267 var_types varType = lvaTable[varNum].TypeGet();
3269 if (varType == TYP_STRUCT)
3271 return lvaTable[varNum].lvSize();
3275 // We only need this Quirk for TARGET_64BIT
3276 if (lvaTable[varNum].lvQuirkToLong)
3278 noway_assert(lvaTable[varNum].IsAddressExposed());
3279 return genTypeStSz(TYP_LONG) * sizeof(int); // return 8 (2 * 4)
3282 return genTypeStSz(varType) * sizeof(int);
3286 // Return the exact width of local variable "varNum" -- the number of bytes
3287 // you'd need to copy in order to overwrite the value.
3289 unsigned Compiler::lvaLclExactSize(unsigned varNum)
3291 assert(varNum < lvaCount);
3292 return lvaGetDesc(varNum)->lvExactSize();
3295 // LclVarDsc "less" comparer used to compare the weight of two locals, when optimizing for small code.
3296 class LclVarDsc_SmallCode_Less
3298 const LclVarDsc* m_lvaTable;
3299 RefCountState m_rcs;
3300 INDEBUG(unsigned m_lvaCount;)
3303 LclVarDsc_SmallCode_Less(const LclVarDsc* lvaTable, RefCountState rcs DEBUGARG(unsigned lvaCount))
3304 : m_lvaTable(lvaTable)
3307 , m_lvaCount(lvaCount)
3312 bool operator()(unsigned n1, unsigned n2)
3314 assert(n1 < m_lvaCount);
3315 assert(n2 < m_lvaCount);
3317 const LclVarDsc* dsc1 = &m_lvaTable[n1];
3318 const LclVarDsc* dsc2 = &m_lvaTable[n2];
3320 // We should not be sorting untracked variables
3321 assert(dsc1->lvTracked);
3322 assert(dsc2->lvTracked);
3323 // We should not be sorting after registers have been allocated
3324 assert(!dsc1->lvRegister);
3325 assert(!dsc2->lvRegister);
3327 unsigned weight1 = dsc1->lvRefCnt(m_rcs);
3328 unsigned weight2 = dsc2->lvRefCnt(m_rcs);
3331 // ARM-TODO: this was disabled for ARM under !FEATURE_FP_REGALLOC; it was probably a left-over from
3332 // legacy backend. It should be enabled and verified.
3334 // Force integer candidates to sort above float candidates.
3335 const bool isFloat1 = isFloatRegType(dsc1->lvType);
3336 const bool isFloat2 = isFloatRegType(dsc2->lvType);
3338 if (isFloat1 != isFloat2)
3340 if ((weight2 != 0) && isFloat1)
3345 if ((weight1 != 0) && isFloat2)
3352 if (weight1 != weight2)
3354 return weight1 > weight2;
3357 // If the weighted ref counts are different then use their difference.
3358 if (dsc1->lvRefCntWtd() != dsc2->lvRefCntWtd())
3360 return dsc1->lvRefCntWtd() > dsc2->lvRefCntWtd();
3363 // We have equal ref counts and weighted ref counts.
3364 // Break the tie by:
3365 // - Increasing the weight by 2 if we are a register arg.
3366 // - Increasing the weight by 0.5 if we are a GC type.
3368 // Review: seems odd that this is mixing counts and weights.
3372 if (dsc1->lvIsRegArg)
3374 weight1 += 2 * BB_UNITY_WEIGHT_UNSIGNED;
3377 if (varTypeIsGC(dsc1->TypeGet()))
3379 weight1 += BB_UNITY_WEIGHT_UNSIGNED / 2;
3385 if (dsc2->lvIsRegArg)
3387 weight2 += 2 * BB_UNITY_WEIGHT_UNSIGNED;
3390 if (varTypeIsGC(dsc2->TypeGet()))
3392 weight2 += BB_UNITY_WEIGHT_UNSIGNED / 2;
3396 if (weight1 != weight2)
3398 return weight1 > weight2;
3401 // To achieve a stable sort we use the LclNum (by way of the pointer address).
3406 // LclVarDsc "less" comparer used to compare the weight of two locals, when optimizing for blended code.
3407 class LclVarDsc_BlendedCode_Less
3409 const LclVarDsc* m_lvaTable;
3410 RefCountState m_rcs;
3411 INDEBUG(unsigned m_lvaCount;)
3414 LclVarDsc_BlendedCode_Less(const LclVarDsc* lvaTable, RefCountState rcs DEBUGARG(unsigned lvaCount))
3415 : m_lvaTable(lvaTable)
3418 , m_lvaCount(lvaCount)
3423 bool operator()(unsigned n1, unsigned n2)
3425 assert(n1 < m_lvaCount);
3426 assert(n2 < m_lvaCount);
3428 const LclVarDsc* dsc1 = &m_lvaTable[n1];
3429 const LclVarDsc* dsc2 = &m_lvaTable[n2];
3431 // We should not be sorting untracked variables
3432 assert(dsc1->lvTracked);
3433 assert(dsc2->lvTracked);
3434 // We should not be sorting after registers have been allocated
3435 assert(!dsc1->lvRegister);
3436 assert(!dsc2->lvRegister);
3438 weight_t weight1 = dsc1->lvRefCntWtd(m_rcs);
3439 weight_t weight2 = dsc2->lvRefCntWtd(m_rcs);
3442 // ARM-TODO: this was disabled for ARM under !FEATURE_FP_REGALLOC; it was probably a left-over from
3443 // legacy backend. It should be enabled and verified.
3445 // Force integer candidates to sort above float candidates.
3446 const bool isFloat1 = isFloatRegType(dsc1->lvType);
3447 const bool isFloat2 = isFloatRegType(dsc2->lvType);
3449 if (isFloat1 != isFloat2)
3451 if (!Compiler::fgProfileWeightsEqual(weight2, 0) && isFloat1)
3456 if (!Compiler::fgProfileWeightsEqual(weight1, 0) && isFloat2)
3463 if (!Compiler::fgProfileWeightsEqual(weight1, 0) && dsc1->lvIsRegArg)
3465 weight1 += 2 * BB_UNITY_WEIGHT;
3468 if (!Compiler::fgProfileWeightsEqual(weight2, 0) && dsc2->lvIsRegArg)
3470 weight2 += 2 * BB_UNITY_WEIGHT;
3473 if (!Compiler::fgProfileWeightsEqual(weight1, weight2))
3475 return weight1 > weight2;
3478 // If the weighted ref counts are different then try the unweighted ref counts.
3479 if (dsc1->lvRefCnt(m_rcs) != dsc2->lvRefCnt(m_rcs))
3481 return dsc1->lvRefCnt(m_rcs) > dsc2->lvRefCnt(m_rcs);
3484 // If one is a GC type and the other is not the GC type wins.
3485 if (varTypeIsGC(dsc1->TypeGet()) != varTypeIsGC(dsc2->TypeGet()))
3487 return varTypeIsGC(dsc1->TypeGet());
3490 // To achieve a stable sort we use the LclNum (by way of the pointer address).
3495 /*****************************************************************************
3497 * Sort the local variable table by refcount and assign tracking indices.
3500 void Compiler::lvaSortByRefCount()
3502 lvaTrackedCount = 0;
3503 lvaTrackedCountInSizeTUnits = 0;
3506 VarSetOps::AssignNoCopy(this, lvaTrackedVars, VarSetOps::MakeEmpty(this));
3514 /* We'll sort the variables by ref count - allocate the sorted table */
3516 if (lvaTrackedToVarNumSize < lvaCount)
3518 lvaTrackedToVarNumSize = lvaCount;
3519 lvaTrackedToVarNum = new (getAllocator(CMK_LvaTable)) unsigned[lvaTrackedToVarNumSize];
3522 unsigned trackedCandidateCount = 0;
3523 unsigned* trackedCandidates = lvaTrackedToVarNum;
3525 // Fill in the table used for sorting
3527 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
3529 LclVarDsc* varDsc = lvaGetDesc(lclNum);
3531 // Start by assuming that the variable will be tracked.
3532 varDsc->lvTracked = 1;
3533 INDEBUG(varDsc->lvTrackedWithoutIndex = 0);
3535 if (varDsc->lvRefCnt(lvaRefCountState) == 0)
3537 // Zero ref count, make this untracked.
3538 varDsc->lvTracked = 0;
3539 varDsc->setLvRefCntWtd(0, lvaRefCountState);
3542 #if !defined(TARGET_64BIT)
3543 if (varTypeIsLong(varDsc) && varDsc->lvPromoted)
3545 varDsc->lvTracked = 0;
3547 #endif // !defined(TARGET_64BIT)
3549 // Variables that are address-exposed, and all struct locals, are never enregistered, or tracked.
3550 // (The struct may be promoted, and its field variables enregistered/tracked, or the VM may "normalize"
3551 // its type so that its not seen by the JIT as a struct.)
3552 // Pinned variables may not be tracked (a condition of the GCInfo representation)
3553 // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
3554 // references when using the general GC encoding.
3555 if (varDsc->IsAddressExposed())
3557 varDsc->lvTracked = 0;
3558 assert(varDsc->lvType != TYP_STRUCT ||
3559 varDsc->lvDoNotEnregister); // For structs, should have set this when we set m_addrExposed.
3561 if (varTypeIsStruct(varDsc))
3563 // Promoted structs will never be considered for enregistration anyway,
3564 // and the DoNotEnregister flag was used to indicate whether promotion was
3565 // independent or dependent.
3566 if (varDsc->lvPromoted)
3568 varDsc->lvTracked = 0;
3570 else if (!varDsc->IsEnregisterableType())
3572 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::NotRegSizeStruct));
3574 else if (varDsc->lvType == TYP_STRUCT)
3576 if (!varDsc->lvRegStruct && !compEnregStructLocals())
3578 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::DontEnregStructs));
3580 else if (varDsc->lvIsMultiRegArgOrRet())
3582 // Prolog and return generators do not support SIMD<->general register moves.
3583 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::IsStructArg));
3585 #if defined(TARGET_ARM)
3586 else if (varDsc->lvIsParam)
3588 // On arm we prespill all struct args,
3589 // TODO-Arm-CQ: keep them in registers, it will need a fix
3590 // to "On the ARM we will spill any incoming struct args" logic in codegencommon.
3591 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::IsStructArg));
3593 #endif // TARGET_ARM
3596 if (varDsc->lvIsStructField && (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
3598 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::DepField));
3600 if (varDsc->lvPinned)
3602 varDsc->lvTracked = 0;
3603 #ifdef JIT32_GCENCODER
3604 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::PinningRef));
3607 if (opts.MinOpts() && !JitConfig.JitMinOptsTrackGCrefs() && varTypeIsGC(varDsc->TypeGet()))
3609 varDsc->lvTracked = 0;
3610 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::MinOptsGC));
3612 if (!compEnregLocals())
3614 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::NoRegVars));
3616 #if defined(JIT32_GCENCODER) && defined(FEATURE_EH_FUNCLETS)
3617 if (lvaIsOriginalThisArg(lclNum) && (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)
3619 // For x86/Linux, we need to track "this".
3620 // However we cannot have it in tracked variables, so we set "this" pointer always untracked
3621 varDsc->lvTracked = 0;
3625 // Are we not optimizing and we have exception handlers?
3626 // if so mark all args and locals "do not enregister".
3628 if (opts.MinOpts() && compHndBBtabCount > 0)
3630 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LiveInOutOfHandler));
3634 var_types type = genActualType(varDsc->TypeGet());
3648 #if defined(TARGET_XARCH)
3651 #endif // TARGET_XARCH
3652 #endif // FEATURE_SIMD
3658 noway_assert(!"lvType not set correctly");
3659 varDsc->lvType = TYP_INT;
3664 varDsc->lvTracked = 0;
3668 if (varDsc->lvTracked)
3670 trackedCandidates[trackedCandidateCount++] = lclNum;
3674 lvaTrackedCount = min(trackedCandidateCount, (unsigned)JitConfig.JitMaxLocalsToTrack());
3676 // Sort the candidates. In the late liveness passes we want lower tracked
3677 // indices to be more important variables, so we always do this. In early
3678 // liveness it does not matter, so we can skip it when we are going to
3679 // track everything.
3680 // TODO-TP: For early liveness we could do a partial sort for the large
3682 if (!fgIsDoingEarlyLiveness || (lvaTrackedCount < trackedCandidateCount))
3684 // Now sort the tracked variable table by ref-count
3685 if (compCodeOpt() == SMALL_CODE)
3687 jitstd::sort(trackedCandidates, trackedCandidates + trackedCandidateCount,
3688 LclVarDsc_SmallCode_Less(lvaTable, lvaRefCountState DEBUGARG(lvaCount)));
3692 jitstd::sort(trackedCandidates, trackedCandidates + trackedCandidateCount,
3693 LclVarDsc_BlendedCode_Less(lvaTable, lvaRefCountState DEBUGARG(lvaCount)));
3697 JITDUMP("Tracked variable (%u out of %u) table:\n", lvaTrackedCount, lvaCount);
3699 // Assign indices to all the variables we've decided to track
3700 for (unsigned varIndex = 0; varIndex < lvaTrackedCount; varIndex++)
3702 LclVarDsc* varDsc = lvaGetDesc(trackedCandidates[varIndex]);
3703 assert(varDsc->lvTracked);
3704 varDsc->lvVarIndex = static_cast<unsigned short>(varIndex);
3706 INDEBUG(if (verbose) { gtDispLclVar(trackedCandidates[varIndex]); })
3707 JITDUMP(" [%6s]: refCnt = %4u, refCntWtd = %6s\n", varTypeName(varDsc->TypeGet()),
3708 varDsc->lvRefCnt(lvaRefCountState),
3709 refCntWtd2str(varDsc->lvRefCntWtd(lvaRefCountState), /* padForDecimalPlaces */ true));
3714 // Mark all variables past the first 'lclMAX_TRACKED' as untracked
3715 for (unsigned varIndex = lvaTrackedCount; varIndex < trackedCandidateCount; varIndex++)
3717 LclVarDsc* varDsc = lvaGetDesc(trackedCandidates[varIndex]);
3718 assert(varDsc->lvTracked);
3719 varDsc->lvTracked = 0;
3722 // We have a new epoch, and also cache the tracked var count in terms of size_t's sufficient to hold that many bits.
3724 lvaTrackedCountInSizeTUnits =
3725 roundUp((unsigned)lvaTrackedCount, (unsigned)(sizeof(size_t) * 8)) / unsigned(sizeof(size_t) * 8);
3728 VarSetOps::AssignNoCopy(this, lvaTrackedVars, VarSetOps::MakeFull(this));
3732 //------------------------------------------------------------------------
3733 // lvExactSize: Get the exact size of the type of this local.
3736 // Size in bytes. Always non-zero, but not necessarily a multiple of the
3739 unsigned LclVarDsc::lvExactSize() const
3741 return (lvType == TYP_STRUCT) ? GetLayout()->GetSize() : genTypeSize(lvType);
3744 //------------------------------------------------------------------------
3745 // lvSize: Get the size of a struct local on the stack frame.
3750 unsigned LclVarDsc::lvSize() const // Size needed for storage representation. Only used for structs.
3752 // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted,
3753 // where the struct itself is no longer used because all access is via its member fields.
3754 // When that happens, the struct is marked as unused and its type has been changed to
3755 // TYP_INT (to keep the GC tracking code from looking at it).
3756 // See Compiler::raAssignVars() for details. For example:
3757 // N002 ( 4, 3) [00EA067C] ------------- return struct $346
3758 // N001 ( 3, 2) [00EA0628] ------------- lclVar struct(U) V03 loc2
3759 // float V03.f1 (offs=0x00) -> V12 tmp7
3760 // f8 (last use) (last use) $345
3761 // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03
3762 // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree.
3764 assert(varTypeIsStruct(lvType) || (lvPromoted && lvUnusedStruct));
3768 assert(varTypeIsStruct(lvType));
3769 const bool isFloatHfa = (lvIsHfa() && (GetHfaType() == TYP_FLOAT));
3770 const unsigned argSizeAlignment = Compiler::eeGetArgSizeAlignment(lvType, isFloatHfa);
3771 return roundUp(lvExactSize(), argSizeAlignment);
3774 #if defined(FEATURE_SIMD) && !defined(TARGET_64BIT)
3775 // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do
3776 // this for arguments, which must be passed according the defined ABI. We don't want to do this for
3777 // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16().
3778 // (Note that for 64-bits, we are already rounding up to 16.)
3779 if (lvType == TYP_SIMD12)
3784 #endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT)
3786 return roundUp(lvExactSize(), TARGET_POINTER_SIZE);
3789 /**********************************************************************************
3790 * Get stack size of the varDsc.
3792 size_t LclVarDsc::lvArgStackSize() const
3794 // Make sure this will have a stack size
3795 assert(!this->lvIsRegArg);
3797 size_t stackSize = 0;
3798 if (varTypeIsStruct(this))
3800 #if defined(WINDOWS_AMD64_ABI)
3801 // Structs are either passed by reference or can be passed by value using one pointer
3802 stackSize = TARGET_POINTER_SIZE;
3803 #elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
3804 // lvSize performs a roundup.
3805 stackSize = this->lvSize();
3807 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
3808 if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa()))
3810 // If the size is greater than 16 bytes then it will
3811 // be passed by reference.
3812 stackSize = TARGET_POINTER_SIZE;
3814 #endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
3816 #else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 !TARGET_RISCV64
3818 NYI("Unsupported target.");
3821 #endif // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI
3825 stackSize = TARGET_POINTER_SIZE;
3831 //------------------------------------------------------------------------
3832 // GetRegisterType: Determine register type for this local var.
3835 // tree - node that uses the local, its type is checked first.
3838 // TYP_UNDEF if the layout is not enregistrable, the register type otherwise.
3840 var_types LclVarDsc::GetRegisterType(const GenTreeLclVarCommon* tree) const
3842 var_types targetType = tree->TypeGet();
3844 if (targetType == TYP_STRUCT)
3846 ClassLayout* layout;
3847 if (tree->OperIs(GT_LCL_FLD, GT_STORE_LCL_FLD))
3849 layout = tree->AsLclFld()->GetLayout();
3853 assert((TypeGet() == TYP_STRUCT) && tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR));
3854 layout = GetLayout();
3857 targetType = layout->GetRegisterType();
3861 if ((targetType != TYP_UNDEF) && tree->OperIs(GT_STORE_LCL_VAR) && lvNormalizeOnStore())
3863 const bool phiStore = (tree->gtGetOp1()->OperIsNonPhiLocal() == false);
3864 // Ensure that the lclVar node is typed correctly,
3865 // does not apply to phi-stores because they do not produce code in the merge block.
3866 assert(phiStore || targetType == genActualType(TypeGet()));
3872 //------------------------------------------------------------------------
3873 // GetRegisterType: Determine register type for this local var.
3876 // TYP_UNDEF if the layout is not enregistrable, the register type otherwise.
3878 var_types LclVarDsc::GetRegisterType() const
3880 if (TypeGet() != TYP_STRUCT)
3882 #if !defined(TARGET_64BIT)
3883 if (TypeGet() == TYP_LONG)
3890 assert(m_layout != nullptr);
3891 return m_layout->GetRegisterType();
3894 //------------------------------------------------------------------------
3895 // GetStackSlotHomeType:
3896 // Get the canonical type of the stack slot that this enregistrable local is
3897 // using when stored on the stack.
3900 // TYP_UNDEF if the layout is not enregistrable. Otherwise returns the type
3901 // of the stack slot home for the local.
3904 // This function always returns a canonical type: for all 4-byte types
3905 // (structs, floats, ints) it will return TYP_INT. It is meant to be used
3906 // when moving locals between register and stack. Because of this the
3907 // returned type is usually at least one 4-byte stack slot. However, there
3908 // are certain exceptions for promoted fields in OSR methods (that may refer
3909 // back to the original frame) and due to macOS arm64 where subsequent small
3910 // parameters can be packed into the same stack slot.
3912 var_types LclVarDsc::GetStackSlotHomeType() const
3914 if (varTypeIsSmall(TypeGet()))
3916 if (compMacOsArm64Abi() && lvIsParam && !lvIsRegArg)
3918 // Allocated by caller and potentially only takes up a small slot
3919 return GetRegisterType();
3922 if (lvIsOSRLocal && lvIsStructField)
3924 #if defined(TARGET_X86)
3925 // Revisit when we support OSR on x86
3928 return GetRegisterType();
3933 return genActualType(GetRegisterType());
3936 //----------------------------------------------------------------------------------------------
3937 // CanBeReplacedWithItsField: check if a whole struct reference could be replaced by a field.
3940 // comp - the compiler instance;
3943 // true if that can be replaced, false otherwise.
3946 // The replacement can be made only for independently promoted structs
3947 // with 1 field without holes.
3949 bool LclVarDsc::CanBeReplacedWithItsField(Compiler* comp) const
3956 if (comp->lvaGetPromotionType(this) != Compiler::PROMOTION_TYPE_INDEPENDENT)
3960 if (lvFieldCnt != 1)
3964 if (lvContainsHoles)
3969 #if defined(FEATURE_SIMD)
3970 // If we return `struct A { SIMD16 a; }` we split the struct into several fields.
3971 // In order to do that we have to have its field `a` in memory. Right now lowering cannot
3972 // handle RETURN struct(multiple registers)->SIMD16(one register), but it can be improved.
3973 LclVarDsc* fieldDsc = comp->lvaGetDesc(lvFieldLclStart);
3974 if (varTypeIsSIMD(fieldDsc))
3978 #endif // FEATURE_SIMD
3983 //------------------------------------------------------------------------
3984 // lvaMarkLclRefs: increment local var references counts and more
3987 // tree - some node in a tree
3988 // block - block that the tree node belongs to
3989 // stmt - stmt that the tree node belongs to
3990 // isRecompute - true if we should just recompute counts
3993 // Invoked via the MarkLocalVarsVisitor
3995 // Primarily increments the regular and weighted local var ref
3996 // counts for any local referred to directly by tree.
4000 // Accounts for implicit references to frame list root for
4001 // pinvokes that will be expanded later.
4003 // Determines if locals of TYP_BOOL can safely be considered
4004 // to hold only 0 or 1 or may have a broader range of true values.
4006 // Does some setup work for assertion prop, noting locals that are
4007 // eligible for assertion prop, single defs, and tracking which blocks
4010 // Looks for uses of generic context and sets lvaGenericsContextInUse.
4012 // In checked builds:
4014 // Verifies that local accesses are consistently typed.
4015 // Verifies that casts remain in bounds.
4017 void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, bool isRecompute)
4019 const weight_t weight = block->getBBWeight(this);
4021 /* Is this a call to unmanaged code ? */
4022 if (tree->IsCall() && compMethodRequiresPInvokeFrame())
4024 assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
4025 if (!opts.ShouldUsePInvokeHelpers())
4027 /* Get the special variable descriptor */
4028 LclVarDsc* varDsc = lvaGetDesc(info.compLvFrameListRoot);
4030 /* Increment the ref counts twice */
4031 varDsc->incRefCnts(weight, this);
4032 varDsc->incRefCnts(weight, this);
4036 if (tree->OperIs(GT_LCL_ADDR))
4038 LclVarDsc* varDsc = lvaGetDesc(tree->AsLclVarCommon());
4039 assert(varDsc->IsAddressExposed() || varDsc->IsHiddenBufferStructArg());
4040 varDsc->incRefCnts(weight, this);
4044 if (!tree->OperIsLocal())
4049 /* This must be a local variable reference */
4051 // See if this is a generics context use.
4052 if ((tree->gtFlags & GTF_VAR_CONTEXT) != 0)
4054 assert(tree->OperIs(GT_LCL_VAR));
4055 if (!lvaGenericsContextInUse)
4057 JITDUMP("-- generic context in use at [%06u]\n", dspTreeID(tree));
4058 lvaGenericsContextInUse = true;
4062 unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
4063 LclVarDsc* varDsc = lvaGetDesc(lclNum);
4065 /* Increment the reference counts */
4067 varDsc->incRefCnts(weight, this);
4070 if (varDsc->lvIsStructField)
4072 // If ref count was increased for struct field, ensure that the
4073 // parent struct is still promoted.
4074 LclVarDsc* parentStruct = lvaGetDesc(varDsc->lvParentLcl);
4075 assert(!parentStruct->lvUndoneStructPromotion);
4081 if (varDsc->IsAddressExposed())
4083 varDsc->lvIsBoolean = false;
4084 varDsc->lvAllDefsAreNoGc = false;
4087 if (!tree->OperIsScalarLocal())
4092 if (fgDomsComputed && IsDominatedByExceptionalEntry(block))
4094 SetVolatileHint(varDsc);
4097 if (tree->OperIs(GT_STORE_LCL_VAR))
4099 GenTree* value = tree->AsLclVar()->Data();
4101 if (varDsc->lvPinned && varDsc->lvAllDefsAreNoGc && !value->IsNotGcDef())
4103 varDsc->lvAllDefsAreNoGc = false;
4106 if (value->gtType != TYP_BOOL)
4108 // Is the value clearly a boolean one?
4109 switch (value->gtOper)
4112 if (value->AsIntCon()->gtIconVal == 0)
4116 if (value->AsIntCon()->gtIconVal == 1)
4121 // Not 0 or 1, fall through ....
4124 if (value->OperIsCompare())
4129 varDsc->lvIsBoolean = false;
4134 if (!varDsc->lvDisqualifySingleDefRegCandidate) // If this var is already disqualified, we can skip this
4136 bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0;
4137 bool bbIsReturn = block->bbJumpKind == BBJ_RETURN;
4138 // TODO: Zero-inits in LSRA are created with below condition. But if filter out based on that condition
4139 // we filter a lot of interesting variables that would benefit otherwise with EH var enregistration.
4140 // bool needsExplicitZeroInit = !varDsc->lvIsParam && (info.compInitMem ||
4141 // varTypeIsGC(varDsc->TypeGet()));
4142 bool needsExplicitZeroInit = fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn);
4144 if (varDsc->lvSingleDefRegCandidate || needsExplicitZeroInit)
4147 if (needsExplicitZeroInit)
4149 varDsc->lvSingleDefDisqualifyReason = 'Z';
4150 JITDUMP("V%02u needs explicit zero init. Disqualified as a single-def register candidate.\n",
4155 varDsc->lvSingleDefDisqualifyReason = 'M';
4156 JITDUMP("V%02u has multiple definitions. Disqualified as a single-def register candidate.\n",
4161 varDsc->lvSingleDefRegCandidate = false;
4162 varDsc->lvDisqualifySingleDefRegCandidate = true;
4164 else if (!varDsc->lvDoNotEnregister)
4166 // Variables can be marked as DoNotEngister in earlier stages like LocalAddressVisitor.
4167 // No need to track them for single-def.
4168 CLANG_FORMAT_COMMENT_ANCHOR;
4170 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
4171 // TODO-CQ: If the varType needs partial callee save, conservatively do not enregister
4172 // such variable. In future, we should enable enregisteration for such variables.
4173 if (!varTypeNeedsPartialCalleeSave(varDsc->GetRegisterType()))
4176 varDsc->lvSingleDefRegCandidate = true;
4177 JITDUMP("Marking EH Var V%02u as a register candidate.\n", lclNum);
4183 // Check that the LCL_VAR node has the same type as the underlying variable, save a few mismatches we allow.
4184 assert(tree->TypeIs(varDsc->TypeGet(), genActualType(varDsc)) ||
4185 (tree->TypeIs(TYP_I_IMPL) && (varDsc->TypeGet() == TYP_BYREF)) || // Created for spill clique import.
4186 (tree->TypeIs(TYP_BYREF) && (varDsc->TypeGet() == TYP_I_IMPL)) || // Created by inliner substitution.
4187 (tree->TypeIs(TYP_INT) && (varDsc->TypeGet() == TYP_LONG))); // Created by "optNarrowTree".
4191 //------------------------------------------------------------------------
4192 // IsDominatedByExceptionalEntry: Check is the block dominated by an exception entry block.
4195 // block - the checking block.
4197 bool Compiler::IsDominatedByExceptionalEntry(BasicBlock* block)
4199 assert(fgDomsComputed);
4200 return block->IsDominatedByExceptionalEntryFlag();
4203 //------------------------------------------------------------------------
4204 // SetVolatileHint: Set a local var's volatile hint.
4207 // varDsc - the local variable that needs the hint.
4209 void Compiler::SetVolatileHint(LclVarDsc* varDsc)
4211 varDsc->lvVolatileHint = true;
4214 //------------------------------------------------------------------------
4215 // lvaMarkLocalVars: update local var ref counts for IR in a basic block
4218 // block - the block in question
4219 // isRecompute - true if counts are being recomputed
4222 // Invokes lvaMarkLclRefs on each tree node for each
4223 // statement in the block.
4225 void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute)
4227 class MarkLocalVarsVisitor final : public GenTreeVisitor<MarkLocalVarsVisitor>
4230 BasicBlock* m_block;
4240 MarkLocalVarsVisitor(Compiler* compiler, BasicBlock* block, Statement* stmt, bool isRecompute)
4241 : GenTreeVisitor<MarkLocalVarsVisitor>(compiler), m_block(block), m_stmt(stmt), m_isRecompute(isRecompute)
4245 Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
4247 // TODO: Stop passing isRecompute once we are sure that this assert is never hit.
4248 assert(!m_isRecompute);
4249 m_compiler->lvaMarkLclRefs(*use, m_block, m_stmt, m_isRecompute);
4250 return WALK_CONTINUE;
4254 JITDUMP("\n*** %s local variables in block " FMT_BB " (weight=%s)\n", isRecompute ? "recomputing" : "marking",
4255 block->bbNum, refCntWtd2str(block->getBBWeight(this)));
4257 for (Statement* const stmt : block->NonPhiStatements())
4259 MarkLocalVarsVisitor visitor(this, block, stmt, isRecompute);
4261 visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
4265 //------------------------------------------------------------------------
4266 // lvaMarkLocalVars: enable normal ref counting, compute initial counts, sort locals table
4269 // suitable phase status
4272 // Now behaves differently in minopts / debug. Instead of actually inspecting
4273 // the IR and counting references, the jit assumes all locals are referenced
4274 // and does not sort the locals table.
4276 // Also, when optimizing, lays the groundwork for assertion prop and more.
4277 // See details in lvaMarkLclRefs.
4279 PhaseStatus Compiler::lvaMarkLocalVars()
4281 JITDUMP("\n*************** In lvaMarkLocalVars()");
4283 // If we have direct pinvokes, verify the frame list root local was set up properly
4284 if (compMethodRequiresPInvokeFrame())
4286 assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
4287 if (!opts.ShouldUsePInvokeHelpers())
4289 noway_assert(info.compLvFrameListRoot >= info.compLocalsCount && info.compLvFrameListRoot < lvaCount);
4293 unsigned const lvaCountOrig = lvaCount;
4295 #if !defined(FEATURE_EH_FUNCLETS)
4297 // Grab space for exception handling
4299 if (ehNeedsShadowSPslots())
4301 // The first slot is reserved for ICodeManager::FixContext(ppEndRegion)
4302 // ie. the offset of the end-of-last-executed-filter
4303 unsigned slotsNeeded = 1;
4305 unsigned handlerNestingLevel = ehMaxHndNestingCount;
4307 if (opts.compDbgEnC && (handlerNestingLevel < (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL))
4308 handlerNestingLevel = (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL;
4310 slotsNeeded += handlerNestingLevel;
4312 // For a filter (which can be active at the same time as a catch/finally handler)
4314 // For zero-termination of the shadow-Stack-pointer chain
4317 lvaShadowSPslotsVar = lvaGrabTempWithImplicitUse(false DEBUGARG("lvaShadowSPslotsVar"));
4318 lvaSetStruct(lvaShadowSPslotsVar, typGetBlkLayout(slotsNeeded * TARGET_POINTER_SIZE), false);
4319 lvaSetVarAddrExposed(lvaShadowSPslotsVar DEBUGARG(AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY));
4322 #endif // !FEATURE_EH_FUNCLETS
4324 // PSPSym and LocAllocSPvar are not used by the NativeAOT ABI
4325 if (!IsTargetAbi(CORINFO_NATIVEAOT_ABI))
4327 #if defined(FEATURE_EH_FUNCLETS)
4328 if (ehNeedsPSPSym())
4330 lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym"));
4331 LclVarDsc* lclPSPSym = lvaGetDesc(lvaPSPSym);
4332 lclPSPSym->lvType = TYP_I_IMPL;
4333 lvaSetVarDoNotEnregister(lvaPSPSym DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr));
4335 #endif // FEATURE_EH_FUNCLETS
4337 #ifdef JIT32_GCENCODER
4338 // LocAllocSPvar is only required by the implicit frame layout expected by the VM on x86. Whether
4339 // a function contains a Localloc is conveyed in the GC information, in the InfoHdrSmall.localloc
4340 // field. The function must have an EBP frame. Then, the VM finds the LocAllocSP slot by assuming
4341 // the following stack layout:
4343 // -- higher addresses --
4344 // saved EBP <-- EBP points here
4345 // other callee-saved registers // InfoHdrSmall.savedRegsCountExclFP specifies this size
4346 // optional GS cookie // InfoHdrSmall.security is 1 if this exists
4348 // -- lower addresses --
4350 // See also eetwain.cpp::GetLocallocSPOffset() and its callers.
4351 if (compLocallocUsed)
4353 lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
4354 LclVarDsc* locAllocSPvar = lvaGetDesc(lvaLocAllocSPvar);
4355 locAllocSPvar->lvType = TYP_I_IMPL;
4357 #endif // JIT32_GCENCODER
4360 // Ref counting is now enabled normally.
4361 lvaRefCountState = RCS_NORMAL;
4364 const bool setSlotNumbers = true;
4366 const bool setSlotNumbers = opts.compScopeInfo && (info.compVarScopesCount > 0);
4367 #endif // defined(DEBUG)
4369 const bool isRecompute = false;
4370 lvaComputeRefCounts(isRecompute, setSlotNumbers);
4372 // If we don't need precise reference counts, e.g. we're not optimizing, we're done.
4373 if (!PreciseRefCountsRequired())
4375 // This phase may add new locals
4377 return (lvaCount != lvaCountOrig) ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
4380 const bool reportParamTypeArg = lvaReportParamTypeArg();
4382 // Update bookkeeping on the generic context.
4383 if (lvaKeepAliveAndReportThis())
4385 lvaGetDesc(0u)->lvImplicitlyReferenced = reportParamTypeArg;
4387 else if (lvaReportParamTypeArg())
4389 // We should have a context arg.
4390 assert(info.compTypeCtxtArg != (int)BAD_VAR_NUM);
4391 lvaGetDesc(info.compTypeCtxtArg)->lvImplicitlyReferenced = reportParamTypeArg;
4394 assert(PreciseRefCountsRequired());
4396 // This phase may add new locals.
4398 return (lvaCount != lvaCountOrig) ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
4401 //------------------------------------------------------------------------
4402 // lvaComputeRefCounts: compute ref counts for locals
4405 // isRecompute -- true if we just want ref counts and no other side effects;
4406 // false means to also look for true boolean locals, lay
4407 // groundwork for assertion prop, check type consistency, etc.
4408 // See lvaMarkLclRefs for details on what else goes on.
4409 // setSlotNumbers -- true if local slot numbers should be assigned.
4412 // Some implicit references are given actual counts or weight bumps here
4413 // to match pre-existing behavior.
4415 // In fast-jitting modes where we don't ref count locals, this bypasses
4416 // actual counting, and makes all locals implicitly referenced on first
4417 // compute. It asserts all locals are implicitly referenced on recompute.
4419 // When optimizing we also recompute lvaGenericsContextInUse based
4420 // on specially flagged LCL_VAR appearances.
4422 void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
4424 JITDUMP("\n*** lvaComputeRefCounts ***\n");
4425 unsigned lclNum = 0;
4426 LclVarDsc* varDsc = nullptr;
4428 // Fast path for minopts and debug codegen.
4430 // On first compute: mark all locals as implicitly referenced and untracked.
4431 // On recompute: do nothing.
4432 if (!PreciseRefCountsRequired())
4438 // All local vars should be marked as implicitly referenced
4440 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
4442 const bool isSpecialVarargsParam = varDsc->lvIsParam && raIsVarargsStackArg(lclNum);
4444 if (isSpecialVarargsParam)
4446 assert(varDsc->lvRefCnt() == 0);
4450 assert(varDsc->lvImplicitlyReferenced);
4453 assert(!varDsc->lvTracked);
4455 #endif // defined (DEBUG)
4461 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
4463 // Using lvImplicitlyReferenced here ensures that we can't
4464 // accidentally make locals be unreferenced later by decrementing
4465 // the ref count to zero.
4467 // If, in minopts/debug, we really want to allow locals to become
4468 // unreferenced later, we'll have to explicitly clear this bit.
4469 varDsc->setLvRefCnt(0);
4470 varDsc->setLvRefCntWtd(BB_ZERO_WEIGHT);
4472 // Special case for some varargs params ... these must
4473 // remain unreferenced.
4474 const bool isSpecialVarargsParam = varDsc->lvIsParam && raIsVarargsStackArg(lclNum);
4476 if (!isSpecialVarargsParam)
4478 varDsc->lvImplicitlyReferenced = 1;
4481 varDsc->lvTracked = 0;
4485 varDsc->lvSlotNum = lclNum;
4488 // Assert that it's ok to bypass the type repair logic in lvaMarkLclRefs
4489 assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
4493 lvaTrackedCount = 0;
4494 lvaTrackedCountInSizeTUnits = 0;
4498 // Slower path we take when optimizing, to get accurate counts.
4500 // First, reset all explicit ref counts and weights.
4501 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
4503 varDsc->setLvRefCnt(0);
4504 varDsc->setLvRefCntWtd(BB_ZERO_WEIGHT);
4508 varDsc->lvSlotNum = lclNum;
4511 // Set initial value for lvSingleDef for explicit and implicit
4512 // argument locals as they are "defined" on entry.
4513 // However, if we are just recomputing the ref counts, retain the value
4514 // that was set by past phases.
4517 varDsc->lvSingleDef = varDsc->lvIsParam;
4518 varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam;
4520 varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false);
4524 // Remember current state of generic context use, and prepare
4525 // to compute new state.
4526 const bool oldLvaGenericsContextInUse = lvaGenericsContextInUse;
4527 lvaGenericsContextInUse = false;
4529 JITDUMP("\n*** lvaComputeRefCounts -- explicit counts ***\n");
4531 // Second, account for all explicit local variable references
4532 for (BasicBlock* const block : Blocks())
4536 assert(isRecompute);
4538 const weight_t weight = block->getBBWeight(this);
4539 for (GenTree* node : LIR::AsRange(block))
4541 if (node->OperIsAnyLocal())
4543 LclVarDsc* varDsc = lvaGetDesc(node->AsLclVarCommon());
4544 // If this is an EH var, use a zero weight for defs, so that we don't
4545 // count those in our heuristic for register allocation, since they always
4546 // must be stored, so there's no value in enregistering them at defs; only
4547 // if there are enough uses to justify it.
4548 if (varDsc->lvLiveInOutOfHndlr && !varDsc->lvDoNotEnregister &&
4549 ((node->gtFlags & GTF_VAR_DEF) != 0))
4551 varDsc->incRefCnts(0, this);
4555 varDsc->incRefCnts(weight, this);
4558 if ((node->gtFlags & GTF_VAR_CONTEXT) != 0)
4560 assert(node->OperIs(GT_LCL_VAR));
4561 lvaGenericsContextInUse = true;
4568 lvaMarkLocalVars(block, isRecompute);
4572 if (oldLvaGenericsContextInUse && !lvaGenericsContextInUse)
4574 // Context was in use but no longer is. This can happen
4575 // if we're able to optimize, so just leave a note.
4576 JITDUMP("\n** Generics context no longer in use\n");
4578 else if (lvaGenericsContextInUse && !oldLvaGenericsContextInUse)
4580 // Context was not in use but now is.
4582 // Changing from unused->used should never happen; creation of any new IR
4583 // for context use should also be setting lvaGenericsContextInUse.
4584 assert(!"unexpected new use of generics context");
4587 JITDUMP("\n*** lvaComputeRefCounts -- implicit counts ***\n");
4589 // Third, bump ref counts for some implicit prolog references
4590 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
4592 // Todo: review justification for these count bumps.
4593 if (varDsc->lvIsRegArg)
4595 if ((lclNum < info.compArgsCount) && (varDsc->lvRefCnt() > 0))
4597 // Fix 388376 ARM JitStress WP7
4598 varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
4599 varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
4602 // Ref count bump that was in lvaPromoteStructVar
4604 // This was formerly done during RCS_EARLY counting,
4605 // and we did not used to reset counts like we do now.
4606 if (varDsc->lvIsStructField && varTypeIsStruct(lvaGetDesc(varDsc->lvParentLcl)))
4608 varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
4612 // If we have JMP, all arguments must have a location
4613 // even if we don't use them inside the method
4614 if (compJmpOpUsed && varDsc->lvIsParam && (varDsc->lvRefCnt() == 0))
4616 // except when we have varargs and the argument is
4617 // passed on the stack. In that case, it's important
4618 // for the ref count to be zero, so that we don't attempt
4619 // to track them for GC info (which is not possible since we
4620 // don't know their offset in the stack). See the assert at the
4621 // end of raMarkStkVars and bug #28949 for more info.
4622 if (!raIsVarargsStackArg(lclNum))
4624 varDsc->lvImplicitlyReferenced = 1;
4628 if (varDsc->lvPinned && varDsc->lvAllDefsAreNoGc)
4630 varDsc->lvPinned = 0;
4632 JITDUMP("V%02u was unpinned as all def candidates were local.\n", lclNum);
4637 void Compiler::lvaAllocOutgoingArgSpaceVar()
4639 #if FEATURE_FIXED_OUT_ARGS
4641 // Setup the outgoing argument region, in case we end up using it later
4643 if (lvaOutgoingArgSpaceVar == BAD_VAR_NUM)
4645 lvaOutgoingArgSpaceVar = lvaGrabTempWithImplicitUse(false DEBUGARG("OutgoingArgSpace"));
4646 lvaSetStruct(lvaOutgoingArgSpaceVar, typGetBlkLayout(0), false);
4647 lvaSetVarAddrExposed(lvaOutgoingArgSpaceVar DEBUGARG(AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY));
4650 noway_assert(lvaOutgoingArgSpaceVar >= info.compLocalsCount && lvaOutgoingArgSpaceVar < lvaCount);
4652 #endif // FEATURE_FIXED_OUT_ARGS
4655 inline void Compiler::lvaIncrementFrameSize(unsigned size)
4657 if (size > MAX_FrameSize || compLclFrameSize + size > MAX_FrameSize)
4659 BADCODE("Frame size overflow");
4662 compLclFrameSize += size;
4665 /****************************************************************************
4667 * Return true if absolute offsets of temps are larger than vars, or in other
4668 * words, did we allocate temps before of after vars. The /GS buffer overrun
4669 * checks want temps to be at low stack addresses than buffers
4671 bool Compiler::lvaTempsHaveLargerOffsetThanVars()
4674 // We never want to place the temps with larger offsets for ARM
4677 if (compGSReorderStackLayout)
4679 return codeGen->isFramePointerUsed();
4688 /****************************************************************************
4690 * Return an upper bound estimate for the size of the compiler spill temps
4693 unsigned Compiler::lvaGetMaxSpillTempSize()
4695 unsigned result = 0;
4697 if (codeGen->regSet.hasComputedTmpSize())
4699 result = codeGen->regSet.tmpGetTotalSize();
4703 result = MAX_SPILL_TEMP_SIZE;
4709 /*****************************************************************************
4711 * Compute stack frame offsets for arguments, locals and optionally temps.
4713 * The frame is laid out as follows for x86:
4718 * |-----------------------|
4721 * |-----------------------| <---- Virtual '0'
4722 * | return address |
4723 * +=======================+
4724 * |Callee saved registers |
4725 * |-----------------------|
4727 * |-----------------------|
4729 * |-----------------------| <---- Ambient ESP
4730 * | Arguments for the |
4742 * |-----------------------|
4745 * |-----------------------| <---- Virtual '0'
4746 * | return address |
4747 * +=======================+
4749 * |-----------------------| <---- EBP
4750 * |Callee saved registers |
4751 * |-----------------------|
4752 * | security object |
4753 * |-----------------------|
4755 * |-----------------------|
4756 * | Last-executed-filter |
4757 * |-----------------------|
4761 * |-----------------------|
4765 * ~-----------------------|
4767 * |-----------------------|
4769 * |-----------------------| <---- Ambient ESP
4770 * | Arguments for the |
4779 * The frame is laid out as follows for x64:
4783 * |-----------------------|
4786 * |-----------------------|
4787 * | 4 fixed incoming |
4788 * | argument slots |
4789 * |-----------------------| <---- Caller's SP & Virtual '0'
4790 * | return address |
4791 * +=======================+
4792 * | Callee saved Int regs |
4793 * -------------------------
4794 * | Padding | <---- this padding (0 or 8 bytes) is to ensure flt registers are saved at a mem location aligned at 16-bytes
4795 * | | so that we can save 128-bit callee saved xmm regs using performant "movaps" instruction instead of "movups"
4796 * -------------------------
4797 * | Callee saved Flt regs | <----- entire 128-bits of callee saved xmm registers are stored here
4798 * |-----------------------|
4800 * |-----------------------|
4802 * |-----------------------|
4803 * | Arguments for the |
4806 * |-----------------------|
4807 * | 4 fixed outgoing |
4808 * | argument slots |
4809 * |-----------------------| <---- Ambient RSP
4818 * |-----------------------|
4821 * |-----------------------|
4822 * | 4 fixed incoming |
4823 * | argument slots |
4824 * |-----------------------| <---- Caller's SP & Virtual '0'
4825 * | return address |
4826 * +=======================+
4827 * | Callee saved Int regs |
4828 * -------------------------
4830 * -------------------------
4831 * | Callee saved Flt regs |
4832 * |-----------------------|
4833 * | security object |
4834 * |-----------------------|
4836 * |-----------------------|
4842 * |-----------------------|
4844 * |-----------------------|
4846 * ~ localloc ~ // not in frames with EH
4848 * |-----------------------|
4849 * | PSPSym | // only in frames with EH (thus no localloc)
4851 * |-----------------------| <---- RBP in localloc frames (max 240 bytes from Initial-SP)
4852 * | Arguments for the |
4855 * |-----------------------|
4856 * | 4 fixed outgoing |
4857 * | argument slots |
4858 * |-----------------------| <---- Ambient RSP (before localloc, this is Initial-SP)
4865 * The frame is laid out as follows for ARM (this is a general picture; details may differ for different conditions):
4869 * |-----------------------|
4872 * +=======================+ <---- Caller's SP
4873 * | Pre-spill registers |
4874 * |-----------------------| <---- Virtual '0'
4875 * |Callee saved registers |
4876 * |-----------------------|
4877 * ~ possible double align ~
4878 * |-----------------------|
4879 * | security object |
4880 * |-----------------------|
4882 * |-----------------------|
4883 * | possible GS cookie |
4884 * |-----------------------|
4886 * |-----------------------|
4887 * | possible GS cookie |
4888 * |-----------------------|
4890 * |-----------------------|
4891 * | Stub Argument Var |
4892 * |-----------------------|
4893 * |Inlined PInvoke Frame V|
4894 * |-----------------------|
4895 * ~ possible double align ~
4896 * |-----------------------|
4897 * | Arguments for the |
4900 * |-----------------------| <---- Ambient SP
4909 * |-----------------------|
4912 * +=======================+ <---- Caller's SP
4913 * | Pre-spill registers |
4914 * |-----------------------| <---- Virtual '0'
4915 * |Callee saved registers |
4916 * |-----------------------|
4917 * | PSPSym | // Only for frames with EH, which means FP-based frames
4918 * |-----------------------|
4919 * ~ possible double align ~
4920 * |-----------------------|
4921 * | security object |
4922 * |-----------------------|
4924 * |-----------------------|
4925 * | possible GS cookie |
4926 * |-----------------------|
4928 * |-----------------------|
4929 * | possible GS cookie |
4930 * |-----------------------|
4932 * |-----------------------|
4933 * | Stub Argument Var |
4934 * |-----------------------|
4935 * |Inlined PInvoke Frame V|
4936 * |-----------------------|
4937 * ~ possible double align ~
4938 * |-----------------------|
4940 * |-----------------------|
4941 * | Arguments for the |
4944 * |-----------------------| <---- Ambient SP
4951 * The frame is laid out as follows for ARM64 (this is a general picture; details may differ for different conditions):
4952 * NOTE: SP must be 16-byte aligned, so there may be alignment slots in the frame.
4953 * We will often save and establish a frame pointer to create better ETW stack walks.
4957 * |-----------------------|
4960 * +=======================+ <---- Caller's SP
4961 * | homed | // this is only needed if reg argument need to be homed, e.g., for varargs
4962 * | register arguments |
4963 * |-----------------------| <---- Virtual '0'
4964 * |Callee saved registers |
4966 * |-----------------------|
4967 * | security object |
4968 * |-----------------------|
4970 * |-----------------------|
4971 * | possible GS cookie |
4972 * |-----------------------|
4974 * |-----------------------|
4975 * | possible GS cookie |
4976 * |-----------------------|
4978 * |-----------------------|
4979 * | Stub Argument Var |
4980 * |-----------------------|
4981 * |Inlined PInvoke Frame V|
4982 * |-----------------------|
4984 * |-----------------------|
4985 * | Saved FP | <---- Frame pointer
4986 * |-----------------------|
4987 * | Stack arguments for |
4988 * | the next function |
4989 * |-----------------------| <---- SP
4996 * FP (R29 / x29) frames
4998 * |-----------------------|
5001 * +=======================+ <---- Caller's SP
5002 * | optional homed | // this is only needed if reg argument need to be homed, e.g., for varargs
5003 * | register arguments |
5004 * |-----------------------| <---- Virtual '0'
5005 * |Callee saved registers |
5007 * |-----------------------|
5008 * | PSPSym | // Only for frames with EH, which requires FP-based frames
5009 * |-----------------------|
5010 * | security object |
5011 * |-----------------------|
5013 * |-----------------------|
5014 * | possible GS cookie |
5015 * |-----------------------|
5017 * |-----------------------|
5018 * | possible GS cookie |
5019 * |-----------------------|
5021 * |-----------------------|
5022 * | Stub Argument Var |
5023 * |-----------------------|
5024 * |Inlined PInvoke Frame V|
5025 * |-----------------------|
5027 * |-----------------------|
5028 * | Saved FP | <---- Frame pointer
5029 * |-----------------------|
5031 * |-----------------------|
5032 * | Stack arguments for |
5033 * | the next function |
5034 * |-----------------------| <---- Ambient SP
5041 * FP (R29 / x29) frames where FP/LR are stored at the top of the frame (frames requiring GS that have localloc)
5043 * |-----------------------|
5046 * +=======================+ <---- Caller's SP
5047 * | optional homed | // this is only needed if reg argument need to be homed, e.g., for varargs
5048 * | register arguments |
5049 * |-----------------------| <---- Virtual '0'
5051 * |-----------------------|
5052 * | Saved FP | <---- Frame pointer
5053 * |-----------------------|
5054 * |Callee saved registers |
5055 * |-----------------------|
5056 * | PSPSym | // Only for frames with EH, which requires FP-based frames
5057 * |-----------------------|
5058 * | security object |
5059 * |-----------------------|
5061 * |-----------------------|
5062 * | possible GS cookie |
5063 * |-----------------------|
5065 * |-----------------------|
5066 * | possible GS cookie |
5067 * |-----------------------|
5069 * |-----------------------|
5070 * | Stub Argument Var |
5071 * |-----------------------|
5072 * |Inlined PInvoke Frame V|
5073 * |-----------------------|
5075 * |-----------------------|
5076 * | Stack arguments for |
5077 * | the next function |
5078 * |-----------------------| <---- Ambient SP
5085 * Doing this all in one pass is 'hard'. So instead we do it in 2 basic passes:
5086 * 1. Assign all the offsets relative to the Virtual '0'. Offsets above (the
5087 * incoming arguments) are positive. Offsets below (everything else) are
5088 * negative. This pass also calcuates the total frame size (between Caller's
5089 * SP/return address and the Ambient SP).
5090 * 2. Figure out where to place the frame pointer, and then adjust the offsets
5091 * as needed for the final stack size and whether the offset is frame pointer
5092 * relative or stack pointer relative.
5097 void Compiler::lvaAssignFrameOffsets(FrameLayoutState curState)
5099 noway_assert((lvaDoneFrameLayout < curState) || (curState == REGALLOC_FRAME_LAYOUT));
5101 lvaDoneFrameLayout = curState;
5107 printf("*************** In lvaAssignFrameOffsets");
5108 if (curState == INITIAL_FRAME_LAYOUT)
5110 printf("(INITIAL_FRAME_LAYOUT)");
5112 else if (curState == PRE_REGALLOC_FRAME_LAYOUT)
5114 printf("(PRE_REGALLOC_FRAME_LAYOUT)");
5116 else if (curState == REGALLOC_FRAME_LAYOUT)
5118 printf("(REGALLOC_FRAME_LAYOUT)");
5120 else if (curState == TENTATIVE_FRAME_LAYOUT)
5122 printf("(TENTATIVE_FRAME_LAYOUT)");
5124 else if (curState == FINAL_FRAME_LAYOUT)
5126 printf("(FINAL_FRAME_LAYOUT)");
5130 printf("(UNKNOWN)");
5137 #if FEATURE_FIXED_OUT_ARGS
5138 assert(lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
5139 #endif // FEATURE_FIXED_OUT_ARGS
5141 /*-------------------------------------------------------------------------
5143 * First process the arguments.
5145 *-------------------------------------------------------------------------
5148 lvaAssignVirtualFrameOffsetsToArgs();
5150 /*-------------------------------------------------------------------------
5152 * Now compute stack offsets for any variables that don't live in registers
5154 *-------------------------------------------------------------------------
5157 lvaAssignVirtualFrameOffsetsToLocals();
5161 /*-------------------------------------------------------------------------
5163 * Now patch the offsets
5165 *-------------------------------------------------------------------------
5168 lvaFixVirtualFrameOffsets();
5170 // Modify the stack offset for fields of promoted structs.
5171 lvaAssignFrameOffsetsToPromotedStructs();
5173 /*-------------------------------------------------------------------------
5177 *-------------------------------------------------------------------------
5180 // If it's not the final frame layout, then it's just an estimate. This means
5181 // we're allowed to once again write to these variables, even if we've read
5182 // from them to make tentative code generation or frame layout decisions.
5183 if (curState < FINAL_FRAME_LAYOUT)
5185 codeGen->resetFramePointerUsedWritePhase();
5189 /*****************************************************************************
5190 * lvaFixVirtualFrameOffsets() : Now that everything has a virtual offset,
5191 * determine the final value for the frame pointer (if needed) and then
5192 * adjust all the offsets appropriately.
5194 * This routine fixes virtual offset to be relative to frame pointer or SP
5195 * based on whether varDsc->lvFramePointerBased is true or false respectively.
5197 void Compiler::lvaFixVirtualFrameOffsets()
5201 #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_AMD64)
5202 if (lvaPSPSym != BAD_VAR_NUM)
5204 // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space.
5205 // Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between
5206 // the PSPSym and the outgoing argument space.
5207 varDsc = lvaGetDesc(lvaPSPSym);
5208 assert(varDsc->lvFramePointerBased); // We always access it RBP-relative.
5209 assert(!varDsc->lvMustInit); // It is never "must init".
5210 varDsc->SetStackOffset(codeGen->genCallerSPtoInitialSPdelta() + lvaLclSize(lvaOutgoingArgSpaceVar));
5214 // With OSR RBP points at the base of the OSR frame, but the virtual offsets
5215 // are from the base of the Tier0 frame. Adjust.
5217 varDsc->SetStackOffset(varDsc->GetStackOffset() - info.compPatchpointInfo->TotalFrameSize());
5222 // The delta to be added to virtual offset to adjust it relative to frame pointer or SP
5226 delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64
5227 JITDUMP("--- delta bump %d for RA\n", REGSIZE_BYTES);
5229 if (codeGen->doubleAlignOrFramePointerUsed())
5231 JITDUMP("--- delta bump %d for FP\n", REGSIZE_BYTES);
5232 delta += REGSIZE_BYTES; // pushed EBP (frame pointer)
5236 if (!codeGen->isFramePointerUsed())
5238 // pushed registers, return address, and padding
5239 JITDUMP("--- delta bump %d for RSP frame\n", codeGen->genTotalFrameSize());
5240 delta += codeGen->genTotalFrameSize();
5242 #if defined(TARGET_ARM)
5245 // We set FP to be after LR, FP
5246 delta += 2 * REGSIZE_BYTES;
5248 #elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5252 JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
5253 delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
5255 #endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
5259 #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5260 // Stack offset includes Tier0 frame.
5262 JITDUMP("--- delta bump %d for OSR + Tier0 frame\n", info.compPatchpointInfo->TotalFrameSize());
5263 delta += info.compPatchpointInfo->TotalFrameSize();
5267 JITDUMP("--- virtual stack offset to actual stack offset delta is %d\n", delta);
5270 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5272 bool doAssignStkOffs = true;
5274 // Can't be relative to EBP unless we have an EBP
5275 noway_assert(!varDsc->lvFramePointerBased || codeGen->doubleAlignOrFramePointerUsed());
5277 // Is this a non-param promoted struct field?
5278 // if so then set doAssignStkOffs to false.
5280 if (varDsc->lvIsStructField)
5282 LclVarDsc* parentvarDsc = lvaGetDesc(varDsc->lvParentLcl);
5283 lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
5285 #if defined(TARGET_X86)
5286 // On x86, we set the stack offset for a promoted field
5287 // to match a struct parameter in lvAssignFrameOffsetsToPromotedStructs.
5288 if ((!varDsc->lvIsParam || parentvarDsc->lvIsParam) && promotionType == PROMOTION_TYPE_DEPENDENT)
5290 if (!varDsc->lvIsParam && promotionType == PROMOTION_TYPE_DEPENDENT)
5293 doAssignStkOffs = false; // Assigned later in lvaAssignFrameOffsetsToPromotedStructs()
5297 if (!varDsc->lvOnFrame)
5299 if (!varDsc->lvIsParam
5300 #if !defined(TARGET_AMD64)
5301 || (varDsc->lvIsRegArg
5302 #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
5303 && compIsProfilerHookNeeded() &&
5304 !lvaIsPreSpilled(lclNum, codeGen->regSet.rsMaskPreSpillRegs(false)) // We need assign stack offsets
5305 // for prespilled arguments
5308 #endif // !defined(TARGET_AMD64)
5311 doAssignStkOffs = false; // Not on frame or an incoming stack arg
5315 if (doAssignStkOffs)
5317 JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta);
5318 varDsc->SetStackOffset(varDsc->GetStackOffset() + delta);
5321 if (genDoubleAlign() && !codeGen->isFramePointerUsed())
5323 if (varDsc->lvFramePointerBased)
5325 varDsc->SetStackOffset(varDsc->GetStackOffset() - delta);
5327 // We need to re-adjust the offsets of the parameters so they are EBP
5328 // relative rather than stack/frame pointer relative
5330 varDsc->SetStackOffset(varDsc->GetStackOffset() +
5331 (2 * TARGET_POINTER_SIZE)); // return address and pushed EBP
5333 noway_assert(varDsc->GetStackOffset() >= FIRST_ARG_STACK_OFFS);
5337 // On System V environments the stkOffs could be 0 for params passed in registers.
5339 // For normal methods only EBP relative references can have negative offsets.
5340 assert(codeGen->isFramePointerUsed() || varDsc->GetStackOffset() >= 0);
5344 assert(codeGen->regSet.tmpAllFree());
5345 for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp))
5347 temp->tdAdjustTempOffs(delta);
5350 lvaCachedGenericContextArgOffs += delta;
5352 #if FEATURE_FIXED_OUT_ARGS
5354 if (lvaOutgoingArgSpaceVar != BAD_VAR_NUM)
5356 varDsc = lvaGetDesc(lvaOutgoingArgSpaceVar);
5357 varDsc->SetStackOffset(0);
5358 varDsc->lvFramePointerBased = false;
5359 varDsc->lvMustInit = false;
5362 #endif // FEATURE_FIXED_OUT_ARGS
5364 #if defined(TARGET_ARM64)
5365 // We normally add alignment below the locals between them and the outgoing
5366 // arg space area. When we store fp/lr(ra) at the bottom, however, this will
5367 // be below the alignment. So we should not apply the alignment adjustment to
5368 // them. It turns out we always store these at +0 and +8 of the FP,
5369 // so instead of dealing with skipping adjustment just for them we just set
5370 // them here always.
5371 assert(codeGen->isFramePointerUsed());
5372 if (lvaRetAddrVar != BAD_VAR_NUM)
5374 lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
5376 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5377 assert(codeGen->isFramePointerUsed());
5378 if (lvaRetAddrVar != BAD_VAR_NUM)
5380 // For LoongArch64 and RISCV64, the RA is below the fp. see the `genPushCalleeSavedRegisters`
5381 lvaTable[lvaRetAddrVar].SetStackOffset(-REGSIZE_BYTES);
5383 #endif // !TARGET_LOONGARCH64
5387 bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask)
5389 const LclVarDsc& desc = lvaTable[lclNum];
5390 return desc.lvIsRegArg && (preSpillMask & genRegMask(desc.GetArgReg()));
5392 #endif // TARGET_ARM
5394 //------------------------------------------------------------------------
5395 // lvaUpdateArgWithInitialReg: Set the initial register of a local variable
5396 // to the one assigned by the register allocator.
5399 // varDsc - the local variable descriptor
5401 void Compiler::lvaUpdateArgWithInitialReg(LclVarDsc* varDsc)
5403 noway_assert(varDsc->lvIsParam);
5405 if (varDsc->lvIsRegCandidate())
5407 varDsc->SetRegNum(varDsc->GetArgInitReg());
5411 //------------------------------------------------------------------------
5412 // lvaUpdateArgsWithInitialReg() : For each argument variable descriptor, update
5413 // its current register with the initial register as assigned by LSRA.
5415 void Compiler::lvaUpdateArgsWithInitialReg()
5422 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
5424 LclVarDsc* varDsc = lvaGetDesc(lclNum);
5426 if (varDsc->lvPromoted)
5428 for (unsigned fieldVarNum = varDsc->lvFieldLclStart;
5429 fieldVarNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldVarNum)
5431 LclVarDsc* fieldVarDsc = lvaGetDesc(fieldVarNum);
5432 lvaUpdateArgWithInitialReg(fieldVarDsc);
5437 lvaUpdateArgWithInitialReg(varDsc);
5442 /*****************************************************************************
5443 * lvaAssignVirtualFrameOffsetsToArgs() : Assign virtual stack offsets to the
5444 * arguments, and implicit arguments (this ptr, return buffer, generics,
5447 void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
5449 unsigned lclNum = 0;
5451 #ifdef UNIX_AMD64_ABI
5452 int callerArgOffset = 0;
5453 #endif // UNIX_AMD64_ABI
5456 Assign stack offsets to arguments (in reverse order of passing).
5458 This means that if we pass arguments left->right, we start at
5459 the end of the list and work backwards, for right->left we start
5460 with the first argument and move forward.
5462 This is all relative to our Virtual '0'
5465 if (info.compArgOrder == Target::ARG_ORDER_L2R)
5467 argOffs = compArgSize;
5470 /* Update the argOffs to reflect arguments that are passed in registers */
5472 noway_assert(codeGen->intRegState.rsCalleeRegArgCount <= MAX_REG_ARG);
5473 noway_assert(compMacOsArm64Abi() || compArgSize >= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
5475 if (info.compArgOrder == Target::ARG_ORDER_L2R)
5477 argOffs -= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
5480 // Update the arg initial register locations.
5481 lvaUpdateArgsWithInitialReg();
5483 /* Is there a "this" argument? */
5485 if (!info.compIsStatic)
5487 noway_assert(lclNum == info.compThisArg);
5490 lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5491 #endif // TARGET_X86
5495 unsigned userArgsToSkip = 0;
5496 #if !defined(TARGET_ARM)
5497 // In the native instance method calling convention on Windows,
5498 // the this parameter comes before the hidden return buffer parameter.
5499 // So, we want to process the native "this" parameter before we process
5500 // the native return buffer parameter.
5501 if (TargetOS::IsWindows && callConvIsInstanceMethodCallConv(info.compCallConv))
5504 if (!lvaTable[lclNum].lvIsRegArg)
5506 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
5508 #elif !defined(UNIX_AMD64_ABI)
5509 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
5510 #endif // TARGET_X86
5516 /* if we have a hidden buffer parameter, that comes here */
5518 if (info.compRetBuffArg != BAD_VAR_NUM)
5520 noway_assert(lclNum == info.compRetBuffArg);
5522 lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5526 #if USER_ARGS_COME_LAST
5528 //@GENERICS: extra argument for instantiation info
5529 if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5531 noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
5532 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES,
5533 argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5536 if (info.compIsVarArgs)
5538 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES,
5539 argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5542 #endif // USER_ARGS_COME_LAST
5544 CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
5545 unsigned argSigLen = info.compMethodInfo->args.numArgs;
5546 // Skip any user args that we've already processed.
5547 assert(userArgsToSkip <= argSigLen);
5548 argSigLen -= userArgsToSkip;
5549 for (unsigned i = 0; i < userArgsToSkip; i++, argLst = info.compCompHnd->getArgNext(argLst))
5556 // struct_n { int; int; ... n times };
5558 // Consider signature:
5560 // Foo (float a,double b,float c,double d,float e,double f,float g,double h,
5561 // float i,double j,float k,double l,struct_3 m) { }
5563 // Basically the signature is: (all float regs full, 1 double, struct_3);
5565 // The double argument occurs before pre spill in the argument iteration and
5566 // computes an argOffset of 0. struct_3 offset becomes 8. This is wrong.
5567 // Because struct_3 is prespilled and double occurs after prespill.
5568 // The correct offsets are double = 16 (aligned stk), struct_3 = 0..12,
5569 // Offset 12 will be skipped for double alignment of double.
5571 // Another example is (struct_2, all float regs full, double, struct_2);
5572 // Here, notice the order is similarly messed up because of 2 pre-spilled
5576 // ARG_INDEX(i) > ARG_INDEX(j) DOES NOT IMPLY |ARG_OFFSET(i)| > |ARG_OFFSET(j)|
5578 // Therefore, we'll do a two pass offset calculation, one that considers pre-spill
5579 // and the next, stack args.
5582 unsigned argLcls = 0;
5584 // Take care of pre spill registers first.
5585 regMaskTP preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false);
5586 regMaskTP tempMask = RBM_NONE;
5587 for (unsigned i = 0, preSpillLclNum = lclNum; i < argSigLen; ++i, ++preSpillLclNum)
5589 if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
5591 unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
5592 argOffs = lvaAssignVirtualFrameOffsetToArg(preSpillLclNum, argSize, argOffs);
5595 // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100
5596 tempMask |= ((((1 << (roundUp(argSize, TARGET_POINTER_SIZE) / REGSIZE_BYTES))) - 1)
5597 << lvaTable[preSpillLclNum].GetArgReg());
5598 if (tempMask == preSpillMask)
5600 // We won't encounter more pre-spilled registers,
5601 // so don't bother iterating further.
5605 argLst = info.compCompHnd->getArgNext(argLst);
5608 // Take care of non pre-spilled stack arguments.
5609 argLst = info.compMethodInfo->args.args;
5610 for (unsigned i = 0, stkLclNum = lclNum; i < argSigLen; ++i, ++stkLclNum)
5612 if (!lvaIsPreSpilled(stkLclNum, preSpillMask))
5614 const unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
5615 argOffs = lvaAssignVirtualFrameOffsetToArg(stkLclNum, argSize, argOffs);
5618 argLst = info.compCompHnd->getArgNext(argLst);
5622 #else // !TARGET_ARM
5623 for (unsigned i = 0; i < argSigLen; i++)
5625 unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
5627 assert(compMacOsArm64Abi() || argumentSize % TARGET_POINTER_SIZE == 0);
5630 lvaAssignVirtualFrameOffsetToArg(lclNum++, argumentSize, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5631 argLst = info.compCompHnd->getArgNext(argLst);
5633 #endif // !TARGET_ARM
5635 #if !USER_ARGS_COME_LAST
5637 //@GENERICS: extra argument for instantiation info
5638 if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5640 noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
5641 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES,
5642 argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5645 if (info.compIsVarArgs)
5647 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES,
5648 argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5651 #endif // USER_ARGS_COME_LAST
5654 #ifdef UNIX_AMD64_ABI
5656 // lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
5657 // individual argument, and return the offset for the next argument.
5658 // Note: This method only calculates the initial offset of the stack passed/spilled arguments
5659 // (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
5660 // The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existence,
5661 // ret address slot, stack frame padding, alloca instructions, etc.
5662 // Note: This is the implementation for UNIX_AMD64 System V platforms.
5664 int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
5666 int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
5668 noway_assert(lclNum < info.compArgsCount);
5669 noway_assert(argSize);
5671 if (info.compArgOrder == Target::ARG_ORDER_L2R)
5676 unsigned fieldVarNum = BAD_VAR_NUM;
5678 LclVarDsc* varDsc = lvaGetDesc(lclNum);
5680 noway_assert(varDsc->lvIsParam);
5682 if (varDsc->lvIsRegArg)
5684 // Argument is passed in a register, don't count it
5685 // when updating the current offset on the stack.
5687 if (varDsc->lvOnFrame)
5689 // The offset for args needs to be set only for the stack homed arguments for System V.
5690 varDsc->SetStackOffset(argOffs);
5694 varDsc->SetStackOffset(0);
5699 // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack.
5700 // This is where they are always homed. So, they can be accessed with positive offset.
5701 // On System V platforms, if the RA decides to home a register passed arg on the stack, it creates a stack
5702 // location on the callee stack (like any other local var.) In such a case, the register passed, stack homed
5703 // arguments are accessed using negative offsets and the stack passed arguments are accessed using positive
5704 // offset (from the caller's stack.)
5705 // For System V platforms if there is no frame pointer the caller stack parameter offset should include the
5706 // callee allocated space. If frame register is used, the callee allocated space should not be included for
5707 // accessing the caller stack parameters. The last two requirements are met in lvaFixVirtualFrameOffsets
5708 // method, which fixes the offsets, based on frame pointer existence, existence of alloca instructions, ret
5709 // address pushed, ets.
5711 varDsc->SetStackOffset(*callerArgOffset);
5712 // Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
5713 // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment.
5714 if (argSize > TARGET_POINTER_SIZE)
5716 *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE);
5720 *callerArgOffset += TARGET_POINTER_SIZE;
5724 // For struct promoted parameters we need to set the offsets for the field lclVars.
5726 // For a promoted struct we also assign the struct fields stack offset
5727 if (varDsc->lvPromoted)
5729 unsigned firstFieldNum = varDsc->lvFieldLclStart;
5730 int offset = varDsc->GetStackOffset();
5731 for (unsigned i = 0; i < varDsc->lvFieldCnt; i++)
5733 LclVarDsc* fieldVarDsc = lvaGetDesc(firstFieldNum + i);
5734 fieldVarDsc->SetStackOffset(offset + fieldVarDsc->lvFldOffset);
5738 if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
5746 #else // !UNIX_AMD64_ABI
5749 // lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
5750 // individual argument, and return the offset for the next argument.
5751 // Note: This method only calculates the initial offset of the stack passed/spilled arguments
5752 // (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
5753 // The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existence,
5754 // ret address slot, stack frame padding, alloca instructions, etc.
5755 // Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.)
5756 int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
5758 int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
5760 noway_assert(lclNum < info.compArgsCount);
5761 noway_assert(argSize);
5763 if (info.compArgOrder == Target::ARG_ORDER_L2R)
5768 unsigned fieldVarNum = BAD_VAR_NUM;
5770 LclVarDsc* varDsc = lvaGetDesc(lclNum);
5772 noway_assert(varDsc->lvIsParam);
5774 if (varDsc->lvIsRegArg)
5776 /* Argument is passed in a register, don't count it
5777 * when updating the current offset on the stack */
5778 CLANG_FORMAT_COMMENT_ANCHOR;
5780 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
5782 // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize
5783 // Also investigate why we are incrementing argOffs for X86 as this seems incorrect
5785 noway_assert(argSize == TARGET_POINTER_SIZE);
5789 #if defined(TARGET_X86)
5790 argOffs += TARGET_POINTER_SIZE;
5791 #elif defined(TARGET_AMD64)
5792 // Register arguments on AMD64 also takes stack space. (in the backing store)
5793 varDsc->SetStackOffset(argOffs);
5794 argOffs += TARGET_POINTER_SIZE;
5795 #elif defined(TARGET_ARM64)
5796 // Register arguments on ARM64 only take stack space when they have a frame home.
5797 // Unless on windows and in a vararg method.
5798 if (compFeatureArgSplit() && this->info.compIsVarArgs)
5800 if (varDsc->lvType == TYP_STRUCT && varDsc->GetOtherArgReg() >= MAX_REG_ARG &&
5801 varDsc->GetOtherArgReg() != REG_NA)
5803 // This is a split struct. It will account for an extra (8 bytes)
5805 varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE);
5806 argOffs += TARGET_POINTER_SIZE;
5810 #elif defined(TARGET_ARM)
5811 // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
5812 // in the prolog, so we have to do SetStackOffset() here
5814 regMaskTP regMask = genRegMask(varDsc->GetArgReg());
5815 if (codeGen->regSet.rsMaskPreSpillRegArg & regMask)
5817 // Signature: void foo(struct_8, int, struct_4)
5818 // ------- CALLER SP -------
5820 // r2 int - not prespilled, but added for alignment. argOffs should skip this.
5823 // -------------------------
5824 // If we added alignment we need to fix argOffs for all registers above alignment.
5825 if (codeGen->regSet.rsMaskPreSpillAlign != RBM_NONE)
5827 assert(genCountBits(codeGen->regSet.rsMaskPreSpillAlign) == 1);
5828 // Is register beyond the alignment pos?
5829 if (regMask > codeGen->regSet.rsMaskPreSpillAlign)
5831 // Increment argOffs just once for the _first_ register after alignment pos
5832 // in the prespill mask.
5833 if (!BitsBetween(codeGen->regSet.rsMaskPreSpillRegArg, regMask,
5834 codeGen->regSet.rsMaskPreSpillAlign))
5836 argOffs += TARGET_POINTER_SIZE;
5841 switch (varDsc->lvType)
5844 if (!varDsc->lvStructDoubleAlign)
5854 // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
5856 // ------- CALLER SP -------
5858 // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
5859 // r1 VACookie -- argOffs = 0
5860 // -------------------------
5862 // Consider argOffs as if it accounts for number of prespilled registers before the current
5863 // register. In the above example, for r2, it is r1 that is prespilled, but since r1 is
5864 // accounted for by argOffs being 4, there should have been no skipping. Instead, if we didn't
5865 // assign r1 to any variable, then argOffs would still be 0 which implies it is not accounting
5866 // for r1, equivalently r1 is skipped.
5868 // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
5870 genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
5871 if (argOffs < prevRegsSize)
5873 // We must align up the argOffset to a multiple of 8 to account for skipped registers.
5874 argOffs = roundUp((unsigned)argOffs, 2 * TARGET_POINTER_SIZE);
5876 // We should've skipped only a single register.
5877 assert(argOffs == prevRegsSize);
5882 // No alignment of argOffs required
5885 varDsc->SetStackOffset(argOffs);
5889 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5891 if (varDsc->lvIsSplit)
5893 assert((varDsc->lvType == TYP_STRUCT) && (varDsc->GetOtherArgReg() == REG_STK));
5894 // This is a split struct. It will account for an extra (8 bytes) for the whole struct.
5895 varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE);
5896 argOffs += TARGET_POINTER_SIZE;
5900 #error Unsupported or unset target architecture
5905 #if defined(TARGET_ARM)
5906 // Dev11 Bug 42817: incorrect codegen for DrawFlatCheckBox causes A/V in WinForms
5908 // Here we have method with a signature (int a1, struct a2, struct a3, int a4, int a5).
5909 // Struct parameter 'a2' is 16-bytes with no alignment requirements;
5910 // it uses r1,r2,r3 and [OutArg+0] when passed.
5911 // Struct parameter 'a3' is 16-bytes that is required to be double aligned;
5912 // the caller skips [OutArg+4] and starts the argument at [OutArg+8].
5913 // Thus the caller generates the correct code to pass the arguments.
5914 // When generating code to receive the arguments we set codeGen->regSet.rsMaskPreSpillRegArg to [r1,r2,r3]
5915 // and spill these three registers as the first instruction in the prolog.
5916 // Then when we layout the arguments' stack offsets we have an argOffs 0 which
5917 // points at the location that we spilled r1 into the stack. For this first
5918 // struct we take the lvIsRegArg path above with "codeGen->regSet.rsMaskPreSpillRegArg &" matching.
5919 // Next when we calculate the argOffs for the second 16-byte struct we have an argOffs
5920 // of 16, which appears to be aligned properly so we don't skip a stack slot.
5922 // To fix this we must recover the actual OutArg offset by subtracting off the
5923 // sizeof of the PreSpill register args.
5924 // Then we align this offset to a multiple of 8 and add back the sizeof
5925 // of the PreSpill register args.
5927 // Dev11 Bug 71767: failure of assert(sizeofPreSpillRegArgs <= argOffs)
5929 // We have a method with 'this' passed in r0, RetBuf arg in r1, VarArgs cookie
5930 // in r2. The first user arg is a 144 byte struct with double alignment required,
5931 // r3 is skipped, and the struct is passed on the stack. However, 'r3' is added
5932 // to the codeGen->regSet.rsMaskPreSpillRegArg mask by the VarArgs cookie code, since we need to
5933 // home all the potential varargs arguments in registers, even if we don't have
5934 // signature type information for the variadic arguments. However, due to alignment,
5935 // we have skipped a register that doesn't have a corresponding symbol. Make up
5936 // for that by increasing argOffs here.
5939 int sizeofPreSpillRegArgs = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
5941 if (argOffs < sizeofPreSpillRegArgs)
5943 // This can only happen if we skipped the last register spot because current stk arg
5944 // is a struct requiring alignment or a pre-spill alignment was required because the
5945 // first reg arg needed alignment.
5947 // Example 1: First Stk Argument requiring alignment in vararg case (same as above comment.)
5948 // Signature (int a0, int a1, int a2, struct {long} a3, ...)
5950 // stk arg a3 --> argOffs here will be 12 (r0-r2) but pre-spill will be 16.
5951 // ---- Caller SP ----
5952 // r3 --> Stack slot is skipped in this case.
5957 // Example 2: First Reg Argument requiring alignment in no-vararg case.
5958 // Signature (struct {long} a0, struct {int} a1, int a2, int a3)
5960 // stk arg --> argOffs here will be 12 {r0-r2} but pre-spill will be 16.
5961 // ---- Caller SP ----
5962 // r3 int a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs.
5963 // r2 struct { int } a1
5964 // r0-r1 struct { long } a0
5965 CLANG_FORMAT_COMMENT_ANCHOR;
5967 #ifdef PROFILING_SUPPORTED
5968 // On Arm under profiler, r0-r3 are always prespilled on stack.
5969 // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct
5970 // hfa2), in which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an
5971 // argument offset less than size of preSpill.
5973 // For this reason the following conditions are asserted when not under profiler.
5974 if (!compIsProfilerHookNeeded())
5977 bool cond = ((info.compIsVarArgs || opts.compUseSoftFP) &&
5978 // Does cur stk arg require double alignment?
5979 ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
5980 (varDsc->lvType == TYP_DOUBLE) || (varDsc->lvType == TYP_LONG))) ||
5981 // Did first reg arg require alignment?
5982 (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
5985 noway_assert(sizeofPreSpillRegArgs <=
5986 argOffs + TARGET_POINTER_SIZE); // at most one register of alignment
5988 argOffs = sizeofPreSpillRegArgs;
5991 noway_assert(argOffs >= sizeofPreSpillRegArgs);
5992 int argOffsWithoutPreSpillRegArgs = argOffs - sizeofPreSpillRegArgs;
5994 switch (varDsc->lvType)
5997 if (!varDsc->lvStructDoubleAlign)
6004 // We must align up the argOffset to a multiple of 8
6006 roundUp((unsigned)argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
6010 // No alignment of argOffs required
6013 #endif // TARGET_ARM
6014 const bool isFloatHfa = (varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_FLOAT));
6015 const unsigned argAlignment = eeGetArgSizeAlignment(varDsc->lvType, isFloatHfa);
6016 if (compMacOsArm64Abi())
6018 argOffs = roundUp(argOffs, argAlignment);
6021 assert((argSize % argAlignment) == 0);
6022 assert((argOffs % argAlignment) == 0);
6023 varDsc->SetStackOffset(argOffs);
6026 // For struct promoted parameters we need to set the offsets for both LclVars.
6028 // For a dependent promoted struct we also assign the struct fields stack offset
6029 CLANG_FORMAT_COMMENT_ANCHOR;
6031 if (varDsc->lvPromoted)
6033 unsigned firstFieldNum = varDsc->lvFieldLclStart;
6034 for (unsigned i = 0; i < varDsc->lvFieldCnt; i++)
6036 LclVarDsc* fieldVarDsc = lvaGetDesc(firstFieldNum + i);
6038 JITDUMP("Adjusting offset of dependent V%02u of arg V%02u: parent %u field %u net %u\n", lclNum,
6039 firstFieldNum + i, varDsc->GetStackOffset(), fieldVarDsc->lvFldOffset,
6040 varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset);
6042 fieldVarDsc->SetStackOffset(varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset);
6046 if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
6053 #endif // !UNIX_AMD64_ABI
6055 //-----------------------------------------------------------------------------
6056 // lvaAssignVirtualFrameOffsetsToLocals: compute the virtual stack offsets for
6057 // all elements on the stackframe.
6060 // Can be called multiple times. Early calls can be used to estimate various
6061 // frame offsets, but details may change.
6063 void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
6065 // (1) Account for things that are set up by the prolog and undone by the epilog.
6068 int originalFrameStkOffs = 0;
6069 int originalFrameSize = 0;
6070 // codeGen->isFramePointerUsed is set in regalloc phase. Initialize it to a guess for pre-regalloc layout.
6071 if (lvaDoneFrameLayout <= PRE_REGALLOC_FRAME_LAYOUT)
6073 codeGen->setFramePointerUsed(codeGen->isFramePointerRequired());
6077 // Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
6078 // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
6079 // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
6080 // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
6081 // frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
6082 // (It should be legal to use these frame types for every frame).
6084 if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0)
6086 // Default configuration
6087 codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
6088 opts.compDbgEnC || compStressCompile(STRESS_GENERIC_VARN, 20));
6090 else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1)
6092 codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
6094 else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3))
6096 codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
6098 #endif // TARGET_ARM64
6101 // On x86/amd64, the return address has already been pushed by the call instruction in the caller.
6102 stkOffs -= TARGET_POINTER_SIZE; // return address;
6103 if (lvaRetAddrVar != BAD_VAR_NUM)
6105 lvaTable[lvaRetAddrVar].SetStackOffset(stkOffs);
6109 // If we are an OSR method, we "inherit" the frame of the original method
6113 originalFrameSize = info.compPatchpointInfo->TotalFrameSize();
6114 originalFrameStkOffs = stkOffs;
6115 stkOffs -= originalFrameSize;
6119 // TODO-AMD64-CQ: for X64 eventually this should be pushed with all the other
6120 // calleeregs. When you fix this, you'll also need to fix
6121 // the assert at the bottom of this method
6122 if (codeGen->doubleAlignOrFramePointerUsed())
6124 stkOffs -= REGSIZE_BYTES;
6128 int preSpillSize = 0;
6129 bool mustDoubleAlign = false;
6132 mustDoubleAlign = true;
6133 preSpillSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
6134 #else // !TARGET_ARM
6136 if (genDoubleAlign())
6138 mustDoubleAlign = true; // X86 only
6141 #endif // !TARGET_ARM
6144 // If the frame pointer is used, then we'll save FP/LR at the bottom of the stack.
6145 // Otherwise, we won't store FP, and we'll store LR at the top, with the other callee-save
6146 // registers (if any).
6148 int initialStkOffs = 0;
6149 if (info.compIsVarArgs)
6151 // For varargs we always save all of the integer register arguments
6152 // so that they are contiguous with the incoming stack arguments.
6153 initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES;
6154 stkOffs -= initialStkOffs;
6157 if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() ||
6158 !isFramePointerUsed()) // Note that currently we always have a frame pointer
6160 stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
6164 // Subtract off FP and LR.
6165 assert(compCalleeRegsPushed >= 2);
6166 stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
6169 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6171 assert(compCalleeRegsPushed >= 2);
6173 #else // !TARGET_LOONGARCH64 && !TARGET_RISCV64
6175 // On ARM32 LR is part of the pushed registers and is always stored at the
6177 if (lvaRetAddrVar != BAD_VAR_NUM)
6179 lvaTable[lvaRetAddrVar].SetStackOffset(stkOffs - REGSIZE_BYTES);
6183 stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
6184 #endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
6186 // (2) Account for the remainder of the frame
6188 // From this point on the code must generally adjust both
6189 // stkOffs and the local frame size. The latter is done via:
6191 // lvaIncrementFrameSize -- for space not associated with a local var
6192 // lvaAllocLocalAndSetVirtualOffset -- for space associated with a local var
6194 // One exception to the above: OSR locals that have offsets within the Tier0
6195 // portion of the frame.
6197 compLclFrameSize = 0;
6200 // For methods with patchpoints, the Tier0 method must reserve
6201 // space for all the callee saves, as this area is shared with the
6202 // OSR method, and we have to anticipate that collectively the
6203 // Tier0 and OSR methods end up saving all callee saves.
6205 // Currently this is x64 only.
6207 if (doesMethodHavePatchpoints() || doesMethodHavePartialCompilationPatchpoints())
6209 const unsigned regsPushed = compCalleeRegsPushed + (codeGen->isFramePointerUsed() ? 1 : 0);
6210 const unsigned extraSlots = genCountBits(RBM_OSR_INT_CALLEE_SAVED) - regsPushed;
6211 const unsigned extraSlotSize = extraSlots * REGSIZE_BYTES;
6213 JITDUMP("\nMethod has patchpoints and has %u callee saves.\n"
6214 "Reserving %u extra slots (%u bytes) for potential OSR method callee saves\n",
6215 regsPushed, extraSlots, extraSlotSize);
6217 stkOffs -= extraSlotSize;
6218 lvaIncrementFrameSize(extraSlotSize);
6221 // In case of Amd64 compCalleeRegsPushed does not include float regs (xmm6-xmm31) that
6222 // need to be pushed. But Amd64 doesn't support push/pop of xmm registers.
6223 // Instead we need to allocate space for them on the stack and save them in prolog.
6224 // Therefore, we consider xmm registers being saved while computing stack offsets
6225 // but space for xmm registers is considered part of compLclFrameSize.
6227 // 1) We need to save the entire 128-bits of xmm register to stack, since amd64
6228 // prolog unwind codes allow encoding of an instruction that stores the entire xmm reg
6229 // at an offset relative to SP
6230 // 2) We adjust frame size so that SP is aligned at 16-bytes after pushing integer registers.
6231 // This means while saving the first xmm register to its allocated stack location we might
6232 // have to skip 8-bytes. The reason for padding is to use efficient "movaps" to save/restore
6233 // xmm registers to/from stack to match Jit64 codegen. Without the aligning on 16-byte
6234 // boundary we would have to use movups when offset turns out unaligned. Movaps is more
6235 // performant than movups.
6236 const unsigned calleeFPRegsSavedSize = genCountBits(compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
6238 // For OSR the alignment pad computation should not take the original frame into account.
6239 // Original frame size includes the pseudo-saved RA and so is always = 8 mod 16.
6240 const int offsetForAlign = -(stkOffs + originalFrameSize);
6242 if ((calleeFPRegsSavedSize > 0) && ((offsetForAlign % XMM_REGSIZE_BYTES) != 0))
6244 // Take care of alignment
6245 int alignPad = (int)AlignmentPad((unsigned)offsetForAlign, XMM_REGSIZE_BYTES);
6246 assert(alignPad != 0);
6247 stkOffs -= alignPad;
6248 lvaIncrementFrameSize(alignPad);
6251 stkOffs -= calleeFPRegsSavedSize;
6252 lvaIncrementFrameSize(calleeFPRegsSavedSize);
6254 // Quirk for VS debug-launch scenario to work
6255 if (compVSQuirkStackPaddingNeeded > 0)
6260 printf("\nAdding VS quirk stack padding of %d bytes between save-reg area and locals\n",
6261 compVSQuirkStackPaddingNeeded);
6265 stkOffs -= compVSQuirkStackPaddingNeeded;
6266 lvaIncrementFrameSize(compVSQuirkStackPaddingNeeded);
6268 #endif // TARGET_AMD64
6270 if (lvaMonAcquired != BAD_VAR_NUM)
6272 // For OSR we use the flag set up by the original method.
6276 assert(info.compPatchpointInfo->HasMonitorAcquired());
6277 int originalOffset = info.compPatchpointInfo->MonitorAcquiredOffset();
6278 int offset = originalFrameStkOffs + originalOffset;
6281 "---OSR--- V%02u (on tier0 frame, monitor acquired) tier0 FP-rel offset %d tier0 frame offset %d new "
6283 lvaMonAcquired, originalOffset, originalFrameStkOffs, offset);
6285 lvaTable[lvaMonAcquired].SetStackOffset(offset);
6289 // This var must go first, in what is called the 'frame header' for EnC so that it is
6290 // preserved when remapping occurs. See vm\eetwain.cpp for detailed comment specifying frame
6291 // layout requirements for EnC to work.
6292 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs);
6296 #if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64))
6297 if (lvaPSPSym != BAD_VAR_NUM)
6299 // On ARM/ARM64, if we need a PSPSym we allocate it early since funclets
6300 // will need to have it at the same caller-SP relative offset so anything
6301 // allocated before this will also leak into the funclet's frame.
6302 noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
6303 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
6305 #endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64)
6307 if (mustDoubleAlign)
6309 if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
6311 // Allocate a pointer sized stack slot, since we may need to double align here
6312 // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
6314 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6315 stkOffs -= TARGET_POINTER_SIZE;
6317 // If we have any TYP_LONG, TYP_DOUBLE or double aligned structs
6318 // then we need to allocate a second pointer sized stack slot,
6319 // since we may need to double align that LclVar when we see it
6320 // in the loop below. We will just always do this so that the
6321 // offsets that we calculate for the stack frame will always
6322 // be greater (or equal) to what they can be in the final layout.
6324 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6325 stkOffs -= TARGET_POINTER_SIZE;
6327 else // FINAL_FRAME_LAYOUT
6329 if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
6331 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6332 stkOffs -= TARGET_POINTER_SIZE;
6334 // We should now have a double-aligned (stkOffs+preSpillSize)
6335 noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
6339 #ifdef JIT32_GCENCODER
6340 if (lvaLocAllocSPvar != BAD_VAR_NUM)
6342 noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
6343 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaLocAllocSPvar, TARGET_POINTER_SIZE, stkOffs);
6345 #endif // JIT32_GCENCODER
6347 // For OSR methods, param type args are always reportable via the root method frame slot.
6348 // (see gcInfoBlockHdrSave) and so do not need a new slot on the frame.
6350 // OSR methods may also be able to use the root frame kept alive this, if the root
6351 // method needed to report this.
6353 // Inlining done under OSR may introduce new reporting, in which case the OSR frame
6354 // must allocate a slot.
6355 if (lvaReportParamTypeArg())
6357 #ifdef JIT32_GCENCODER
6358 noway_assert(codeGen->isFramePointerUsed());
6362 PatchpointInfo* ppInfo = info.compPatchpointInfo;
6363 assert(ppInfo->HasGenericContextArgOffset());
6364 const int originalOffset = ppInfo->GenericContextArgOffset();
6365 lvaCachedGenericContextArgOffs = originalFrameStkOffs + originalOffset;
6369 // For CORINFO_CALLCONV_PARAMTYPE (if needed)
6370 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6371 stkOffs -= TARGET_POINTER_SIZE;
6372 lvaCachedGenericContextArgOffs = stkOffs;
6375 #ifndef JIT32_GCENCODER
6376 else if (lvaKeepAliveAndReportThis())
6378 bool canUseExistingSlot = false;
6381 PatchpointInfo* ppInfo = info.compPatchpointInfo;
6382 if (ppInfo->HasKeptAliveThis())
6384 const int originalOffset = ppInfo->KeptAliveThisOffset();
6385 lvaCachedGenericContextArgOffs = originalFrameStkOffs + originalOffset;
6386 canUseExistingSlot = true;
6390 if (!canUseExistingSlot)
6392 // When "this" is also used as generic context arg.
6393 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6394 stkOffs -= TARGET_POINTER_SIZE;
6395 lvaCachedGenericContextArgOffs = stkOffs;
6400 #if !defined(FEATURE_EH_FUNCLETS)
6401 /* If we need space for slots for shadow SP, reserve it now */
6402 if (ehNeedsShadowSPslots())
6404 noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
6405 if (!lvaReportParamTypeArg())
6407 #ifndef JIT32_GCENCODER
6408 if (!lvaKeepAliveAndReportThis())
6411 // In order to keep the gc info encoding smaller, the VM assumes that all methods with EH
6412 // have also saved space for a ParamTypeArg, so we need to do that here
6413 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6414 stkOffs -= TARGET_POINTER_SIZE;
6417 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclSize(lvaShadowSPslotsVar), stkOffs);
6419 #endif // !FEATURE_EH_FUNCLETS
6421 if (compGSReorderStackLayout)
6423 assert(getNeedsGSSecurityCookie());
6425 if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie())
6427 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
6432 If we're supposed to track lifetimes of pointer temps, we'll
6433 assign frame offsets in the following order:
6435 non-pointer local variables (also untracked pointer variables)
6436 pointer local variables
6443 ALLOC_NON_PTRS = 0x1, // assign offsets to non-ptr
6444 ALLOC_PTRS = 0x2, // Second pass, assign offsets to tracked ptrs
6445 ALLOC_UNSAFE_BUFFERS = 0x4,
6446 ALLOC_UNSAFE_BUFFERS_WITH_PTRS = 0x8
6448 UINT alloc_order[5];
6450 unsigned int cur = 0;
6452 if (compGSReorderStackLayout)
6454 noway_assert(getNeedsGSSecurityCookie());
6456 if (codeGen->isFramePointerUsed())
6458 alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS;
6459 alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
6463 bool tempsAllocated = false;
6465 if (lvaTempsHaveLargerOffsetThanVars() && !codeGen->isFramePointerUsed())
6467 // Because we want the temps to have a larger offset than locals
6468 // and we're not using a frame pointer, we have to place the temps
6469 // above the vars. Otherwise we place them after the vars (at the
6470 // bottom of the frame).
6471 noway_assert(!tempsAllocated);
6472 stkOffs = lvaAllocateTemps(stkOffs, mustDoubleAlign);
6473 tempsAllocated = true;
6476 alloc_order[cur++] = ALLOC_NON_PTRS;
6478 if (opts.compDbgEnC)
6480 /* We will use just one pass, and assign offsets to all variables */
6481 alloc_order[cur - 1] |= ALLOC_PTRS;
6482 noway_assert(compGSReorderStackLayout == false);
6486 alloc_order[cur++] = ALLOC_PTRS;
6489 if (!codeGen->isFramePointerUsed() && compGSReorderStackLayout)
6491 alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
6492 alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS;
6495 alloc_order[cur] = 0;
6497 noway_assert(cur < ArrLen(alloc_order));
6499 // Force first pass to happen
6500 UINT assignMore = 0xFFFFFFFF;
6501 bool have_LclVarDoubleAlign = false;
6503 for (cur = 0; alloc_order[cur]; cur++)
6505 if ((assignMore & alloc_order[cur]) == 0)
6515 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6517 /* Ignore field locals of the promotion type PROMOTION_TYPE_FIELD_DEPENDENT.
6518 In other words, we will not calculate the "base" address of the struct local if
6519 the promotion type is PROMOTION_TYPE_FIELD_DEPENDENT.
6521 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6526 #if FEATURE_FIXED_OUT_ARGS
6527 // The scratch mem is used for the outgoing arguments, and it must be absolutely last
6528 if (lclNum == lvaOutgoingArgSpaceVar)
6534 bool allocateOnFrame = varDsc->lvOnFrame;
6536 if (varDsc->lvRegister && (lvaDoneFrameLayout == REGALLOC_FRAME_LAYOUT) &&
6537 ((varDsc->TypeGet() != TYP_LONG) || (varDsc->GetOtherReg() != REG_STK)))
6539 allocateOnFrame = false;
6542 // For OSR args and locals, we use the slots on the original frame.
6544 // Note we must do this even for "non frame" locals, as we sometimes
6545 // will refer to their memory homes.
6546 if (lvaIsOSRLocal(lclNum))
6548 if (varDsc->lvIsStructField)
6550 const unsigned parentLclNum = varDsc->lvParentLcl;
6551 const int parentOriginalOffset = info.compPatchpointInfo->Offset(parentLclNum);
6552 const int offset = originalFrameStkOffs + parentOriginalOffset + varDsc->lvFldOffset;
6554 JITDUMP("---OSR--- V%02u (promoted field of V%02u; on tier0 frame) tier0 FP-rel offset %d tier0 "
6555 "frame offset %d field offset %d new virt offset "
6557 lclNum, parentLclNum, parentOriginalOffset, originalFrameStkOffs, varDsc->lvFldOffset,
6560 lvaTable[lclNum].SetStackOffset(offset);
6564 // Add frampointer-relative offset of this OSR live local in the original frame
6565 // to the offset of original frame in our new frame.
6566 const int originalOffset = info.compPatchpointInfo->Offset(lclNum);
6567 const int offset = originalFrameStkOffs + originalOffset;
6570 "---OSR--- V%02u (on tier0 frame) tier0 FP-rel offset %d tier0 frame offset %d new virt offset "
6572 lclNum, originalOffset, originalFrameStkOffs, offset);
6574 lvaTable[lclNum].SetStackOffset(offset);
6579 /* Ignore variables that are not on the stack frame */
6581 if (!allocateOnFrame)
6583 /* For EnC, all variables have to be allocated space on the
6584 stack, even though they may actually be enregistered. This
6585 way, the frame layout can be directly inferred from the
6589 if (!opts.compDbgEnC)
6593 else if (lclNum >= info.compLocalsCount)
6594 { // ignore temps for EnC
6598 else if (lvaGSSecurityCookie == lclNum && getNeedsGSSecurityCookie())
6600 // Special case for OSR. If the original method had a cookie,
6601 // we use its slot on the original frame.
6602 if (opts.IsOSR() && info.compPatchpointInfo->HasSecurityCookie())
6604 int originalOffset = info.compPatchpointInfo->SecurityCookieOffset();
6605 int offset = originalFrameStkOffs + originalOffset;
6607 JITDUMP("---OSR--- V%02u (on tier0 frame, security cookie) tier0 FP-rel offset %d tier0 frame "
6610 lclNum, originalOffset, originalFrameStkOffs, offset);
6612 lvaTable[lclNum].SetStackOffset(offset);
6618 // These need to be located as the very first variables (highest memory address)
6619 // and so they have already been assigned an offset
6621 #if defined(FEATURE_EH_FUNCLETS)
6622 lclNum == lvaPSPSym ||
6624 lclNum == lvaShadowSPslotsVar ||
6625 #endif // FEATURE_EH_FUNCLETS
6626 #ifdef JIT32_GCENCODER
6627 lclNum == lvaLocAllocSPvar ||
6628 #endif // JIT32_GCENCODER
6629 lclNum == lvaRetAddrVar)
6631 assert(varDsc->GetStackOffset() != BAD_STK_OFFS);
6635 if (lclNum == lvaMonAcquired)
6640 // This should be low on the stack. Hence, it will be assigned later.
6641 if (lclNum == lvaStubArgumentVar)
6643 #ifdef JIT32_GCENCODER
6644 noway_assert(codeGen->isFramePointerUsed());
6649 // This should be low on the stack. Hence, it will be assigned later.
6650 if (lclNum == lvaInlinedPInvokeFrameVar)
6652 noway_assert(codeGen->isFramePointerUsed());
6656 if (varDsc->lvIsParam)
6658 #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
6660 // On Windows AMD64 we can use the caller-reserved stack area that is already setup
6661 assert(varDsc->GetStackOffset() != BAD_STK_OFFS);
6664 #else // !TARGET_AMD64
6666 // A register argument that is not enregistered ends up as
6667 // a local variable which will need stack frame space.
6669 if (!varDsc->lvIsRegArg)
6675 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
6677 // Stack offset to varargs (parameters) should point to home area which will be preallocated.
6678 const unsigned regArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg());
6679 varDsc->SetStackOffset(-initialStkOffs + regArgNum * REGSIZE_BYTES);
6686 // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
6687 // in the prolog, thus they don't need stack frame space.
6689 if ((codeGen->regSet.rsMaskPreSpillRegs(false) & genRegMask(varDsc->GetArgReg())) != 0)
6691 assert(varDsc->GetStackOffset() != BAD_STK_OFFS);
6696 #endif // !TARGET_AMD64
6699 /* Make sure the type is appropriate */
6701 if (varDsc->lvIsUnsafeBuffer && compGSReorderStackLayout)
6703 if (varDsc->lvIsPtr)
6705 if ((alloc_order[cur] & ALLOC_UNSAFE_BUFFERS_WITH_PTRS) == 0)
6707 assignMore |= ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
6713 if ((alloc_order[cur] & ALLOC_UNSAFE_BUFFERS) == 0)
6715 assignMore |= ALLOC_UNSAFE_BUFFERS;
6720 else if (varTypeIsGC(varDsc->TypeGet()) && varDsc->lvTracked)
6722 if ((alloc_order[cur] & ALLOC_PTRS) == 0)
6724 assignMore |= ALLOC_PTRS;
6730 if ((alloc_order[cur] & ALLOC_NON_PTRS) == 0)
6732 assignMore |= ALLOC_NON_PTRS;
6737 /* Need to align the offset? */
6739 if (mustDoubleAlign && (varDsc->lvType == TYP_DOUBLE // Align doubles for ARM and x86
6741 || varDsc->lvType == TYP_LONG // Align longs for ARM
6743 #ifndef TARGET_64BIT
6744 || varDsc->lvStructDoubleAlign // Align when lvStructDoubleAlign is true
6745 #endif // !TARGET_64BIT
6748 noway_assert((compLclFrameSize % TARGET_POINTER_SIZE) == 0);
6750 if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) && !have_LclVarDoubleAlign)
6752 // If this is the first TYP_LONG, TYP_DOUBLE or double aligned struct
6753 // then we have seen in this loop then we allocate a pointer sized
6754 // stack slot since we may need to double align this LclVar
6755 // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
6757 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6758 stkOffs -= TARGET_POINTER_SIZE;
6762 if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
6764 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6765 stkOffs -= TARGET_POINTER_SIZE;
6768 // We should now have a double-aligned (stkOffs+preSpillSize)
6769 noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
6772 // Remember that we had to double align a LclVar
6773 have_LclVarDoubleAlign = true;
6776 // Reserve the stack space for this variable
6777 stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs);
6778 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6779 // If we have an incoming register argument that has a promoted field then we
6780 // need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
6782 if (varDsc->lvIsRegArg && varDsc->lvPromoted)
6784 unsigned firstFieldNum = varDsc->lvFieldLclStart;
6785 for (unsigned i = 0; i < varDsc->lvFieldCnt; i++)
6787 LclVarDsc* fieldVarDsc = lvaGetDesc(firstFieldNum + i);
6788 fieldVarDsc->SetStackOffset(varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset);
6791 #endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6795 if (getNeedsGSSecurityCookie() && !compGSReorderStackLayout)
6797 if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie())
6799 // LOCALLOC used, but we have no unsafe buffer. Allocated cookie last, close to localloc buffer.
6800 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
6804 if (tempsAllocated == false)
6806 /*-------------------------------------------------------------------------
6810 *-------------------------------------------------------------------------
6812 stkOffs = lvaAllocateTemps(stkOffs, mustDoubleAlign);
6815 /*-------------------------------------------------------------------------
6817 * Now do some final stuff
6819 *-------------------------------------------------------------------------
6822 // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar need to be assigned last
6823 // Important: The stack walker depends on lvaStubArgumentVar immediately
6824 // following lvaInlinedPInvokeFrameVar in the frame.
6826 if (lvaStubArgumentVar != BAD_VAR_NUM)
6828 #ifdef JIT32_GCENCODER
6829 noway_assert(codeGen->isFramePointerUsed());
6831 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaStubArgumentVar, lvaLclSize(lvaStubArgumentVar), stkOffs);
6834 if (lvaInlinedPInvokeFrameVar != BAD_VAR_NUM)
6836 noway_assert(codeGen->isFramePointerUsed());
6838 lvaAllocLocalAndSetVirtualOffset(lvaInlinedPInvokeFrameVar, lvaLclSize(lvaInlinedPInvokeFrameVar), stkOffs);
6841 #ifdef JIT32_GCENCODER
6842 // JIT32 encoder cannot handle GS cookie at fp+0 since NO_GS_COOKIE == 0.
6843 // Add some padding if it is the last allocated local.
6844 if ((lvaGSSecurityCookie != BAD_VAR_NUM) && (lvaGetDesc(lvaGSSecurityCookie)->GetStackOffset() == stkOffs))
6846 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6847 stkOffs -= TARGET_POINTER_SIZE;
6851 if (mustDoubleAlign)
6853 if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
6855 // Allocate a pointer sized stack slot, since we may need to double align here
6856 // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
6858 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6859 stkOffs -= TARGET_POINTER_SIZE;
6861 if (have_LclVarDoubleAlign)
6863 // If we have any TYP_LONG, TYP_DOUBLE or double aligned structs
6864 // the we need to allocate a second pointer sized stack slot,
6865 // since we may need to double align the last LclVar that we saw
6866 // in the loop above. We do this so that the offsets that we
6867 // calculate for the stack frame are always greater than they will
6868 // be in the final layout.
6870 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6871 stkOffs -= TARGET_POINTER_SIZE;
6874 else // FINAL_FRAME_LAYOUT
6876 if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
6878 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
6879 stkOffs -= TARGET_POINTER_SIZE;
6881 // We should now have a double-aligned (stkOffs+preSpillSize)
6882 noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
6886 #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_AMD64)
6887 if (lvaPSPSym != BAD_VAR_NUM)
6889 // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument
6890 // space. Any padding will be higher on the stack than this
6891 // (including the padding added by lvaAlignFrame()).
6892 noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
6893 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
6895 #endif // FEATURE_EH_FUNCLETS && defined(TARGET_AMD64)
6898 if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() &&
6899 isFramePointerUsed()) // Note that currently we always have a frame pointer
6901 // Create space for saving FP and LR.
6902 stkOffs -= 2 * REGSIZE_BYTES;
6904 #endif // TARGET_ARM64
6906 #if FEATURE_FIXED_OUT_ARGS
6907 if (lvaOutgoingArgSpaceSize > 0)
6909 #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) // No 4 slots for outgoing params on System V.
6910 noway_assert(lvaOutgoingArgSpaceSize >= (4 * TARGET_POINTER_SIZE));
6912 noway_assert((lvaOutgoingArgSpaceSize % TARGET_POINTER_SIZE) == 0);
6914 // Give it a value so we can avoid asserts in CHK builds.
6915 // Since this will always use an SP relative offset of zero
6916 // at the end of lvaFixVirtualFrameOffsets, it will be set to absolute '0'
6918 stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclSize(lvaOutgoingArgSpaceVar), stkOffs);
6920 #endif // FEATURE_FIXED_OUT_ARGS
6922 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6923 // For LoongArch64 and RISCV64, CalleeSavedRegs are at bottom.
6924 int pushedCount = 0;
6926 // compLclFrameSize equals our negated virtual stack offset minus the pushed registers and return address
6927 // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'.
6928 int pushedCount = compCalleeRegsPushed;
6932 if (info.compIsVarArgs)
6934 pushedCount += MAX_REG_ARG;
6939 if (codeGen->doubleAlignOrFramePointerUsed())
6941 pushedCount += 1; // pushed EBP (frame pointer)
6943 pushedCount += 1; // pushed PC (return address)
6946 noway_assert(compLclFrameSize + originalFrameSize ==
6947 (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));
6950 int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs)
6952 noway_assert(lclNum != BAD_VAR_NUM);
6954 LclVarDsc* lcl = lvaGetDesc(lclNum);
6956 // Before final frame layout, assume the worst case, that every >=8 byte local will need
6957 // maximum padding to be aligned. This is because we generate code based on the stack offset
6958 // computed during tentative frame layout. These offsets cannot get bigger during final
6959 // frame layout, as that would possibly require different code generation (for example,
6960 // using a 4-byte offset instead of a 1-byte offset in an instruction). The offsets can get
6961 // smaller. It is possible there is different alignment at the point locals are allocated
6962 // between tentative and final frame layout which would introduce padding between locals
6963 // and thus increase the offset (from the stack pointer) of one of the locals. Hence the
6964 // need to assume the worst alignment before final frame layout.
6965 // We could probably improve this by sorting all the objects by alignment,
6966 // such that all 8 byte objects are together, 4 byte objects are together, etc., which
6967 // would require at most one alignment padding per group.
6969 // TYP_SIMD structs locals have alignment preference given by getSIMDTypeAlignment() for
6970 // better performance.
6971 if ((size >= 8) && ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || ((stkOffs % 8) != 0)
6972 #if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
6973 || varTypeIsSIMD(lcl)
6977 // Note that stack offsets are negative or equal to zero
6978 assert(stkOffs <= 0);
6980 // alignment padding
6982 #if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
6983 if (varTypeIsSIMD(lcl))
6985 int alignment = getSIMDTypeAlignment(lcl->TypeGet());
6987 if (stkOffs % alignment != 0)
6989 if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
6991 pad = alignment - 1;
6992 // Note that all the objects will probably be misaligned, but we'll fix that in final layout.
6996 pad = alignment + (stkOffs % alignment); // +1 to +(alignment-1) bytes
7001 #endif // FEATURE_SIMD && ALIGN_SIMD_TYPES
7003 if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
7006 // Note that all the objects will probably be misaligned, but we'll fix that in final layout.
7010 pad = 8 + (stkOffs % 8); // +1 to +7 bytes
7013 // Will the pad ever be anything except 4? Do we put smaller-than-4-sized objects on the stack?
7014 lvaIncrementFrameSize(pad);
7021 gtDispLclVar(lclNum, /*pad*/ false);
7022 printf(", size=%d, stkOffs=%c0x%x, pad=%d\n", size, stkOffs < 0 ? '-' : '+',
7023 stkOffs < 0 ? -stkOffs : stkOffs, pad);
7027 #endif // TARGET_64BIT
7029 /* Reserve space on the stack by bumping the frame size */
7031 lvaIncrementFrameSize(size);
7033 lcl->SetStackOffset(stkOffs);
7039 gtDispLclVar(lclNum, /*pad*/ false);
7040 printf(", size=%d, stkOffs=%c0x%x\n", size, stkOffs < 0 ? '-' : '+', stkOffs < 0 ? -stkOffs : stkOffs);
7048 /*****************************************************************************
7049 * lvaIsCalleeSavedIntRegCountEven() : returns true if the number of integer registers
7050 * pushed onto stack is even including RBP if used as frame pointer
7052 * Note that this excludes return address (PC) pushed by caller. To know whether
7053 * the SP offset after pushing integer registers is aligned, we need to take
7054 * negation of this routine.
7056 bool Compiler::lvaIsCalleeSavedIntRegCountEven()
7058 unsigned regsPushed = compCalleeRegsPushed + (codeGen->isFramePointerUsed() ? 1 : 0);
7059 return (regsPushed % (16 / REGSIZE_BYTES)) == 0;
7061 #endif // TARGET_AMD64
7063 /*****************************************************************************
7064 * lvaAlignFrame() : After allocating everything on the frame, reserve any
7065 * extra space needed to keep the frame aligned
7067 void Compiler::lvaAlignFrame()
7069 #if defined(TARGET_AMD64)
7071 // Leaf frames do not need full alignment, but the unwind info is smaller if we
7072 // are at least 8 byte aligned (and we assert as much)
7073 if ((compLclFrameSize % 8) != 0)
7075 lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
7077 else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
7079 // If we are not doing final layout, we don't know the exact value of compLclFrameSize
7080 // and thus do not know how much we will need to add in order to be aligned.
7081 // We add 8 so compLclFrameSize is still a multiple of 8.
7082 lvaIncrementFrameSize(8);
7084 assert((compLclFrameSize % 8) == 0);
7086 // Ensure that the stack is always 16-byte aligned by grabbing an unused QWORD
7087 // if needed, but off by 8 because of the return value.
7088 // And don't forget that compCalleeRegsPused does *not* include RBP if we are
7089 // using it as the frame pointer.
7091 bool regPushedCountAligned = lvaIsCalleeSavedIntRegCountEven();
7092 bool lclFrameSizeAligned = (compLclFrameSize % 16) == 0;
7094 // If this isn't the final frame layout, assume we have to push an extra QWORD
7095 // Just so the offsets are true upper limits.
7096 CLANG_FORMAT_COMMENT_ANCHOR;
7098 #ifdef UNIX_AMD64_ABI
7099 // The compNeedToAlignFrame flag is indicating if there is a need to align the frame.
7100 // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
7101 // FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
7102 // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of 0.
7103 // The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that there
7104 // are calls and making sure the frame alignment logic is executed.
7105 bool stackNeedsAlignment = (compLclFrameSize != 0 || opts.compNeedToAlignFrame);
7106 #else // !UNIX_AMD64_ABI
7107 bool stackNeedsAlignment = compLclFrameSize != 0;
7108 #endif // !UNIX_AMD64_ABI
7109 if ((!codeGen->isFramePointerUsed() && (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)) ||
7110 (stackNeedsAlignment && (regPushedCountAligned == lclFrameSizeAligned)))
7112 lvaIncrementFrameSize(REGSIZE_BYTES);
7115 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7117 // The stack on ARM64/LoongArch64 must be 16 byte aligned.
7119 // First, align up to 8.
7120 if ((compLclFrameSize % 8) != 0)
7122 lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
7124 else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
7126 // If we are not doing final layout, we don't know the exact value of compLclFrameSize
7127 // and thus do not know how much we will need to add in order to be aligned.
7128 // We add 8 so compLclFrameSize is still a multiple of 8.
7129 lvaIncrementFrameSize(8);
7131 assert((compLclFrameSize % 8) == 0);
7133 // Ensure that the stack is always 16-byte aligned by grabbing an unused QWORD
7135 bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) == 0;
7136 bool lclFrameSizeAligned = (compLclFrameSize % 16) == 0;
7138 // If this isn't the final frame layout, assume we have to push an extra QWORD
7139 // Just so the offsets are true upper limits.
7140 if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned))
7142 lvaIncrementFrameSize(REGSIZE_BYTES);
7145 #elif defined(TARGET_ARM)
7147 // Ensure that stack offsets will be double-aligned by grabbing an unused DWORD if needed.
7149 bool lclFrameSizeAligned = (compLclFrameSize % sizeof(double)) == 0;
7150 bool regPushedCountAligned = ((compCalleeRegsPushed + genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true))) %
7151 (sizeof(double) / TARGET_POINTER_SIZE)) == 0;
7153 if (regPushedCountAligned != lclFrameSizeAligned)
7155 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
7158 #elif defined(TARGET_X86)
7161 if (genDoubleAlign())
7163 // Double Frame Alignment for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
7165 if (compLclFrameSize == 0)
7167 // This can only happen with JitStress=1 or JitDoubleAlign=2
7168 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
7173 if (STACK_ALIGN > REGSIZE_BYTES)
7175 if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
7177 // If we are not doing final layout, we don't know the exact value of compLclFrameSize
7178 // and thus do not know how much we will need to add in order to be aligned.
7179 // We add the maximum pad that we could ever have (which is 12)
7180 lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
7183 // Align the stack with STACK_ALIGN value.
7184 int adjustFrameSize = compLclFrameSize;
7185 #if defined(UNIX_X86_ABI)
7186 bool isEbpPushed = codeGen->isFramePointerUsed();
7188 isEbpPushed |= genDoubleAlign();
7190 // we need to consider spilled register(s) plus return address and/or EBP
7191 int adjustCount = compCalleeRegsPushed + 1 + (isEbpPushed ? 1 : 0);
7192 adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
7194 if ((adjustFrameSize % STACK_ALIGN) != 0)
7196 lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
7201 NYI("TARGET specific lvaAlignFrame");
7202 #endif // !TARGET_AMD64
7205 /*****************************************************************************
7206 * lvaAssignFrameOffsetsToPromotedStructs() : Assign offsets to fields
7207 * within a promoted struct (worker for lvaAssignFrameOffsets).
7209 void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
7211 LclVarDsc* varDsc = lvaTable;
7212 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++, varDsc++)
7214 // For promoted struct fields that are params, we will
7215 // assign their offsets in lvaAssignVirtualFrameOffsetToArg().
7216 // This is not true for the System V systems since there is no
7217 // outgoing args space. Assign the dependently promoted fields properly.
7219 CLANG_FORMAT_COMMENT_ANCHOR;
7221 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM) || defined(TARGET_X86)
7222 // ARM: lo/hi parts of a promoted long arg need to be updated.
7224 // For System V platforms there is no outgoing args space.
7226 // For System V and x86, a register passed struct arg is homed on the stack in a separate local var.
7227 // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg method.
7228 // Make sure the code below is not executed for these structs and the offset is not changed.
7230 const bool mustProcessParams = true;
7232 // OSR must also assign offsets here.
7234 const bool mustProcessParams = opts.IsOSR();
7235 #endif // defined(UNIX_AMD64_ABI) || defined(TARGET_ARM) || defined(TARGET_X86)
7237 if (varDsc->lvIsStructField && (!varDsc->lvIsParam || mustProcessParams))
7239 LclVarDsc* parentvarDsc = lvaGetDesc(varDsc->lvParentLcl);
7240 lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
7242 if (promotionType == PROMOTION_TYPE_INDEPENDENT)
7244 // The stack offset for these field locals must have been calculated
7245 // by the normal frame offset assignment.
7250 noway_assert(promotionType == PROMOTION_TYPE_DEPENDENT);
7251 noway_assert(varDsc->lvOnFrame);
7252 if (parentvarDsc->lvOnFrame)
7254 JITDUMP("Adjusting offset of dependent V%02u of V%02u: parent %u field %u net %u\n", lclNum,
7255 varDsc->lvParentLcl, parentvarDsc->GetStackOffset(), varDsc->lvFldOffset,
7256 parentvarDsc->GetStackOffset() + varDsc->lvFldOffset);
7257 varDsc->SetStackOffset(parentvarDsc->GetStackOffset() + varDsc->lvFldOffset);
7261 varDsc->lvOnFrame = false;
7262 noway_assert(varDsc->lvRefCnt() == 0);
7269 /*****************************************************************************
7270 * lvaAllocateTemps() : Assign virtual offsets to temps (always negative).
7272 int Compiler::lvaAllocateTemps(int stkOffs, bool mustDoubleAlign)
7274 unsigned spillTempSize = 0;
7276 if (lvaDoneFrameLayout == FINAL_FRAME_LAYOUT)
7278 int preSpillSize = 0;
7280 preSpillSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * TARGET_POINTER_SIZE;
7283 /* Allocate temps */
7285 assert(codeGen->regSet.tmpAllFree());
7287 for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp))
7289 var_types tempType = temp->tdTempType();
7290 unsigned size = temp->tdTempSize();
7292 /* Figure out and record the stack offset of the temp */
7294 /* Need to align the offset? */
7295 CLANG_FORMAT_COMMENT_ANCHOR;
7298 if (varTypeIsGC(tempType) && ((stkOffs % TARGET_POINTER_SIZE) != 0))
7300 // Calculate 'pad' as the number of bytes to align up 'stkOffs' to be a multiple of TARGET_POINTER_SIZE
7301 // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte
7302 // aligned). Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative
7305 int alignPad = (int)AlignmentPad((unsigned)-stkOffs, TARGET_POINTER_SIZE);
7307 spillTempSize += alignPad;
7308 lvaIncrementFrameSize(alignPad);
7309 stkOffs -= alignPad;
7311 noway_assert((stkOffs % TARGET_POINTER_SIZE) == 0);
7315 if (mustDoubleAlign && (tempType == TYP_DOUBLE)) // Align doubles for x86 and ARM
7317 noway_assert((compLclFrameSize % TARGET_POINTER_SIZE) == 0);
7319 if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
7321 spillTempSize += TARGET_POINTER_SIZE;
7322 lvaIncrementFrameSize(TARGET_POINTER_SIZE);
7323 stkOffs -= TARGET_POINTER_SIZE;
7325 // We should now have a double-aligned (stkOffs+preSpillSize)
7326 noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
7329 spillTempSize += size;
7330 lvaIncrementFrameSize(size);
7332 temp->tdSetTempOffs(stkOffs);
7335 // Only required for the ARM platform that we have an accurate estimate for the spillTempSize
7336 noway_assert(spillTempSize <= lvaGetMaxSpillTempSize());
7339 else // We haven't run codegen, so there are no Spill temps yet!
7341 unsigned size = lvaGetMaxSpillTempSize();
7343 lvaIncrementFrameSize(size);
7352 /*****************************************************************************
7354 * Dump the register a local is in right now. It is only the current location, since the location changes and it
7355 * is updated throughout code generation based on LSRA register assignments.
7358 void Compiler::lvaDumpRegLocation(unsigned lclNum)
7360 const LclVarDsc* varDsc = lvaGetDesc(lclNum);
7363 if (varDsc->TypeGet() == TYP_DOUBLE)
7365 // The assigned registers are `lvRegNum:RegNext(lvRegNum)`
7366 printf("%3s:%-3s ", getRegName(varDsc->GetRegNum()), getRegName(REG_NEXT(varDsc->GetRegNum())));
7369 #endif // TARGET_ARM
7371 printf("%3s ", getRegName(varDsc->GetRegNum()));
7375 /*****************************************************************************
7377 * Dump the frame location assigned to a local.
7378 * It's the home location, even though the variable doesn't always live
7379 * in its home location.
7382 void Compiler::lvaDumpFrameLocation(unsigned lclNum)
7388 offset = lvaFrameAddress(lclNum, compLocallocUsed, &baseReg, 0, /* isFloatUsage */ false);
7391 offset = lvaFrameAddress(lclNum, &EBPbased);
7392 baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
7395 printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
7398 /*****************************************************************************
7400 * dump a single lvaTable entry
7403 void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth)
7405 LclVarDsc* varDsc = lvaGetDesc(lclNum);
7406 var_types type = varDsc->TypeGet();
7408 if (curState == INITIAL_FRAME_LAYOUT)
7411 gtDispLclVar(lclNum);
7413 printf(" %7s ", varTypeName(type));
7414 gtDispLclVarStructType(lclNum);
7418 if (varDsc->lvRefCnt() == 0)
7420 // Print this with a special indicator that the variable is unused. Even though the
7421 // variable itself is unused, it might be a struct that is promoted, so seeing it
7422 // can be useful when looking at the promoted struct fields. It's also weird to see
7423 // missing var numbers if these aren't printed.
7426 #if FEATURE_FIXED_OUT_ARGS
7427 // Since lvaOutgoingArgSpaceSize is a PhasedVar we can't read it for Dumping until
7428 // after we set it to something.
7429 else if ((lclNum == lvaOutgoingArgSpaceVar) && lvaOutgoingArgSpaceSize.HasFinalValue() &&
7430 (lvaOutgoingArgSpaceSize == 0))
7432 // Similar to above; print this anyway.
7435 #endif // FEATURE_FIXED_OUT_ARGS
7441 gtDispLclVar(lclNum);
7443 printf("[V%02u", lclNum);
7444 if (varDsc->lvTracked)
7446 printf(",T%02u]", varDsc->lvVarIndex);
7453 printf(" (%3u,%*s)", varDsc->lvRefCnt(lvaRefCountState), (int)refCntWtdWidth,
7454 refCntWtd2str(varDsc->lvRefCntWtd(lvaRefCountState), /* padForDecimalPlaces */ true));
7456 printf(" %7s ", varTypeName(type));
7457 if (genTypeSize(type) == 0)
7459 printf("(%2d) ", lvaLclSize(lclNum));
7466 // The register or stack location field is 11 characters wide.
7467 if ((varDsc->lvRefCnt(lvaRefCountState) == 0) && !varDsc->lvImplicitlyReferenced)
7469 printf("zero-ref ");
7471 else if (varDsc->lvRegister != 0)
7473 // It's always a register, and always in the same register.
7474 lvaDumpRegLocation(lclNum);
7476 else if (varDsc->lvOnFrame == 0)
7478 printf("registers ");
7482 // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home
7483 // location. Otherwise, it's always on the stack.
7484 if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
7486 lvaDumpFrameLocation(lclNum);
7491 if (varDsc->lvIsHfa())
7493 printf(" HFA(%s) ", varTypeName(varDsc->GetHfaType()));
7496 if (varDsc->lvDoNotEnregister)
7498 printf(" do-not-enreg[");
7499 if (varDsc->IsAddressExposed())
7503 if (varDsc->IsHiddenBufferStructArg())
7507 if (varTypeIsStruct(varDsc))
7511 if (varDsc->GetDoNotEnregReason() == DoNotEnregisterReason::VMNeedsStackAddr)
7515 if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr)
7517 printf("%c", varDsc->lvSingleDefDisqualifyReason);
7519 if (varDsc->GetDoNotEnregReason() == DoNotEnregisterReason::LocalField)
7523 if (varDsc->GetDoNotEnregReason() == DoNotEnregisterReason::BlockOp)
7527 if (varDsc->lvIsMultiRegArg)
7531 if (varDsc->lvIsMultiRegRet)
7535 #ifdef JIT32_GCENCODER
7536 if (varDsc->lvPinned)
7538 #endif // JIT32_GCENCODER
7542 if (varDsc->lvIsMultiRegArg)
7544 printf(" multireg-arg");
7546 if (varDsc->lvIsMultiRegRet)
7548 printf(" multireg-ret");
7550 if (varDsc->lvMustInit)
7552 printf(" must-init");
7554 if (varDsc->IsAddressExposed())
7556 printf(" addr-exposed");
7558 if (varDsc->IsHiddenBufferStructArg())
7560 printf(" hidden-struct-arg");
7562 if (varDsc->lvHasLdAddrOp)
7564 printf(" ld-addr-op");
7566 if (lvaIsOriginalThisArg(lclNum))
7570 if (varDsc->lvPinned)
7574 if (varDsc->lvClassHnd != NO_CLASS_HANDLE)
7576 printf(" class-hnd");
7578 if (varDsc->lvClassIsExact)
7582 if (varDsc->lvLiveInOutOfHndlr)
7586 if (varDsc->lvSpillAtSingleDef)
7588 printf(" spill-single-def");
7590 else if (varDsc->lvSingleDefRegCandidate)
7592 printf(" single-def");
7594 if (lvaIsOSRLocal(lclNum) && varDsc->lvOnFrame)
7596 printf(" tier0-frame");
7599 #ifndef TARGET_64BIT
7600 if (varDsc->lvStructDoubleAlign)
7601 printf(" double-align");
7602 #endif // !TARGET_64BIT
7604 if (compGSReorderStackLayout && !varDsc->lvRegister)
7606 if (varDsc->lvIsPtr)
7610 if (varDsc->lvIsUnsafeBuffer)
7612 printf(" unsafe-buffer");
7616 if (varDsc->lvReason != nullptr)
7618 printf(" \"%s\"", varDsc->lvReason);
7621 if (varDsc->lvIsStructField)
7623 LclVarDsc* parentVarDsc = lvaGetDesc(varDsc->lvParentLcl);
7624 lvaPromotionType promotionType = lvaGetPromotionType(parentVarDsc);
7625 switch (promotionType)
7627 case PROMOTION_TYPE_NONE:
7630 case PROMOTION_TYPE_DEPENDENT:
7633 case PROMOTION_TYPE_INDEPENDENT:
7639 if (varDsc->lvClassHnd != NO_CLASS_HANDLE)
7641 printf(" <%s>", eeGetClassName(varDsc->lvClassHnd));
7643 else if (varTypeIsStruct(varDsc->TypeGet()))
7645 ClassLayout* layout = varDsc->GetLayout();
7646 if (layout != nullptr && !layout->IsBlockLayout())
7648 printf(" <%s>", layout->GetClassName());
7655 /*****************************************************************************
7660 void Compiler::lvaTableDump(FrameLayoutState curState)
7662 if (curState == NO_FRAME_LAYOUT)
7664 curState = lvaDoneFrameLayout;
7665 if (curState == NO_FRAME_LAYOUT)
7667 // Still no layout? Could be a bug, but just display the initial layout
7668 curState = INITIAL_FRAME_LAYOUT;
7672 if (curState == INITIAL_FRAME_LAYOUT)
7674 printf("; Initial");
7676 else if (curState == PRE_REGALLOC_FRAME_LAYOUT)
7678 printf("; Pre-RegAlloc");
7680 else if (curState == REGALLOC_FRAME_LAYOUT)
7682 printf("; RegAlloc");
7684 else if (curState == TENTATIVE_FRAME_LAYOUT)
7686 printf("; Tentative");
7688 else if (curState == FINAL_FRAME_LAYOUT)
7694 printf("UNKNOWN FrameLayoutState!");
7698 printf(" local variable assignments\n");
7704 // Figure out some sizes, to help line things up
7706 size_t refCntWtdWidth = 6; // Use 6 as the minimum width
7708 if (curState != INITIAL_FRAME_LAYOUT) // don't need this info for INITIAL_FRAME_LAYOUT
7710 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
7712 size_t width = strlen(refCntWtd2str(varDsc->lvRefCntWtd(lvaRefCountState), /* padForDecimalPlaces */ true));
7713 if (width > refCntWtdWidth)
7715 refCntWtdWidth = width;
7720 // Do the actual output
7722 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
7724 lvaDumpEntry(lclNum, curState, refCntWtdWidth);
7727 //-------------------------------------------------------------------------
7728 // Display the code-gen temps
7730 assert(codeGen->regSet.tmpAllFree());
7731 for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp))
7733 printf("; TEMP_%02u %26s%*s%7s -> ", -temp->tdTempNum(), " ", refCntWtdWidth, " ",
7734 varTypeName(temp->tdTempType()));
7735 int offset = temp->tdTempOffs();
7736 printf(" [%2s%1s0x%02X]\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE, (offset < 0 ? "-" : "+"),
7737 (offset < 0 ? -offset : offset));
7740 if (curState >= TENTATIVE_FRAME_LAYOUT)
7743 printf("; Lcl frame size = %d\n", compLclFrameSize);
7748 /*****************************************************************************
7750 * Conservatively estimate the layout of the stack frame.
7752 * This function is only used before final frame layout. It conservatively estimates the
7753 * number of callee-saved registers that must be saved, then calls lvaAssignFrameOffsets().
7754 * To do final frame layout, the callee-saved registers are known precisely, so
7755 * lvaAssignFrameOffsets() is called directly.
7757 * Returns the (conservative, that is, overly large) estimated size of the frame,
7758 * including the callee-saved registers. This is only used by the emitter during code
7759 * generation when estimating the size of the offset of instructions accessing temps,
7760 * and only if temps have a larger offset than variables.
7763 unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
7765 assert(curState < FINAL_FRAME_LAYOUT);
7769 /* Layout the stack frame conservatively.
7770 Assume all callee-saved registers are spilled to stack */
7772 compCalleeRegsPushed = CNT_CALLEE_SAVED;
7774 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7775 if (compFloatingPointUsed)
7776 compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
7778 compCalleeRegsPushed++; // we always push LR or RA. See genPushCalleeSavedRegisters
7779 #elif defined(TARGET_AMD64)
7780 if (compFloatingPointUsed)
7782 compCalleeFPRegsSavedMask = RBM_FLT_CALLEE_SAVED;
7786 compCalleeFPRegsSavedMask = RBM_NONE;
7791 if (genDoubleAlign())
7793 // X86 only - account for extra 4-byte pad that may be created by "and esp, -8" instruction
7794 compCalleeRegsPushed++;
7799 // Since FP/EBP is included in the SAVED_REG_MAXSZ we need to
7800 // subtract 1 register if codeGen->isFramePointerUsed() is true.
7801 if (codeGen->isFramePointerUsed())
7803 compCalleeRegsPushed--;
7807 lvaAssignFrameOffsets(curState);
7809 unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
7810 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7811 if (compFloatingPointUsed)
7813 calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
7815 calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR or RA. See genPushCalleeSavedRegisters
7818 result = compLclFrameSize + calleeSavedRegMaxSz;
7822 //------------------------------------------------------------------------
7823 // lvaGetSPRelativeOffset: Given a variable, return the offset of that
7824 // variable in the frame from the stack pointer. This number will be positive,
7825 // since the stack pointer must be at a lower address than everything on the
7828 // This can't be called for localloc functions, since the stack pointer
7829 // varies, and thus there is no fixed offset to a variable from the stack pointer.
7832 // varNum - the variable number
7837 int Compiler::lvaGetSPRelativeOffset(unsigned varNum)
7839 assert(!compLocallocUsed);
7840 assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
7841 const LclVarDsc* varDsc = lvaGetDesc(varNum);
7842 assert(varDsc->lvOnFrame);
7843 int spRelativeOffset;
7845 if (varDsc->lvFramePointerBased)
7847 // The stack offset is relative to the frame pointer, so convert it to be
7848 // relative to the stack pointer (which makes no sense for localloc functions).
7849 spRelativeOffset = varDsc->GetStackOffset() + codeGen->genSPtoFPdelta();
7853 spRelativeOffset = varDsc->GetStackOffset();
7856 assert(spRelativeOffset >= 0);
7857 return spRelativeOffset;
7860 /*****************************************************************************
7862 * Return the caller-SP-relative stack offset of a local/parameter.
7863 * Requires the local to be on the stack and frame layout to be complete.
7866 int Compiler::lvaGetCallerSPRelativeOffset(unsigned varNum)
7868 assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
7869 const LclVarDsc* varDsc = lvaGetDesc(varNum);
7870 assert(varDsc->lvOnFrame);
7872 return lvaToCallerSPRelativeOffset(varDsc->GetStackOffset(), varDsc->lvFramePointerBased);
7875 //-----------------------------------------------------------------------------
7876 // lvaToCallerSPRelativeOffset: translate a frame offset into an offset from
7877 // the caller's stack pointer.
7880 // offset - frame offset
7881 // isFpBase - if true, offset is from FP, otherwise offset is from SP
7882 // forRootFrame - if the current method is an OSR method, adjust the offset
7883 // to be relative to the SP for the root method, instead of being relative
7884 // to the SP for the OSR method.
7889 int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased, bool forRootFrame) const
7891 assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
7895 offset += codeGen->genCallerSPtoFPdelta();
7899 offset += codeGen->genCallerSPtoInitialSPdelta();
7902 #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7903 if (forRootFrame && opts.IsOSR())
7905 const PatchpointInfo* const ppInfo = info.compPatchpointInfo;
7907 #if defined(TARGET_AMD64)
7908 // The offset computed above already includes the OSR frame adjustment, plus the
7909 // pop of the "pseudo return address" from the OSR frame.
7911 // To get to root method caller-SP, we need to subtract off the tier0 frame
7912 // size and the pushed return address and RBP for the tier0 frame (which we know is an
7915 // ppInfo's TotalFrameSize also accounts for the popped pseudo return address
7916 // between the tier0 method frame and the OSR frame. So the net adjustment
7917 // is simply TotalFrameSize plus one register.
7919 const int adjustment = ppInfo->TotalFrameSize() + REGSIZE_BYTES;
7921 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7923 const int adjustment = ppInfo->TotalFrameSize();
7926 offset -= adjustment;
7929 // OSR NYI for other targets.
7930 assert(!opts.IsOSR());
7936 /*****************************************************************************
7938 * Return the Initial-SP-relative stack offset of a local/parameter.
7939 * Requires the local to be on the stack and frame layout to be complete.
7942 int Compiler::lvaGetInitialSPRelativeOffset(unsigned varNum)
7944 assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
7945 const LclVarDsc* varDsc = lvaGetDesc(varNum);
7946 assert(varDsc->lvOnFrame);
7948 return lvaToInitialSPRelativeOffset(varDsc->GetStackOffset(), varDsc->lvFramePointerBased);
7951 // Given a local variable offset, and whether that offset is frame-pointer based, return its offset from Initial-SP.
7952 // This is used, for example, to figure out the offset of the frame pointer from Initial-SP.
7953 int Compiler::lvaToInitialSPRelativeOffset(unsigned offset, bool isFpBased)
7955 assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
7959 // Currently, the frame starts by pushing ebp, ebp points to the saved ebp
7960 // (so we have ebp pointer chaining). Add the fixed-size frame size plus the
7961 // size of the callee-saved regs (not including ebp itself) to find Initial-SP.
7963 assert(codeGen->isFramePointerUsed());
7964 offset += codeGen->genSPtoFPdelta();
7968 // The offset is correct already!
7970 #else // !TARGET_AMD64
7971 NYI("lvaToInitialSPRelativeOffset");
7972 #endif // !TARGET_AMD64
7977 /*****************************************************************************/
7980 //-----------------------------------------------------------------------------
7981 // lvaStressLclFldPadding: Pick a padding size at "random".
7984 // Padding amoount in bytes
7986 unsigned Compiler::lvaStressLclFldPadding(unsigned lclNum)
7988 // TODO: make this a bit more random, eg:
7989 // return (lclNum ^ info.compMethodHash() ^ getJitStressLevel()) % 8;
7991 // Convert every 2nd variable
7997 // Pick a padding size at "random"
7998 unsigned size = lclNum % 7;
8003 //-----------------------------------------------------------------------------
8004 // lvaStressLclFldCB: Convert GT_LCL_VAR's to GT_LCL_FLD's
8007 // pTree -- pointer to tree to possibly convert
8008 // data -- walker data
8011 // The stress mode does 2 passes.
8013 // In the first pass we will mark the locals where we CAN't apply the stress mode.
8014 // In the second pass we will do the appropriate morphing wherever we've not determined we can't do it.
8016 Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* data)
8018 GenTree* const tree = *pTree;
8019 GenTreeLclVarCommon* lcl = tree->OperIsAnyLocal() ? tree->AsLclVarCommon() : nullptr;
8023 return WALK_CONTINUE;
8026 Compiler* const pComp = ((lvaStressLclFldArgs*)data->pCallbackData)->m_pCompiler;
8027 bool const bFirstPass = ((lvaStressLclFldArgs*)data->pCallbackData)->m_bFirstPass;
8028 unsigned const lclNum = lcl->GetLclNum();
8029 LclVarDsc* const varDsc = pComp->lvaGetDesc(lclNum);
8030 var_types const lclType = lcl->TypeGet();
8031 var_types const varType = varDsc->TypeGet();
8033 if (varDsc->lvNoLclFldStress)
8035 // Already determined we can't do anything for this var
8036 return WALK_CONTINUE;
8041 // Ignore locals that already have field appearances
8042 if (lcl->OperIs(GT_LCL_FLD, GT_STORE_LCL_FLD) ||
8043 (lcl->OperIs(GT_LCL_ADDR) && (lcl->AsLclFld()->GetLclOffs() != 0)))
8045 varDsc->lvNoLclFldStress = true;
8046 return WALK_CONTINUE;
8049 // Ignore arguments and temps
8050 if (varDsc->lvIsParam || lclNum >= pComp->info.compLocalsCount)
8052 varDsc->lvNoLclFldStress = true;
8053 return WALK_CONTINUE;
8056 // Ignore OSR locals; if in memory, they will live on the
8057 // Tier0 frame and so can't have their storage adjusted.
8059 if (pComp->lvaIsOSRLocal(lclNum))
8061 varDsc->lvNoLclFldStress = true;
8062 return WALK_CONTINUE;
8065 // Likewise for Tier0 methods with patchpoints --
8066 // if we modify them we'll misreport their locations in the patchpoint info.
8068 if (pComp->doesMethodHavePatchpoints() || pComp->doesMethodHavePartialCompilationPatchpoints())
8070 varDsc->lvNoLclFldStress = true;
8071 return WALK_CONTINUE;
8074 // Converting tail calls to loops may require insertion of explicit
8075 // zero initialization for IL locals. The JIT does not support this for
8077 // TODO-Cleanup: Can probably be removed now since TYP_BLK does not
8079 if (pComp->compMayConvertTailCallToLoop)
8081 varDsc->lvNoLclFldStress = true;
8082 return WALK_CONTINUE;
8085 // Fix for lcl_fld stress mode
8086 if (varDsc->lvKeepType)
8088 varDsc->lvNoLclFldStress = true;
8089 return WALK_CONTINUE;
8092 // Can't have GC ptrs in block layouts.
8093 if (!varTypeIsArithmetic(lclType))
8095 varDsc->lvNoLclFldStress = true;
8096 return WALK_CONTINUE;
8099 // The noway_assert in the second pass below, requires that these types match
8101 if (varType != lclType)
8103 varDsc->lvNoLclFldStress = true;
8104 return WALK_CONTINUE;
8107 // Weed out "small" types like TYP_BYTE as we don't mark the GT_LCL_VAR
8108 // node with the accurate small type. If we bash lvaTable[].lvType,
8109 // then there will be no indication that it was ever a small type.
8111 if (genTypeSize(varType) != genTypeSize(genActualType(varType)))
8113 varDsc->lvNoLclFldStress = true;
8114 return WALK_CONTINUE;
8117 // Offset some of the local variable by a "random" non-zero amount
8119 unsigned padding = pComp->lvaStressLclFldPadding(lclNum);
8122 varDsc->lvNoLclFldStress = true;
8123 return WALK_CONTINUE;
8129 noway_assert((varType == lclType) || ((varType == TYP_STRUCT) && varDsc->GetLayout()->IsBlockLayout()));
8131 // Calculate padding
8132 unsigned padding = pComp->lvaStressLclFldPadding(lclNum);
8134 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
8135 // We need to support alignment requirements to access memory.
8136 // Be conservative and use the maximally aligned type here.
8137 padding = roundUp(padding, genTypeSize(TYP_DOUBLE));
8138 #endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
8140 if (varType != TYP_STRUCT)
8142 // Change the variable to a block struct
8143 ClassLayout* layout =
8144 pComp->typGetBlkLayout(roundUp(padding + pComp->lvaLclSize(lclNum), TARGET_POINTER_SIZE));
8145 varDsc->lvType = TYP_STRUCT;
8146 varDsc->SetLayout(layout);
8147 pComp->lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::STRESS_LCL_FLD));
8149 JITDUMP("Converting V%02u to %u sized block with LCL_FLD at offset (padding %u)\n", lclNum,
8150 layout->GetSize(), padding);
8153 tree->gtFlags |= GTF_GLOB_REF;
8156 if (tree->OperIs(GT_LCL_VAR))
8158 tree->SetOper(GT_LCL_FLD);
8160 else if (tree->OperIs(GT_STORE_LCL_VAR))
8162 tree->SetOper(GT_STORE_LCL_FLD);
8165 tree->AsLclFld()->SetLclOffs(padding);
8167 if (tree->OperIs(GT_STORE_LCL_FLD) && tree->IsPartialLclFld(pComp))
8169 tree->gtFlags |= GTF_VAR_USEASG;
8173 return WALK_CONTINUE;
8176 /*****************************************************************************/
8178 void Compiler::lvaStressLclFld()
8180 if (!compStressCompile(STRESS_LCL_FLDS, 5))
8185 lvaStressLclFldArgs Args;
8186 Args.m_pCompiler = this;
8187 Args.m_bFirstPass = true;
8190 fgWalkAllTreesPre(lvaStressLclFldCB, &Args);
8193 Args.m_bFirstPass = false;
8194 fgWalkAllTreesPre(lvaStressLclFldCB, &Args);
8199 /*****************************************************************************
8201 * A little routine that displays a local variable bitset.
8202 * 'set' is mask of variables that have to be displayed
8203 * 'allVars' is the complete set of interesting variables (blank space is
8204 * inserted if its corresponding bit is not in 'set').
8208 void Compiler::lvaDispVarSet(VARSET_VALARG_TP set)
8210 VARSET_TP allVars(VarSetOps::MakeEmpty(this));
8211 lvaDispVarSet(set, allVars);
8214 void Compiler::lvaDispVarSet(VARSET_VALARG_TP set, VARSET_VALARG_TP allVars)
8218 bool needSpace = false;
8220 for (unsigned index = 0; index < lvaTrackedCount; index++)
8222 if (VarSetOps::IsMember(this, set, index))
8227 /* Look for the matching variable */
8229 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
8231 if ((varDsc->lvVarIndex == index) && varDsc->lvTracked)
8246 printf("V%02u", lclNum);
8248 else if (VarSetOps::IsMember(this, allVars, index))