1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
4 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX Code Generator Common: XX
8 XX Methods common to all architectures and register allocation strategies XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
15 // identical, and which should probably be moved here.
26 #ifndef JIT32_GCENCODER
27 #include "gcinfoencoder.h"
30 #include "patchpointinfo.h"
32 /*****************************************************************************/
34 void CodeGenInterface::setFramePointerRequiredEH(bool value)
36 m_cgFramePointerRequired = value;
38 #ifndef JIT32_GCENCODER
41 // EnumGcRefs will only enumerate slots in aborted frames
42 // if they are fully-interruptible. So if we have a catch
43 // or finally that will keep frame-vars alive, we need to
44 // force fully-interruptible.
45 CLANG_FORMAT_COMMENT_ANCHOR;
50 printf("Method has EH, marking method as fully interruptible\n");
54 m_cgInterruptible = true;
56 #endif // JIT32_GCENCODER
59 /*****************************************************************************/
60 CodeGenInterface* getCodeGenerator(Compiler* comp)
62 return new (comp, CMK_Codegen) CodeGen(comp);
65 // CodeGen constructor
66 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
67 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
71 #if defined(TARGET_XARCH)
72 void CodeGenInterface::CopyRegisterInfo()
74 #if defined(TARGET_AMD64)
75 rbmAllFloat = compiler->rbmAllFloat;
76 rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
77 #endif // TARGET_AMD64
79 rbmAllMask = compiler->rbmAllMask;
80 rbmMskCalleeTrash = compiler->rbmMskCalleeTrash;
82 #endif // TARGET_XARCH
84 /*****************************************************************************/
86 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
88 #if defined(TARGET_XARCH)
89 negBitmaskFlt = nullptr;
90 negBitmaskDbl = nullptr;
91 absBitmaskFlt = nullptr;
92 absBitmaskDbl = nullptr;
93 zroSimd12Elm3 = nullptr;
94 u8ToDblBitmask = nullptr;
95 #endif // defined(TARGET_XARCH)
97 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86)
98 m_stkArgVarNum = BAD_VAR_NUM;
101 #if defined(UNIX_X86_ABI)
102 curNestedAlignment = 0;
103 maxNestedAlignment = 0;
106 gcInfo.regSet = ®Set;
107 m_cgEmitter = new (compiler->getAllocator()) emitter();
108 m_cgEmitter->codeGen = this;
109 m_cgEmitter->gcInfo = &gcInfo;
112 setVerbose(compiler->verbose);
118 getDisAssembler().disInit(compiler);
122 genTempLiveChg = true;
123 genTrnslLocalVarCount = 0;
125 // Shouldn't be used before it is set in genFnProlog()
126 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
128 #if defined(TARGET_XARCH)
129 // Shouldn't be used before it is set in genFnProlog()
130 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
131 #endif // defined(TARGET_XARCH)
135 // This will be set before final frame layout.
136 compiler->compVSQuirkStackPaddingNeeded = 0;
137 #endif // TARGET_AMD64
139 compiler->genCallSite2DebugInfoMap = nullptr;
141 /* Assume that we not fully interruptible */
143 SetInterruptible(false);
144 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
145 SetHasTailCalls(false);
146 #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
148 genInterruptibleUsed = false;
149 genCurDispOffset = (unsigned)-1;
153 genSaveFpLrWithAllCalleeSavedRegisters = false;
154 genForceFuncletFrameType5 = false;
155 #endif // TARGET_ARM64
158 #if defined(TARGET_X86) || defined(TARGET_ARM)
160 //---------------------------------------------------------------------
161 // genTotalFrameSize - return the "total" size of the stack frame, including local size
162 // and callee-saved register size. There are a few things "missing" depending on the
163 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
165 // For ARM, this doesn't include the prespilled registers.
167 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
168 // It also doesn't include the pushed return address.
173 int CodeGenInterface::genTotalFrameSize() const
175 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
177 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
179 assert(totalFrameSize >= 0);
180 return totalFrameSize;
183 //---------------------------------------------------------------------
184 // genSPtoFPdelta - return the offset from SP to the frame pointer.
185 // This number is going to be positive, since SP must be at the lowest
188 // There must be a frame pointer to call this function!
190 int CodeGenInterface::genSPtoFPdelta() const
192 assert(isFramePointerUsed());
196 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
202 //---------------------------------------------------------------------
203 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
204 // This number is going to be negative, since the Caller-SP is at a higher
205 // address than the frame pointer.
207 // There must be a frame pointer to call this function!
209 int CodeGenInterface::genCallerSPtoFPdelta() const
211 assert(isFramePointerUsed());
212 int callerSPtoFPdelta = 0;
214 #if defined(TARGET_ARM)
215 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
216 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
217 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
218 #elif defined(TARGET_X86)
219 // Thanks to ebp chaining, the difference between ebp-based addresses
220 // and caller-SP-relative addresses is just the 2 pointers:
223 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
225 #error "Unknown TARGET"
228 assert(callerSPtoFPdelta <= 0);
229 return callerSPtoFPdelta;
232 //---------------------------------------------------------------------
233 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
235 // This number will be negative.
237 int CodeGenInterface::genCallerSPtoInitialSPdelta() const
239 int callerSPtoSPdelta = 0;
241 #if defined(TARGET_ARM)
242 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
243 callerSPtoSPdelta -= genTotalFrameSize();
244 #elif defined(TARGET_X86)
245 callerSPtoSPdelta -= genTotalFrameSize();
246 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
248 // compCalleeRegsPushed does not account for the frame pointer
249 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
250 if (isFramePointerUsed())
252 callerSPtoSPdelta -= REGSIZE_BYTES;
255 #error "Unknown TARGET"
258 assert(callerSPtoSPdelta <= 0);
259 return callerSPtoSPdelta;
262 #endif // defined(TARGET_X86) || defined(TARGET_ARM)
264 /*****************************************************************************
265 * Should we round simple operations (assignments, arithmetic operations, etc.)
270 bool CodeGen::genShouldRoundFP()
272 RoundLevel roundLevel = getRoundFloatLevel();
277 case ROUND_CMP_CONST:
282 assert(roundLevel == ROUND_ALWAYS);
287 /*****************************************************************************
289 * Initialize some global variables.
292 void CodeGen::genPrepForCompiler()
294 treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
296 /* Figure out which non-register variables hold pointers */
298 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
300 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
301 // in a register (i.e. they live on the stack for all or part of their lifetime).
302 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
306 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
308 if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
310 if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
312 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
316 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
317 genLastLiveMask = RBM_NONE;
319 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
323 //------------------------------------------------------------------------
324 // genMarkLabelsForCodegen: Mark labels required for codegen.
326 // Mark all blocks that require a label with BBF_HAS_LABEL. These are either blocks that are:
327 // 1. the target of jumps (fall-through flow doesn't require a label),
328 // 2. referenced labels such as for "switch" codegen,
329 // 3. needed to denote the range of EH regions to the VM.
330 // 4. needed to denote the range of code for alignment processing.
332 // No labels will be in the IR before now, but future codegen might annotate additional blocks
333 // with this flag, such as "switch" codegen, or codegen-created blocks from genCreateTempLabel().
334 // Also, the alignment processing code marks BBJ_COND fall-through labels elsewhere.
336 // To report exception handling information to the VM, we need the size of the exception
337 // handling regions. To compute that, we need to emit labels for the beginning block of
338 // an EH region, and the block that immediately follows a region. Go through the EH
339 // table and mark all these blocks with BBF_HAS_LABEL to make this happen.
341 // This code is closely couple with genReportEH() in the sense that any block
342 // that this procedure has determined it needs to have a label has to be selected
343 // using the same logic both here and in genReportEH(), so basically any time there is
344 // a change in the way we handle EH reporting, we have to keep the logic of these two
345 // methods 'in sync'.
347 // No blocks should be added or removed after this.
349 void CodeGen::genMarkLabelsForCodegen()
351 assert(!compiler->fgSafeBasicBlockCreation);
353 JITDUMP("Mark labels for codegen\n");
356 // No label flags should be set before this.
357 for (BasicBlock* const block : compiler->Blocks())
359 assert((block->bbFlags & BBF_HAS_LABEL) == 0);
363 // The first block is special; it always needs a label. This is to properly set up GC info.
364 JITDUMP(" " FMT_BB " : first block\n", compiler->fgFirstBB->bbNum);
365 compiler->fgFirstBB->bbFlags |= BBF_HAS_LABEL;
367 // The current implementation of switch tables requires the first block to have a label so it
368 // can generate offsets to the switch label targets.
369 // (This is duplicative with the fact we always set the first block with a label above.)
370 // TODO-CQ: remove this when switches have been re-implemented to not use this.
371 if (compiler->fgHasSwitch)
373 JITDUMP(" " FMT_BB " : function has switch; mark first block\n", compiler->fgFirstBB->bbNum);
374 compiler->fgFirstBB->bbFlags |= BBF_HAS_LABEL;
377 for (BasicBlock* const block : compiler->Blocks())
379 switch (block->bbJumpKind)
381 case BBJ_ALWAYS: // This will also handle the BBJ_ALWAYS of a BBJ_CALLFINALLY/BBJ_ALWAYS pair.
384 JITDUMP(" " FMT_BB " : branch target\n", block->bbJumpDest->bbNum);
385 block->bbJumpDest->bbFlags |= BBF_HAS_LABEL;
389 for (BasicBlock* const bTarget : block->SwitchTargets())
391 JITDUMP(" " FMT_BB " : branch target\n", bTarget->bbNum);
392 bTarget->bbFlags |= BBF_HAS_LABEL;
396 case BBJ_CALLFINALLY:
397 // The finally target itself will get marked by walking the EH table, below, and marking
398 // all handler begins.
399 CLANG_FORMAT_COMMENT_ANCHOR;
401 #if FEATURE_EH_CALLFINALLY_THUNKS
403 // For callfinally thunks, we need to mark the block following the callfinally/always pair,
404 // as that's needed for identifying the range of the "duplicate finally" region in EH data.
405 BasicBlock* bbToLabel = block->bbNext;
406 if (block->isBBCallAlwaysPair())
408 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
410 if (bbToLabel != nullptr)
412 JITDUMP(" " FMT_BB " : callfinally thunk region end\n", bbToLabel->bbNum);
413 bbToLabel->bbFlags |= BBF_HAS_LABEL;
416 #endif // FEATURE_EH_CALLFINALLY_THUNKS
420 case BBJ_EHFINALLYRET:
422 case BBJ_EHFILTERRET:
429 noway_assert(!"Unexpected bbJumpKind");
434 // Walk all the exceptional code blocks and mark them, since they don't appear in the normal flow graph.
435 for (Compiler::AddCodeDsc* add = compiler->fgAddCodeList; add; add = add->acdNext)
437 JITDUMP(" " FMT_BB " : throw helper block\n", add->acdDstBlk->bbNum);
438 add->acdDstBlk->bbFlags |= BBF_HAS_LABEL;
441 for (EHblkDsc* const HBtab : EHClauses(compiler))
443 HBtab->ebdTryBeg->bbFlags |= BBF_HAS_LABEL;
444 HBtab->ebdHndBeg->bbFlags |= BBF_HAS_LABEL;
446 JITDUMP(" " FMT_BB " : try begin\n", HBtab->ebdTryBeg->bbNum);
447 JITDUMP(" " FMT_BB " : hnd begin\n", HBtab->ebdHndBeg->bbNum);
449 if (HBtab->ebdTryLast->bbNext != nullptr)
451 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
452 JITDUMP(" " FMT_BB " : try end\n", HBtab->ebdTryLast->bbNext->bbNum);
455 if (HBtab->ebdHndLast->bbNext != nullptr)
457 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
458 JITDUMP(" " FMT_BB " : hnd end\n", HBtab->ebdHndLast->bbNext->bbNum);
461 if (HBtab->HasFilter())
463 HBtab->ebdFilter->bbFlags |= BBF_HAS_LABEL;
464 JITDUMP(" " FMT_BB " : filter begin\n", HBtab->ebdFilter->bbNum);
469 if (compiler->verbose)
471 printf("*************** After genMarkLabelsForCodegen()\n");
472 compiler->fgDispBasicBlocks();
477 void CodeGenInterface::genUpdateLife(GenTree* tree)
479 treeLifeUpdater->UpdateLife(tree);
482 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
484 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
487 // Return the register mask for the given register variable
489 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
491 regMaskTP regMask = RBM_NONE;
493 assert(varDsc->lvIsInReg());
495 regNumber reg = varDsc->GetRegNum();
496 if (genIsValidFloatReg(reg))
498 regMask = genRegMaskFloat(reg ARM_ARG(varDsc->GetRegisterType()));
502 regMask = genRegMask(reg);
507 // Return the register mask for the given lclVar or regVar tree node
509 regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
511 assert(tree->gtOper == GT_LCL_VAR);
513 regMaskTP regMask = RBM_NONE;
514 const LclVarDsc* varDsc = compiler->lvaGetDesc(tree->AsLclVarCommon());
515 if (varDsc->lvPromoted)
517 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
519 const LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(i);
520 noway_assert(fieldVarDsc->lvIsStructField);
521 if (fieldVarDsc->lvIsInReg())
523 regMask |= genGetRegMask(fieldVarDsc);
527 else if (varDsc->lvIsInReg())
529 regMask = genGetRegMask(varDsc);
534 // The given lclVar is either going live (being born) or dying.
535 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
536 // Update regSet.GetMaskVars() accordingly.
538 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
540 regMaskTP regMask = genGetRegMask(varDsc);
543 if (compiler->verbose)
545 printf("\t\t\t\t\t\t\tV%02u in reg ", compiler->lvaGetLclNum(varDsc));
547 varDsc->PrintVarReg();
548 printf(" is becoming %s ", (isDying) ? "dead" : "live");
549 Compiler::printTreeID(tree);
556 // We'd like to be able to assert the following, however if we are walking
557 // through a qmark/colon tree, we may encounter multiple last-use nodes.
558 // assert((regSet.GetMaskVars() & regMask) == regMask);
559 regSet.RemoveMaskVars(regMask);
563 // If this is going live, the register must not have a variable in it, except
564 // in the case of an exception or "spill at single-def" variable, which may be already treated
565 // as live in the register.
566 assert(varDsc->IsAlwaysAliveInMemory() || ((regSet.GetMaskVars() & regMask) == 0));
567 regSet.AddMaskVars(regMask);
571 //----------------------------------------------------------------------
572 // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
573 // Not all JIT Helper calls follow the standard ABI on the target architecture.
575 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
576 // enumerated) and not 100% accurate (some killsets are bigger than
577 // what they really are).
578 // There's some work to be done in several places in the JIT to
579 // accurately track the registers that are getting killed by
581 // a) LSRA needs several changes to accommodate more precise killsets
582 // for every helper call it sees (both explicitly [easy] and
583 // implicitly [hard])
584 // b) Currently for AMD64, when we generate code for a helper call
585 // we're independently over-pessimizing the killsets of the call
586 // (independently from LSRA) and this needs changes
587 // both in CodeGenAmd64.cpp and emitx86.cpp.
589 // The best solution for this problem would be to try to centralize
590 // the killset information in a single place but then make the
591 // corresponding changes so every code generation phase is in sync
594 // The interim solution is to only add known helper calls that don't
595 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
598 // helper - The helper being inquired about
601 // Mask of register kills -- registers whose values are no longer guaranteed to be the same.
603 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
607 case CORINFO_HELP_ASSIGN_REF:
608 case CORINFO_HELP_CHECKED_ASSIGN_REF:
609 return RBM_CALLEE_TRASH_WRITEBARRIER;
611 case CORINFO_HELP_ASSIGN_BYREF:
612 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
614 case CORINFO_HELP_PROF_FCN_ENTER:
615 return RBM_PROFILER_ENTER_TRASH;
617 case CORINFO_HELP_PROF_FCN_LEAVE:
618 return RBM_PROFILER_LEAVE_TRASH;
620 case CORINFO_HELP_PROF_FCN_TAILCALL:
621 return RBM_PROFILER_TAILCALL_TRASH;
624 case CORINFO_HELP_ASSIGN_REF_EAX:
625 case CORINFO_HELP_ASSIGN_REF_ECX:
626 case CORINFO_HELP_ASSIGN_REF_EBX:
627 case CORINFO_HELP_ASSIGN_REF_EBP:
628 case CORINFO_HELP_ASSIGN_REF_ESI:
629 case CORINFO_HELP_ASSIGN_REF_EDI:
631 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
632 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
633 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
634 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
635 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
636 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
640 case CORINFO_HELP_STOP_FOR_GC:
641 return RBM_STOP_FOR_GC_TRASH;
643 case CORINFO_HELP_INIT_PINVOKE_FRAME:
644 return RBM_INIT_PINVOKE_FRAME_TRASH;
646 case CORINFO_HELP_VALIDATE_INDIRECT_CALL:
647 return RBM_VALIDATE_INDIRECT_CALL_TRASH;
650 return RBM_CALLEE_TRASH;
654 //------------------------------------------------------------------------
655 // compChangeLife: Compare the given "newLife" with last set of live variables and update
656 // codeGen "gcInfo", siScopes, "regSet" with the new variable's homes/liveness.
659 // newLife - the new set of variables that are alive.
662 // The set of live variables reflects the result of only emitted code, it should not be considering the becoming
663 // live/dead of instructions that has not been emitted yet. This is used to ensure [) "VariableLiveRange"
664 // intervals when calling "siStartVariableLiveRange" and "siEndVariableLiveRange".
667 // If "ForCodeGen" is false, only "compCurLife" set (and no mask) will be setted.
669 template <bool ForCodeGen>
670 void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
675 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
676 dumpConvertedVarSet(this, compCurLife);
677 printf(" -> %s ", VarSetOps::ToString(this, newLife));
678 dumpConvertedVarSet(this, newLife);
683 /* We should only be called when the live set has actually changed */
685 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
689 VarSetOps::Assign(this, compCurLife, newLife);
693 /* Figure out which variables are becoming live/dead at this point */
695 // deadSet = compCurLife - newLife
696 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
698 // bornSet = newLife - compCurLife
699 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
701 /* Can't simultaneously become live and dead at the same time */
703 // (deadSet UNION bornSet) != EMPTY
704 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
705 // (deadSet INTERSECTION bornSet) == EMPTY
706 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
708 VarSetOps::Assign(this, compCurLife, newLife);
710 // Handle the dying vars first, then the newly live vars.
711 // This is because, in the RyuJIT backend case, they may occupy registers that
712 // will be occupied by another var that is newly live.
713 VarSetOps::Iter deadIter(this, deadSet);
714 unsigned deadVarIndex = 0;
715 while (deadIter.NextElem(&deadVarIndex))
717 unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex);
718 LclVarDsc* varDsc = lvaGetDesc(varNum);
719 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
720 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
721 bool isInReg = varDsc->lvIsInReg();
722 bool isInMemory = !isInReg || varDsc->IsAlwaysAliveInMemory();
726 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
728 regMaskTP regMask = varDsc->lvRegMask();
731 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
735 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
737 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
739 // Update the gcVarPtrSetCur if it is in memory.
740 if (isInMemory && (isGCRef || isByRef))
742 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
743 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
746 codeGen->getVariableLiveKeeper()->siEndVariableLiveRange(varNum);
749 VarSetOps::Iter bornIter(this, bornSet);
750 unsigned bornVarIndex = 0;
751 while (bornIter.NextElem(&bornVarIndex))
753 unsigned varNum = lvaTrackedIndexToLclNum(bornVarIndex);
754 LclVarDsc* varDsc = lvaGetDesc(varNum);
755 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
756 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
758 if (varDsc->lvIsInReg())
760 // If this variable is going live in a register, it is no longer live on the stack,
761 // unless it is an EH/"spill at single-def" var, which always remains live on the stack.
762 if (!varDsc->IsAlwaysAliveInMemory())
765 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
767 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
770 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
772 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
773 regMaskTP regMask = varDsc->lvRegMask();
776 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
780 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
783 else if (lvaIsGCTracked(varDsc))
785 // This isn't in a register, so update the gcVarPtrSetCur to show that it's live on the stack.
786 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
787 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
790 codeGen->getVariableLiveKeeper()->siStartVariableLiveRange(varDsc, varNum);
794 // Need an explicit instantiation.
795 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
797 /*****************************************************************************
801 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
803 GetEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
806 /*****************************************************************************
810 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
812 GetEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
816 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
821 //----------------------------------------------------------------------
822 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
825 // tree - spilled GenTree node
828 // TempDsc corresponding to tree
829 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
831 // tree must be in spilled state.
832 assert((tree->gtFlags & GTF_SPILLED) != 0);
834 // Get the tree's SpillDsc.
835 RegSet::SpillDsc* prevDsc;
836 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->GetRegNum(), &prevDsc);
837 assert(spillDsc != nullptr);
839 // Get the temp desc.
840 TempDsc* temp = regSet.rsGetSpillTempWord(tree->GetRegNum(), spillDsc, prevDsc);
844 /*****************************************************************************
846 * The following can be used to create basic blocks that serve as labels for
847 * the emitter. Use with caution - these are not real basic blocks!
852 BasicBlock* CodeGen::genCreateTempLabel()
855 // These blocks don't affect FP
856 compiler->fgSafeBasicBlockCreation = true;
859 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
862 compiler->fgSafeBasicBlockCreation = false;
865 JITDUMP("Mark " FMT_BB " as label: codegen temp block\n", block->bbNum);
866 block->bbFlags |= BBF_HAS_LABEL;
868 // Use coldness of current block, as this label will
869 // be contained in it.
870 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
874 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
876 block->bbTgtStkDepth = genStackLevel / sizeof(int);
882 void CodeGen::genLogLabel(BasicBlock* bb)
885 if (compiler->opts.dspCode)
887 printf("\n L_M%03u_" FMT_BB ":\n", compiler->compMethodID, bb->bbNum);
892 // genDefineTempLabel: Define a label based on the current GC info tracked by
893 // the code generator.
896 // label - A label represented as a basic block. These are created with
897 // genCreateTempLabel and are not normal basic blocks.
900 // The label will be defined with the current GC info tracked by the code
901 // generator. When the emitter sees this label it will thus remove any temporary
902 // GC refs it is tracking in registers. For example, a call might produce a ref
903 // in RAX which the emitter would track but which would not be tracked in
904 // codegen's GC info since codegen would immediately copy it from RAX into its
907 void CodeGen::genDefineTempLabel(BasicBlock* label)
910 label->bbEmitCookie = GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
911 gcInfo.gcRegByrefSetCur, false DEBUG_ARG(label));
914 // genDefineInlineTempLabel: Define an inline label that does not affect the GC
918 // label - A label represented as a basic block. These are created with
919 // genCreateTempLabel and are not normal basic blocks.
922 // The emitter will continue to track GC info as if there was no label.
924 void CodeGen::genDefineInlineTempLabel(BasicBlock* label)
927 label->bbEmitCookie = GetEmitter()->emitAddInlineLabel();
930 //------------------------------------------------------------------------
931 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
934 // block - The BasicBlock for which we are about to generate code.
937 // Must be called just prior to generating code for 'block'.
940 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
941 // and if 'block' is a throw helper block with a non-zero stack level.
943 void CodeGen::genAdjustStackLevel(BasicBlock* block)
945 #if !FEATURE_FIXED_OUT_ARGS
946 // Check for inserted throw blocks and adjust genStackLevel.
947 CLANG_FORMAT_COMMENT_ANCHOR;
949 #if defined(UNIX_X86_ABI)
950 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
952 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
953 // at this point if a jump to this block is made in the middle of pushing arguments.
955 // Here we restore SP to prevent potential stack alignment issues.
956 GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
960 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
962 noway_assert(block->bbFlags & BBF_HAS_LABEL);
964 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
966 if (genStackLevel != 0)
969 GetEmitter()->emitMarkStackLvl(genStackLevel);
970 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
973 NYI("Need emitMarkStackLvl()");
977 #endif // !FEATURE_FIXED_OUT_ARGS
980 /*****************************************************************************
982 * Take an address expression and try to find the best set of components to
983 * form an address mode; returns non-zero if this is successful.
985 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
986 * Refactor this code so that the underlying analysis can be used in
987 * the RyuJIT Backend to do lowering, instead of having to call this method with the
988 * option to not generate the code.
990 * 'fold' specifies if it is OK to fold the array index which hangs off
993 * If successful, the parameters will be set to the following values:
995 * *rv1Ptr ... base operand
996 * *rv2Ptr ... optional operand
997 * *revPtr ... true if rv2 is before rv1 in the evaluation order
998 * *mulPtr ... optional multiplier (2/4/8) for rv2
999 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1000 * *cnsPtr ... integer constant [optional]
1002 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1003 * identifies the components that might be used to
1004 * form an address mode later on.
1007 bool CodeGen::genCreateAddrMode(
1008 GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr)
1011 The following indirections are valid address modes on x86/x64:
1013 [ icon] * not handled here
1017 [reg1 + reg2 + icon]
1024 [reg1 + 2 * reg2 + icon]
1025 [reg1 + 4 * reg2 + icon]
1026 [reg1 + 8 * reg2 + icon]
1028 The following indirections are valid address modes on arm64:
1033 [reg1 + reg2 * natural-scale]
1037 /* All indirect address modes require the address to be an addition */
1039 if (!addr->OperIs(GT_ADD))
1044 GenTree* rv1 = nullptr;
1045 GenTree* rv2 = nullptr;
1055 /* What order are the sub-operands to be evaluated */
1057 if (addr->gtFlags & GTF_REVERSE_OPS)
1059 op1 = addr->AsOp()->gtOp2;
1060 op2 = addr->AsOp()->gtOp1;
1064 op1 = addr->AsOp()->gtOp1;
1065 op2 = addr->AsOp()->gtOp2;
1068 // Can't use indirect addressing mode as we need to check for overflow.
1069 // Also, can't use 'lea' as it doesn't set the flags.
1071 if (addr->gtOverflow())
1076 bool rev = false; // Is op2 first in the evaluation order?
1079 A complex address mode can combine the following operands:
1081 op1 ... base address
1082 op2 ... optional scaled index
1083 mul ... optional multiplier (2/4/8) for op2
1084 cns ... optional displacement
1086 Here we try to find such a set of operands and arrange for these
1087 to sit in registers.
1094 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1095 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1096 here if we find a scaled index.
1098 CLANG_FORMAT_COMMENT_ANCHOR;
1102 /* Special case: keep constants as 'op2', but don't do this for constant handles
1103 because they don't fit I32 that we're going to check for below anyway. */
1105 if (op1->IsCnsIntOrI() && !op1->IsIconHandle())
1107 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1113 /* Check for an addition of a constant */
1115 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->AsIntConCommon()->IconValue()))
1117 // We should not be building address modes out of non-foldable constants
1118 if (!op2->AsIntConCommon()->ImmedValCanBeFolded(compiler, addr->OperGet()))
1120 assert(compiler->opts.compReloc);
1124 /* We're adding a constant */
1126 cns += op2->AsIntConCommon()->IconValue();
1128 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1132 /* Inspect the operand the constant is being added to */
1134 switch (op1->gtOper)
1138 if (op1->gtOverflow())
1143 op2 = op1->AsOp()->gtOp2;
1144 op1 = op1->AsOp()->gtOp1;
1148 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1149 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1151 if (op1->gtOverflow())
1153 return false; // Need overflow check
1160 mul = op1->GetScaledIndex();
1163 /* We can use "[mul*rv2 + icon]" */
1166 rv2 = op1->AsOp()->gtOp1;
1171 #endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1178 /* The best we can do is "[rv1 + icon]" */
1186 // op2 is not a constant. So keep on trying.
1188 /* Neither op1 nor op2 are sitting in a register right now */
1190 switch (op1->gtOper)
1192 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1193 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1196 if (op1->gtOverflow())
1201 if (op1->AsOp()->gtOp2->IsIntCnsFitsInI32())
1203 GenTreeIntCon* addConst = op1->AsOp()->gtOp2->AsIntCon();
1205 if (addConst->ImmedValCanBeFolded(compiler, GT_ADD) && FitsIn<INT32>(cns + addConst->IconValue()))
1207 cns += addConst->IconValue();
1208 op1 = op1->AsOp()->gtOp1;
1217 if (op1->gtOverflow())
1226 mul = op1->GetScaledIndex();
1229 /* 'op1' is a scaled value */
1232 rv2 = op1->AsOp()->gtOp1;
1235 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1237 if (jitIsScaleIndexMul(argScale * mul))
1239 mul = mul * argScale;
1240 rv2 = rv2->AsOp()->gtOp1;
1248 noway_assert(rev == false);
1254 #endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 && !TARGET_RISCV64
1258 op1 = op1->AsOp()->gtOp1;
1263 op1 = op1->AsOp()->gtOp2;
1271 switch (op2->gtOper)
1273 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1274 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because
1275 // arm doesn't support both scale and offset at the same. Offset is handled
1276 // at the emitter as a peephole optimization.
1279 if (op2->gtOverflow())
1284 if (op2->AsOp()->gtOp2->IsIntCnsFitsInI32())
1286 GenTreeIntCon* addConst = op2->AsOp()->gtOp2->AsIntCon();
1288 if (addConst->ImmedValCanBeFolded(compiler, GT_ADD) && FitsIn<INT32>(cns + addConst->IconValue()))
1290 cns += addConst->IconValue();
1291 op2 = op2->AsOp()->gtOp1;
1300 if (op2->gtOverflow())
1309 mul = op2->GetScaledIndex();
1312 // 'op2' is a scaled value...is it's argument also scaled?
1314 rv2 = op2->AsOp()->gtOp1;
1315 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1317 if (jitIsScaleIndexMul(argScale * mul))
1319 mul = mul * argScale;
1320 rv2 = rv2->AsOp()->gtOp1;
1333 #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
1337 op2 = op2->AsOp()->gtOp1;
1342 op2 = op2->AsOp()->gtOp2;
1349 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
1361 // Make sure a GC address doesn't end up in 'rv2'
1362 if (varTypeIsGC(rv2->TypeGet()))
1364 std::swap(rv1, rv2);
1368 // Special case: constant array index (that is range-checked)
1371 // By default, assume index is rv2 and indexScale is mul (or 1 if mul is zero)
1372 GenTree* index = rv2;
1373 ssize_t indexScale = mul == 0 ? 1 : mul;
1375 if (rv2->OperIs(GT_MUL, GT_LSH) && (rv2->gtGetOp2()->IsCnsIntOrI()))
1377 indexScale *= compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1380 // "index * 0" means index is zero
1381 if (indexScale == 0)
1386 else if (index->IsIntCnsFitsInI32())
1388 ssize_t constantIndex = index->AsIntConCommon()->IconValue() * indexScale;
1389 if (constantIndex == 0)
1391 // while scale is a non-zero constant, the actual index is zero so drop it
1395 else if (FitsIn<INT32>(cns + constantIndex))
1397 // Add the constant index to the accumulated offset value
1398 cns += constantIndex;
1399 // and get rid of index
1407 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
1408 noway_assert(rv1 || mul != 1);
1410 noway_assert(FitsIn<INT32>(cns));
1412 if (rv1 == nullptr && rv2 == nullptr)
1417 /* Success - return the various components to the caller */
1428 /*****************************************************************************
1430 * Generate an exit sequence for a return from a method (note: when compiling
1431 * for speed there might be multiple exit points).
1434 void CodeGen::genExitCode(BasicBlock* block)
1436 /* Just wrote the first instruction of the epilog - inform debugger
1437 Note that this may result in a duplicate IPmapping entry, and
1440 // For non-optimized debuggable code, there is only one epilog.
1441 genIPmappingAdd(IPmappingDscKind::Epilog, DebugInfo(), true);
1443 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
1444 if (compiler->getNeedsGSSecurityCookie())
1446 genEmitGSCookieCheck(jmpEpilog);
1451 // The GS cookie check created a temp label that has no live
1452 // incoming GC registers, we need to fix that
1457 /* Figure out which register parameters hold pointers */
1459 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1462 noway_assert(varDsc->lvIsParam);
1464 gcInfo.gcMarkRegPtrVal(varDsc->GetArgReg(), varDsc->TypeGet());
1467 GetEmitter()->emitThisGCrefRegs = GetEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1468 GetEmitter()->emitThisByrefRegs = GetEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1472 genReserveEpilog(block);
1475 //------------------------------------------------------------------------
1476 // genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1479 // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1480 // Otherwise, we generate the 'throw' inline.
1483 // jumpKind - jump kind to generate;
1484 // codeKind - the special throw-helper kind;
1485 // failBlk - optional fail target block, if it is already known;
1487 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk)
1489 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1490 #if defined(UNIX_X86_ABI) && defined(FEATURE_EH_FUNCLETS)
1491 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1492 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1493 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1497 // For code with throw helper blocks, find and use the helper block for
1498 // raising the exception. The block may be shared by other trees too.
1500 BasicBlock* excpRaisingBlock;
1502 if (failBlk != nullptr)
1504 // We already know which block to jump to. Use that.
1505 excpRaisingBlock = failBlk;
1508 Compiler::AddCodeDsc* add =
1509 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1510 assert(excpRaisingBlock == add->acdDstBlk);
1511 #if !FEATURE_FIXED_OUT_ARGS
1512 assert(add->acdStkLvlInit || isFramePointerUsed());
1513 #endif // !FEATURE_FIXED_OUT_ARGS
1518 // Find the helper-block which raises the exception.
1519 Compiler::AddCodeDsc* add =
1520 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1521 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
1522 excpRaisingBlock = add->acdDstBlk;
1523 #if !FEATURE_FIXED_OUT_ARGS
1524 assert(add->acdStkLvlInit || isFramePointerUsed());
1525 #endif // !FEATURE_FIXED_OUT_ARGS
1528 noway_assert(excpRaisingBlock != nullptr);
1530 // Jump to the exception-throwing block on error.
1531 inst_JMP(jumpKind, excpRaisingBlock);
1535 // The code to throw the exception will be generated inline, and
1536 // we will jump around it in the normal non-exception case.
1538 BasicBlock* tgtBlk = nullptr;
1539 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
1540 if (reverseJumpKind != jumpKind)
1542 tgtBlk = genCreateTempLabel();
1543 inst_JMP(reverseJumpKind, tgtBlk);
1546 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
1548 // Define the spot for the normal non-exception case to jump to.
1549 if (tgtBlk != nullptr)
1551 assert(reverseJumpKind != jumpKind);
1552 genDefineTempLabel(tgtBlk);
1557 /*****************************************************************************
1559 * The last operation done was generating code for "tree" and that would
1560 * have set the flags. Check if the operation caused an overflow.
1563 #if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1565 void CodeGen::genCheckOverflow(GenTree* tree)
1567 // Overflow-check should be asked for this tree
1568 noway_assert(tree->gtOverflow());
1570 const var_types type = tree->TypeGet();
1572 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
1573 noway_assert(!varTypeIsSmall(type));
1575 emitJumpKind jumpKind;
1578 if (tree->OperGet() == GT_MUL)
1585 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
1587 #if defined(TARGET_XARCH)
1589 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
1591 #elif defined(TARGET_ARMARCH)
1593 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
1595 if (jumpKind == EJ_lo)
1597 if (tree->OperGet() != GT_SUB)
1602 #endif // defined(TARGET_ARMARCH)
1605 // Jump to the block which will throw the exception
1607 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
1611 #if defined(FEATURE_EH_FUNCLETS)
1613 /*****************************************************************************
1615 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
1616 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
1621 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
1623 if (block->bbFlags & BBF_FUNCLET_BEG)
1625 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
1626 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
1628 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
1632 // We shouldn't see FUNC_ROOT
1633 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
1634 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
1639 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
1640 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
1642 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
1644 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
1646 assert(!block->hasHndIndex());
1650 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
1651 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
1656 #endif // FEATURE_EH_FUNCLETS
1658 //----------------------------------------------------------------------
1659 // genGenerateCode: Generate code for the function.
1662 // codePtr [OUT] - address of generated code
1663 // nativeSizeOfCode [OUT] - length of generated code in bytes
1665 void CodeGen::genGenerateCode(void** codePtr, uint32_t* nativeSizeOfCode)
1671 printf("*************** In genGenerateCode()\n");
1672 compiler->fgDispBasicBlocks(compiler->verboseTrees);
1676 this->codePtr = codePtr;
1677 this->nativeSizeOfCode = nativeSizeOfCode;
1679 DoPhase(this, PHASE_GENERATE_CODE, &CodeGen::genGenerateMachineCode);
1680 DoPhase(this, PHASE_EMIT_CODE, &CodeGen::genEmitMachineCode);
1681 DoPhase(this, PHASE_EMIT_GCEH, &CodeGen::genEmitUnwindDebugGCandEH);
1684 //----------------------------------------------------------------------
1685 // genGenerateMachineCode -- determine which machine instructions to emit
1687 void CodeGen::genGenerateMachineCode()
1690 genInterruptibleUsed = true;
1692 compiler->fgDebugCheckBBlist();
1695 /* This is the real thing */
1697 genPrepForCompiler();
1699 /* Prepare the emitter */
1700 GetEmitter()->Init();
1702 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
1706 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
1708 compiler->opts.disAsm = true;
1711 compiler->compCurBB = compiler->fgFirstBB;
1713 if (compiler->opts.disAsm)
1716 const char* fullName = compiler->info.compFullName;
1718 const char* fullName = compiler->eeGetMethodFullName(compiler->info.compMethodHnd);
1721 printf("; Assembly listing for method %s (%s)\n", fullName, compiler->compGetTieringName(true));
1723 printf("; Emitting ");
1725 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
1727 printf("SMALL_CODE");
1729 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
1731 printf("FAST_CODE");
1735 printf("BLENDED_CODE");
1740 #if defined(TARGET_X86)
1741 if (compiler->canUseEvexEncoding())
1743 printf("X86 with AVX512");
1745 else if (compiler->canUseVexEncoding())
1747 printf("X86 with AVX");
1751 printf("generic X86");
1753 #elif defined(TARGET_AMD64)
1754 if (compiler->canUseEvexEncoding())
1756 printf("X64 with AVX512");
1758 else if (compiler->canUseVexEncoding())
1760 printf("X64 with AVX");
1764 printf("generic X64");
1766 #elif defined(TARGET_ARM)
1767 printf("generic ARM");
1768 #elif defined(TARGET_ARM64)
1769 printf("generic ARM64");
1770 #elif defined(TARGET_LOONGARCH64)
1771 printf("generic LOONGARCH64");
1772 #elif defined(TARGET_RISCV64)
1773 printf("generic RISCV64");
1775 printf("unknown architecture");
1778 if (TargetOS::IsWindows)
1780 printf(" - Windows");
1782 else if (TargetOS::IsMacOS)
1786 else if (TargetOS::IsUnix)
1793 printf("; %s code\n", compiler->compGetTieringName(false));
1795 if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
1797 printf("; NativeAOT compilation\n");
1799 else if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN))
1801 printf("; ReadyToRun compilation\n");
1804 if (compiler->opts.IsOSR())
1806 printf("; OSR variant for entry point 0x%x\n", compiler->info.compILEntry);
1809 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
1811 printf("; optimized code\n");
1813 else if (compiler->opts.compDbgEnC)
1815 printf("; EnC code\n");
1817 else if (compiler->opts.compDbgCode)
1819 printf("; debuggable code\n");
1822 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && compiler->fgHaveProfileWeights())
1824 printf("; optimized using %s\n", compiler->compGetPgoSourceName());
1828 if (compiler->genDoubleAlign())
1829 printf("; double-aligned frame\n");
1832 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
1834 if (GetInterruptible())
1836 printf("; fully interruptible\n");
1840 printf("; partially interruptible\n");
1843 if (compiler->fgHaveProfileWeights())
1845 printf("; with %s: edge weights are %s, and fgCalledCount is " FMT_WT "\n",
1846 compiler->compGetPgoSourceName(), compiler->fgHaveValidEdgeWeights ? "valid" : "invalid",
1847 compiler->fgCalledCount);
1850 if (compiler->fgPgoFailReason != nullptr)
1852 printf("; %s\n", compiler->fgPgoFailReason);
1855 if ((compiler->fgPgoInlineePgo + compiler->fgPgoInlineeNoPgo + compiler->fgPgoInlineeNoPgoSingleBlock) > 0)
1857 printf("; %u inlinees with PGO data; %u single block inlinees; %u inlinees without PGO data\n",
1858 compiler->fgPgoInlineePgo, compiler->fgPgoInlineeNoPgoSingleBlock, compiler->fgPgoInlineeNoPgo);
1861 if (compiler->opts.IsCFGEnabled())
1863 printf("; control-flow guard enabled\n");
1866 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
1868 printf("; invoked as altjit\n");
1872 // We compute the final frame layout before code generation. This is because LSRA
1873 // has already computed exactly the maximum concurrent number of spill temps of each type that are
1874 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
1875 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
1876 // memory from the VM.
1880 GetEmitter()->emitBegFN(isFramePointerUsed()
1883 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
1884 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
1888 /* Now generate code for the function */
1892 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
1893 // generation hasn't touched it (it shouldn't!).
1896 compiler->lvaTableDump();
1900 /* We can now generate the function prolog and epilog */
1901 genGeneratePrologsAndEpilogs();
1903 // check to see if any jumps can be removed
1904 GetEmitter()->emitRemoveJumpToNextInst();
1906 /* Bind jump distances */
1907 GetEmitter()->emitJumpDistBind();
1909 #if FEATURE_LOOP_ALIGN
1910 /* Perform alignment adjustments */
1912 GetEmitter()->emitLoopAlignAdjustments();
1915 /* The code is now complete and final; it should not change after this. */
1918 //----------------------------------------------------------------------
1919 // genEmitMachineCode -- emit the actual machine instruction code
1921 void CodeGen::genEmitMachineCode()
1923 /* Compute the size of the code sections that we are going to ask the VM
1924 to allocate. Note that this might not be precisely the size of the
1925 code we emit, though it's fatal if we emit more code than the size we
1927 (Note: an example of a case where we emit less code would be useful.)
1930 GetEmitter()->emitComputeCodeSizes();
1933 unsigned instrCount;
1935 // Code to test or stress our ability to run a fallback compile.
1936 // We trigger the fallback here, before asking the VM for any memory,
1937 // because if not, we will leak mem, as the current codebase can't free
1938 // the mem after the emitter asks the VM for it. As this is only a stress
1939 // mode, we only want the functionality, and don't care about the relative
1940 // ugliness of having the failure here.
1941 if (!compiler->jitFallbackCompile)
1943 // Use DOTNET_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
1944 // especially that caused by enabling JIT stress.
1945 if (!JitConfig.JitNoForceFallback())
1947 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
1949 JITDUMP("\n\n*** forcing no-way fallback -- current jit request will be abandoned ***\n\n");
1950 NO_WAY_NOASSERT("Stress failure");
1957 /* We've finished collecting all the unwind information for the function. Now reserve
1958 space for it from the VM.
1961 compiler->unwindReserve();
1963 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
1965 #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1966 trackedStackPtrsContig = false;
1967 #elif defined(TARGET_ARM)
1968 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
1969 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
1971 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
1974 if (compiler->opts.disAsm && compiler->opts.disTesting)
1976 printf("; BEGIN METHOD %s\n", compiler->eeGetMethodFullName(compiler->info.compMethodHnd));
1979 codeSize = GetEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, GetInterruptible(),
1980 IsFullPtrRegMapRequired(), compiler->compHndBBtabCount, &prologSize,
1981 &epilogSize, codePtr, &coldCodePtr, &consPtr DEBUGARG(&instrCount));
1984 assert(compiler->compCodeGenDone == false);
1986 /* We're done generating code for this function */
1987 compiler->compCodeGenDone = true;
1990 #if defined(DEBUG) || defined(LATE_DISASM)
1991 // Add code size information into the Perf Score
1992 // All compPerfScore calculations must be performed using doubles
1993 compiler->info.compPerfScore += ((double)compiler->info.compTotalHotCodeSize * (double)PERFSCORE_CODESIZE_COST_HOT);
1994 compiler->info.compPerfScore +=
1995 ((double)compiler->info.compTotalColdCodeSize * (double)PERFSCORE_CODESIZE_COST_COLD);
1996 #endif // DEBUG || LATE_DISASM
1998 if (compiler->opts.disAsm && compiler->opts.disTesting)
2000 printf("; END METHOD %s\n", compiler->eeGetMethodFullName(compiler->info.compMethodHnd));
2004 if (compiler->opts.disAsm || verbose)
2006 printf("\n; Total bytes of code %d, prolog size %d, PerfScore %.2f, instruction count %d, allocated bytes for "
2008 codeSize, prologSize, compiler->info.compPerfScore, instrCount,
2009 GetEmitter()->emitTotalHotCodeSize + GetEmitter()->emitTotalColdCodeSize);
2011 #if TRACK_LSRA_STATS
2012 if (JitConfig.DisplayLsraStats() == 3)
2014 compiler->m_pLinearScan->dumpLsraStatsSummary(jitstdout());
2016 #endif // TRACK_LSRA_STATS
2018 printf(" (MethodHash=%08x) for method %s (%s)\n", compiler->info.compMethodHash(), compiler->info.compFullName,
2019 compiler->compGetTieringName(true));
2021 printf("; ============================================================\n\n");
2022 printf(""); // in our logic this causes a flush
2027 printf("*************** After end code gen, before unwindEmit()\n");
2028 GetEmitter()->emitDispIGlist(/* displayInstructions */ true);
2031 if (compiler->opts.disAsm)
2033 printf("\n; Total bytes of code %d\n\n", codeSize);
2037 *nativeSizeOfCode = codeSize;
2038 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2040 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2042 // Make sure that the x86 alignment and cache prefetch optimization rules
2045 // Don't start a method in the last 7 bytes of a 16-byte alignment area
2046 // unless we are generating SMALL_CODE
2047 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
2050 //----------------------------------------------------------------------
2051 // genEmitUnwindDebugGCandEH: emit unwind, debug, gc, and EH info
2053 void CodeGen::genEmitUnwindDebugGCandEH()
2055 /* Now that the code is issued, we can finalize and emit the unwind data */
2057 compiler->unwindEmit(*codePtr, coldCodePtr);
2059 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
2063 genReportRichDebugInfo();
2065 /* Finalize the Local Var info in terms of generated code */
2070 unsigned finalHotCodeSize;
2071 unsigned finalColdCodeSize;
2072 if (compiler->fgFirstColdBlock != nullptr)
2074 // We did some hot/cold splitting. The hot section is always padded out to the
2075 // size we thought it would be, but the cold section is not.
2076 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2077 assert(compiler->info.compTotalHotCodeSize > 0);
2078 assert(compiler->info.compTotalColdCodeSize > 0);
2079 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2080 finalColdCodeSize = codeSize - finalHotCodeSize;
2084 // No hot/cold splitting
2085 assert(codeSize <= compiler->info.compTotalHotCodeSize);
2086 assert(compiler->info.compTotalHotCodeSize > 0);
2087 assert(compiler->info.compTotalColdCodeSize == 0);
2088 finalHotCodeSize = codeSize;
2089 finalColdCodeSize = 0;
2091 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2092 #endif // LATE_DISASM
2094 /* Report any exception handlers to the VM */
2098 #ifdef JIT32_GCENCODER
2103 // Create and store the GC info for this method.
2104 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2107 FILE* dmpf = jitstdout();
2109 compiler->opts.dmpHex = false;
2110 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2113 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2118 compiler->opts.dmpHex = true;
2121 if (compiler->opts.dmpHex)
2123 size_t consSize = GetEmitter()->emitDataSize();
2125 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2126 fprintf(dmpf, "\n");
2130 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2134 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2136 #ifdef JIT32_GCENCODER
2137 size_t infoSize = compiler->compInfoBlkSize;
2139 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2140 #endif // JIT32_GCENCODER
2142 fprintf(dmpf, "\n");
2146 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
2150 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2152 #ifdef JIT32_GCENCODER
2154 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2155 #endif // JIT32_GCENCODER
2160 if (dmpf != jitstdout())
2167 /* Tell the emitter that we're done with this function */
2169 GetEmitter()->emitEndFN();
2171 /* Shut down the spill logic */
2173 regSet.rsSpillDone();
2175 /* Shut down the temp logic */
2181 size_t dataSize = GetEmitter()->emitDataSize();
2182 grossVMsize += compiler->info.compILCodeSize;
2183 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2184 grossNCsize += codeSize + dataSize;
2186 #endif // DISPLAY_SIZES
2189 /*****************************************************************************
2191 * Report EH clauses to the VM
2194 void CodeGen::genReportEH()
2196 if (compiler->compHndBBtabCount == 0)
2202 if (compiler->opts.dspEHTable)
2204 printf("*************** EH table for %s\n", compiler->info.compFullName);
2210 bool isNativeAOT = compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI);
2212 unsigned EHCount = compiler->compHndBBtabCount;
2214 #if defined(FEATURE_EH_FUNCLETS)
2215 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2217 unsigned duplicateClauseCount = 0;
2218 unsigned enclosingTryIndex;
2220 // Duplicate clauses are not used by NativeAOT ABI
2223 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
2225 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2226 // ignoring 'mutual protect' trys
2227 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2228 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2230 ++duplicateClauseCount;
2233 EHCount += duplicateClauseCount;
2236 #if FEATURE_EH_CALLFINALLY_THUNKS
2237 unsigned clonedFinallyCount = 0;
2239 // Duplicate clauses are not used by NativeAOT ABI
2242 // We don't keep track of how many cloned finally there are. So, go through and count.
2243 // We do a quick pass first through the EH table to see if there are any try/finally
2244 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2246 bool anyFinallys = false;
2247 for (EHblkDsc* const HBtab : EHClauses(compiler))
2249 if (HBtab->HasFinallyHandler())
2257 for (BasicBlock* const block : compiler->Blocks())
2259 if (block->bbJumpKind == BBJ_CALLFINALLY)
2261 ++clonedFinallyCount;
2265 EHCount += clonedFinallyCount;
2268 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2270 #endif // FEATURE_EH_FUNCLETS
2273 if (compiler->opts.dspEHTable)
2275 #if defined(FEATURE_EH_FUNCLETS)
2276 #if FEATURE_EH_CALLFINALLY_THUNKS
2277 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2278 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2279 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2280 #else // !FEATURE_EH_CALLFINALLY_THUNKS
2281 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2282 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2283 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2284 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
2285 #else // !FEATURE_EH_FUNCLETS
2286 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2287 assert(compiler->compHndBBtabCount == EHCount);
2288 #endif // !FEATURE_EH_FUNCLETS
2292 // Tell the VM how many EH clauses to expect.
2293 compiler->eeSetEHcount(EHCount);
2295 XTnum = 0; // This is the index we pass to the VM
2297 for (EHblkDsc* const HBtab : EHClauses(compiler))
2299 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2301 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2302 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2304 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2305 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2306 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2307 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2309 if (HBtab->HasFilter())
2311 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2315 hndTyp = HBtab->ebdTyp;
2318 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2322 // CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2323 // try block as the previous one. The runtime cannot reliably infer this information from
2324 // native code offsets because of different try blocks can have same offsets. Alternative
2325 // solution to this problem would be inserting extra nops to ensure that different try
2326 // blocks have different offsets.
2327 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
2329 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2330 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2331 // IL as "try { try {} catch {} catch {} } finally {}".
2332 assert(HBtab->HasCatchHandler());
2333 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
2337 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2338 // the fields aren't accurate.
2340 CORINFO_EH_CLAUSE clause;
2341 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2342 clause.Flags = flags;
2343 clause.TryOffset = tryBeg;
2344 clause.TryLength = tryEnd;
2345 clause.HandlerOffset = hndBeg;
2346 clause.HandlerLength = hndEnd;
2348 assert(XTnum < EHCount);
2350 // Tell the VM about this EH clause.
2351 compiler->eeSetEHinfo(XTnum, &clause);
2356 #if defined(FEATURE_EH_FUNCLETS)
2357 // Now output duplicated clauses.
2359 // If a funclet has been created by moving a handler out of a try region that it was originally nested
2360 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
2361 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2362 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2363 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2364 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2365 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2368 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2369 // try or handler region):
2387 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2388 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
2389 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2390 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2391 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2392 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2393 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2394 // representing try (1) protecting the new funclets catch (3) and (4).
2395 // The code will be generated as follows:
2397 // ABCFH // "main" code
2402 // The EH regions are:
2407 // D -> G // "duplicate" clause
2408 // E -> G // "duplicate" clause
2410 // Note that we actually need to generate one of these additional "duplicate" clauses for every
2411 // region the funclet is nested in. Take this example:
2443 // When we pull out funclets, we get the following generated code:
2445 // ABCDEHJMO // "main" function
2453 // And the EH regions we report to the VM are (in order; main clauses
2454 // first in most-to-least nested order, funclets ("duplicated clauses")
2455 // last, in most-to-least nested) are:
2463 // F -> I // funclet clause #1 for F
2464 // F -> K // funclet clause #2 for F
2465 // F -> L // funclet clause #3 for F
2466 // F -> N // funclet clause #4 for F
2467 // G -> I // funclet clause #1 for G
2468 // G -> K // funclet clause #2 for G
2469 // G -> L // funclet clause #3 for G
2470 // G -> N // funclet clause #4 for G
2471 // I -> K // funclet clause #1 for I
2472 // I -> L // funclet clause #2 for I
2473 // I -> N // funclet clause #3 for I
2474 // K -> N // funclet clause #1 for K
2475 // L -> N // funclet clause #1 for L
2477 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2478 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2479 // to add a clause "F -> G" because F is NOT protected by G, but we still have
2480 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2482 // The overall ordering of the clauses is still the same most-to-least nesting
2483 // after front-to-back start offset. Because we place the funclets at the end
2484 // these new clauses should also go at the end by this ordering.
2487 if (duplicateClauseCount > 0)
2489 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
2492 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2494 unsigned enclosingTryIndex;
2496 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2498 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2499 // ignoring 'mutual protect' trys
2500 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2501 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2503 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2504 // that will have the enclosing try protecting the funclet.
2506 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2507 // greater EH table index
2509 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2511 // The try region is the handler of the funclet. Note that for filters, we don't protect the
2512 // filter region, only the filter handler region. This is because exceptions in filters never
2513 // escape; the VM swallows them.
2515 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2516 BasicBlock* bbTryLast = fletTab->ebdHndLast;
2518 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2519 BasicBlock* bbHndLast = encTab->ebdHndLast;
2521 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2523 tryBeg = compiler->ehCodeOffset(bbTryBeg);
2524 hndBeg = compiler->ehCodeOffset(bbHndBeg);
2526 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2527 : compiler->ehCodeOffset(bbTryLast->bbNext);
2528 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2529 : compiler->ehCodeOffset(bbHndLast->bbNext);
2531 if (encTab->HasFilter())
2533 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2537 hndTyp = encTab->ebdTyp;
2540 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2542 // Tell the VM this is an extra clause caused by moving funclets out of line.
2543 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
2545 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2546 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2547 // instruction immediately after the 'try' body. So, it really could be more accurately named
2550 CORINFO_EH_CLAUSE clause;
2551 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2552 clause.Flags = flags;
2553 clause.TryOffset = tryBeg;
2554 clause.TryLength = tryEnd;
2555 clause.HandlerOffset = hndBeg;
2556 clause.HandlerLength = hndEnd;
2558 assert(XTnum < EHCount);
2560 // Tell the VM about this EH clause (a duplicated clause).
2561 compiler->eeSetEHinfo(XTnum, &clause);
2564 ++reportedDuplicateClauseCount;
2567 if (duplicateClauseCount == reportedDuplicateClauseCount)
2569 break; // we've reported all of them; no need to continue looking
2573 } // for each 'true' enclosing 'try'
2574 } // for each EH table entry
2576 assert(duplicateClauseCount == reportedDuplicateClauseCount);
2577 } // if (duplicateClauseCount > 0)
2579 #if FEATURE_EH_CALLFINALLY_THUNKS
2580 if (clonedFinallyCount > 0)
2582 unsigned reportedClonedFinallyCount = 0;
2583 for (BasicBlock* const block : compiler->Blocks())
2585 if (block->bbJumpKind == BBJ_CALLFINALLY)
2587 UNATIVE_OFFSET hndBeg, hndEnd;
2589 hndBeg = compiler->ehCodeOffset(block);
2591 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
2592 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
2593 BasicBlock* bbLabel = block->bbNext;
2594 if (block->isBBCallAlwaysPair())
2596 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
2598 if (bbLabel == nullptr)
2600 hndEnd = compiler->info.compNativeCodeSize;
2604 assert(bbLabel->bbEmitCookie != nullptr);
2605 hndEnd = compiler->ehCodeOffset(bbLabel);
2608 CORINFO_EH_CLAUSE clause;
2609 clause.ClassToken = 0; // unused
2610 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
2611 clause.TryOffset = hndBeg;
2612 clause.TryLength = hndBeg;
2613 clause.HandlerOffset = hndBeg;
2614 clause.HandlerLength = hndEnd;
2616 assert(XTnum < EHCount);
2618 // Tell the VM about this EH clause (a cloned finally clause).
2619 compiler->eeSetEHinfo(XTnum, &clause);
2622 ++reportedClonedFinallyCount;
2625 if (clonedFinallyCount == reportedClonedFinallyCount)
2627 break; // we're done; no need to keep looking
2630 } // block is BBJ_CALLFINALLY
2633 assert(clonedFinallyCount == reportedClonedFinallyCount);
2634 } // if (clonedFinallyCount > 0)
2635 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2637 #endif // FEATURE_EH_FUNCLETS
2639 assert(XTnum == EHCount);
2642 //----------------------------------------------------------------------
2643 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
2644 // helper should be used.
2647 // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
2650 // true if an optimized write barrier helper should be used, false otherwise.
2651 // Note: only x86 implements register-specific source optimized write
2652 // barriers currently.
2654 bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
2656 #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS
2658 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
2667 //----------------------------------------------------------------------
2668 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
2669 // helper should be used.
2671 // This has the same functionality as the version of
2672 // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
2673 // determining what the required write barrier form is, if possible.
2676 // store - the GT_STOREIND node
2679 // true if an optimized write barrier helper should be used, false otherwise.
2680 // Note: only x86 implements register-specific source optimized write
2681 // barriers currently.
2683 bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTreeStoreInd* store)
2685 #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS
2687 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(store);
2688 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
2697 //----------------------------------------------------------------------
2698 // genWriteBarrierHelperForWriteBarrierForm: Given a write barrier form
2699 // return the corresponding helper.
2702 // wbf - the write barrier form
2705 // Write barrier helper to use.
2707 // Note: do not call this function to get an optimized write barrier helper (e.g.,
2710 CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GCInfo::WriteBarrierForm wbf)
2714 case GCInfo::WBF_BarrierChecked:
2715 return CORINFO_HELP_CHECKED_ASSIGN_REF;
2717 case GCInfo::WBF_BarrierUnchecked:
2718 return CORINFO_HELP_ASSIGN_REF;
2721 case GCInfo::WBF_NoBarrier_CheckNotHeapInDebug:
2722 return CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
2730 //----------------------------------------------------------------------
2731 // genGCWriteBarrier: Generate a write barrier for a node.
2734 // store - the GT_STOREIND node
2735 // wbf - already computed write barrier form to use
2737 void CodeGen::genGCWriteBarrier(GenTreeStoreInd* store, GCInfo::WriteBarrierForm wbf)
2739 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(wbf);
2741 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
2742 // Under FEATURE_COUNT_GC_WRITE_BARRIERS, we will add an extra argument to the
2743 // checked write barrier call denoting the kind of address being written to.
2745 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
2747 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
2748 GenTree* tgtAddr = store->Addr();
2750 while (tgtAddr->OperIs(GT_ADD, GT_LEA))
2752 if (tgtAddr->OperIs(GT_LEA) && tgtAddr->AsAddrMode()->HasBase())
2754 tgtAddr = tgtAddr->AsAddrMode()->Base();
2756 else if (tgtAddr->OperIs(GT_ADD) && tgtAddr->AsOp()->gtGetOp2()->IsCnsIntOrI())
2758 tgtAddr = tgtAddr->AsOp()->gtGetOp1();
2766 if (tgtAddr->OperIs(GT_LCL_VAR))
2768 unsigned lclNum = tgtAddr->AsLclVar()->GetLclNum();
2769 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
2770 if (lclNum == compiler->info.compRetBuffArg)
2772 wbKind = CWBKind_RetBuf
2774 else if (varDsc->TypeGet() == TYP_BYREF)
2776 wbKind = varDsc->lvIsParam ? CWBKind_ByRefArg : CWBKind_OtherByRefLocal;
2779 else if (tgtAddr->OperIs(GT_LCL_ADDR))
2781 // Ideally, we should have eliminated the barrier for this case.
2782 wbKind = CWBKind_AddrOfLocal;
2787 // Enable this to sample the unclassified trees.
2788 static int unclassifiedBarrierSite = 0;
2789 if (wbKind == CWBKind_Unclassified)
2791 unclassifiedBarrierSite++;
2792 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite);
2793 compiler->gtDispTree(store);
2794 printf(""); // Flush.
2801 inst_IV(INS_push, wbKind);
2802 genEmitHelperCall(helper,
2804 EA_PTRSIZE); // retSize
2805 SubtractStackLevel(4);
2808 #endif // FEATURE_COUNT_GC_WRITE_BARRIERS
2810 genEmitHelperCall(helper,
2812 EA_PTRSIZE); // retSize
2816 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2817 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2819 XX Prolog / Epilog XX
2821 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2822 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2825 /*****************************************************************************
2827 * Generates code for moving incoming register arguments to their
2828 * assigned location, in the function prolog.
2832 #pragma warning(push)
2833 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2836 #if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
2837 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
2842 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
2846 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
2847 unsigned argNum; // current argNum, always in [0..argMax-1]
2848 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
2849 unsigned regArgNum; // index into the regArgTab[] table
2850 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
2851 bool doingFloat = regState->rsIsFloat;
2853 // We should be generating the prolog block when we are called
2854 assert(compiler->compGeneratingProlog);
2856 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
2857 noway_assert(regArgMaskLive != 0);
2859 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
2860 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
2862 // The regArgTab can always have unused entries,
2863 // for example if an architecture always increments the arg register number but uses either
2864 // an integer register or a floating point register to hold the next argument
2865 // then with a mix of float and integer args you could have:
2867 // sampleMethod(int i, float x, int j, float y, int k, float z);
2868 // r0, r2 and r4 as valid integer arguments with argMax as 5
2869 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
2870 // The first one is doingFloat==false and the second one is doingFloat==true
2872 // If a fixed return buffer (in r8) was also present then the first one would become:
2873 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
2876 argMax = regState->rsCalleeRegArgCount;
2877 fixedRetBufIndex = (unsigned)-1; // Invalid value
2879 // If necessary we will select a correct xtraReg for circular floating point args later.
2883 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
2885 else // we are doing the integer registers
2887 noway_assert(argMax <= MAX_REG_ARG);
2888 if (hasFixedRetBuffReg())
2890 fixedRetBufIndex = theFixedRetBuffArgNum();
2891 // We have an additional integer register argument when hasFixedRetBuffReg() is true
2892 argMax = fixedRetBufIndex + 1;
2893 assert(argMax == (MAX_REG_ARG + 1));
2898 // Construct a table with the register arguments, for detecting circular and
2899 // non-circular dependencies between the register arguments. A dependency is when
2900 // an argument register Rn needs to be moved to register Rm that is also an argument
2901 // register. The table is constructed in the order the arguments are passed in
2902 // registers: the first register argument is in regArgTab[0], the second in
2903 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
2904 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
2905 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
2906 // we have increased the allocated size of the regArgTab[] by one.
2910 unsigned varNum; // index into compiler->lvaTable[] for this register argument
2911 var_types type; // the Jit type of this regArgTab entry
2912 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
2913 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
2914 // argument register number 'x'. Only used when circular = true.
2915 char slot; // 0 means the register is not used for a register argument
2916 // 1 means the first part of a register argument
2917 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
2918 bool stackArg; // true if the argument gets homed to the stack
2919 bool writeThru; // true if the argument gets homed to both stack and register
2920 bool processed; // true after we've processed the argument (and it is in its final location)
2921 bool circular; // true if this register participates in a circular dependency loop.
2922 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
2927 for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
2929 varDsc = compiler->lvaGetDesc(varNum);
2931 // Is this variable a register arg?
2932 if (!varDsc->lvIsParam)
2937 if (!varDsc->lvIsRegArg)
2942 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
2943 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
2944 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
2945 // use the original TYP_STRUCT argument.
2947 if (varDsc->lvPromoted || varDsc->lvIsStructField)
2949 LclVarDsc* parentVarDsc = varDsc;
2950 if (varDsc->lvIsStructField)
2952 assert(!varDsc->lvPromoted);
2953 parentVarDsc = compiler->lvaGetDesc(varDsc->lvParentLcl);
2956 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
2958 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
2960 // For register arguments that are independent promoted structs we put the promoted field varNum in the
2962 if (varDsc->lvPromoted)
2969 // For register arguments that are not independent promoted structs we put the parent struct varNum in
2971 if (varDsc->lvIsStructField)
2978 var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
2979 // Change regType to the HFA type when we have a HFA argument
2980 if (varDsc->lvIsHfaRegArg())
2982 #if defined(TARGET_ARM64)
2983 if (TargetOS::IsWindows && compiler->info.compIsVarArgs)
2985 assert(!"Illegal incoming HFA arg encountered in Vararg method.");
2987 #endif // defined(TARGET_ARM64)
2988 regType = varDsc->GetHfaType();
2991 #if defined(UNIX_AMD64_ABI)
2992 if (!varTypeIsStruct(regType))
2993 #endif // defined(UNIX_AMD64_ABI)
2995 bool isFloatReg = emitter::isFloatReg(varDsc->GetArgReg());
2997 if (isFloatReg != doingFloat)
2999 // A struct might be passed partially in XMM register for System V calls.
3000 // So a single arg might use both register files.
3003 else if (isFloatReg != varTypeUsesFloatArgReg(regType))
3005 if (regType == TYP_FLOAT)
3011 assert(regType == TYP_DOUBLE);
3019 #if defined(UNIX_AMD64_ABI)
3020 if (varTypeIsStruct(varDsc))
3022 CORINFO_CLASS_HANDLE typeHnd;
3023 if (varDsc->lvIsStructField)
3025 // The only case we currently permit is a wrapped SIMD field,
3026 // where we won't have the class handle available, so get it
3027 // from the parent struct -- they will agree on ABI details.
3028 LclVarDsc* parentDsc = compiler->lvaGetDesc(varDsc->lvParentLcl);
3029 assert(varTypeIsSIMD(varDsc) && (parentDsc->lvFieldCnt == 1));
3030 typeHnd = parentDsc->GetLayout()->GetClassHandle();
3034 typeHnd = varDsc->GetLayout()->GetClassHandle();
3036 assert(typeHnd != nullptr);
3037 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3038 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3039 if (!structDesc.passedInRegisters)
3041 // The var is not passed in registers.
3045 unsigned firstRegSlot = 0;
3046 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
3048 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3053 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3054 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3055 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3056 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3059 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3060 // registers or on stack, the upper most 4-bytes will be zero.
3062 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3063 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3066 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3067 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3068 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3069 // there is no need to clear upper 4-bytes of Vector3 type args.
3071 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3072 // Vector3 return values are returned two return registers and Caller assembles them into a
3073 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3074 // type args in prolog and Vector3 type return value of a call
3076 if (varDsc->lvType == TYP_SIMD12)
3078 regType = TYP_DOUBLE;
3083 regType = compiler->GetEightByteType(structDesc, slotCounter);
3086 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3088 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
3089 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
3091 // Store the reg for the first slot.
3094 firstRegSlot = regArgNum;
3097 // Bingo - add it to our table
3098 noway_assert(regArgNum < argMax);
3099 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
3100 // not be multiple vars representing this argument
3102 regArgTab[regArgNum].varNum = varNum;
3103 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
3104 regArgTab[regArgNum].type = regType;
3111 continue; // Nothing to do for this regState set.
3114 regArgNum = firstRegSlot;
3117 #endif // defined(UNIX_AMD64_ABI)
3119 // Bingo - add it to our table
3120 regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType);
3123 if (TargetArchitecture::IsArm32)
3125 int lclSize = compiler->lvaLclSize(varNum);
3126 if (lclSize > REGSIZE_BYTES)
3128 slots = lclSize / REGSIZE_BYTES;
3131 #if FEATURE_MULTIREG_ARGS
3132 else if (varDsc->lvIsMultiRegArg)
3134 if (varDsc->lvIsHfaRegArg())
3136 // We have an HFA argument, set slots to the number of registers used
3137 slots = varDsc->lvHfaSlots();
3141 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3142 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
3143 // We have a non-HFA multireg argument, set slots to two
3147 #endif // FEATURE_MULTIREG_ARGS
3149 // Handle args split between registers and stack. The arm64 fixed ret buf arg is never split.
3150 if (compFeatureArgSplit() && (fixedRetBufIndex != regArgNum))
3152 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3153 if (regArgNum + slots > maxRegArgNum)
3155 JITDUMP("Splitting V%02u: %u registers, %u stack slots\n", varNum, maxRegArgNum - regArgNum,
3156 regArgNum + slots - maxRegArgNum);
3157 slots = maxRegArgNum - regArgNum;
3161 // Note that regArgNum + 1 represents an argument index not an actual argument register;
3162 // see genMapRegArgNumToRegNum().
3164 for (int i = 0; i < slots; i++)
3166 noway_assert((regArgNum + i) < argMax);
3168 // We better not have added it already (there better not be multiple vars representing this argument
3170 noway_assert(regArgTab[regArgNum + i].slot == 0);
3172 regArgTab[regArgNum + i].varNum = varNum;
3173 regArgTab[regArgNum + i].slot = static_cast<char>(i + 1);
3175 regArgTab[regArgNum + i].type = regType; // Set the register type.
3179 for (int i = 0; i < slots; i++)
3181 regType = regArgTab[regArgNum + i].type;
3182 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3184 #if !defined(UNIX_AMD64_ABI)
3185 assert((i > 0) || (regNum == varDsc->GetArgReg()));
3186 #endif // defined(UNIX_AMD64_ABI)
3188 // Is the arg dead on entry to the method ?
3190 if ((regArgMaskLive & genRegMask(regNum)) == 0)
3192 if (varDsc->lvTrackedNonStruct())
3194 // We may now see some tracked locals with zero refs.
3195 // See Lowering::DoPhase. Tolerate these.
3196 if (varDsc->lvRefCnt() > 0)
3198 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3204 noway_assert(varDsc->lvType == TYP_STRUCT);
3205 #else // !TARGET_X86
3206 // For LSRA, it may not be in regArgMaskLive if it has a zero
3207 // refcnt. This is in contrast with the non-LSRA case in which all
3208 // non-tracked args are assumed live on entry.
3209 noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
3210 (varDsc->IsAddressExposed() && compiler->info.compIsVarArgs) ||
3211 (varDsc->IsAddressExposed() && compiler->opts.compUseSoftFP));
3212 #endif // !TARGET_X86
3214 // Mark it as processed and be done with it
3215 regArgTab[regArgNum + i].processed = true;
3220 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3221 // could be equal to GetArgReg(). The pre-spilled registers are also not considered live either since
3222 // they've already been spilled.
3224 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
3225 #endif // TARGET_ARM
3227 #if !defined(UNIX_AMD64_ABI)
3228 noway_assert(xtraReg != (varDsc->GetArgReg() + i));
3230 noway_assert(regArgMaskLive & genRegMask(regNum));
3233 regArgTab[regArgNum + i].processed = false;
3234 regArgTab[regArgNum + i].writeThru = (varDsc->lvIsInReg() && varDsc->lvLiveInOutOfHndlr);
3236 /* mark stack arguments since we will take care of those first */
3237 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3239 /* If it goes on the stack or in a register that doesn't hold
3240 * an argument anymore -> CANNOT form a circular dependency */
3242 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3244 /* will trash another argument -> possible dependency
3245 * We may need several passes after the table is constructed
3246 * to decide on that */
3248 /* Maybe the argument stays in the register (IDEAL) */
3250 if ((i == 0) && (varDsc->GetRegNum() == regNum))
3255 #if !defined(TARGET_64BIT)
3256 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->GetOtherReg() == regNum))
3260 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->GetOtherReg() == regNum))
3265 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3266 (REG_NEXT(varDsc->GetRegNum()) == regNum))
3270 #endif // !defined(TARGET_64BIT)
3271 regArgTab[regArgNum + i].circular = true;
3276 regArgTab[regArgNum + i].circular = false;
3278 /* mark the argument register as free */
3279 regArgMaskLive &= ~genRegMask(regNum);
3284 /* Find the circular dependencies for the argument registers, if any.
3285 * A circular dependency is a set of registers R1, R2, ..., Rn
3286 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3291 /* Possible circular dependencies still exist; the previous pass was not enough
3292 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3298 for (argNum = 0; argNum < argMax; argNum++)
3300 // If we already marked the argument as non-circular then continue
3302 if (!regArgTab[argNum].circular)
3307 if (regArgTab[argNum].slot == 0) // Not a register argument
3312 varNum = regArgTab[argNum].varNum;
3313 varDsc = compiler->lvaGetDesc(varNum);
3314 const var_types varRegType = varDsc->GetRegisterType();
3315 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3317 /* cannot possibly have stack arguments */
3318 noway_assert(varDsc->lvIsInReg());
3319 noway_assert(!regArgTab[argNum].stackArg);
3321 var_types regType = regArgTab[argNum].type;
3322 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3324 regNumber destRegNum = REG_NA;
3325 if (varTypeIsPromotable(varDsc) &&
3326 (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT))
3328 assert(regArgTab[argNum].slot <= varDsc->lvFieldCnt);
3329 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + regArgTab[argNum].slot - 1);
3330 destRegNum = fieldVarDsc->GetRegNum();
3332 else if (regArgTab[argNum].slot == 1)
3334 destRegNum = varDsc->GetRegNum();
3336 #if defined(TARGET_ARM64) && defined(FEATURE_SIMD)
3337 else if (varDsc->lvIsHfa())
3339 // This must be a SIMD type that's fully enregistered, but is passed as an HFA.
3340 // Each field will be inserted into the same destination register.
3341 assert(varTypeIsSIMD(varDsc));
3342 assert(regArgTab[argNum].slot <= (int)varDsc->lvHfaSlots());
3344 assert(regArgTab[argNum - 1].varNum == varNum);
3345 regArgMaskLive &= ~genRegMask(regNum);
3346 regArgTab[argNum].circular = false;
3350 #elif defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
3353 assert(regArgTab[argNum].slot == 2);
3355 assert(regArgTab[argNum - 1].slot == 1);
3356 assert(regArgTab[argNum - 1].varNum == varNum);
3357 assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16));
3358 regArgMaskLive &= ~genRegMask(regNum);
3359 regArgTab[argNum].circular = false;
3363 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
3364 #if !defined(TARGET_64BIT)
3365 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
3367 destRegNum = varDsc->GetOtherReg();
3371 assert(regArgTab[argNum].slot == 2);
3372 assert(varDsc->TypeGet() == TYP_DOUBLE);
3373 destRegNum = REG_NEXT(varDsc->GetRegNum());
3375 #endif // !defined(TARGET_64BIT)
3376 noway_assert(destRegNum != REG_NA);
3377 if (genRegMask(destRegNum) & regArgMaskLive)
3379 /* we are trashing a live argument register - record it */
3380 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3381 noway_assert(destRegArgNum < argMax);
3382 regArgTab[destRegArgNum].trashBy = argNum;
3386 /* argument goes to a free register */
3387 regArgTab[argNum].circular = false;
3390 /* mark the argument register as free */
3391 regArgMaskLive &= ~genRegMask(regNum);
3397 /* At this point, everything that has the "circular" flag
3398 * set to "true" forms a circular dependency */
3399 CLANG_FORMAT_COMMENT_ANCHOR;
3406 printf("Circular dependencies found while home-ing the incoming arguments.\n");
3411 // LSRA allocates registers to incoming parameters in order and will not overwrite
3412 // a register still holding a live parameter.
3414 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
3415 "Homing of float argument registers with circular dependencies not implemented.");
3417 // Now move the arguments to their locations.
3418 // First consider ones that go on the stack since they may free some registers.
3419 // Also home writeThru args, since they're also homed to the stack.
3421 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3422 for (argNum = 0; argNum < argMax; argNum++)
3426 #if defined(UNIX_AMD64_ABI)
3427 // If this is the wrong register file, just continue.
3428 if (regArgTab[argNum].type == TYP_UNDEF)
3430 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3431 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3432 // The next register file processing will process it.
3435 #endif // defined(UNIX_AMD64_ABI)
3437 // If the arg is dead on entry to the method, skip it
3439 if (regArgTab[argNum].processed)
3444 if (regArgTab[argNum].slot == 0) // Not a register argument
3449 varNum = regArgTab[argNum].varNum;
3450 varDsc = compiler->lvaGetDesc(varNum);
3452 #ifndef TARGET_64BIT
3453 // If this arg is never on the stack, go to the next one.
3454 if (varDsc->lvType == TYP_LONG)
3456 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru)
3460 else if (varDsc->GetOtherReg() != REG_STK)
3466 #endif // !TARGET_64BIT
3468 // If this arg is never on the stack, go to the next one.
3469 if (!regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru)
3475 #if defined(TARGET_ARM)
3476 if (varDsc->lvType == TYP_DOUBLE)
3478 if (regArgTab[argNum].slot == 2)
3480 // We handled the entire double when processing the first half (slot == 1)
3486 noway_assert(regArgTab[argNum].circular == false);
3488 noway_assert(varDsc->lvIsParam);
3489 noway_assert(varDsc->lvIsRegArg);
3490 noway_assert(varDsc->lvIsInReg() == false || varDsc->lvLiveInOutOfHndlr ||
3491 (varDsc->lvType == TYP_LONG && varDsc->GetOtherReg() == REG_STK && regArgTab[argNum].slot == 2));
3493 var_types storeType = TYP_UNDEF;
3494 unsigned slotSize = TARGET_POINTER_SIZE;
3496 if (varTypeIsStruct(varDsc))
3498 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3499 #if FEATURE_MULTIREG_ARGS
3500 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3501 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3502 #endif // FEATURE_MULTIREG_ARGS
3503 #ifdef UNIX_AMD64_ABI
3504 storeType = regArgTab[argNum].type;
3505 #endif // !UNIX_AMD64_ABI
3506 if (varDsc->lvIsHfaRegArg())
3509 // On ARM32 the storeType for HFA args is always TYP_FLOAT
3510 storeType = TYP_FLOAT;
3511 slotSize = (unsigned)emitActualTypeSize(storeType);
3512 #else // TARGET_ARM64
3513 storeType = genActualType(varDsc->GetHfaType());
3514 slotSize = (unsigned)emitActualTypeSize(storeType);
3515 #endif // TARGET_ARM64
3518 else // Not a struct type
3520 storeType = genActualType(regArgTab[argNum].type);
3522 size = emitActualTypeSize(storeType);
3524 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
3525 #endif // TARGET_X86
3527 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
3529 // Stack argument - if the ref count is 0 don't care about it
3531 if (!varDsc->lvOnFrame)
3533 noway_assert(varDsc->lvRefCnt() == 0);
3537 // Since slot is typically 1, baseOffset is typically 0
3538 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
3540 GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
3542 #ifndef UNIX_AMD64_ABI
3543 // Check if we are writing past the end of the struct
3544 if (varTypeIsStruct(varDsc))
3546 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
3548 #endif // !UNIX_AMD64_ABI
3551 // Mark the argument as processed, and set it as no longer live in srcRegNum,
3552 // unless it is a writeThru var, in which case we home it to the stack, but
3553 // don't mark it as processed until below.
3554 if (!regArgTab[argNum].writeThru)
3556 regArgTab[argNum].processed = true;
3557 regArgMaskLive &= ~genRegMask(srcRegNum);
3560 #if defined(TARGET_ARM)
3561 if ((storeType == TYP_DOUBLE) && !regArgTab[argNum].writeThru)
3563 regArgTab[argNum + 1].processed = true;
3564 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
3569 /* Process any circular dependencies */
3572 unsigned begReg, destReg, srcReg;
3573 unsigned varNumDest, varNumSrc;
3574 LclVarDsc* varDscDest;
3575 LclVarDsc* varDscSrc;
3576 instruction insCopy = INS_mov;
3580 #ifndef UNIX_AMD64_ABI
3581 if (GlobalJitOptions::compFeatureHfa)
3582 #endif // !UNIX_AMD64_ABI
3584 insCopy = ins_Copy(TYP_DOUBLE);
3585 // Compute xtraReg here when we have a float argument
3586 assert(xtraReg == REG_NA);
3588 regMaskTP fpAvailMask;
3590 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
3591 if (GlobalJitOptions::compFeatureHfa)
3593 fpAvailMask &= RBM_ALLDOUBLE;
3596 if (fpAvailMask == RBM_NONE)
3598 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
3599 if (GlobalJitOptions::compFeatureHfa)
3601 fpAvailMask &= RBM_ALLDOUBLE;
3605 assert(fpAvailMask != RBM_NONE);
3607 // We pick the lowest avail register number
3608 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
3609 xtraReg = genRegNumFromMask(tempMask);
3611 #if defined(TARGET_X86)
3612 // This case shouldn't occur on x86 since NYI gets converted to an assert
3613 NYI("Homing circular FP registers via xtraReg");
3617 for (argNum = 0; argNum < argMax; argNum++)
3619 // If not a circular dependency then continue
3620 if (!regArgTab[argNum].circular)
3625 // If already processed the dependency then continue
3627 if (regArgTab[argNum].processed)
3632 if (regArgTab[argNum].slot == 0) // Not a register argument
3637 destReg = begReg = argNum;
3638 srcReg = regArgTab[argNum].trashBy;
3640 varNumDest = regArgTab[destReg].varNum;
3641 varDscDest = compiler->lvaGetDesc(varNumDest);
3642 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
3644 noway_assert(srcReg < argMax);
3645 varNumSrc = regArgTab[srcReg].varNum;
3646 varDscSrc = compiler->lvaGetDesc(varNumSrc);
3647 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
3649 emitAttr size = EA_PTRSIZE;
3653 // The following code relies upon the target architecture having an
3654 // 'xchg' instruction which directly swaps the values held in two registers.
3655 // On the ARM architecture we do not have such an instruction.
3657 if (destReg == regArgTab[srcReg].trashBy)
3659 /* only 2 registers form the circular dependency - use "xchg" */
3661 varNum = regArgTab[argNum].varNum;
3662 varDsc = compiler->lvaGetDesc(varNum);
3663 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3665 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
3667 /* Set "size" to indicate GC if one and only one of
3668 * the operands is a pointer
3669 * RATIONALE: If both are pointers, nothing changes in
3670 * the GC pointer tracking. If only one is a pointer we
3671 * have to "swap" the registers in the GC reg pointer mask
3674 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
3679 noway_assert(varDscDest->GetArgReg() == varDscSrc->GetRegNum());
3681 GetEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->GetRegNum(), varDscSrc->GetArgReg());
3682 regSet.verifyRegUsed(varDscSrc->GetRegNum());
3683 regSet.verifyRegUsed(varDscSrc->GetArgReg());
3685 /* mark both arguments as processed */
3686 regArgTab[destReg].processed = true;
3687 regArgTab[srcReg].processed = true;
3689 regArgMaskLive &= ~genRegMask(varDscSrc->GetArgReg());
3690 regArgMaskLive &= ~genRegMask(varDscDest->GetArgReg());
3693 #endif // TARGET_XARCH
3695 var_types destMemType = varDscDest->TypeGet();
3698 bool cycleAllDouble = true; // assume the best
3700 unsigned iter = begReg;
3703 if (compiler->lvaGetDesc(regArgTab[iter].varNum)->TypeGet() != TYP_DOUBLE)
3705 cycleAllDouble = false;
3708 iter = regArgTab[iter].trashBy;
3709 } while (iter != begReg);
3711 // We may treat doubles as floats for ARM because we could have partial circular
3712 // dependencies of a float with a lo/hi part of the double. We mark the
3713 // trashBy values for each slot of the double, so let the circular dependency
3714 // logic work its way out for floats rather than doubles. If a cycle has all
3715 // doubles, then optimize so that instead of two vmov.f32's to move a double,
3716 // we can use one vmov.f64.
3718 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
3720 destMemType = TYP_FLOAT;
3722 #endif // TARGET_ARM
3724 if (destMemType == TYP_REF)
3728 else if (destMemType == TYP_BYREF)
3732 else if (destMemType == TYP_DOUBLE)
3736 else if (destMemType == TYP_FLOAT)
3741 /* move the dest reg (begReg) in the extra reg */
3743 assert(xtraReg != REG_NA);
3745 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
3747 GetEmitter()->emitIns_Mov(insCopy, size, xtraReg, begRegNum, /* canSkip */ false);
3749 regSet.verifyRegUsed(xtraReg);
3751 *pXtraRegClobbered = true;
3752 /* start moving everything to its right place */
3754 while (srcReg != begReg)
3758 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
3759 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
3761 GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, srcRegNum, /* canSkip */ false);
3763 regSet.verifyRegUsed(destRegNum);
3765 /* mark 'src' as processed */
3766 noway_assert(srcReg < argMax);
3767 regArgTab[srcReg].processed = true;
3769 if (size == EA_8BYTE)
3770 regArgTab[srcReg + 1].processed = true;
3772 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
3774 /* move to the next pair */
3776 srcReg = regArgTab[srcReg].trashBy;
3778 varDscDest = varDscSrc;
3779 destMemType = varDscDest->TypeGet();
3781 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
3783 destMemType = TYP_FLOAT;
3786 varNumSrc = regArgTab[srcReg].varNum;
3787 varDscSrc = compiler->lvaGetDesc(varNumSrc);
3788 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
3790 if (destMemType == TYP_REF)
3794 else if (destMemType == TYP_DOUBLE)
3804 /* take care of the beginning register */
3806 noway_assert(srcReg == begReg);
3808 /* move the dest reg (begReg) in the extra reg */
3810 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
3812 GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, xtraReg, /* canSkip */ false);
3814 regSet.verifyRegUsed(destRegNum);
3815 /* mark the beginning register as processed */
3817 regArgTab[srcReg].processed = true;
3819 if (size == EA_8BYTE)
3820 regArgTab[srcReg + 1].processed = true;
3822 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
3827 /* Finally take care of the remaining arguments that must be enregistered */
3828 while (regArgMaskLive)
3830 regMaskTP regArgMaskLiveSave = regArgMaskLive;
3832 for (argNum = 0; argNum < argMax; argNum++)
3834 /* If already processed go to the next one */
3835 if (regArgTab[argNum].processed)
3840 if (regArgTab[argNum].slot == 0)
3841 { // Not a register argument
3845 varNum = regArgTab[argNum].varNum;
3846 varDsc = compiler->lvaGetDesc(varNum);
3847 const var_types regType = regArgTab[argNum].type;
3848 const regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3849 const var_types varRegType = varDsc->GetRegisterType();
3851 #if defined(UNIX_AMD64_ABI)
3852 if (regType == TYP_UNDEF)
3854 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3855 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3856 // The next register file processing will process it.
3857 regArgMaskLive &= ~genRegMask(regNum);
3860 #endif // defined(UNIX_AMD64_ABI)
3862 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3864 // On x86 we don't enregister args that are not pointer sized.
3865 noway_assert(genTypeSize(varDsc->GetStackSlotHomeType()) == TARGET_POINTER_SIZE);
3866 #endif // TARGET_X86
3868 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
3870 /* Register argument - hopefully it stays in the same register */
3871 regNumber destRegNum = REG_NA;
3872 var_types destMemType = varDsc->GetRegisterType();
3874 if (regArgTab[argNum].slot == 1)
3876 destRegNum = varDsc->GetRegNum();
3879 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
3881 // The second half of the double has already been processed! Treat this as a single.
3882 destMemType = TYP_FLOAT;
3884 #endif // TARGET_ARM
3886 #ifndef TARGET_64BIT
3887 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
3889 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
3890 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
3892 destRegNum = regNum;
3896 destRegNum = varDsc->GetOtherReg();
3899 assert(destRegNum != REG_STK);
3903 assert(regArgTab[argNum].slot == 2);
3904 assert(destMemType == TYP_DOUBLE);
3906 // For doubles, we move the entire double using the argNum representing
3907 // the first half of the double. There are two things we won't do:
3908 // (1) move the double when the 1st half of the destination is free but the
3909 // 2nd half is occupied, and (2) move the double when the 2nd half of the
3910 // destination is free but the 1st half is occupied. Here we consider the
3911 // case where the first half can't be moved initially because its target is
3912 // still busy, but the second half can be moved. We wait until the entire
3913 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
3914 // and F2 single moving to F16. When we process F0, its target F2 is busy,
3915 // so we skip it on the first pass. When we process F1, its target F3 is
3916 // available. However, we want to move F0/F1 all at once, so we skip it here.
3917 // We process F2, which frees up F2. The next pass through, we process F0 and
3918 // F2/F3 are empty, so we move it. Note that if half of a double is involved
3919 // in a circularity with a single, then we will have already moved that half
3920 // above, so we go ahead and move the remaining half as a single.
3921 // Because there are no circularities left, we are guaranteed to terminate.
3924 assert(regArgTab[argNum - 1].slot == 1);
3926 if (!regArgTab[argNum - 1].processed)
3928 // The first half of the double hasn't been processed; try to be processed at the same time
3932 // The first half of the double has been processed but the second half hasn't!
3933 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
3934 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
3935 // dependency logic above will move them as singles, leaving just F3 to move. Treat
3936 // it as a single to finish the shuffling.
3938 destMemType = TYP_FLOAT;
3939 destRegNum = REG_NEXT(varDsc->GetRegNum());
3941 #endif // !TARGET_64BIT
3942 #if (defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD)
3945 assert(regArgTab[argNum].slot == 2);
3947 assert(regArgTab[argNum - 1].slot == 1);
3948 assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16));
3949 destRegNum = varDsc->GetRegNum();
3950 noway_assert(regNum != destRegNum);
3953 #endif // (defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD)
3954 noway_assert(destRegNum != REG_NA);
3955 if (destRegNum != regNum)
3957 /* Cannot trash a currently live register argument.
3958 * Skip this one until its target will be free
3959 * which is guaranteed to happen since we have no circular dependencies. */
3961 regMaskTP destMask = genRegMask(destRegNum);
3963 // Don't process the double until both halves of the destination are clear.
3964 if (genActualType(destMemType) == TYP_DOUBLE)
3966 assert((destMask & RBM_DBL_REGS) != 0);
3967 destMask |= genRegMask(REG_NEXT(destRegNum));
3971 if (destMask & regArgMaskLive)
3976 /* Move it to the new register */
3978 emitAttr size = emitActualTypeSize(destMemType);
3980 #if defined(TARGET_ARM64)
3981 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
3983 // For a SIMD type that is passed in two integer registers,
3984 // Limit the copy below to the first 8 bytes from the first integer register.
3985 // Handle the remaining 8 bytes from the second slot in the code further below
3986 assert(EA_SIZE(size) >= 8);
3990 inst_Mov(destMemType, destRegNum, regNum, /* canSkip */ false, size);
3993 /* mark the argument as processed */
3995 assert(!regArgTab[argNum].processed);
3996 regArgTab[argNum].processed = true;
3997 regArgMaskLive &= ~genRegMask(regNum);
3998 #if FEATURE_MULTIREG_ARGS
3999 int argRegCount = 1;
4001 if (genActualType(destMemType) == TYP_DOUBLE)
4006 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4007 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4010 int nextArgNum = argNum + 1;
4011 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
4012 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4013 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4014 // and moves the 0th element of the src reg into the 1st element of the dest reg.
4015 GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varRegType), destRegNum, nextRegNum, 0);
4016 // Set destRegNum to regNum so that we skip the setting of the register below,
4017 // but mark argNum as processed and clear regNum from the live mask.
4018 destRegNum = regNum;
4020 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4021 #ifdef TARGET_ARMARCH
4022 if (varDsc->lvIsHfa())
4024 // This includes both fixed-size SIMD types that are independently promoted, as well
4025 // as other HFA structs.
4026 argRegCount = varDsc->lvHfaSlots();
4027 if (argNum < (argMax - argRegCount + 1))
4029 if (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT)
4031 // For an HFA type that is passed in multiple registers and promoted, we copy each field to its
4032 // destination register.
4033 for (int i = 0; i < argRegCount; i++)
4035 int nextArgNum = argNum + i;
4036 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i);
4037 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
4038 destRegNum = fieldVarDsc->GetRegNum();
4039 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4040 noway_assert(genIsValidFloatReg(nextRegNum));
4041 noway_assert(genIsValidFloatReg(destRegNum));
4042 GetEmitter()->emitIns_Mov(INS_mov, EA_8BYTE, destRegNum, nextRegNum, /* canSkip */ false);
4045 #if defined(TARGET_ARM64) && defined(FEATURE_SIMD)
4048 // For a SIMD type that is passed in multiple registers but enregistered as a vector,
4049 // the code above copies the first argument register into the lower 4 or 8 bytes
4050 // of the target register. Here we must handle the subsequent fields by
4051 // inserting them into the upper bytes of the target SIMD floating point register.
4052 argRegCount = varDsc->lvHfaSlots();
4053 for (int i = 1; i < argRegCount; i++)
4055 int nextArgNum = argNum + i;
4056 regArgElem* nextArgElem = ®ArgTab[nextArgNum];
4057 var_types nextArgType = nextArgElem->type;
4058 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, nextArgType);
4059 noway_assert(nextArgElem->varNum == varNum);
4060 noway_assert(genIsValidFloatReg(nextRegNum));
4061 noway_assert(genIsValidFloatReg(destRegNum));
4062 GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, nextRegNum, i, 0);
4065 #endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD)
4068 #endif // TARGET_ARMARCH
4070 // Mark the rest of the argument registers corresponding to this multi-reg type as
4071 // being processed and no longer live.
4072 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
4074 int nextArgNum = argNum + regSlot;
4075 assert(!regArgTab[nextArgNum].processed);
4076 regArgTab[nextArgNum].processed = true;
4077 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
4078 regArgMaskLive &= ~genRegMask(nextRegNum);
4080 #endif // FEATURE_MULTIREG_ARGS
4083 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4086 #endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
4089 #pragma warning(pop)
4092 /*****************************************************************************
4093 * If any incoming stack arguments live in registers, load them.
4095 void CodeGen::genEnregisterIncomingStackArgs()
4100 printf("*************** In genEnregisterIncomingStackArgs()\n");
4104 // OSR handles this specially -- see genEnregisterOSRArgsAndLocals
4106 assert(!compiler->opts.IsOSR());
4108 assert(compiler->compGeneratingProlog);
4110 unsigned varNum = 0;
4112 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
4114 regNumber tmp_reg = REG_NA;
4117 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4119 /* Is this variable a parameter? */
4121 if (!varDsc->lvIsParam)
4126 /* If it's a register argument then it's already been taken care of.
4127 But, on Arm when under a profiler, we would have prespilled a register argument
4128 and hence here we need to load it from its prespilled location.
4130 bool isPrespilledForProfiling = false;
4131 #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
4132 isPrespilledForProfiling =
4133 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4136 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4141 /* Has the parameter been assigned to a register? */
4143 if (!varDsc->lvIsInReg())
4148 /* Is the variable dead on entry */
4150 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4155 /* Load the incoming parameter into the register */
4157 /* Figure out the home offset of the incoming argument */
4159 regNumber regNum = varDsc->GetArgInitReg();
4160 assert(regNum != REG_STK);
4162 var_types regType = varDsc->GetStackSlotHomeType();
4163 #ifdef TARGET_LOONGARCH64
4166 int base = compiler->lvaFrameAddress(varNum, &FPbased);
4168 if (emitter::isValidSimm12(base))
4170 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
4174 if (tmp_reg == REG_NA)
4176 regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
4180 GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
4181 GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
4182 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8);
4186 int baseOffset = -(base - tmp_offset) - 8;
4187 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset);
4191 #else // !TARGET_LOONGARCH64
4192 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
4193 #endif // !TARGET_LOONGARCH64
4195 regSet.verifyRegUsed(regNum);
4199 /*-------------------------------------------------------------------------
4201 * We have to decide whether we're going to use block initialization
4202 * in the prolog before we assign final stack offsets. This is because
4203 * when using block initialization we may need additional callee-saved
4204 * registers which need to be saved on the frame, thus increasing the
4207 * We'll count the number of locals we have to initialize,
4208 * and if there are lots of them we'll use block initialization.
4209 * Thus, the local variable table must have accurate register location
4210 * information for enregistered locals for their register state on entry
4213 * At the same time we set lvMustInit for locals (enregistered or on stack)
4214 * that must be initialized (e.g. initialize memory (comInitMem),
4215 * untracked pointers or disable DFA)
4217 void CodeGen::genCheckUseBlockInit()
4219 assert(!compiler->compGeneratingProlog);
4221 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
4222 // larger than int count for more than 1).
4227 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4229 // The logic below is complex. Make sure we are not
4230 // double-counting the initialization impact of any locals.
4231 bool counted = false;
4233 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4235 noway_assert(varDsc->lvRefCnt() == 0);
4236 varDsc->lvMustInit = 0;
4240 if (compiler->fgVarIsNeverZeroInitializedInProlog(varNum))
4242 varDsc->lvMustInit = 0;
4246 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4248 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4249 // initialized by the parent struct. No need to set the lvMustInit bit in the
4251 varDsc->lvMustInit = 0;
4255 if (varDsc->lvHasExplicitInit)
4257 varDsc->lvMustInit = 0;
4261 const bool isTemp = varDsc->lvIsTemp;
4262 const bool hasGCPtr = varDsc->HasGCPtr();
4263 const bool isTracked = varDsc->lvTracked;
4264 const bool isStruct = varTypeIsStruct(varDsc);
4265 const bool compInitMem = compiler->info.compInitMem;
4267 if (isTemp && !hasGCPtr)
4269 varDsc->lvMustInit = 0;
4273 if (compInitMem || hasGCPtr || varDsc->lvMustInit)
4277 /* For uninitialized use of tracked variables, the liveness
4278 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4280 if (varDsc->lvMustInit ||
4281 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4283 /* This var must be initialized */
4285 varDsc->lvMustInit = 1;
4287 /* See if the variable is on the stack will be initialized
4288 * using rep stos - compute the total size to be zero-ed */
4290 if (varDsc->lvOnFrame)
4292 if (!varDsc->lvRegister)
4294 if (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)
4296 // Var is on the stack at entry.
4298 roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4304 // Var is partially enregistered
4305 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) &&
4306 varDsc->GetOtherReg() == REG_STK);
4307 initStkLclCnt += genTypeStSz(TYP_INT);
4314 if (varDsc->lvOnFrame)
4316 bool mustInitThisVar = false;
4317 if (hasGCPtr && !isTracked)
4319 JITDUMP("must init V%02u because it has a GC ref\n", varNum);
4320 mustInitThisVar = true;
4322 else if (hasGCPtr && isStruct)
4324 // TODO-1stClassStructs: support precise liveness reporting for such structs.
4325 JITDUMP("must init a tracked V%02u because it a struct with a GC ref\n", varNum);
4326 mustInitThisVar = true;
4330 // We are done with tracked or GC vars, now look at untracked vars without GC refs.
4333 assert(!hasGCPtr && !isTemp);
4336 JITDUMP("must init V%02u because compInitMem is set and it is not a temp\n", varNum);
4337 mustInitThisVar = true;
4341 if (mustInitThisVar)
4343 varDsc->lvMustInit = true;
4347 initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4355 /* Don't forget about spill temps that hold pointers */
4356 assert(regSet.tmpAllFree());
4357 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4359 if (varTypeIsGC(tempThis->tdTempType()))
4365 // Record number of 4 byte slots that need zeroing.
4366 genInitStkLclCnt = initStkLclCnt;
4368 // Decide if we will do block initialization in the prolog, or use
4369 // a series of individual stores.
4371 // Primary factor is the number of slots that need zeroing. We've
4372 // been counting by sizeof(int) above. We assume for now we can
4373 // only zero register width bytes per store.
4375 // Current heuristic is to use block init when more than 4 stores
4378 // TODO: Consider taking into account the presence of large structs that
4379 // potentially only need some fields set to zero.
4381 // Compiler::fgVarNeedsExplicitZeroInit relies on this logic to
4382 // find structs that are guaranteed to be block initialized.
4383 // If this logic changes, Compiler::fgVarNeedsExplicitZeroInit needs
4385 CLANG_FORMAT_COMMENT_ANCHOR;
4388 #if defined(TARGET_AMD64)
4390 // We can clear using aligned SIMD so the threshold is lower,
4391 // and clears in order which is better for auto-prefetching
4392 genUseBlockInit = (genInitStkLclCnt > 4);
4394 #else // !defined(TARGET_AMD64)
4396 genUseBlockInit = (genInitStkLclCnt > 8);
4400 genUseBlockInit = (genInitStkLclCnt > 4);
4402 #endif // TARGET_64BIT
4404 if (genUseBlockInit)
4406 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4408 // If there is a secret stub param, don't count it, as it will no longer
4409 // be live when we do block init.
4410 if (compiler->info.compPublishStubParam)
4412 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4417 // On the Arm if we are using a block init to initialize, then we
4418 // must force spill R4/R5/R6 so that we can use them during
4419 // zero-initialization process.
4421 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
4422 if (forceSpillRegCount > 0)
4423 regSet.rsSetRegsModified(RBM_R4);
4424 if (forceSpillRegCount > 1)
4425 regSet.rsSetRegsModified(RBM_R5);
4426 if (forceSpillRegCount > 2)
4427 regSet.rsSetRegsModified(RBM_R6);
4428 #endif // TARGET_ARM
4432 /*****************************************************************************
4434 * initFltRegs -- The mask of float regs to be zeroed.
4435 * initDblRegs -- The mask of double regs to be zeroed.
4436 * initReg -- A zero initialized integer reg to copy from.
4438 * Does best effort to move between VFP/xmm regs if one is already
4439 * initialized to 0. (Arm Only) Else copies from the integer register which
4442 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
4444 assert(compiler->compGeneratingProlog);
4446 // The first float/double reg that is initialized to 0. So they can be used to
4447 // initialize the remaining registers.
4448 regNumber fltInitReg = REG_NA;
4449 regNumber dblInitReg = REG_NA;
4451 // Iterate through float/double registers and initialize them to 0 or
4452 // copy from already initialized register of the same type.
4453 regMaskTP regMask = genRegMask(REG_FP_FIRST);
4454 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
4456 if (regMask & initFltRegs)
4458 // Do we have a float register already set to 0?
4459 if (fltInitReg != REG_NA)
4462 inst_Mov(TYP_FLOAT, reg, fltInitReg, /* canSkip */ false);
4467 // Do we have a double register initialized to 0?
4468 if (dblInitReg != REG_NA)
4470 // Copy from double.
4471 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
4476 inst_Mov(TYP_FLOAT, reg, initReg, /* canSkip */ false);
4478 #elif defined(TARGET_XARCH)
4479 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
4480 GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
4482 #elif defined(TARGET_ARM64)
4483 // We will just zero out the entire vector register. This sets it to a double/float zero value
4484 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
4485 #elif defined(TARGET_LOONGARCH64)
4486 // We will just zero out the entire vector register. This sets it to a double/float zero value
4487 GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
4488 #elif defined(TARGET_RISCV64)
4489 GetEmitter()->emitIns_R_R(INS_fmv_w_x, EA_4BYTE, reg, REG_R0);
4491 #error Unsupported or unset target architecture
4496 else if (regMask & initDblRegs)
4498 // Do we have a double register already set to 0?
4499 if (dblInitReg != REG_NA)
4501 // Copy from double.
4502 inst_Mov(TYP_DOUBLE, reg, dblInitReg, /* canSkip */ false);
4507 // Do we have a float register initialized to 0?
4508 if (fltInitReg != REG_NA)
4511 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
4516 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
4518 #elif defined(TARGET_XARCH)
4519 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
4520 GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
4522 #elif defined(TARGET_ARM64)
4523 // We will just zero out the entire vector register. This sets it to a double/float zero value
4524 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
4525 #elif defined(TARGET_LOONGARCH64)
4526 GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
4527 #elif defined(TARGET_RISCV64)
4528 GetEmitter()->emitIns_R_R(INS_fmv_d_x, EA_8BYTE, reg, REG_R0);
4530 #error Unsupported or unset target architecture
4538 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
4539 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
4540 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
4544 #elif defined(TARGET_LOONGARCH64)
4546 #elif defined(TARGET_RISCV64)
4548 #else // !TARGET_ARM64
4549 if (*pInitRegZeroed == false)
4551 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
4552 *pInitRegZeroed = true;
4555 #endif // !TARGET_ARM64
4558 //-----------------------------------------------------------------------------
4559 // genZeroInitFrame: Zero any untracked pointer locals and/or initialize memory for locspace
4562 // untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init
4563 // code will end initializing memory (not inclusive).
4564 // untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will
4565 // start zero initializing memory.
4566 // initReg - A scratch register (that gets set to zero on some platforms).
4567 // pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero,
4568 // 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched.
4569 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
4571 assert(compiler->compGeneratingProlog);
4573 if (genUseBlockInit)
4575 genZeroInitFrameUsingBlockInit(untrLclHi, untrLclLo, initReg, pInitRegZeroed);
4577 else if (genInitStkLclCnt > 0)
4579 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // initReg is not a live incoming
4582 /* Initialize any lvMustInit vars on the stack */
4587 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4589 if (!varDsc->lvMustInit)
4594 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
4595 // partially-enregistered vars in the case where we don't use a block init.
4596 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
4598 // lvMustInit can only be set for GC types or TYP_STRUCT types
4599 // or when compInitMem is true
4600 // or when in debug code
4602 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
4603 compiler->info.compInitMem || compiler->opts.compDbgCode);
4605 if (!varDsc->lvOnFrame)
4610 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
4611 (varDsc->lvExactSize() >= TARGET_POINTER_SIZE))
4613 // We only initialize the GC variables in the TYP_STRUCT
4614 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
4615 ClassLayout* layout = varDsc->GetLayout();
4617 for (unsigned i = 0; i < slots; i++)
4619 if (layout->IsGCPtr(i))
4621 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
4622 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
4628 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
4630 // zero out the whole thing rounded up to a single stack slot size
4631 unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
4633 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
4635 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
4639 assert(i == lclSize || (i + sizeof(int) == lclSize));
4642 GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
4645 #endif // TARGET_64BIT
4646 assert(i == lclSize);
4650 assert(regSet.tmpAllFree());
4651 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4653 if (!varTypeIsGC(tempThis->tdTempType()))
4658 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
4660 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
4665 //-----------------------------------------------------------------------------
4666 // genEnregisterOSRArgsAndLocals: Initialize any enregistered args or locals
4667 // that get values from the tier0 frame.
4670 // initReg -- scratch register to use if needed
4671 // pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit)
4673 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
4674 void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed)
4676 void CodeGen::genEnregisterOSRArgsAndLocals()
4679 assert(compiler->opts.IsOSR());
4680 PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
4682 // basic sanity checks (make sure we're OSRing the right method)
4683 assert(patchpointInfo->NumberOfLocals() == compiler->info.compLocalsCount);
4685 const int originalFrameSize = patchpointInfo->TotalFrameSize();
4686 const unsigned patchpointInfoLen = patchpointInfo->NumberOfLocals();
4688 for (unsigned varNum = 0; varNum < compiler->lvaCount; varNum++)
4690 if (!compiler->lvaIsOSRLocal(varNum))
4692 // This local was not part of the tier0 method's state.
4693 // No work required.
4698 LclVarDsc* const varDsc = compiler->lvaGetDesc(varNum);
4700 if (!varDsc->lvIsInReg())
4702 // For args/locals in memory, the OSR frame will continue to access
4703 // that memory location. No work required.
4705 JITDUMP("---OSR--- V%02u in memory\n", varNum);
4709 // This local was part of the live tier0 state and is enregistered in the
4710 // OSR method. Initialize the register from the right frame slot.
4712 // If we ever enable promotion we'll need to generalize what follows to copy each
4713 // field from the tier0 frame to its OSR home.
4715 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4717 // This arg or local is not live at entry to the OSR method.
4718 // No work required.
4720 JITDUMP("---OSR--- V%02u (reg) not live at entry\n", varNum);
4724 int fieldOffset = 0;
4725 unsigned lclNum = varNum;
4727 if (varDsc->lvIsStructField)
4729 lclNum = varDsc->lvParentLcl;
4730 assert(lclNum < patchpointInfoLen);
4732 fieldOffset = varDsc->lvFldOffset;
4733 JITDUMP("---OSR--- V%02u is promoted field of V%02u at offset %d\n", varNum, lclNum, fieldOffset);
4736 // Note we are always reading from the tier0 frame here
4738 const var_types lclTyp = varDsc->GetStackSlotHomeType();
4739 const emitAttr size = emitActualTypeSize(lclTyp);
4740 const int stkOffs = patchpointInfo->Offset(lclNum) + fieldOffset;
4742 #if defined(TARGET_AMD64)
4744 // Original frames always use frame pointers, so
4745 // stkOffs is the tier0 frame's frame-relative offset
4748 // We need to determine the stack or frame-pointer relative
4749 // offset for this variable in the current frame.
4751 // If current frame does not use a frame pointer, we need to
4752 // add the SP-to-FP delta of this frame and the SP-to-FP delta
4753 // of the original frame; that translates from this frame's
4754 // stack pointer the old frame frame pointer.
4756 // We then add the original frame's frame-pointer relative
4757 // offset (note this offset is usually negative -- the stack
4758 // grows down, so locals are below the frame pointer).
4760 // /-----original frame-----/
4761 // / return address /
4762 // / saved RBP --+ / <--- Original frame ptr --+
4764 // / ... (stkOffs) / |
4766 // / variable --+ / |
4767 // / ... / (original frame sp-fp delta)
4769 // /-----OSR frame ---------/ |
4770 // / pseudo return address / --+
4772 // / ... / (this frame sp-fp delta)
4774 // /------------------------/ <--- Stack ptr --+
4776 // If the current frame is using a frame pointer, we need to
4777 // add the SP-to-FP delta of/ the original frame and then add
4778 // the original frame's frame-pointer relative offset.
4780 // /-----original frame-----/
4781 // / return address /
4782 // / saved RBP --+ / <--- Original frame ptr --+
4784 // / ... (stkOffs) / |
4786 // / variable --+ / |
4787 // / ... / (original frame sp-fp delta)
4789 // /-----OSR frame ---------/ |
4790 // / pseudo return address / --+
4791 // / saved RBP / <--- Frame ptr --+
4795 // /------------------------/
4797 int offset = originalFrameSize + stkOffs;
4799 if (isFramePointerUsed())
4801 // also adjust for saved RPB on this frame
4802 offset += TARGET_POINTER_SIZE;
4806 offset += genSPtoFPdelta();
4809 JITDUMP("---OSR--- V%02u (reg) old rbp offset %d old frame %d this frame sp-fp %d new offset %d (0x%02x)\n",
4810 varNum, stkOffs, originalFrameSize, genSPtoFPdelta(), offset, offset);
4812 GetEmitter()->emitIns_R_AR(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset);
4814 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
4816 // Patchpoint offset is from top of Tier0 frame
4818 // We need to determine the frame-pointer relative
4819 // offset for this variable in the osr frame.
4821 // First add the Tier0 frame size
4823 const int tier0FrameSize = compiler->info.compPatchpointInfo->TotalFrameSize();
4825 // then add the OSR frame size
4827 const int osrFrameSize = genTotalFrameSize();
4829 // then subtract OSR SP-FP delta
4831 const int osrSpToFpDelta = genSPtoFPdelta();
4833 // | => tier0 top of frame relative
4834 // | + => tier0 bottom of frame relative
4835 // | | + => osr bottom of frame (sp) relative
4836 // | | | - => osr fp relative
4838 const int offset = stkOffs + tier0FrameSize + osrFrameSize - osrSpToFpDelta;
4840 JITDUMP("---OSR--- V%02u (reg) Tier0 virtual offset %d OSR frame size %d OSR sp-fp "
4841 "delta %d total offset %d (0x%x)\n",
4842 varNum, stkOffs, osrFrameSize, osrSpToFpDelta, offset, offset);
4844 genInstrWithConstant(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset, initReg);
4845 *pInitRegZeroed = false;
4850 /*-----------------------------------------------------------------------------
4852 * Save the generic context argument.
4854 * We need to do this within the "prolog" in case anyone tries to inspect
4855 * the param-type-arg/this (which can be done after the prolog) using
4856 * ICodeManager::GetParamTypeArg().
4859 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
4861 assert(compiler->compGeneratingProlog);
4863 const bool reportArg = compiler->lvaReportParamTypeArg();
4865 if (compiler->opts.IsOSR())
4867 PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo;
4870 // OSR method will use Tier0 slot to report context arg.
4872 assert(ppInfo->HasGenericContextArgOffset());
4873 JITDUMP("OSR method will use Tier0 frame slot for generics context arg.\n");
4875 else if (compiler->lvaKeepAliveAndReportThis())
4877 // OSR method will use Tier0 slot to report `this` as context.
4879 assert(ppInfo->HasKeptAliveThis());
4880 JITDUMP("OSR method will use Tier0 frame slot for generics context `this`.\n");
4886 // We should report either generic context arg or "this" when used so.
4889 #ifndef JIT32_GCENCODER
4890 if (!compiler->lvaKeepAliveAndReportThis())
4897 // For JIT32_GCENCODER, we won't be here if reportArg is false.
4898 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
4900 noway_assert(contextArg != BAD_VAR_NUM);
4901 LclVarDsc* varDsc = compiler->lvaGetDesc(contextArg);
4903 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
4904 // moved to its final home location. So we need to use it from the
4905 // incoming location.
4909 bool isPrespilledForProfiling = false;
4910 #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
4911 isPrespilledForProfiling =
4912 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
4915 // Load from the argument register only if it is not prespilled.
4916 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
4918 reg = varDsc->GetArgReg();
4922 if (isFramePointerUsed())
4924 #if defined(TARGET_ARM)
4925 // GetStackOffset() is always valid for incoming stack-arguments, even if the argument
4926 // will become enregistered.
4927 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
4928 noway_assert((2 * REGSIZE_BYTES <= varDsc->GetStackOffset()) &&
4929 (size_t(varDsc->GetStackOffset()) < compiler->compArgSize + 2 * REGSIZE_BYTES));
4931 // GetStackOffset() is always valid for incoming stack-arguments, even if the argument
4932 // will become enregistered.
4933 noway_assert((0 < varDsc->GetStackOffset()) && (size_t(varDsc->GetStackOffset()) < compiler->compArgSize));
4937 // We will just use the initReg since it is an available register
4938 // and we are probably done using it anyway...
4940 *pInitRegZeroed = false;
4942 // mov reg, [compiler->info.compTypeCtxtArg]
4943 GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4944 varDsc->GetStackOffset());
4945 regSet.verifyRegUsed(reg);
4948 #if defined(TARGET_ARM64)
4949 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4950 compiler->lvaCachedGenericContextArgOffset(), rsGetRsvdReg());
4951 #elif defined(TARGET_ARM)
4952 // ARM's emitIns_R_R_I automatically uses the reserved register if necessary.
4953 GetEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4954 compiler->lvaCachedGenericContextArgOffset());
4955 #elif defined(TARGET_LOONGARCH64)
4956 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4957 compiler->lvaCachedGenericContextArgOffset(), REG_R21);
4958 #elif defined(TARGET_RISCV64)
4959 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4960 compiler->lvaCachedGenericContextArgOffset(), rsGetRsvdReg());
4961 #else // !ARM64 !ARM !LOONGARCH64 !RISCV64
4962 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
4963 GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4964 compiler->lvaCachedGenericContextArgOffset());
4965 #endif // !ARM64 !ARM !LOONGARCH64 !RISCV64
4968 /*****************************************************************************
4973 These instructions are just a reordering of the instructions used today.
4979 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
4981 add esp, LOCALS_SIZE / pop dummyReg
4991 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
4992 Everything else is similar, though in a different order.
4994 The security object will no longer be at a fixed offset. However, the
4995 offset can still be determined by looking up the GC-info and determining
4996 how many callee-saved registers are pushed.
5003 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
5005 add esp, LOCALS_SIZE / pop dummyReg
5009 (mov esp, ebp if there are no callee-saved registers)
5013 Double-aligned frame :
5014 --------------------
5016 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
5017 of callee-saved registers are pushed on the stack so that the locals
5018 themselves are qword-aligned. The instructions are the same as today,
5019 just in a different order.
5027 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
5029 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
5038 localloc (with ebp) frames :
5039 --------------------------
5041 The instructions are the same as today, just in a different order.
5042 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
5043 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
5050 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
5052 lea esp, [ebp-calleeSavedRegsPushedSize]
5056 (mov esp, ebp if there are no callee-saved registers)
5060 *****************************************************************************/
5062 /*****************************************************************************
5064 * Reserve space for a function prolog.
5067 void CodeGen::genReserveProlog(BasicBlock* block)
5069 assert(block != nullptr);
5071 JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
5073 /* Nothing is live on entry to the prolog */
5075 GetEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
5078 /*****************************************************************************
5080 * Reserve space for a function epilog.
5083 void CodeGen::genReserveEpilog(BasicBlock* block)
5085 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
5086 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
5088 /* The return value is special-cased: make sure it goes live for the epilog */
5090 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
5092 if (IsFullPtrRegMapRequired() && !jmpEpilog)
5094 if (varTypeIsGC(compiler->info.compRetNativeType))
5096 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
5098 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
5100 switch (compiler->info.compRetNativeType)
5103 gcrefRegsArg |= RBM_INTRET;
5106 byrefRegsArg |= RBM_INTRET;
5112 JITDUMP("Extending return value GC liveness to epilog\n");
5116 JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
5118 assert(block != nullptr);
5119 const VARSET_TP& gcrefVarsArg(GetEmitter()->emitThisGCrefVars);
5120 bool last = (block->bbNext == nullptr);
5121 GetEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
5124 #if defined(FEATURE_EH_FUNCLETS)
5126 /*****************************************************************************
5128 * Reserve space for a funclet prolog.
5131 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
5133 assert(block != nullptr);
5135 /* Currently, no registers are live on entry to the prolog, except maybe
5136 the exception object. There might be some live stack vars, but they
5137 cannot be accessed until after the frame pointer is re-established.
5138 In order to potentially prevent emitting a death before the prolog
5139 and a birth right after it, we just report it as live during the
5140 prolog, and rely on the prolog being non-interruptible. Trust
5141 genCodeForBBlist to correctly initialize all the sets.
5143 We might need to relax these asserts if the VM ever starts
5144 restoring any registers, then we could have live-in reg vars...
5147 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
5148 noway_assert(gcInfo.gcRegByrefSetCur == 0);
5150 JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
5152 GetEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
5153 gcInfo.gcRegByrefSetCur, false);
5156 /*****************************************************************************
5158 * Reserve space for a funclet epilog.
5161 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
5163 assert(block != nullptr);
5165 JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
5167 bool last = (block->bbNext == nullptr);
5168 GetEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
5169 gcInfo.gcRegByrefSetCur, last);
5172 #endif // FEATURE_EH_FUNCLETS
5174 /*****************************************************************************
5175 * Finalize the frame size and offset assignments.
5177 * No changes can be made to the modified register set after this, since that can affect how many
5178 * callee-saved registers get saved.
5180 void CodeGen::genFinalizeFrame()
5182 JITDUMP("Finalizing stack frame\n");
5184 // Initializations need to happen based on the var locations at the start
5185 // of the first basic block, so load those up. In particular, the determination
5186 // of whether or not to use block init in the prolog is dependent on the variable
5187 // locations on entry to the function.
5188 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
5190 genCheckUseBlockInit();
5192 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
5193 CLANG_FORMAT_COMMENT_ANCHOR;
5195 #if defined(TARGET_X86)
5197 if (compiler->compTailCallUsed)
5199 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
5200 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
5201 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
5202 // actually get saved.
5204 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
5206 #endif // TARGET_X86
5209 // Make sure that callee-saved registers used by call to a stack probing helper generated are pushed on stack.
5210 if (compiler->compLclFrameSize >= compiler->eeGetPageSize())
5212 regSet.rsSetRegsModified(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
5213 RBM_STACK_PROBE_HELPER_TRASH);
5216 // If there are any reserved registers, add them to the modified set.
5217 if (regSet.rsMaskResvd != RBM_NONE)
5219 regSet.rsSetRegsModified(regSet.rsMaskResvd);
5221 #endif // TARGET_ARM
5226 printf("Modified regs: ");
5227 dspRegMask(regSet.rsGetModifiedRegsMask());
5232 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
5233 if (compiler->opts.compDbgEnC)
5235 // We always save FP.
5236 noway_assert(isFramePointerUsed());
5237 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
5238 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_ENC_CALLEE_SAVED);
5239 if (RBM_ENC_CALLEE_SAVED != 0)
5241 regSet.rsSetRegsModified(RBM_ENC_CALLEE_SAVED);
5243 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
5244 #else // !TARGET_AMD64 && !TARGET_ARM64
5245 // On x86 we save all callee saved regs so the saved reg area size is consistent
5246 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
5247 #endif // !TARGET_AMD64 && !TARGET_ARM64
5250 /* If we have any pinvoke calls, we might potentially trash everything */
5251 if (compiler->compMethodRequiresPInvokeFrame())
5253 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
5254 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
5257 #ifdef UNIX_AMD64_ABI
5258 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
5259 if (compiler->compIsProfilerHookNeeded())
5261 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
5265 /* Count how many callee-saved registers will actually be saved (pushed) */
5267 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
5268 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
5271 // EBP cannot be (directly) modified
5272 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
5275 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5277 #ifdef TARGET_ARMARCH
5278 if (isFramePointerUsed())
5280 // For a FP based frame we have to push/pop the FP register
5282 maskCalleeRegsPushed |= RBM_FPBASE;
5284 // This assert check that we are not using REG_FP
5285 // as both the frame pointer and as a codegen register
5287 assert(!regSet.rsRegsModified(RBM_FPBASE));
5290 // we always push LR. See genPushCalleeSavedRegisters
5292 maskCalleeRegsPushed |= RBM_LR;
5294 #if defined(TARGET_ARM)
5295 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
5296 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
5297 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
5299 if ((maskPushRegsFloat != RBM_NONE) ||
5300 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
5302 // Here we try to keep stack double-aligned before the vpush
5303 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
5305 regNumber extraPushedReg = REG_R4;
5306 while (maskPushRegsInt & genRegMask(extraPushedReg))
5308 extraPushedReg = REG_NEXT(extraPushedReg);
5310 if (extraPushedReg < REG_R11)
5312 maskPushRegsInt |= genRegMask(extraPushedReg);
5313 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
5316 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
5319 // We currently only expect to push/pop consecutive FP registers
5320 // and these have to be double-sized registers as well.
5321 // Here we will insure that maskPushRegsFloat obeys these requirements.
5323 if (maskPushRegsFloat != RBM_NONE)
5325 regMaskTP contiguousMask = genRegMaskFloat(REG_F16);
5326 while (maskPushRegsFloat > contiguousMask)
5328 contiguousMask <<= 2;
5329 contiguousMask |= genRegMaskFloat(REG_F16);
5331 if (maskPushRegsFloat != contiguousMask)
5333 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
5334 maskPushRegsFloat |= maskExtraRegs;
5335 regSet.rsSetRegsModified(maskExtraRegs);
5336 maskCalleeRegsPushed |= maskExtraRegs;
5339 #endif // TARGET_ARM
5340 #endif // TARGET_ARMARCH
5342 #if defined(TARGET_XARCH)
5343 // Compute the count of callee saved float regs saved on stack.
5344 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm31)
5345 // regs are stack allocated and preserved in their stack locations.
5346 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
5347 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
5348 #endif // defined(TARGET_XARCH)
5350 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5351 if (isFramePointerUsed())
5353 // For a FP based frame we have to push/pop the FP register
5355 maskCalleeRegsPushed |= RBM_FPBASE;
5357 // This assert check that we are not using REG_FP
5358 // as both the frame pointer and as a codegen register
5360 assert(!regSet.rsRegsModified(RBM_FPBASE));
5363 // we always push RA. See genPushCalleeSavedRegisters
5364 maskCalleeRegsPushed |= RBM_RA;
5365 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
5367 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
5372 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
5373 dspRegMask(maskCalleeRegsPushed);
5378 /* Assign the final offsets to things living on the stack frame */
5380 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
5383 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
5385 compiler->lvaTableDump();
5390 /*****************************************************************************
5392 * Generates code for a function prolog.
5394 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
5396 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
5397 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
5398 * only instructions which result in control not going to the next instruction. Basically, any time execution would
5399 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
5400 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
5401 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
5403 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
5404 * debugger team to ensure that stepping still works.
5406 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
5410 #pragma warning(push)
5411 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5413 void CodeGen::genFnProlog()
5415 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
5417 compiler->funSetCurrentFunc(0);
5422 printf("*************** In genFnProlog()\n");
5427 genInterruptibleUsed = true;
5430 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
5432 /* Ready to start on the prolog proper */
5434 GetEmitter()->emitBegProlog();
5435 compiler->unwindBegProlog();
5437 // Do this so we can put the prolog instruction group ahead of
5438 // other instruction groups
5439 genIPmappingAddToFront(IPmappingDscKind::Prolog, DebugInfo(), true);
5442 if (compiler->opts.dspCode)
5444 printf("\n__prolog:\n");
5448 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
5450 // Create new scopes for the method-parameters for the prolog-block.
5454 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
5455 // For arm64 OSR, emit a "phantom prolog" to account for the actions taken
5456 // in the tier0 frame that impact FP and SP on entry to the OSR method.
5458 // x64 handles this differently; the phantom prolog unwind is emitted in
5459 // genOSRRecordTier0CalleeSavedRegistersAndFrame.
5461 if (compiler->opts.IsOSR())
5463 PatchpointInfo* patchpointInfo = compiler->info.compPatchpointInfo;
5464 const int tier0FrameSize = patchpointInfo->TotalFrameSize();
5466 // SP is tier0 method's SP.
5467 compiler->unwindAllocStack(tier0FrameSize);
5469 #endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
5473 if (compiler->compJitHaltMethod())
5475 /* put a nop first because the debugger and other tools are likely to
5476 put an int3 at the beginning and we don't want to confuse them */
5479 instGen(INS_BREAKPOINT);
5481 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5482 // Avoid asserts in the unwind info because these instructions aren't accounted for.
5483 compiler->unwindPadding();
5484 #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
5488 #if defined(FEATURE_EH_FUNCLETS) && defined(DEBUG)
5490 // We cannot force 0-initialization of the PSPSym
5491 // as it will overwrite the real value
5492 if (compiler->lvaPSPSym != BAD_VAR_NUM)
5494 const LclVarDsc* varDsc = compiler->lvaGetDesc(compiler->lvaPSPSym);
5495 assert(!varDsc->lvMustInit);
5498 #endif // FEATURE_EH_FUNCLETS && DEBUG
5500 /*-------------------------------------------------------------------------
5502 * Record the stack frame ranges that will cover all of the tracked
5503 * and untracked pointer variables.
5504 * Also find which registers will need to be zero-initialized.
5506 * 'initRegs': - Generally, enregistered variables should not need to be
5507 * zero-inited. They only need to be zero-inited when they
5508 * have a possibly uninitialized read on some control
5509 * flow path. Apparently some of the IL_STUBs that we
5510 * generate have this property.
5513 int untrLclLo = +INT_MAX;
5514 int untrLclHi = -INT_MAX;
5515 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
5516 // Note that they may be tracked, but simply not allocated to a register.
5517 bool hasUntrLcl = false;
5519 int GCrefLo = +INT_MAX;
5520 int GCrefHi = -INT_MAX;
5521 bool hasGCRef = false;
5523 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
5524 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
5525 regMaskTP initDblRegs = RBM_NONE;
5530 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
5532 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
5537 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
5539 noway_assert(varDsc->lvRefCnt() == 0);
5543 signed int loOffs = varDsc->GetStackOffset();
5544 signed int hiOffs = varDsc->GetStackOffset() + compiler->lvaLclSize(varNum);
5546 /* We need to know the offset range of tracked stack GC refs */
5547 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
5549 if (varDsc->HasGCPtr() && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
5551 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
5552 // taken care of by the parent struct.
5553 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5557 if (loOffs < GCrefLo)
5561 if (hiOffs > GCrefHi)
5568 /* For lvMustInit vars, gather pertinent info */
5570 if (!varDsc->lvMustInit)
5575 bool isInReg = varDsc->lvIsInReg();
5576 bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr;
5578 // Note that 'lvIsInReg()' will only be accurate for variables that are actually live-in to
5579 // the first block. This will include all possibly-uninitialized locals, whose liveness
5580 // will naturally propagate up to the entry block. However, we also set 'lvMustInit' for
5581 // locals that are live-in to a finally block, and those may not be live-in to the first
5582 // block. For those, we don't want to initialize the register, as it will not actually be
5583 // occupying it on entry.
5586 if (compiler->lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr)
5588 isInReg = VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex);
5592 assert(VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
5598 regNumber regForVar = varDsc->GetRegNum();
5599 regMaskTP regMask = genRegMask(regForVar);
5600 if (!genIsValidFloatReg(regForVar))
5602 initRegs |= regMask;
5604 if (varTypeIsMultiReg(varDsc))
5606 if (varDsc->GetOtherReg() != REG_STK)
5608 initRegs |= genRegMask(varDsc->GetOtherReg());
5612 /* Upper DWORD is on the stack, and needs to be inited */
5614 loOffs += sizeof(int);
5619 else if (varDsc->TypeGet() == TYP_DOUBLE)
5621 initDblRegs |= regMask;
5625 initFltRegs |= regMask;
5634 if (loOffs < untrLclLo)
5638 if (hiOffs > untrLclHi)
5645 /* Don't forget about spill temps that hold pointers */
5647 assert(regSet.tmpAllFree());
5648 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
5650 if (!varTypeIsGC(tempThis->tdTempType()))
5655 signed int loOffs = tempThis->tdTempOffs();
5656 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
5658 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
5659 // previous frame pointer. Thus, stkOffs can't be zero.
5660 CLANG_FORMAT_COMMENT_ANCHOR;
5662 #if !defined(TARGET_AMD64)
5663 // However, on amd64 there is no requirement to chain frame pointers.
5665 noway_assert(!isFramePointerUsed() || loOffs != 0);
5666 #endif // !defined(TARGET_AMD64)
5668 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
5672 if (loOffs < untrLclLo)
5676 if (hiOffs > untrLclHi)
5682 // TODO-Cleanup: Add suitable assert for the OSR case.
5683 assert(compiler->opts.IsOSR() || ((genInitStkLclCnt > 0) == hasUntrLcl));
5688 if (genInitStkLclCnt > 0)
5690 printf("Found %u lvMustInit int-sized stack slots, frame offsets %d through %d\n", genInitStkLclCnt,
5691 -untrLclLo, -untrLclHi);
5697 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
5698 // Ditto for all enregistered user arguments in a varargs method.
5699 // These registers will be available to use for the initReg. We just remove
5700 // all of these registers from the rsCalleeRegArgMaskLiveIn.
5702 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
5705 /* Choose the register to use for zero initialization */
5707 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
5709 // Track if initReg holds non-zero value. Start conservative and assume it has non-zero value.
5710 // If initReg is ever set to zero, this variable is set to true and zero initializing initReg
5712 bool initRegZeroed = false;
5713 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
5716 // We should not use the special PINVOKE registers as the initReg
5717 // since they are trashed by the jithelper call to setup the PINVOKE frame
5718 if (compiler->compMethodRequiresPInvokeFrame())
5720 excludeMask |= RBM_PINVOKE_FRAME;
5722 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
5723 if (!compiler->opts.ShouldUsePInvokeHelpers())
5725 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
5727 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
5729 const LclVarDsc* varDsc = compiler->lvaGetDesc(compiler->info.compLvFrameListRoot);
5730 if (varDsc->lvRegister)
5732 excludeMask |= genRegMask(varDsc->GetRegNum());
5738 // If we have a variable sized frame (compLocallocUsed is true)
5739 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
5740 if (compiler->compLocallocUsed)
5742 excludeMask |= RBM_SAVED_LOCALLOC_SP;
5744 #endif // TARGET_ARM
5746 const bool isRoot = (compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT);
5749 const bool isOSRx64Root = isRoot && compiler->opts.IsOSR();
5751 const bool isOSRx64Root = false;
5752 #endif // TARGET_AMD64
5754 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
5756 if (tempMask != RBM_NONE)
5758 // We will use one of the registers that we were planning to zero init anyway.
5759 // We pick the lowest register number.
5760 tempMask = genFindLowestBit(tempMask);
5761 initReg = genRegNumFromMask(tempMask);
5763 // Next we prefer to use one of the unused argument registers.
5764 // If they aren't available we use one of the caller-saved integer registers.
5767 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
5768 if (tempMask != RBM_NONE)
5770 // We pick the lowest register number
5771 tempMask = genFindLowestBit(tempMask);
5772 initReg = genRegNumFromMask(tempMask);
5776 #if defined(TARGET_AMD64)
5777 // For x64 OSR root frames, we can't use any as of yet unsaved
5778 // callee save as initReg, as we defer saving these until later in
5779 // the prolog, and we don't have normal arg regs.
5782 initReg = REG_SCRATCH; // REG_EAX
5784 #elif defined(TARGET_ARM64)
5785 // For arm64 OSR root frames, we may need a scratch register for large
5786 // offset addresses. Use a register that won't be allocated.
5788 if (isRoot && compiler->opts.IsOSR())
5794 #ifndef TARGET_LOONGARCH64
5795 // For LoongArch64's OSR root frames, we may need a scratch register for large
5796 // offset addresses. But this does not conflict with the REG_PINVOKE_FRAME.
5797 noway_assert(!compiler->compMethodRequiresPInvokeFrame() || (initReg != REG_PINVOKE_FRAME));
5800 #if defined(TARGET_AMD64)
5801 // If we are a varargs call, in order to set up the arguments correctly this
5802 // must be done in a 2 step process. As per the x64 ABI:
5803 // a) The caller sets up the argument shadow space (just before the return
5804 // address, 4 pointer sized slots).
5805 // b) The callee is responsible to home the arguments on the shadow space
5806 // provided by the caller.
5807 // This way, the varargs iterator will be able to retrieve the
5808 // call arguments properly since both the arg regs and the stack allocated
5809 // args will be contiguous.
5811 // OSR methods can skip this, as the setup is done by the original method.
5812 if (compiler->info.compIsVarArgs && !compiler->opts.IsOSR())
5814 GetEmitter()->spillIntArgRegsToShadowSlots();
5817 #endif // TARGET_AMD64
5820 /*-------------------------------------------------------------------------
5822 * Now start emitting the part of the prolog which sets up the frame
5825 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
5827 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
5828 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
5830 #endif // TARGET_ARM
5832 unsigned extraFrameSize = 0;
5839 // Account for the Tier0 callee saves
5841 genOSRRecordTier0CalleeSavedRegistersAndFrame();
5843 // We don't actually push any callee saves on the OSR frame,
5844 // but we still reserve space, so account for this when
5845 // allocating the local frame.
5847 extraFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
5849 #endif // TARGET_AMD64
5851 if (doubleAlignOrFramePointerUsed())
5853 // OSR methods handle "saving" FP specially.
5855 // For epilog and unwind, we restore the RBP saved by the
5856 // Tier0 method. The save we do here is just to set up a
5857 // proper RBP-based frame chain link.
5859 if (isOSRx64Root && isFramePointerUsed())
5861 GetEmitter()->emitIns_R_AR(INS_mov, EA_8BYTE, initReg, REG_FPBASE, 0);
5862 inst_RV(INS_push, initReg, TYP_REF);
5863 initRegZeroed = false;
5865 // We account for the SP movement in unwind, but not for
5866 // the "save" of RBP.
5868 compiler->unwindAllocStack(REGSIZE_BYTES);
5872 inst_RV(INS_push, REG_FPBASE, TYP_REF);
5873 compiler->unwindPush(REG_FPBASE);
5875 #ifndef TARGET_AMD64 // On AMD64, establish the frame pointer after the "sub rsp"
5876 genEstablishFramePointer(0, /*reportUnwindData*/ true);
5877 #endif // !TARGET_AMD64
5880 if (compiler->genDoubleAlign())
5882 noway_assert(isFramePointerUsed() == false);
5883 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
5885 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
5887 #endif // DOUBLE_ALIGN
5889 #endif // TARGET_XARCH
5891 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5892 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
5894 #else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
5898 genPushCalleeSavedRegisters();
5900 #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
5903 bool needToEstablishFP = false;
5904 int afterLclFrameSPtoFPdelta = 0;
5905 if (doubleAlignOrFramePointerUsed())
5907 needToEstablishFP = true;
5909 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
5910 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
5911 // too big, we go ahead and do it here.
5913 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
5914 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
5915 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
5917 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
5918 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
5919 needToEstablishFP = false;
5922 #endif // TARGET_ARM
5924 //-------------------------------------------------------------------------
5926 // Subtract the local frame size from SP.
5928 //-------------------------------------------------------------------------
5929 CLANG_FORMAT_COMMENT_ANCHOR;
5931 #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
5932 regMaskTP maskStackAlloc = RBM_NONE;
5935 maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize + extraFrameSize,
5936 regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
5937 #endif // TARGET_ARM
5939 if (maskStackAlloc == RBM_NONE)
5941 genAllocLclFrame(compiler->compLclFrameSize + extraFrameSize, initReg, &initRegZeroed,
5942 intRegState.rsCalleeRegArgMaskLiveIn);
5944 #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
5947 // For x64 OSR we have to finish saving int callee saves.
5951 genOSRSaveRemainingCalleeSavedRegisters();
5953 #endif // TARGET_AMD64
5955 //-------------------------------------------------------------------------
5958 if (compiler->compLocallocUsed)
5960 GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE, /* canSkip */ false);
5961 regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
5962 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
5964 #endif // TARGET_ARMARCH
5966 #if defined(TARGET_XARCH)
5967 // Preserve callee saved float regs to stack.
5968 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
5969 #endif // defined(TARGET_XARCH)
5972 // Establish the AMD64 frame pointer after the OS-reported prolog.
5973 if (doubleAlignOrFramePointerUsed())
5975 const bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
5976 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
5978 #endif // TARGET_AMD64
5979 compiler->unwindEndProlog();
5981 //-------------------------------------------------------------------------
5983 // This is the end of the OS-reported prolog for purposes of unwinding
5985 //-------------------------------------------------------------------------
5988 if (needToEstablishFP)
5990 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
5991 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
5993 #endif // TARGET_ARM
5995 if (compiler->info.compPublishStubParam)
5997 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM,
5998 compiler->lvaStubArgumentVar, 0);
5999 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
6001 // It's no longer live; clear it out so it can be used after this in the prolog
6002 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
6006 // Zero out the frame as needed
6009 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
6011 #if defined(FEATURE_EH_FUNCLETS)
6013 genSetPSPSym(initReg, &initRegZeroed);
6015 #else // !FEATURE_EH_FUNCLETS
6017 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
6018 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
6020 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
6021 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
6023 // Zero out the slot for nesting level 0
6024 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
6028 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6029 initRegZeroed = true;
6032 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
6036 #endif // !FEATURE_EH_FUNCLETS
6038 genReportGenericContextArg(initReg, &initRegZeroed);
6040 #ifdef JIT32_GCENCODER
6041 // Initialize the LocalAllocSP slot if there is localloc in the function.
6042 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
6044 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
6046 #endif // JIT32_GCENCODER
6048 // Set up the GS security cookie
6050 genSetGSSecurityCookie(initReg, &initRegZeroed);
6052 #ifdef PROFILING_SUPPORTED
6054 // Insert a function entry callback for profiling, if requested.
6055 // OSR methods aren't called, so don't have enter hooks.
6056 if (!compiler->opts.IsOSR())
6058 genProfilingEnterCallback(initReg, &initRegZeroed);
6061 #endif // PROFILING_SUPPORTED
6063 // For OSR we may have a zero-length prolog. That's not supported
6064 // when the method must report a generics context,/ so add a nop if so.
6066 if (compiler->opts.IsOSR() && (GetEmitter()->emitGetPrologOffsetEstimate() == 0) &&
6067 (compiler->lvaReportParamTypeArg() || compiler->lvaKeepAliveAndReportThis()))
6069 JITDUMP("OSR: prolog was zero length and has generic context to report: adding nop to pad prolog.\n");
6073 if (!GetInterruptible())
6075 // The 'real' prolog ends here for non-interruptible methods.
6076 // For fully-interruptible methods, we extend the prolog so that
6077 // we do not need to track GC information while shuffling the
6079 GetEmitter()->emitMarkPrologEnd();
6082 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
6083 // The unused bits of Vector3 arguments must be cleared
6084 // since native compiler doesn't initize the upper bits to zeros.
6086 // TODO-Cleanup: This logic can be implemented in
6087 // genFnPrologCalleeRegArgs() for argument registers and
6088 // genEnregisterIncomingStackArgs() for stack arguments.
6089 genClearStackVec3ArgUpperBits();
6090 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
6092 /*-----------------------------------------------------------------------------
6093 * Take care of register arguments first
6096 // Home incoming arguments and generate any required inits.
6097 // OSR handles this by moving the values from the original frame.
6099 // Update the arg initial register locations.
6101 if (compiler->opts.IsOSR())
6103 // For OSR we defer updating "initial reg" for args until
6104 // we've set the live-in regs with values from the Tier0 frame.
6106 // Otherwise we'll do some of these fetches twice.
6108 CLANG_FORMAT_COMMENT_ANCHOR;
6109 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
6110 genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed);
6112 genEnregisterOSRArgsAndLocals();
6114 compiler->lvaUpdateArgsWithInitialReg();
6118 compiler->lvaUpdateArgsWithInitialReg();
6120 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6121 if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn)
6123 initRegZeroed = false;
6124 genFnPrologCalleeRegArgs();
6127 auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) {
6128 if (regState->rsCalleeRegArgMaskLiveIn)
6130 // If we need an extra register to shuffle around the incoming registers
6131 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
6132 // if we don't need to use the xtraReg then this flag will stay false
6135 bool xtraRegClobbered = false;
6137 if (genRegMask(initReg) & RBM_ARG_REGS)
6143 xtraReg = REG_SCRATCH;
6144 initRegZeroed = false;
6147 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
6149 if (xtraRegClobbered)
6151 initRegZeroed = false;
6156 #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
6157 assignIncomingRegisterArgs(&intRegState);
6158 assignIncomingRegisterArgs(&floatRegState);
6160 assignIncomingRegisterArgs(&intRegState);
6163 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
6165 // Home the incoming arguments.
6166 genEnregisterIncomingStackArgs();
6169 /* Initialize any must-init registers variables now */
6173 regMaskTP regMask = 0x1;
6175 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
6177 if (regMask & initRegs)
6179 // Check if we have already zeroed this register
6180 if ((reg == initReg) && initRegZeroed)
6186 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
6189 initRegZeroed = true;
6196 if (initFltRegs | initDblRegs)
6198 // If initReg is not in initRegs then we will use REG_SCRATCH
6199 if ((genRegMask(initReg) & initRegs) == 0)
6201 initReg = REG_SCRATCH;
6202 initRegZeroed = false;
6206 // This is needed only for Arm since it can use a zero initialized int register
6207 // to initialize vfp registers.
6210 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6211 initRegZeroed = true;
6213 #endif // TARGET_ARM
6215 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
6218 //-----------------------------------------------------------------------------
6221 // Increase the prolog size here only if fully interruptible.
6224 if (GetInterruptible())
6226 GetEmitter()->emitMarkPrologEnd();
6228 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
6235 GetEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
6239 noway_assert(GCrefLo == +INT_MAX);
6240 noway_assert(GCrefHi == -INT_MAX);
6244 if (compiler->opts.dspCode)
6251 // On non-x86 the VARARG cookie does not need any special treatment.
6253 // Load up the VARARG argument pointer register so it doesn't get clobbered.
6254 // only do this if we actually access any statically declared args
6255 // (our argument pointer register has a refcount > 0).
6256 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
6258 if (compiler->info.compIsVarArgs && compiler->lvaGetDesc(argsStartVar)->lvRefCnt() > 0)
6260 varDsc = compiler->lvaGetDesc(argsStartVar);
6262 noway_assert(compiler->info.compArgsCount > 0);
6264 // MOV EAX, <VARARGS HANDLE>
6265 GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
6266 regSet.verifyRegUsed(REG_EAX);
6269 GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
6271 // EDX might actually be holding something here. So make sure to only use EAX for this code
6274 const LclVarDsc* lastArg = compiler->lvaGetDesc(compiler->info.compArgsCount - 1);
6275 noway_assert(!lastArg->lvRegister);
6276 signed offset = lastArg->GetStackOffset();
6277 assert(offset != BAD_STK_OFFS);
6278 noway_assert(lastArg->lvFramePointerBased);
6280 // LEA EAX, &<VARARGS HANDLE> + EAX
6281 GetEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
6283 if (varDsc->lvIsInReg())
6285 GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, varDsc->GetRegNum(), REG_EAX, /* canSkip */ true);
6286 regSet.verifyRegUsed(varDsc->GetRegNum());
6290 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
6294 #endif // TARGET_X86
6296 #if defined(DEBUG) && defined(TARGET_XARCH)
6297 if (compiler->opts.compStackCheckOnRet)
6299 assert(compiler->lvaReturnSpCheck != BAD_VAR_NUM);
6300 assert(compiler->lvaGetDesc(compiler->lvaReturnSpCheck)->lvDoNotEnregister);
6301 assert(compiler->lvaGetDesc(compiler->lvaReturnSpCheck)->lvOnFrame);
6302 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0);
6304 #endif // defined(DEBUG) && defined(TARGET_XARCH)
6306 GetEmitter()->emitEndProlog();
6309 #pragma warning(pop)
6312 //------------------------------------------------------------------------
6313 // getCallTarget - Get the node that evaluates to the call target
6316 // call - the GT_CALL node
6319 // The node. Note that for direct calls this may still return non-null if the direct call
6320 // requires a 'complex' tree to load the target (e.g. in R2R or because we go through a stub).
6322 GenTree* CodeGen::getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd)
6324 // all virtuals should have been expanded into a control expression by this point.
6325 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
6327 if (call->gtCallType == CT_INDIRECT)
6329 assert(call->gtControlExpr == nullptr);
6331 if (methHnd != nullptr)
6336 return call->gtCallAddr;
6339 if (methHnd != nullptr)
6341 *methHnd = call->gtCallMethHnd;
6344 return call->gtControlExpr;
6347 //------------------------------------------------------------------------
6348 // getCallIndirectionCellReg - Get the register containing the indirection cell for a call
6354 // The register containing the indirection cell, or REG_NA if this call does not use an indirection cell argument.
6357 // We currently use indirection cells for VSD on all platforms and for R2R calls on ARM architectures.
6359 regNumber CodeGen::getCallIndirectionCellReg(GenTreeCall* call)
6361 regNumber result = REG_NA;
6362 switch (call->GetIndirectionCellArgKind())
6364 case WellKnownArg::None:
6366 case WellKnownArg::R2RIndirectionCell:
6367 result = REG_R2R_INDIRECT_PARAM;
6369 case WellKnownArg::VirtualStubCell:
6370 result = compiler->virtualStubParamInfo->GetReg();
6377 if (call->GetIndirectionCellArgKind() != WellKnownArg::None)
6379 CallArg* indirCellArg = call->gtArgs.FindWellKnownArg(call->GetIndirectionCellArgKind());
6380 assert((indirCellArg != nullptr) && (indirCellArg->AbiInfo.GetRegNum() == result));
6387 //------------------------------------------------------------------------
6388 // genDefinePendingLabel - If necessary, define the pending call label after a
6389 // call instruction was emitted.
6392 // call - the call node
6394 void CodeGen::genDefinePendingCallLabel(GenTreeCall* call)
6396 // for pinvoke/intrinsic/tailcalls we may have needed to get the address of
6398 if (!genPendingCallLabel)
6403 // For certain indirect calls we may introduce helper calls before that we need to skip:
6404 // - CFG may introduce a call to the validator first
6405 // - Generic virtual methods may compute the target dynamically through a separate helper call
6406 if (call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL) ||
6407 call->IsHelperCall(compiler, CORINFO_HELP_VIRTUAL_FUNC_PTR))
6412 genDefineInlineTempLabel(genPendingCallLabel);
6413 genPendingCallLabel = nullptr;
6416 /*****************************************************************************
6418 * Generates code for all the function and funclet prologs and epilogs.
6421 void CodeGen::genGeneratePrologsAndEpilogs()
6426 printf("*************** Before prolog / epilog generation\n");
6427 GetEmitter()->emitDispIGlist(/* displayInstructions */ false);
6431 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
6432 // This affects our code that determines which untracked locals need to be zero initialized.
6433 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
6435 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
6437 GetEmitter()->emitStartPrologEpilogGeneration();
6439 gcInfo.gcResetForBB();
6442 // Generate all the prologs and epilogs.
6443 CLANG_FORMAT_COMMENT_ANCHOR;
6445 #if defined(FEATURE_EH_FUNCLETS)
6447 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
6448 // information computed during codegen, or during function prolog generation, like
6449 // frame offsets. It must run after main function prolog generation.
6451 genCaptureFuncletPrologEpilogInfo();
6453 #endif // FEATURE_EH_FUNCLETS
6455 // Walk the list of prologs and epilogs and generate them.
6456 // We maintain a list of prolog and epilog basic blocks in
6457 // the insGroup structure in the emitter. This list was created
6458 // during code generation by the genReserve*() functions.
6460 // TODO: it seems like better design would be to create a list of prologs/epilogs
6461 // in the code generator (not the emitter), and then walk that list. But we already
6462 // have the insGroup list, which serves well, so we don't need the extra allocations
6463 // for a prolog/epilog list in the code generator.
6465 GetEmitter()->emitGeneratePrologEpilog();
6467 // Tell the emitter we're done with all prolog and epilog generation.
6469 GetEmitter()->emitFinishPrologEpilogGeneration();
6474 printf("*************** After prolog / epilog generation\n");
6475 GetEmitter()->emitDispIGlist(/* displayInstructions */ false);
6481 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6482 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6484 XX End Prolog / Epilog XX
6486 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6487 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6490 //-----------------------------------------------------------------------------------
6491 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
6494 // hClass - type handle
6497 // true if type is returned in multiple registers, false otherwise.
6499 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallConvExtension callConv)
6501 if (hClass == NO_CLASS_HANDLE)
6506 structPassingKind howToReturnStruct;
6507 var_types returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct);
6509 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6510 return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
6512 return (varTypeIsStruct(returnType));
6516 //----------------------------------------------
6517 // Methods that support HFA's for ARM32/ARM64
6518 //----------------------------------------------
6520 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
6522 return varTypeIsValidHfaType(GetHfaType(hClass));
6525 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
6527 if (GlobalJitOptions::compFeatureHfa)
6529 if (hClass != NO_CLASS_HANDLE)
6531 CorInfoHFAElemType elemKind = info.compCompHnd->getHFAType(hClass);
6532 if (elemKind != CORINFO_HFA_ELEM_NONE)
6534 // This type may not appear elsewhere, but it will occupy a floating point register.
6535 compFloatingPointUsed = true;
6537 return HfaTypeFromElemKind(elemKind);
6543 //------------------------------------------------------------------------
6544 // GetHfaCount: Given a class handle for an HFA struct
6545 // return the number of registers needed to hold the HFA
6547 // Note that on ARM32 the single precision registers overlap with
6548 // the double precision registers and for that reason each
6549 // double register is considered to be two single registers.
6550 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
6551 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
6552 // will return 4 for both.
6554 // hClass: the class handle of a HFA struct
6556 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
6558 assert(IsHfa(hClass));
6560 // A HFA of doubles is twice as large as an HFA of singles for ARM32
6561 // (i.e. uses twice the number of single precision registers)
6562 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
6563 #else // TARGET_ARM64
6564 var_types hfaType = GetHfaType(hClass);
6565 unsigned classSize = info.compCompHnd->getClassSize(hClass);
6566 // Note that the retail build issues a warning about a potential division by zero without the Max function
6567 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
6568 return classSize / elemSize;
6569 #endif // TARGET_ARM64
6572 //------------------------------------------------------------------------------------------------ //
6573 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
6576 // The number of the first argument with stack slot on the caller's frame.
6579 // On x64 Windows the caller always creates slots (homing space) in its frame for the
6580 // first 4 arguments of a callee (register passed args). So, the variable number
6581 // (lclNum) for the first argument with a stack slot is always 0.
6582 // For System V systems or armarch, there is no such calling convention requirement, and the code
6583 // needs to find the first stack passed argument from the caller. This is done by iterating over
6584 // all the lvParam variables and finding the first with GetArgReg() equals to REG_STK.
6586 unsigned CodeGen::getFirstArgWithStackSlot()
6588 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6589 unsigned baseVarNum = 0;
6590 // Iterate over all the lvParam variables in the Lcl var table until we find the first one
6591 // that's passed on the stack.
6592 LclVarDsc* varDsc = nullptr;
6593 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
6595 varDsc = compiler->lvaGetDesc(i);
6597 // We should have found a stack parameter (and broken out of this loop) before
6598 // we find any non-parameters.
6599 assert(varDsc->lvIsParam);
6601 if (varDsc->GetArgReg() == REG_STK)
6607 assert(varDsc != nullptr);
6610 #elif defined(TARGET_AMD64)
6613 // Not implemented for x86.
6614 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
6616 #endif // TARGET_X86
6619 //------------------------------------------------------------------------
6620 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
6622 void CodeGen::genSinglePush()
6624 AddStackLevel(REGSIZE_BYTES);
6627 //------------------------------------------------------------------------
6628 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
6630 void CodeGen::genSinglePop()
6632 SubtractStackLevel(REGSIZE_BYTES);
6635 //------------------------------------------------------------------------
6636 // genPushRegs: Push the given registers.
6639 // regs - mask or registers to push
6640 // byrefRegs - OUT arg. Set to byref registers that were pushed.
6641 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
6644 // Mask of registers pushed.
6647 // This function does not check if the register is marked as used, etc.
6649 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
6651 *byrefRegs = RBM_NONE;
6652 *noRefRegs = RBM_NONE;
6654 if (regs == RBM_NONE)
6659 #if FEATURE_FIXED_OUT_ARGS
6661 NYI("Don't call genPushRegs with real regs!");
6664 #else // FEATURE_FIXED_OUT_ARGS
6666 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
6667 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
6669 regMaskTP pushedRegs = regs;
6671 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
6673 regMaskTP regBit = regMaskTP(1) << reg;
6675 if ((regBit & regs) == RBM_NONE)
6679 if (regBit & gcInfo.gcRegGCrefSetCur)
6683 else if (regBit & gcInfo.gcRegByrefSetCur)
6685 *byrefRegs |= regBit;
6688 else if (noRefRegs != NULL)
6690 *noRefRegs |= regBit;
6698 inst_RV(INS_push, reg, type);
6701 gcInfo.gcMarkRegSetNpt(regBit);
6708 #endif // FEATURE_FIXED_OUT_ARGS
6711 //------------------------------------------------------------------------
6712 // genPopRegs: Pop the registers that were pushed by genPushRegs().
6715 // regs - mask of registers to pop
6716 // byrefRegs - The byref registers that were pushed by genPushRegs().
6717 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
6722 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
6724 if (regs == RBM_NONE)
6729 #if FEATURE_FIXED_OUT_ARGS
6731 NYI("Don't call genPopRegs with real regs!");
6733 #else // FEATURE_FIXED_OUT_ARGS
6735 noway_assert((regs & byrefRegs) == byrefRegs);
6736 noway_assert((regs & noRefRegs) == noRefRegs);
6737 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
6739 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
6740 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
6742 // Walk the registers in the reverse order as genPushRegs()
6743 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
6745 regMaskTP regBit = regMaskTP(1) << reg;
6747 if ((regBit & regs) == RBM_NONE)
6751 if (regBit & byrefRegs)
6755 else if (regBit & noRefRegs)
6764 inst_RV(INS_pop, reg, type);
6767 if (type != TYP_INT)
6768 gcInfo.gcMarkRegPtrVal(reg, type);
6773 #endif // FEATURE_FIXED_OUT_ARGS
6776 /*****************************************************************************
6779 * This function should be called only after the sizes of the emitter blocks
6780 * have been finalized.
6783 void CodeGen::genSetScopeInfo()
6785 if (!compiler->opts.compScopeInfo)
6793 printf("*************** In genSetScopeInfo()\n");
6797 unsigned varsLocationsCount = 0;
6799 varsLocationsCount = (unsigned int)varLiveKeeper->getLiveRangesCount();
6801 if (varsLocationsCount == 0)
6803 // No variable home to report
6804 compiler->eeSetLVcount(0);
6805 compiler->eeSetLVdone();
6809 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
6811 // Initialize the table where the reported variables' home will be placed.
6812 compiler->eeSetLVcount(varsLocationsCount);
6815 genTrnslLocalVarCount = varsLocationsCount;
6816 if (varsLocationsCount)
6818 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[varsLocationsCount];
6822 // We can have one of both flags defined, both, or none. Specially if we need to compare both
6823 // both results. But we cannot report both to the debugger, since there would be overlapping
6824 // intervals, and may not indicate the same variable location.
6826 genSetScopeInfoUsingVariableRanges();
6828 compiler->eeSetLVdone();
6831 //------------------------------------------------------------------------
6832 // genSetScopeInfoUsingVariableRanges: Call "genSetScopeInfo" with the
6833 // "VariableLiveRanges" created for the arguments, special arguments and
6834 // IL local variables.
6837 // This function is called from "genSetScopeInfo" once the code is generated
6838 // and we want to send debug info to the debugger.
6840 void CodeGen::genSetScopeInfoUsingVariableRanges()
6842 unsigned int liveRangeIndex = 0;
6844 for (unsigned int varNum = 0; varNum < compiler->info.compLocalsCount; varNum++)
6846 LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
6848 if (compiler->compMap2ILvarNum(varNum) == (unsigned int)ICorDebugInfo::UNKNOWN_ILNUM)
6853 auto reportRange = [this, varDsc, varNum, &liveRangeIndex](siVarLoc* loc, UNATIVE_OFFSET start,
6854 UNATIVE_OFFSET end) {
6855 if (varDsc->lvIsParam && (start == end))
6857 // If the length is zero, it means that the prolog is empty. In that case,
6858 // CodeGen::genSetScopeInfo will report the liveness of all arguments
6859 // as spanning the first instruction in the method, so that they can
6860 // at least be inspected on entry to the method.
6866 genSetScopeInfo(liveRangeIndex, start, end - start, varNum, varNum, true, loc);
6871 siVarLoc* curLoc = nullptr;
6872 UNATIVE_OFFSET curStart = 0;
6873 UNATIVE_OFFSET curEnd = 0;
6875 for (int rangeIndex = 0; rangeIndex < 2; rangeIndex++)
6877 VariableLiveKeeper::LiveRangeList* liveRanges;
6878 if (rangeIndex == 0)
6880 liveRanges = varLiveKeeper->getLiveRangesForVarForProlog(varNum);
6884 liveRanges = varLiveKeeper->getLiveRangesForVarForBody(varNum);
6887 for (VariableLiveKeeper::VariableLiveRange& liveRange : *liveRanges)
6889 UNATIVE_OFFSET startOffs = liveRange.m_StartEmitLocation.CodeOffset(GetEmitter());
6890 UNATIVE_OFFSET endOffs = liveRange.m_EndEmitLocation.CodeOffset(GetEmitter());
6892 assert(startOffs <= endOffs);
6893 assert(startOffs >= curEnd);
6894 if ((curLoc != nullptr) && (startOffs == curEnd) && siVarLoc::Equals(curLoc, &liveRange.m_VarLocation))
6896 // Extend current range.
6901 // Report old range if any.
6902 if (curLoc != nullptr)
6904 reportRange(curLoc, curStart, curEnd);
6907 // Start a new range.
6908 curLoc = &liveRange.m_VarLocation;
6909 curStart = startOffs;
6914 // Report last range
6915 if (curLoc != nullptr)
6917 reportRange(curLoc, curStart, curEnd);
6921 compiler->eeVarsCount = liveRangeIndex;
6924 //------------------------------------------------------------------------
6925 // genSetScopeInfo: Record scope information for debug info
6929 // startOffs - the starting offset for this scope
6930 // length - the length of this scope
6931 // varNum - the lclVar for this scope info
6933 // avail - a bool indicating if it has a home
6934 // varLoc - the position (reg or stack) of the variable
6937 // Called for every scope info piece to record by the main genSetScopeInfo()
6939 void CodeGen::genSetScopeInfo(unsigned which,
6940 UNATIVE_OFFSET startOffs,
6941 UNATIVE_OFFSET length,
6947 // We need to do some mapping while reporting back these variables.
6949 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
6950 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
6953 // Non-x86 platforms are allowed to access all arguments directly
6954 // so we don't need this code.
6956 // Is this a varargs function?
6957 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
6958 varNum < compiler->info.compArgsCount && !compiler->lvaGetDesc(varNum)->lvIsRegArg)
6960 noway_assert(varLoc->vlType == VLT_STK || varLoc->vlType == VLT_STK2);
6962 // All stack arguments (except the varargs handle) have to be
6963 // accessed via the varargs cookie. Discard generated info,
6964 // and just find its position relative to the varargs handle
6966 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
6967 if (!compiler->lvaGetDesc(compiler->lvaVarargsHandleArg)->lvOnFrame)
6969 noway_assert(!compiler->opts.compDbgCode);
6973 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
6974 // arguments of vararg functions to avoid reporting them to GC.
6975 noway_assert(!compiler->lvaGetDesc(varNum)->lvRegister);
6976 unsigned cookieOffset = compiler->lvaGetDesc(compiler->lvaVarargsHandleArg)->GetStackOffset();
6977 unsigned varOffset = compiler->lvaGetDesc(varNum)->GetStackOffset();
6979 noway_assert(cookieOffset < varOffset);
6980 unsigned offset = varOffset - cookieOffset;
6981 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
6982 noway_assert(offset < stkArgSize);
6983 offset = stkArgSize - offset;
6985 varLoc->vlType = VLT_FIXED_VA;
6986 varLoc->vlFixedVarArg.vlfvOffset = offset;
6989 #endif // TARGET_X86
6991 VarName name = nullptr;
6995 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
6997 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
6999 name = compiler->info.compVarScopes[scopeNum].vsdName;
7003 // Hang on to this compiler->info.
7005 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
7007 tlvi.tlviVarNum = ilVarNum;
7008 tlvi.tlviLVnum = LVnum;
7009 tlvi.tlviName = name;
7010 tlvi.tlviStartPC = startOffs;
7011 tlvi.tlviLength = length;
7012 tlvi.tlviAvailable = avail;
7013 tlvi.tlviVarLoc = *varLoc;
7017 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, *varLoc);
7020 /*****************************************************************************/
7023 /*****************************************************************************
7026 * Can be called only after lviSetLocalVarInfo() has been called
7030 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
7032 if (!compiler->opts.compScopeInfo)
7035 if (compiler->info.compVarScopesCount == 0)
7038 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
7040 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
7042 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
7043 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
7044 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
7046 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
7053 /*****************************************************************************
7056 * Can be called only after lviSetLocalVarInfo() has been called
7060 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
7062 if (!compiler->opts.compScopeInfo)
7065 if (compiler->info.compVarScopesCount == 0)
7068 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
7070 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
7072 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStack((regNumber)reg, stkOffs)) &&
7073 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
7074 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
7076 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
7083 /*****************************************************************************/
7084 #endif // defined(DEBUG)
7085 #endif // LATE_DISASM
7089 /*****************************************************************************
7090 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
7093 void CodeGen::genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping)
7095 if (mappingNum != unsigned(-1))
7097 printf("%d: ", mappingNum);
7100 switch (ipMapping->ipmdKind)
7102 case IPmappingDscKind::Prolog:
7105 case IPmappingDscKind::Epilog:
7108 case IPmappingDscKind::NoMapping:
7111 case IPmappingDscKind::Normal:
7112 const ILLocation& loc = ipMapping->ipmdLoc;
7113 Compiler::eeDispILOffs(loc.GetOffset());
7114 if (loc.IsStackEmpty())
7116 printf(" STACK_EMPTY");
7121 printf(" CALL_INSTRUCTION");
7128 ipMapping->ipmdNativeLoc.Print(compiler->compMethodID);
7129 // We can only call this after code generation. Is there any way to tell when it's legal to call?
7130 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(GetEmitter()));
7132 if (ipMapping->ipmdIsLabel)
7140 void CodeGen::genIPmappingListDisp()
7142 unsigned mappingNum = 0;
7144 for (IPmappingDsc& dsc : compiler->genIPmappings)
7146 genIPmappingDisp(mappingNum, &dsc);
7153 /*****************************************************************************
7155 * Append an IPmappingDsc struct to the list that we're maintaining
7157 * Record the instr offset as being at the current code gen position.
7160 void CodeGen::genIPmappingAdd(IPmappingDscKind kind, const DebugInfo& di, bool isLabel)
7162 if (!compiler->opts.compDbgInfo)
7167 assert((kind == IPmappingDscKind::Normal) == di.IsValid());
7171 case IPmappingDscKind::Prolog:
7172 case IPmappingDscKind::Epilog:
7177 if (kind == IPmappingDscKind::Normal)
7179 noway_assert(di.GetLocation().GetOffset() <= compiler->info.compILCodeSize);
7182 // Ignore this one if it's the same IL location as the last one we saw.
7183 // Note that we'll let through two identical IL offsets if the flag bits
7184 // differ, or two identical "special" mappings (e.g., PROLOG).
7185 if ((compiler->genIPmappings.size() > 0) && (kind == compiler->genIPmappings.back().ipmdKind) &&
7186 (di.GetLocation() == compiler->genIPmappings.back().ipmdLoc))
7188 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", di.GetLocation().GetOffset());
7194 IPmappingDsc addMapping;
7195 addMapping.ipmdNativeLoc.CaptureLocation(GetEmitter());
7196 addMapping.ipmdKind = kind;
7197 addMapping.ipmdLoc = di.GetLocation();
7198 addMapping.ipmdIsLabel = isLabel;
7200 assert((kind == IPmappingDscKind::Normal) == addMapping.ipmdLoc.IsValid());
7201 compiler->genIPmappings.push_back(addMapping);
7206 printf("Added IP mapping: ");
7207 genIPmappingDisp(unsigned(-1), &addMapping);
7212 /*****************************************************************************
7214 * Prepend an IPmappingDsc struct to the list that we're maintaining
7217 void CodeGen::genIPmappingAddToFront(IPmappingDscKind kind, const DebugInfo& di, bool isLabel)
7219 if (!compiler->opts.compDbgInfo)
7224 noway_assert((kind != IPmappingDscKind::Normal) ||
7225 (di.IsValid() && (di.GetLocation().GetOffset() <= compiler->info.compILCodeSize)));
7227 /* Create a mapping entry and prepend it to the list */
7229 IPmappingDsc addMapping;
7230 addMapping.ipmdNativeLoc.CaptureLocation(GetEmitter());
7231 addMapping.ipmdKind = kind;
7232 addMapping.ipmdLoc = di.GetLocation();
7233 addMapping.ipmdIsLabel = isLabel;
7234 compiler->genIPmappings.push_front(addMapping);
7239 printf("Added IP mapping to front: ");
7240 genIPmappingDisp(unsigned(-1), &addMapping);
7245 /*****************************************************************************/
7247 void CodeGen::genEnsureCodeEmitted(const DebugInfo& di)
7249 if (!compiler->opts.compDbgCode)
7259 // If other IL were offsets reported, skip
7261 if (compiler->genIPmappings.size() <= 0)
7266 const IPmappingDsc& prev = compiler->genIPmappings.back();
7267 if (prev.ipmdLoc != di.GetLocation())
7272 // di represents the last reported offset. Make sure that we generated native code
7274 if (prev.ipmdNativeLoc.IsCurrentLocation(GetEmitter()))
7280 //------------------------------------------------------------------------
7281 // genIPmappingGen: Shut down the IP-mapping logic, report the info to the EE.
7283 void CodeGen::genIPmappingGen()
7285 if (!compiler->opts.compDbgInfo)
7290 JITDUMP("*************** In genIPmappingGen()\n");
7292 if (compiler->genIPmappings.size() <= 0)
7294 compiler->eeSetLIcount(0);
7295 compiler->eeSetLIdone();
7299 UNATIVE_OFFSET prevNativeOfs = UNATIVE_OFFSET(~0);
7300 for (jitstd::list<IPmappingDsc>::iterator it = compiler->genIPmappings.begin();
7301 it != compiler->genIPmappings.end();)
7303 UNATIVE_OFFSET dscNativeOfs = it->ipmdNativeLoc.CodeOffset(GetEmitter());
7304 if (dscNativeOfs != prevNativeOfs)
7306 prevNativeOfs = dscNativeOfs;
7311 // If we have a previous offset we should have a previous mapping.
7312 assert(it != compiler->genIPmappings.begin());
7313 jitstd::list<IPmappingDsc>::iterator prev = it;
7316 // Prev and current mappings have same native offset.
7317 // If one does not map to IL then remove that one.
7318 if (prev->ipmdKind == IPmappingDscKind::NoMapping)
7320 compiler->genIPmappings.erase(prev);
7325 if (it->ipmdKind == IPmappingDscKind::NoMapping)
7327 it = compiler->genIPmappings.erase(it);
7331 // Both have mappings.
7332 // If previous is the prolog, keep both if this one is at IL offset 0.
7333 // (TODO: Why? Debugger has no problem breaking on the prolog mapping
7335 if ((prev->ipmdKind == IPmappingDscKind::Prolog) && (it->ipmdKind == IPmappingDscKind::Normal) &&
7336 (it->ipmdLoc.GetOffset() == 0))
7342 // For the special case of an IL instruction with no body followed by
7343 // the epilog (say ret void immediately preceding the method end), we
7344 // leave both entries in, so that we'll stop at the (empty) ret
7345 // statement if the user tries to put a breakpoint there, and then have
7346 // the option of seeing the epilog or not based on SetUnmappedStopMask
7348 if (it->ipmdKind == IPmappingDscKind::Epilog)
7354 // For managed return values we store all calls. Keep both in this case
7356 if (((prev->ipmdKind == IPmappingDscKind::Normal) && (prev->ipmdLoc.IsCall())) ||
7357 ((it->ipmdKind == IPmappingDscKind::Normal) && (it->ipmdLoc.IsCall())))
7363 // Otherwise report the higher offset unless the previous mapping is a
7365 if (prev->ipmdIsLabel)
7367 it = compiler->genIPmappings.erase(it);
7371 compiler->genIPmappings.erase(prev);
7376 // Tell them how many mapping records we've got
7378 compiler->eeSetLIcount(static_cast<unsigned int>(compiler->genIPmappings.size()));
7380 // Now tell them about the mappings
7381 unsigned int mappingIdx = 0;
7382 for (const IPmappingDsc& dsc : compiler->genIPmappings)
7384 compiler->eeSetLIinfo(mappingIdx++, dsc.ipmdNativeLoc.CodeOffset(GetEmitter()), dsc.ipmdKind, dsc.ipmdLoc);
7389 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
7390 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
7391 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
7392 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
7394 if (compiler->opts.compDbgCode)
7396 //Assert that the first instruction of every basic block with more than one incoming edge has a
7397 //different sequence point from each incoming block.
7399 //It turns out that the only thing we really have to assert is that the first statement in each basic
7400 //block has an IL offset and appears in eeBoundaries.
7401 for (BasicBlock* const block : compiler->Blocks())
7403 Statement* stmt = block->firstStmt();
7404 if ((block->bbRefs > 1) && (stmt != nullptr))
7407 DebugInfo rootInfo = stmt->GetDebugInfo().GetRoot();
7408 if (rootInfo.IsValid())
7410 for (unsigned i = 0; i < compiler->eeBoundariesCount; ++i)
7412 if (compiler->eeBoundaries[i].ilOffset == rootInfo.GetLocation().GetOffset())
7419 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
7425 compiler->eeSetLIdone();
7429 //------------------------------------------------------------------------
7430 // genReportRichDebugInfoInlineTreeToFile:
7431 // Recursively process a context in the inline tree and write information about it to a file.
7435 // context - the context
7436 // first - whether this is the first of the siblings being written out
7438 void CodeGen::genReportRichDebugInfoInlineTreeToFile(FILE* file, InlineContext* context, bool* first)
7440 if (context->GetSibling() != nullptr)
7442 genReportRichDebugInfoInlineTreeToFile(file, context->GetSibling(), first);
7445 if (context->IsSuccess())
7454 fprintf(file, "{\"Ordinal\":%u,", context->GetOrdinal());
7455 fprintf(file, "\"MethodID\":%lld,", (int64_t)context->GetCallee());
7456 fprintf(file, "\"ILOffset\":%u,", context->GetLocation().GetOffset());
7457 fprintf(file, "\"LocationFlags\":%u,", (uint32_t)context->GetLocation().EncodeSourceTypes());
7458 fprintf(file, "\"ExactILOffset\":%u,", context->GetActualCallOffset());
7459 auto append = [&]() {
7461 const char* methodName = compiler->eeGetMethodName(context->GetCallee(), buffer, sizeof(buffer));
7462 fprintf(file, "\"MethodName\":\"%s\",", methodName);
7465 fprintf(file, "\"Inlinees\":[");
7466 if (context->GetChild() != nullptr)
7468 bool childFirst = true;
7469 genReportRichDebugInfoInlineTreeToFile(file, context->GetChild(), &childFirst);
7471 fprintf(file, "]}");
7475 //------------------------------------------------------------------------
7476 // genReportRichDebugInfoToFile:
7477 // Write rich debug info in JSON format to file specified by environment variable.
7479 void CodeGen::genReportRichDebugInfoToFile()
7481 if (JitConfig.WriteRichDebugInfoFile() == nullptr)
7486 static CritSecObject s_critSect;
7487 CritSecHolder holder(s_critSect);
7489 FILE* file = _wfopen(JitConfig.WriteRichDebugInfoFile(), W("a"));
7490 if (file == nullptr)
7495 // MethodID in ETW events are the method handles.
7496 fprintf(file, "{\"MethodID\":%lld,", (INT64)compiler->info.compMethodHnd);
7497 // Print inline tree.
7498 fprintf(file, "\"InlineTree\":");
7501 genReportRichDebugInfoInlineTreeToFile(file, compiler->compInlineContext, &first);
7502 fprintf(file, ",\"Mappings\":[");
7504 for (RichIPMapping& mapping : compiler->genRichIPmappings)
7513 fprintf(file, "{\"NativeOffset\":%u,\"InlineContext\":%u,\"ILOffset\":%u}",
7514 mapping.nativeLoc.CodeOffset(GetEmitter()), mapping.debugInfo.GetInlineContext()->GetOrdinal(),
7515 mapping.debugInfo.GetLocation().GetOffset());
7518 fprintf(file, "]}\n");
7525 //------------------------------------------------------------------------
7526 // genRecordRichDebugInfoInlineTree:
7527 // Recursively process a context in the inline tree and record information
7531 // context - the inline context
7532 // nodes - the array to record into
7534 void CodeGen::genRecordRichDebugInfoInlineTree(InlineContext* context, ICorDebugInfo::InlineTreeNode* nodes)
7536 if (context->IsSuccess())
7538 // We expect 1 + NumInlines unique ordinals
7539 assert(context->GetOrdinal() <= compiler->m_inlineStrategy->GetInlineCount());
7541 ICorDebugInfo::InlineTreeNode* node = &nodes[context->GetOrdinal()];
7542 node->Method = context->GetCallee();
7543 node->ILOffset = context->GetActualCallOffset();
7544 node->Child = context->GetChild() == nullptr ? 0 : context->GetChild()->GetOrdinal();
7545 node->Sibling = context->GetSibling() == nullptr ? 0 : context->GetSibling()->GetOrdinal();
7548 if (context->GetSibling() != nullptr)
7550 genRecordRichDebugInfoInlineTree(context->GetSibling(), nodes);
7553 if (context->GetChild() != nullptr)
7555 genRecordRichDebugInfoInlineTree(context->GetChild(), nodes);
7559 //------------------------------------------------------------------------
7560 // genReportRichDebugInfo:
7561 // If enabled, report rich debugging information to file and/or EE.
7563 void CodeGen::genReportRichDebugInfo()
7565 INDEBUG(genReportRichDebugInfoToFile());
7567 if (JitConfig.RichDebugInfo() == 0)
7572 unsigned numContexts = 1 + compiler->m_inlineStrategy->GetInlineCount();
7573 unsigned numRichMappings = static_cast<unsigned>(compiler->genRichIPmappings.size());
7575 ICorDebugInfo::InlineTreeNode* inlineTree = static_cast<ICorDebugInfo::InlineTreeNode*>(
7576 compiler->info.compCompHnd->allocateArray(numContexts * sizeof(ICorDebugInfo::InlineTreeNode)));
7577 ICorDebugInfo::RichOffsetMapping* mappings = static_cast<ICorDebugInfo::RichOffsetMapping*>(
7578 compiler->info.compCompHnd->allocateArray(numRichMappings * sizeof(ICorDebugInfo::RichOffsetMapping)));
7580 memset(inlineTree, 0, numContexts * sizeof(ICorDebugInfo::InlineTreeNode));
7581 memset(mappings, 0, numRichMappings * sizeof(ICorDebugInfo::RichOffsetMapping));
7583 genRecordRichDebugInfoInlineTree(compiler->compInlineContext, inlineTree);
7586 for (unsigned i = 0; i < numContexts; i++)
7588 assert(inlineTree[i].Method != NO_METHOD_HANDLE);
7592 size_t mappingIndex = 0;
7593 for (const RichIPMapping& richMapping : compiler->genRichIPmappings)
7595 ICorDebugInfo::RichOffsetMapping* mapping = &mappings[mappingIndex];
7596 assert(richMapping.debugInfo.IsValid());
7597 mapping->NativeOffset = richMapping.nativeLoc.CodeOffset(GetEmitter());
7598 mapping->Inlinee = richMapping.debugInfo.GetInlineContext()->GetOrdinal();
7599 mapping->ILOffset = richMapping.debugInfo.GetLocation().GetOffset();
7600 mapping->Source = richMapping.debugInfo.GetLocation().EncodeSourceTypes();
7605 compiler->info.compCompHnd->reportRichMappings(inlineTree, numContexts, mappings, numRichMappings);
7608 //------------------------------------------------------------------------
7609 // genAddRichIPMappingHere:
7610 // Create a rich IP mapping at the current emit location using the specified
7611 // debug information.
7614 // di - the debug information
7616 void CodeGen::genAddRichIPMappingHere(const DebugInfo& di)
7618 RichIPMapping mapping;
7619 mapping.nativeLoc.CaptureLocation(GetEmitter());
7620 mapping.debugInfo = di;
7621 compiler->genRichIPmappings.push_back(mapping);
7624 /*============================================================================
7626 * These are empty stubs to help the late dis-assembler to compile
7627 * if the late disassembler is being built into a non-DEBUG build.
7629 *============================================================================
7632 #if defined(LATE_DISASM)
7636 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
7642 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
7647 /*****************************************************************************/
7648 #endif // !defined(DEBUG)
7649 #endif // defined(LATE_DISASM)
7651 //------------------------------------------------------------------------
7652 // indirForm: Make a temporary indir we can feed to pattern matching routines
7653 // in cases where we don't want to instantiate all the indirs that happen.
7655 /* static */ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
7657 GenTreeIndir i(GT_IND, type, base, nullptr);
7658 i.SetRegNum(REG_NA);
7663 //------------------------------------------------------------------------
7664 // indirForm: Make a temporary indir we can feed to pattern matching routines
7665 // in cases where we don't want to instantiate all the indirs that happen.
7667 /* static */ GenTreeStoreInd CodeGen::storeIndirForm(var_types type, GenTree* base, GenTree* data)
7669 GenTreeStoreInd i(type, base, data);
7670 i.SetRegNum(REG_NA);
7674 //------------------------------------------------------------------------
7675 // intForm: Make a temporary int we can feed to pattern matching routines
7676 // in cases where we don't want to instantiate.
7678 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
7680 GenTreeIntCon i(type, value);
7681 i.SetRegNum(REG_NA);
7685 #if defined(TARGET_X86) || defined(TARGET_ARM)
7686 //------------------------------------------------------------------------
7687 // genLongReturn: Generates code for long return statement for x86 and arm.
7689 // Note: treeNode's and op1's registers are already consumed.
7692 // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
7697 void CodeGen::genLongReturn(GenTree* treeNode)
7699 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
7700 assert(treeNode->TypeGet() == TYP_LONG);
7701 GenTree* op1 = treeNode->gtGetOp1();
7702 var_types targetType = treeNode->TypeGet();
7704 assert(op1 != nullptr);
7705 assert(op1->OperGet() == GT_LONG);
7706 GenTree* loRetVal = op1->gtGetOp1();
7707 GenTree* hiRetVal = op1->gtGetOp2();
7708 assert((loRetVal->GetRegNum() != REG_NA) && (hiRetVal->GetRegNum() != REG_NA));
7710 genConsumeReg(loRetVal);
7711 genConsumeReg(hiRetVal);
7713 inst_Mov(targetType, REG_LNGRET_LO, loRetVal->GetRegNum(), /* canSkip */ true, emitActualTypeSize(TYP_INT));
7714 inst_Mov(targetType, REG_LNGRET_HI, hiRetVal->GetRegNum(), /* canSkip */ true, emitActualTypeSize(TYP_INT));
7716 #endif // TARGET_X86 || TARGET_ARM
7718 //------------------------------------------------------------------------
7719 // genReturn: Generates code for return statement.
7720 // In case of struct return, delegates to the genStructReturn method.
7723 // treeNode - The GT_RETURN or GT_RETFILT tree node.
7728 void CodeGen::genReturn(GenTree* treeNode)
7730 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
7731 GenTree* op1 = treeNode->gtGetOp1();
7732 var_types targetType = treeNode->TypeGet();
7734 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
7735 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
7736 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
7737 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
7740 if (targetType == TYP_VOID)
7742 assert(op1 == nullptr);
7746 #if defined(TARGET_X86) || defined(TARGET_ARM)
7747 if (targetType == TYP_LONG)
7749 genLongReturn(treeNode);
7752 #endif // TARGET_X86 || TARGET_ARM
7754 if (isStructReturn(treeNode))
7756 genStructReturn(treeNode);
7758 else if (targetType != TYP_VOID)
7760 assert(op1 != nullptr);
7761 noway_assert(op1->GetRegNum() != REG_NA);
7763 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
7764 // consumed a reg for the operand. This is because the variable
7765 // is dead after return. But we are issuing more instructions
7766 // like "profiler leave callback" after this consumption. So
7767 // if you are issuing more instructions after this point,
7768 // remember to keep the variable live up until the new method
7769 // exit point where it is actually dead.
7772 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7773 genSimpleReturn(treeNode);
7774 #else // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64
7775 #if defined(TARGET_X86)
7776 if (varTypeUsesFloatReg(treeNode))
7778 genFloatReturn(treeNode);
7781 #elif defined(TARGET_ARM)
7782 if (varTypeUsesFloatReg(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
7784 if (targetType == TYP_FLOAT)
7786 GetEmitter()->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->GetRegNum(),
7787 /* canSkip */ false);
7791 assert(targetType == TYP_DOUBLE);
7792 GetEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
7797 #endif // TARGET_ARM
7801 if (varTypeUsesIntReg(treeNode))
7803 retReg = REG_INTRET;
7807 assert(varTypeUsesFloatReg(treeNode));
7808 retReg = REG_FLOATRET;
7811 inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true);
7813 #endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64
7817 #ifdef PROFILING_SUPPORTED
7819 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
7820 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
7821 // in the handling of the GT_RETURN statement.
7822 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
7823 // for the return registers containing GC refs.
7825 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
7826 // In flowgraph and other places assert that the last node of a block marked as
7827 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
7828 // maintain such an invariant irrespective of whether profiler hook needed or not.
7829 // Also, there is not much to be gained by materializing it as an explicit node.
7831 // There should be a single return block while generating profiler ELT callbacks,
7832 // so we just look for that block to trigger insertion of the profile hook.
7833 if ((compiler->compCurBB == compiler->genReturnBB) && compiler->compIsProfilerHookNeeded())
7836 // Since we are invalidating the assumption that we would slip into the epilog
7837 // right after the "return", we need to preserve the return reg's GC state
7838 // across the call until actual method return.
7840 ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc;
7841 unsigned retRegCount = retTypeDesc.GetReturnRegCount();
7843 if (compiler->compMethodReturnsRetBufAddr())
7845 gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
7849 for (unsigned i = 0; i < retRegCount; ++i)
7851 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
7853 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
7858 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_LEAVE);
7860 if (compiler->compMethodReturnsRetBufAddr())
7862 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
7866 for (unsigned i = 0; i < retRegCount; ++i)
7868 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
7870 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
7875 #endif // PROFILING_SUPPORTED
7877 #if defined(DEBUG) && defined(TARGET_XARCH)
7878 bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
7880 #if defined(FEATURE_EH_FUNCLETS)
7881 // Don't do stack pointer check at the return from a funclet; only for the main function.
7882 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7884 doStackPointerCheck = false;
7886 #else // !FEATURE_EH_FUNCLETS
7887 // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
7888 // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
7889 if (compiler->compCurBB->KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET))
7891 doStackPointerCheck = false;
7893 #endif // !FEATURE_EH_FUNCLETS
7895 genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
7896 #endif // defined(DEBUG) && defined(TARGET_XARCH)
7899 //------------------------------------------------------------------------
7900 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
7903 // treeNode - The tree node to evaluate whether is a struct return.
7906 // Returns true if the 'treeNode" is a GT_RETURN node of type struct.
7907 // Otherwise returns false.
7909 bool CodeGen::isStructReturn(GenTree* treeNode)
7911 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
7912 // For the GT_RET_FILT, the return is always a bool or a void, for the end of a finally block.
7913 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
7914 if (treeNode->OperGet() != GT_RETURN)
7919 #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
7920 assert(!varTypeIsStruct(treeNode));
7923 return varTypeIsStruct(treeNode) && (compiler->info.compRetNativeType == TYP_STRUCT);
7927 //------------------------------------------------------------------------
7928 // genStructReturn: Generates code for returning a struct.
7931 // treeNode - The GT_RETURN tree node.
7937 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
7939 void CodeGen::genStructReturn(GenTree* treeNode)
7941 assert(treeNode->OperGet() == GT_RETURN);
7943 genConsumeRegs(treeNode->gtGetOp1());
7945 GenTree* op1 = treeNode->gtGetOp1();
7946 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
7948 ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc;
7949 const unsigned regCount = retTypeDesc.GetReturnRegCount();
7950 assert(regCount <= MAX_RET_REG_COUNT);
7952 #if FEATURE_MULTIREG_RET
7953 // Right now the only enregisterable structs supported are SIMD vector types.
7954 if (genIsRegCandidateLocal(actualOp1))
7957 const GenTreeLclVar* lclVar = actualOp1->AsLclVar();
7958 const LclVarDsc* varDsc = compiler->lvaGetDesc(lclVar);
7959 assert(varTypeIsSIMD(varDsc->GetRegisterType()));
7960 assert(!lclVar->IsMultiReg());
7964 genSIMDSplitReturn(op1, &retTypeDesc);
7965 #endif // FEATURE_SIMD
7967 else if (actualOp1->OperIs(GT_LCL_VAR) && !actualOp1->AsLclVar()->IsMultiReg())
7969 GenTreeLclVar* lclNode = actualOp1->AsLclVar();
7970 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
7971 assert(varDsc->lvIsMultiRegRet);
7973 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7974 // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }",
7975 // i. e. not include the padding for the first field, and so the general loop below won't work.
7976 var_types type = retTypeDesc.GetReturnRegType(0);
7977 regNumber toReg = retTypeDesc.GetABIReturnReg(0);
7978 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
7981 assert(regCount == 2);
7982 int offset = genTypeSize(type);
7983 type = retTypeDesc.GetReturnRegType(1);
7984 offset = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset);
7985 toReg = retTypeDesc.GetABIReturnReg(1);
7986 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
7988 #else // !TARGET_LOONGARCH64 && !TARGET_RISCV64
7990 for (unsigned i = 0; i < regCount; ++i)
7992 var_types type = retTypeDesc.GetReturnRegType(i);
7993 regNumber toReg = retTypeDesc.GetABIReturnReg(i);
7994 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
7995 offset += genTypeSize(type);
7997 #endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
8001 for (unsigned i = 0; i < regCount; ++i)
8003 var_types type = retTypeDesc.GetReturnRegType(i);
8004 regNumber toReg = retTypeDesc.GetABIReturnReg(i);
8005 regNumber fromReg = op1->GetRegByIndex(i);
8006 if ((fromReg == REG_NA) && op1->OperIs(GT_COPY))
8008 // A copy that doesn't copy this field will have REG_NA.
8009 // TODO-Cleanup: It would probably be better to always have a valid reg
8010 // on a GT_COPY, unless the operand is actually spilled. Then we wouldn't have
8011 // to check for this case (though we'd have to check in the genRegCopy that the
8013 fromReg = actualOp1->GetRegByIndex(i);
8015 if (fromReg == REG_NA)
8017 // This is a spilled field of a multi-reg lclVar.
8018 // We currently only mark a lclVar operand as RegOptional, since we don't have a way
8019 // to mark a multi-reg tree node as used from spill (GTF_NOREG_AT_USE) on a per-reg basis.
8020 LclVarDsc* varDsc = compiler->lvaGetDesc(actualOp1->AsLclVar());
8021 assert(varDsc->lvPromoted);
8022 unsigned fieldVarNum = varDsc->lvFieldLclStart + i;
8023 assert(compiler->lvaGetDesc(fieldVarNum)->lvOnFrame);
8025 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, fieldVarNum, 0);
8029 // Note that ins_Copy(fromReg, type) will return the appropriate register to copy
8030 // between register files if needed.
8031 inst_Mov(type, toReg, fromReg, /* canSkip */ true);
8035 #else // !FEATURE_MULTIREG_RET
8040 //----------------------------------------------------------------------------------
8041 // genMultiRegStoreToLocal: store multi-reg value to a local
8044 // lclNode - GenTree of GT_STORE_LCL_VAR
8050 // The child of store is a multi-reg node.
8052 void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
8054 assert(lclNode->OperIs(GT_STORE_LCL_VAR));
8055 assert(varTypeIsStruct(lclNode) || varTypeIsMultiReg(lclNode));
8057 GenTree* op1 = lclNode->gtGetOp1();
8058 assert(op1->IsMultiRegNode());
8059 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
8060 unsigned regCount = actualOp1->GetMultiRegCount(compiler);
8061 assert(regCount > 1);
8063 // Assumption: current implementation requires that a multi-reg
8064 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
8065 // being promoted, unless compiler->lvaEnregMultiRegVars is true.
8067 unsigned lclNum = lclNode->GetLclNum();
8068 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
8069 if (op1->OperIs(GT_CALL))
8071 assert(regCount <= MAX_RET_REG_COUNT);
8072 noway_assert(varDsc->lvIsMultiRegRet);
8076 // Check for the case of an enregistered SIMD type that's returned in multiple registers.
8077 if (varDsc->lvIsRegCandidate() && (lclNode->GetRegNum() != REG_NA))
8079 assert(varTypeIsSIMD(lclNode));
8080 genMultiRegStoreToSIMDLocal(lclNode);
8083 #endif // FEATURE_SIMD
8085 // We have either a multi-reg local or a local with multiple fields in memory.
8087 // The liveness model is as follows:
8088 // use reg #0 from src, including any reload or copy
8090 // use reg #1 from src, including any reload or copy
8093 // Imagine the following scenario:
8094 // There are 3 registers used. Prior to this node, they occupy registers r3, r2 and r1.
8095 // There are 3 registers defined by this node. They need to be placed in r1, r2 and r3,
8098 // If we defined the as using all the source registers at once, we'd have to adopt one
8099 // of the following models:
8100 // - All (or all but one) of the incoming sources are marked "delayFree" so that they won't
8101 // get the same register as any of the registers being defined. This would result in copies for
8102 // the common case where the source and destination registers are the same (e.g. when a CALL
8103 // result is assigned to a lclVar, which is then returned).
8104 // - For our example (and for many/most cases) we would have to copy or spill all sources.
8105 // - We allow circular dependencies between source and destination registers. This would require
8106 // the code generator to determine the order in which the copies must be generated, and would
8107 // require a temp register in case a swap is required. This complexity would have to be handled
8108 // in both the normal code generation case, as well as for copies & reloads, as they are currently
8109 // modeled by the register allocator to happen just prior to the use.
8110 // - For our example, a temp would be required to swap r1 and r3, unless a swap instruction is
8111 // available on the target.
8113 // By having a multi-reg local use and define each field in order, we avoid these issues, and the
8114 // register allocator will ensure that any conflicts are resolved via spill or inserted COPYs.
8115 // For our example, the register allocator would simple spill r1 because the first def requires it.
8116 // The code generator would move r3 to r1, leave r2 alone, and then load the spilled value into r3.
8118 unsigned offset = 0;
8119 bool isMultiRegVar = lclNode->IsMultiRegLclVar();
8120 bool hasRegs = false;
8124 assert(compiler->lvaEnregMultiRegVars);
8125 assert(regCount == varDsc->lvFieldCnt);
8128 for (unsigned i = 0; i < regCount; ++i)
8130 regNumber reg = genConsumeReg(op1, i);
8131 var_types srcType = actualOp1->GetRegTypeByIndex(i);
8132 // genConsumeReg will return the valid register, either from the COPY
8133 // or from the original source.
8134 assert(reg != REG_NA);
8138 // Each field is passed in its own register, use the field types.
8139 regNumber varReg = lclNode->GetRegByIndex(i);
8140 unsigned fieldLclNum = varDsc->lvFieldLclStart + i;
8141 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum);
8142 var_types destType = fieldVarDsc->TypeGet();
8143 if (varReg != REG_NA)
8147 // We may need a cross register-file copy here.
8148 inst_Mov(destType, varReg, reg, /* canSkip */ true);
8154 if ((varReg == REG_STK) || fieldVarDsc->IsAlwaysAliveInMemory())
8156 if (!lclNode->IsLastUse(i))
8158 // A byte field passed in a long register should be written on the stack as a byte.
8159 instruction storeIns = ins_StoreFromSrc(reg, destType);
8160 GetEmitter()->emitIns_S_R(storeIns, emitTypeSize(destType), reg, fieldLclNum, 0);
8163 fieldVarDsc->SetRegNum(varReg);
8167 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
8168 // should consider the padding field within a struct.
8169 offset = (offset % genTypeSize(srcType)) ? AlignUp(offset, genTypeSize(srcType)) : offset;
8171 // Several fields could be passed in one register, copy using the register type.
8172 // It could rewrite memory outside of the fields but local on the stack are rounded to POINTER_SIZE so
8173 // it is safe to store a long register into a byte field as it is known that we have enough padding after.
8174 GetEmitter()->emitIns_S_R(ins_Store(srcType), emitTypeSize(srcType), reg, lclNum, offset);
8175 offset += genTypeSize(srcType);
8179 assert(offset <= varDsc->lvSize());
8180 #else // !TARGET_64BIT
8181 if (varTypeIsStruct(varDsc))
8183 assert(offset <= varDsc->lvSize());
8187 assert(varDsc->TypeGet() == TYP_LONG);
8188 assert(offset <= genTypeSize(TYP_LONG));
8190 #endif // !TARGET_64BIT
8195 // Update variable liveness.
8200 genProduceReg(lclNode);
8204 genUpdateLife(lclNode);
8209 genUpdateLife(lclNode);
8210 varDsc->SetRegNum(REG_STK);
8214 //------------------------------------------------------------------------
8215 // genRegCopy: Produce code for a GT_COPY node.
8218 // tree - the GT_COPY node
8221 // This will copy the register produced by this node's source, to
8222 // the register allocated to this GT_COPY node.
8223 // It has some special handling for these cases:
8224 // - when the source and target registers are in different register files
8225 // (note that this is *not* a conversion).
8226 // - when the source is a lclVar whose home location is being moved to a new
8227 // register (rather than just being copied for temporary use).
8229 void CodeGen::genRegCopy(GenTree* treeNode)
8231 assert(treeNode->OperGet() == GT_COPY);
8232 GenTree* op1 = treeNode->AsOp()->gtOp1;
8234 if (op1->IsMultiRegNode())
8236 // Register allocation assumes that any reload and copy are done in operand order.
8237 // That is, we can have:
8238 // (reg0, reg1) = COPY(V0,V1) where V0 is in reg1 and V1 is in memory
8239 // The register allocation model assumes:
8240 // First, V0 is moved to reg0 (v1 can't be in reg0 because it is still live, which would be a conflict).
8241 // Then, V1 is moved to reg1
8242 // However, if we call genConsumeRegs on op1, it will do the reload of V1 before we do the copy of V0.
8243 // So we need to handle that case first.
8245 // There should never be any circular dependencies, and we will check that here.
8247 // GenTreeCopyOrReload only reports the highest index that has a valid register.
8248 // However, we need to ensure that we consume all the registers of the child node,
8249 // so we use its regCount.
8250 unsigned regCount = op1->GetMultiRegCount(compiler);
8251 assert(regCount <= MAX_MULTIREG_COUNT);
8253 // First set the source registers as busy if they haven't been spilled.
8254 // (Note that this is just for verification that we don't have circular dependencies.)
8255 regMaskTP busyRegs = RBM_NONE;
8256 for (unsigned i = 0; i < regCount; ++i)
8258 if ((op1->GetRegSpillFlagByIdx(i) & GTF_SPILLED) == 0)
8260 busyRegs |= genRegMask(op1->GetRegByIndex(i));
8263 for (unsigned i = 0; i < regCount; ++i)
8265 regNumber sourceReg = op1->GetRegByIndex(i);
8266 // genRegCopy will consume the source register, perform any required reloads,
8267 // and will return either the register copied to, or the original register if there's no copy.
8268 regNumber targetReg = genRegCopy(treeNode, i);
8269 if (targetReg != sourceReg)
8271 regMaskTP targetRegMask = genRegMask(targetReg);
8272 assert((busyRegs & targetRegMask) == 0);
8273 // Clear sourceReg from the busyRegs, and add targetReg.
8274 busyRegs &= ~genRegMask(sourceReg);
8276 busyRegs |= genRegMask(targetReg);
8281 regNumber srcReg = genConsumeReg(op1);
8282 var_types targetType = treeNode->TypeGet();
8283 regNumber targetReg = treeNode->GetRegNum();
8284 assert(srcReg != REG_NA);
8285 assert(targetReg != REG_NA);
8286 assert(targetType != TYP_STRUCT);
8288 inst_Mov(targetType, targetReg, srcReg, /* canSkip */ false);
8292 // The lclVar will never be a def.
8293 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
8294 // appropriately set the gcInfo for the copied value.
8295 // If not, there are two cases we need to handle:
8296 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
8297 // will remain live in its original register.
8298 // genProduceReg() will appropriately set the gcInfo for the copied value,
8299 // and genConsumeReg will reset it.
8300 // - Otherwise, we need to update register info for the lclVar.
8302 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
8303 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
8305 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
8307 LclVarDsc* varDsc = compiler->lvaGetDesc(lcl);
8309 // If we didn't just spill it (in genConsumeReg, above), then update the register info
8310 if (varDsc->GetRegNum() != REG_STK)
8312 // The old location is dying
8313 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
8315 gcInfo.gcMarkRegSetNpt(genRegMask(op1->GetRegNum()));
8317 genUpdateVarReg(varDsc, treeNode);
8319 // Report the home change for this variable
8320 varLiveKeeper->siUpdateVariableLiveRange(varDsc, lcl->GetLclNum());
8322 // The new location is going live
8323 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
8328 genProduceReg(treeNode);
8331 //------------------------------------------------------------------------
8332 // genRegCopy: Produce code for a single register of a multireg copy node.
8335 // tree - The GT_COPY node
8336 // multiRegIndex - The index of the register to be copied
8339 // This will copy the corresponding register produced by this node's source, to
8340 // the register allocated to the register specified by this GT_COPY node.
8341 // A multireg copy doesn't support moving between register files, as the GT_COPY
8342 // node does not retain separate types for each index.
8343 // - when the source is a lclVar whose home location is being moved to a new
8344 // register (rather than just being copied for temporary use).
8347 // Either the register copied to, or the original register if there's no copy.
8349 regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex)
8351 assert(treeNode->OperGet() == GT_COPY);
8352 GenTree* op1 = treeNode->gtGetOp1();
8353 assert(op1->IsMultiRegNode());
8355 GenTreeCopyOrReload* copyNode = treeNode->AsCopyOrReload();
8356 assert(copyNode->GetRegCount() <= MAX_MULTIREG_COUNT);
8358 // Consume op1's register, which will perform any necessary reloads.
8359 genConsumeReg(op1, multiRegIndex);
8361 regNumber sourceReg = op1->GetRegByIndex(multiRegIndex);
8362 regNumber targetReg = copyNode->GetRegNumByIdx(multiRegIndex);
8363 // GenTreeCopyOrReload only reports the highest index that has a valid register.
8364 // However there may be lower indices that have no valid register (i.e. the register
8365 // on the source is still valid at the consumer).
8366 if (targetReg != REG_NA)
8368 // We shouldn't specify a no-op move.
8369 assert(sourceReg != targetReg);
8371 if (op1->IsMultiRegLclVar())
8373 LclVarDsc* parentVarDsc = compiler->lvaGetDesc(op1->AsLclVar());
8374 unsigned fieldVarNum = parentVarDsc->lvFieldLclStart + multiRegIndex;
8375 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldVarNum);
8376 type = fieldVarDsc->TypeGet();
8377 inst_Mov(type, targetReg, sourceReg, /* canSkip */ false);
8378 if (!op1->AsLclVar()->IsLastUse(multiRegIndex) && fieldVarDsc->GetRegNum() != REG_STK)
8380 // The old location is dying
8381 genUpdateRegLife(fieldVarDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
8382 gcInfo.gcMarkRegSetNpt(genRegMask(sourceReg));
8383 genUpdateVarReg(fieldVarDsc, treeNode);
8385 // Report the home change for this variable
8386 varLiveKeeper->siUpdateVariableLiveRange(fieldVarDsc, fieldVarNum);
8388 // The new location is going live
8389 genUpdateRegLife(fieldVarDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
8394 type = op1->GetRegTypeByIndex(multiRegIndex);
8395 inst_Mov(type, targetReg, sourceReg, /* canSkip */ false);
8396 // We never spill after a copy, so to produce the single register, we simply need to
8397 // update the GC info for the defined register.
8398 gcInfo.gcMarkRegPtrVal(targetReg, type);
8408 #if defined(DEBUG) && defined(TARGET_XARCH)
8410 //------------------------------------------------------------------------
8411 // genStackPointerCheck: Generate code to check the stack pointer against a saved value.
8412 // This is a debug check.
8415 // doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
8416 // lvaStackPointerVar - The local variable number that holds the value of the stack pointer
8417 // we are comparing against.
8418 // offset - the offset from the stack pointer to expect
8419 // regTmp - register we can use for computation if `offset` != 0
8424 void CodeGen::genStackPointerCheck(bool doStackPointerCheck,
8425 unsigned lvaStackPointerVar,
8429 if (doStackPointerCheck)
8431 assert(lvaStackPointerVar != BAD_VAR_NUM);
8432 assert(compiler->lvaGetDesc(lvaStackPointerVar)->lvDoNotEnregister);
8433 assert(compiler->lvaGetDesc(lvaStackPointerVar)->lvOnFrame);
8437 assert(regTmp != REG_NA);
8438 GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, regTmp, REG_SPBASE, /* canSkip */ false);
8439 GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, regTmp, offset);
8440 GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regTmp, lvaStackPointerVar, 0);
8444 GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0);
8447 BasicBlock* sp_check = genCreateTempLabel();
8448 GetEmitter()->emitIns_J(INS_je, sp_check);
8449 instGen(INS_BREAKPOINT);
8450 genDefineTempLabel(sp_check);
8454 #endif // defined(DEBUG) && defined(TARGET_XARCH)
8456 unsigned CodeGenInterface::getCurrentStackLevel() const
8458 return genStackLevel;
8462 //------------------------------------------------------------------------
8463 // VariableLiveRanges dumpers
8464 //------------------------------------------------------------------------
8466 // Dump "VariableLiveRange" when code has not been generated and we don't have so the assembly native offset
8467 // but at least "emitLocation"s and "siVarLoc"
8468 void CodeGenInterface::VariableLiveKeeper::VariableLiveRange::dumpVariableLiveRange(
8469 const CodeGenInterface* codeGen) const
8471 codeGen->dumpSiVarLoc(&m_VarLocation);
8474 m_StartEmitLocation.Print(codeGen->GetCompiler()->compMethodID);
8476 if (m_EndEmitLocation.Valid())
8478 m_EndEmitLocation.Print(codeGen->GetCompiler()->compMethodID);
8487 // Dump "VariableLiveRange" when code has been generated and we have the assembly native offset of each "emitLocation"
8488 void CodeGenInterface::VariableLiveKeeper::VariableLiveRange::dumpVariableLiveRange(
8489 emitter* emit, const CodeGenInterface* codeGen) const
8491 assert(emit != nullptr);
8493 // "VariableLiveRanges" are created setting its location ("m_VarLocation") and the initial native offset
8494 // ("m_StartEmitLocation")
8495 codeGen->dumpSiVarLoc(&m_VarLocation);
8497 // If this is an open "VariableLiveRange", "m_EndEmitLocation" is non-valid and print -1
8498 UNATIVE_OFFSET endAssemblyOffset = m_EndEmitLocation.Valid() ? m_EndEmitLocation.CodeOffset(emit) : -1;
8500 printf(" [%X, %X)", m_StartEmitLocation.CodeOffset(emit), m_EndEmitLocation.CodeOffset(emit));
8503 //------------------------------------------------------------------------
8505 //------------------------------------------------------------------------
8506 //------------------------------------------------------------------------
8507 // resetDumper: If the "liveRange" has its last "VariableLiveRange" closed, it makes
8508 // the "LiveRangeDumper" points to end of "liveRange" (nullptr). In other case,
8509 // it makes the "LiveRangeDumper" points to the last "VariableLiveRange" of
8510 // "liveRange", which is opened.
8513 // liveRanges - the "LiveRangeList" of the "VariableLiveDescriptor" we want to
8514 // update its "LiveRangeDumper".
8517 // This method is expected to be called once a the code for a BasicBlock has been
8518 // generated and all the new "VariableLiveRange"s of the variable during this block
8520 void CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::resetDumper(const LiveRangeList* liveRanges)
8522 // There must have reported something in order to reset
8523 assert(m_hasLiveRangestoDump);
8525 if (liveRanges->back().m_EndEmitLocation.Valid())
8527 // the last "VariableLiveRange" is closed and the variable
8528 // is no longer alive
8529 m_hasLiveRangestoDump = false;
8533 // the last "VariableLiveRange" remains opened because it is
8534 // live at "BasicBlock"s "bbLiveOut".
8535 m_StartingLiveRange = liveRanges->backPosition();
8539 //------------------------------------------------------------------------
8540 // setDumperStartAt: Make "LiveRangeDumper" instance points the last "VariableLiveRange"
8541 // added so we can starts dumping from there after the actual "BasicBlock"s code is generated.
8544 // liveRangeIt - an iterator to a position in "VariableLiveDescriptor::m_VariableLiveRanges"
8547 // A const pointer to the "LiveRangeList" containing all the "VariableLiveRange"s
8548 // of the variable with index "varNum".
8551 // "varNum" should be always a valid inde ("varnum" < "m_LiveDscCount")
8552 void CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::setDumperStartAt(const LiveRangeListIterator liveRangeIt)
8554 m_hasLiveRangestoDump = true;
8555 m_StartingLiveRange = liveRangeIt;
8558 //------------------------------------------------------------------------
8559 // getStartForDump: Return an iterator to the first "VariableLiveRange" edited/added
8560 // during the current "BasicBlock"
8563 // A LiveRangeListIterator to the first "VariableLiveRange" in "LiveRangeList" which
8564 // was used during last "BasicBlock".
8566 CodeGenInterface::VariableLiveKeeper::LiveRangeListIterator CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::
8567 getStartForDump() const
8569 return m_StartingLiveRange;
8572 //------------------------------------------------------------------------
8573 // hasLiveRangesToDump: Retutn whether at least a "VariableLiveRange" was alive during
8574 // the current "BasicBlock"'s code generation
8577 // A boolean indicating indicating if there is at least a "VariableLiveRange"
8578 // that has been used for the variable during last "BasicBlock".
8580 bool CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::hasLiveRangesToDump() const
8582 return m_hasLiveRangestoDump;
8586 //------------------------------------------------------------------------
8587 // VariableLiveDescriptor
8588 //------------------------------------------------------------------------
8590 CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::VariableLiveDescriptor(CompAllocator allocator)
8592 // Initialize an empty list
8593 m_VariableLiveRanges = new (allocator) LiveRangeList(allocator);
8595 INDEBUG(m_VariableLifeBarrier = new (allocator) LiveRangeDumper(m_VariableLiveRanges));
8598 //------------------------------------------------------------------------
8599 // hasVariableLiveRangeOpen: Return true if the variable is still alive,
8600 // false in other case.
8602 bool CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::hasVariableLiveRangeOpen() const
8604 return !m_VariableLiveRanges->empty() && !m_VariableLiveRanges->back().m_EndEmitLocation.Valid();
8607 //------------------------------------------------------------------------
8608 // getLiveRanges: Return the list of variable locations for this variable.
8611 // A const LiveRangeList* pointing to the first variable location if it has
8612 // any or the end of the list in other case.
8614 CodeGenInterface::VariableLiveKeeper::LiveRangeList* CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::
8615 getLiveRanges() const
8617 return m_VariableLiveRanges;
8620 //------------------------------------------------------------------------
8621 // startLiveRangeFromEmitter: Report this variable as being born in "varLocation"
8622 // since the instruction where "emit" is located.
8625 // varLocation - the home of the variable.
8626 // emit - an emitter* instance located at the first instruction from
8627 // where "varLocation" becomes valid.
8630 // This variable is being born so it should be dead.
8633 // The position of "emit" matters to ensure intervals inclusive of the
8634 // beginning and exclusive of the end.
8636 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::startLiveRangeFromEmitter(
8637 CodeGenInterface::siVarLoc varLocation, emitter* emit) const
8639 noway_assert(emit != nullptr);
8641 // Is the first "VariableLiveRange" or the previous one has been closed so its "m_EndEmitLocation" is valid
8642 noway_assert(m_VariableLiveRanges->empty() || m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8644 if (!m_VariableLiveRanges->empty() &&
8645 siVarLoc::Equals(&varLocation, &(m_VariableLiveRanges->back().m_VarLocation)) &&
8646 m_VariableLiveRanges->back().m_EndEmitLocation.IsPreviousInsNum(emit))
8648 JITDUMP("Extending debug range...\n");
8650 // The variable is being born just after the instruction at which it died.
8651 // In this case, i.e. an update of the variable's value, we coalesce the live ranges.
8652 m_VariableLiveRanges->back().m_EndEmitLocation.Init();
8656 JITDUMP("New debug range: %s\n",
8657 m_VariableLiveRanges->empty()
8659 : siVarLoc::Equals(&varLocation, &(m_VariableLiveRanges->back().m_VarLocation))
8660 ? "new var or location"
8662 // Creates new live range with invalid end
8663 m_VariableLiveRanges->emplace_back(varLocation, emitLocation(), emitLocation());
8664 m_VariableLiveRanges->back().m_StartEmitLocation.CaptureLocation(emit);
8668 if (!m_VariableLifeBarrier->hasLiveRangesToDump())
8670 m_VariableLifeBarrier->setDumperStartAt(m_VariableLiveRanges->backPosition());
8674 // startEmitLocationendEmitLocation has to be Valid and endEmitLocationendEmitLocation not
8675 noway_assert(m_VariableLiveRanges->back().m_StartEmitLocation.Valid());
8676 noway_assert(!m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8679 //------------------------------------------------------------------------
8680 // endLiveRangeAtEmitter: Report this variable as becoming dead since the
8681 // instruction where "emit" is located.
8684 // emit - an emitter* instance located at the first instruction from
8685 // this variable becomes dead.
8688 // This variable is becoming dead so it should be alive.
8691 // The position of "emit" matters to ensure intervals inclusive of the
8692 // beginning and exclusive of the end.
8694 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::endLiveRangeAtEmitter(emitter* emit) const
8696 noway_assert(emit != nullptr);
8697 noway_assert(hasVariableLiveRangeOpen());
8699 // Using [close, open) ranges so as to not compute the size of the last instruction
8700 m_VariableLiveRanges->back().m_EndEmitLocation.CaptureLocation(emit);
8702 JITDUMP("Closing debug range.\n");
8703 // No m_EndEmitLocation has to be Valid
8704 noway_assert(m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8707 //------------------------------------------------------------------------
8708 // UpdateLiveRangeAtEmitter: Report this variable as changing its variable
8709 // home to "varLocation" since the instruction where "emit" is located.
8712 // varLocation - the new variable location.
8713 // emit - an emitter* instance located at the first instruction from
8714 // where "varLocation" becomes valid.
8717 // This variable is being born so it should be dead.
8720 // The position of "emit" matters to ensure intervals inclusive of the
8721 // beginning and exclusive of the end.
8723 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::updateLiveRangeAtEmitter(
8724 CodeGenInterface::siVarLoc varLocation, emitter* emit) const
8726 // This variable is changing home so it has been started before during this block
8727 noway_assert(m_VariableLiveRanges != nullptr && !m_VariableLiveRanges->empty());
8729 // And its last m_EndEmitLocation has to be invalid
8730 noway_assert(!m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8732 // If we are reporting again the same home, that means we are doing something twice?
8733 // noway_assert(! CodeGenInterface::siVarLoc::Equals(&m_VariableLiveRanges->back().m_VarLocation, varLocation));
8735 // Close previous live range
8736 endLiveRangeAtEmitter(emit);
8738 startLiveRangeFromEmitter(varLocation, emit);
8742 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::dumpAllRegisterLiveRangesForBlock(
8743 emitter* emit, const CodeGenInterface* codeGen) const
8746 for (LiveRangeListIterator it = m_VariableLiveRanges->begin(); it != m_VariableLiveRanges->end(); it++)
8752 it->dumpVariableLiveRange(emit, codeGen);
8757 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::dumpRegisterLiveRangesForBlockBeforeCodeGenerated(
8758 const CodeGenInterface* codeGen) const
8761 for (LiveRangeListIterator it = m_VariableLifeBarrier->getStartForDump(); it != m_VariableLiveRanges->end(); it++)
8767 it->dumpVariableLiveRange(codeGen);
8772 // Returns true if a live range for this variable has been recorded
8773 bool CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::hasVarLiveRangesToDump() const
8775 return !m_VariableLiveRanges->empty();
8778 // Returns true if a live range for this variable has been recorded from last call to EndBlock
8779 bool CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::hasVarLiveRangesFromLastBlockToDump() const
8781 return m_VariableLifeBarrier->hasLiveRangesToDump();
8784 // Reset the barrier so as to dump only next block changes on next block
8785 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::endBlockLiveRanges()
8787 // make "m_VariableLifeBarrier->m_StartingLiveRange" now points to nullptr for printing purposes
8788 m_VariableLifeBarrier->resetDumper(m_VariableLiveRanges);
8792 //------------------------------------------------------------------------
8793 // VariableLiveKeeper
8794 //------------------------------------------------------------------------
8795 // Initialize structures for VariableLiveRanges
8796 void CodeGenInterface::initializeVariableLiveKeeper()
8798 CompAllocator allocator = compiler->getAllocator(CMK_VariableLiveRanges);
8800 int amountTrackedVariables = compiler->opts.compDbgInfo ? compiler->info.compLocalsCount : 0;
8801 int amountTrackedArgs = compiler->opts.compDbgInfo ? compiler->info.compArgsCount : 0;
8803 varLiveKeeper = new (allocator) VariableLiveKeeper(amountTrackedVariables, amountTrackedArgs, compiler, allocator);
8806 CodeGenInterface::VariableLiveKeeper* CodeGenInterface::getVariableLiveKeeper() const
8808 return varLiveKeeper;
8811 //------------------------------------------------------------------------
8812 // VariableLiveKeeper: Create an instance of the object in charge of managing
8813 // VariableLiveRanges and initialize the array "m_vlrLiveDsc".
8816 // totalLocalCount - the count of args, special args and IL Local
8817 // variables in the method.
8818 // argsCount - the count of args and special args in the method.
8819 // compiler - a compiler instance
8821 CodeGenInterface::VariableLiveKeeper::VariableLiveKeeper(unsigned int totalLocalCount,
8822 unsigned int argsCount,
8824 CompAllocator allocator)
8825 : m_LiveDscCount(totalLocalCount)
8826 , m_LiveArgsCount(argsCount)
8828 , m_LastBasicBlockHasBeenEmitted(false)
8830 if (m_LiveDscCount > 0)
8832 // Allocate memory for "m_vlrLiveDsc" and initialize each "VariableLiveDescriptor"
8833 m_vlrLiveDsc = allocator.allocate<VariableLiveDescriptor>(m_LiveDscCount);
8834 m_vlrLiveDscForProlog = allocator.allocate<VariableLiveDescriptor>(m_LiveDscCount);
8836 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
8838 new (m_vlrLiveDsc + varNum, jitstd::placement_t()) VariableLiveDescriptor(allocator);
8839 new (m_vlrLiveDscForProlog + varNum, jitstd::placement_t()) VariableLiveDescriptor(allocator);
8844 //------------------------------------------------------------------------
8845 // siStartOrCloseVariableLiveRange: Reports the given variable as beign born
8846 // or becoming dead.
8849 // varDsc - the variable for which a location changed will be reported
8850 // varNum - the index of the variable in the "compiler->lvaTable"
8851 // isBorn - whether the variable is being born from where the emitter is located.
8852 // isDying - whether the variable is dying from where the emitter is located.
8855 // The emitter should be located on the first instruction from where is true that
8856 // the variable becoming valid (when isBorn is true) or invalid (when isDying is true).
8859 // This method is being called from treeLifeUpdater when the variable is being born,
8860 // becoming dead, or both.
8862 void CodeGenInterface::VariableLiveKeeper::siStartOrCloseVariableLiveRange(const LclVarDsc* varDsc,
8863 unsigned int varNum,
8867 noway_assert(varDsc != nullptr);
8869 // Only the variables that exists in the IL, "this", and special arguments
8871 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount)
8873 if (isBorn && !isDying)
8875 // "varDsc" is valid from this point
8876 siStartVariableLiveRange(varDsc, varNum);
8878 if (isDying && !isBorn)
8880 // this variable live range is no longer valid from this point
8881 siEndVariableLiveRange(varNum);
8886 //------------------------------------------------------------------------
8887 // siStartOrCloseVariableLiveRanges: Iterates the given set of variables
8888 // calling "siStartOrCloseVariableLiveRange" with each one.
8891 // varsIndexSet - the set of variables to report start/end "VariableLiveRange"
8892 // isBorn - whether the set is being born from where the emitter is located.
8893 // isDying - whether the set is dying from where the emitter is located.
8896 // The emitter should be located on the first instruction from where is true that
8897 // the variable becoming valid (when isBorn is true) or invalid (when isDying is true).
8900 // This method is being called from treeLifeUpdater when a set of variables
8901 // is being born, becoming dead, or both.
8903 void CodeGenInterface::VariableLiveKeeper::siStartOrCloseVariableLiveRanges(VARSET_VALARG_TP varsIndexSet,
8907 if (m_Compiler->opts.compDbgInfo)
8909 VarSetOps::Iter iter(m_Compiler, varsIndexSet);
8910 unsigned varIndex = 0;
8911 while (iter.NextElem(&varIndex))
8913 unsigned int varNum = m_Compiler->lvaTrackedIndexToLclNum(varIndex);
8914 const LclVarDsc* varDsc = m_Compiler->lvaGetDesc(varNum);
8915 siStartOrCloseVariableLiveRange(varDsc, varNum, isBorn, isDying);
8920 //------------------------------------------------------------------------
8921 // siStartVariableLiveRange: Reports the given variable as being born.
8924 // varDsc - the variable for which a location changed will be reported
8925 // varNum - the index of the variable to report home in lvLiveDsc
8928 // The emitter should be pointing to the first instruction from where the VariableLiveRange is
8930 // The given "varDsc" should have its VariableRangeLists initialized.
8933 // This method should be called on every place a Variable is becoming alive.
8934 void CodeGenInterface::VariableLiveKeeper::siStartVariableLiveRange(const LclVarDsc* varDsc, unsigned int varNum)
8936 noway_assert(varDsc != nullptr);
8938 // Only the variables that exists in the IL, "this", and special arguments are reported, as long as they were
8940 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount && (varDsc->lvIsInReg() || varDsc->lvOnFrame))
8942 // Build siVarLoc for this born "varDsc"
8943 CodeGenInterface::siVarLoc varLocation =
8944 m_Compiler->codeGen->getSiVarLoc(varDsc, m_Compiler->codeGen->getCurrentStackLevel());
8946 VariableLiveDescriptor* varLiveDsc = &m_vlrLiveDsc[varNum];
8947 // this variable live range is valid from this point
8948 varLiveDsc->startLiveRangeFromEmitter(varLocation, m_Compiler->GetEmitter());
8952 //------------------------------------------------------------------------
8953 // siEndVariableLiveRange: Reports the variable as becoming dead.
8956 // varNum - the index of the variable at m_vlrLiveDsc or lvaTable in that
8957 // is becoming dead.
8960 // The given variable should be alive.
8961 // The emitter should be pointing to the first instruction from where the VariableLiveRange is
8962 // becoming invalid.
8965 // This method should be called on every place a Variable is becoming dead.
8966 void CodeGenInterface::VariableLiveKeeper::siEndVariableLiveRange(unsigned int varNum)
8968 // Only the variables that exists in the IL, "this", and special arguments
8969 // will be reported.
8971 // This method is being called from genUpdateLife, and that one is called after
8972 // code for BasicBlock have been generated, but the emitter has no longer
8973 // a valid IG so we don't report the close of a "VariableLiveRange" after code is
8976 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount && !m_LastBasicBlockHasBeenEmitted &&
8977 m_vlrLiveDsc[varNum].hasVariableLiveRangeOpen())
8979 // this variable live range is no longer valid from this point
8980 m_vlrLiveDsc[varNum].endLiveRangeAtEmitter(m_Compiler->GetEmitter());
8984 //------------------------------------------------------------------------
8985 // siUpdateVariableLiveRange: Reports the change of variable location for the
8989 // varDsc - the variable for which tis home has changed.
8990 // varNum - the index of the variable to report home in lvLiveDsc
8993 // The given variable should be alive.
8994 // The emitter should be pointing to the first instruction from where
8995 // the new variable location is becoming valid.
8997 void CodeGenInterface::VariableLiveKeeper::siUpdateVariableLiveRange(const LclVarDsc* varDsc, unsigned int varNum)
8999 noway_assert(varDsc != nullptr);
9001 // Only the variables that exists in the IL, "this", and special arguments
9002 // will be reported. This are locals and arguments, and are counted in
9003 // "info.compLocalsCount".
9005 // This method is being called when the prolog is being generated, and
9006 // the emitter has no longer a valid IG so we don't report the close of
9007 // a "VariableLiveRange" after code is emitted.
9008 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount && !m_LastBasicBlockHasBeenEmitted)
9010 // Build the location of the variable
9011 CodeGenInterface::siVarLoc siVarLoc =
9012 m_Compiler->codeGen->getSiVarLoc(varDsc, m_Compiler->codeGen->getCurrentStackLevel());
9014 // Report the home change for this variable
9015 VariableLiveDescriptor* varLiveDsc = &m_vlrLiveDsc[varNum];
9016 varLiveDsc->updateLiveRangeAtEmitter(siVarLoc, m_Compiler->GetEmitter());
9020 //------------------------------------------------------------------------
9021 // siEndAllVariableLiveRange: Reports the set of variables as becoming dead.
9024 // newLife - the set of variables that are becoming dead.
9027 // All the variables in the set are alive.
9030 // This method is called when the last block being generated to killed all
9031 // the live variables and set a flag to avoid reporting variable locations for
9032 // on next calls to method that update variable liveness.
9033 void CodeGenInterface::VariableLiveKeeper::siEndAllVariableLiveRange(VARSET_VALARG_TP varsToClose)
9035 if (m_Compiler->opts.compDbgInfo)
9037 if (m_Compiler->lvaTrackedCount > 0 || !m_Compiler->opts.OptimizationDisabled())
9039 VarSetOps::Iter iter(m_Compiler, varsToClose);
9040 unsigned varIndex = 0;
9041 while (iter.NextElem(&varIndex))
9043 unsigned int varNum = m_Compiler->lvaTrackedIndexToLclNum(varIndex);
9044 siEndVariableLiveRange(varNum);
9049 // It seems we are jitting debug code, so we don't have variable
9051 siEndAllVariableLiveRange();
9055 m_LastBasicBlockHasBeenEmitted = true;
9058 //------------------------------------------------------------------------
9059 // siEndAllVariableLiveRange: Reports all live variables as dead.
9062 // This overload exists for the case we are jitting code compiled in
9063 // debug mode. When that happen we don't have variable liveness info
9064 // as "BaiscBlock::bbLiveIn" or "BaiscBlock::bbLiveOut" and there is no
9065 // tracked variable.
9067 void CodeGenInterface::VariableLiveKeeper::siEndAllVariableLiveRange()
9069 // TODO: we can improve this keeping a set for the variables with
9070 // open VariableLiveRanges
9072 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9074 const VariableLiveDescriptor* varLiveDsc = m_vlrLiveDsc + varNum;
9075 if (varLiveDsc->hasVariableLiveRangeOpen())
9077 siEndVariableLiveRange(varNum);
9082 //------------------------------------------------------------------------
9083 // getLiveRangesForVarForBody: Return the "VariableLiveRange" that correspond to
9084 // the given "varNum".
9087 // varNum - the index of the variable in m_vlrLiveDsc, which is the same as
9091 // A const pointer to the list of variable locations reported for the variable.
9094 // This variable should be an argument, a special argument or an IL local
9096 CodeGenInterface::VariableLiveKeeper::LiveRangeList* CodeGenInterface::VariableLiveKeeper::getLiveRangesForVarForBody(
9097 unsigned int varNum) const
9099 // There should be at least one variable for which its liveness is tracked
9100 noway_assert(varNum < m_LiveDscCount);
9102 return m_vlrLiveDsc[varNum].getLiveRanges();
9105 //------------------------------------------------------------------------
9106 // getLiveRangesForVarForProlog: Return the "VariableLiveRange" that correspond to
9107 // the given "varNum".
9110 // varNum - the index of the variable in m_vlrLiveDsc, which is the same as
9114 // A const pointer to the list of variable locations reported for the variable.
9117 // This variable should be an argument, a special argument or an IL local
9119 CodeGenInterface::VariableLiveKeeper::LiveRangeList* CodeGenInterface::VariableLiveKeeper::getLiveRangesForVarForProlog(
9120 unsigned int varNum) const
9122 // There should be at least one variable for which its liveness is tracked
9123 noway_assert(varNum < m_LiveDscCount);
9125 return m_vlrLiveDscForProlog[varNum].getLiveRanges();
9128 //------------------------------------------------------------------------
9129 // getLiveRangesCount: Returns the count of variable locations reported for the tracked
9130 // variables, which are arguments, special arguments, and local IL variables.
9133 // size_t - the count of variable locations
9136 // This method is being called from "genSetScopeInfo" to know the count of
9137 // "varResultInfo" that should be created on eeSetLVcount.
9139 size_t CodeGenInterface::VariableLiveKeeper::getLiveRangesCount() const
9141 size_t liveRangesCount = 0;
9143 if (m_Compiler->opts.compDbgInfo)
9145 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9147 for (int i = 0; i < 2; i++)
9149 VariableLiveDescriptor* varLiveDsc = (i == 0 ? m_vlrLiveDscForProlog : m_vlrLiveDsc) + varNum;
9151 if (m_Compiler->compMap2ILvarNum(varNum) != (unsigned int)ICorDebugInfo::UNKNOWN_ILNUM)
9153 liveRangesCount += varLiveDsc->getLiveRanges()->size();
9158 return liveRangesCount;
9161 //------------------------------------------------------------------------
9162 // psiStartVariableLiveRange: Reports the given variable as being born.
9165 // varLcation - the variable location
9166 // varNum - the index of the variable in "compiler->lvaTable" or
9167 // "VariableLivekeeper->m_vlrLiveDsc"
9170 // This function is expected to be called from "psiBegProlog" during
9171 // prolog code generation.
9173 void CodeGenInterface::VariableLiveKeeper::psiStartVariableLiveRange(CodeGenInterface::siVarLoc varLocation,
9174 unsigned int varNum)
9176 // This descriptor has to correspond to a parameter. The first slots in lvaTable
9177 // are arguments and special arguments.
9178 noway_assert(varNum < m_LiveArgsCount);
9180 VariableLiveDescriptor* varLiveDsc = &m_vlrLiveDscForProlog[varNum];
9181 varLiveDsc->startLiveRangeFromEmitter(varLocation, m_Compiler->GetEmitter());
9184 //------------------------------------------------------------------------
9185 // psiClosePrologVariableRanges: Report all the parameters as becoming dead.
9188 // This function is expected to be called from preffix "psiEndProlog" after
9189 // code for prolog has been generated.
9191 void CodeGenInterface::VariableLiveKeeper::psiClosePrologVariableRanges()
9193 noway_assert(m_LiveArgsCount <= m_LiveDscCount);
9195 for (unsigned int varNum = 0; varNum < m_LiveArgsCount; varNum++)
9197 VariableLiveDescriptor* varLiveDsc = m_vlrLiveDscForProlog + varNum;
9199 if (varLiveDsc->hasVariableLiveRangeOpen())
9201 varLiveDsc->endLiveRangeAtEmitter(m_Compiler->GetEmitter());
9207 void CodeGenInterface::VariableLiveKeeper::dumpBlockVariableLiveRanges(const BasicBlock* block)
9209 assert(block != nullptr);
9211 bool hasDumpedHistory = false;
9213 printf("\nVariable Live Range History Dump for " FMT_BB "\n", block->bbNum);
9215 if (m_Compiler->opts.compDbgInfo)
9217 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9219 VariableLiveDescriptor* varLiveDsc = m_vlrLiveDsc + varNum;
9221 if (varLiveDsc->hasVarLiveRangesFromLastBlockToDump())
9223 hasDumpedHistory = true;
9224 m_Compiler->gtDispLclVar(varNum, false);
9226 varLiveDsc->dumpRegisterLiveRangesForBlockBeforeCodeGenerated(m_Compiler->codeGen);
9227 varLiveDsc->endBlockLiveRanges();
9233 if (!hasDumpedHistory)
9235 printf("..None..\n");
9239 void CodeGenInterface::VariableLiveKeeper::dumpLvaVariableLiveRanges() const
9241 bool hasDumpedHistory = false;
9243 printf("VARIABLE LIVE RANGES:\n");
9245 if (m_Compiler->opts.compDbgInfo)
9247 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9249 VariableLiveDescriptor* varLiveDsc = m_vlrLiveDsc + varNum;
9251 if (varLiveDsc->hasVarLiveRangesToDump())
9253 hasDumpedHistory = true;
9254 m_Compiler->gtDispLclVar(varNum, false);
9256 varLiveDsc->dumpAllRegisterLiveRangesForBlock(m_Compiler->GetEmitter(), m_Compiler->codeGen);
9262 if (!hasDumpedHistory)
9264 printf("..None..\n");
9269 //-----------------------------------------------------------------------------
9270 // genPoisonFrame: Generate code that places a recognizable value into address exposed variables.
9273 // This function emits code to poison address exposed non-zero-inited local variables. We expect this function
9274 // to be called when emitting code for the scratch BB that comes right after the prolog.
9275 // The variables are poisoned using 0xcdcdcdcd.
9276 void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
9278 assert(compiler->compShouldPoisonFrame());
9279 #if defined(TARGET_XARCH)
9280 regNumber poisonValReg = REG_EAX;
9281 assert((regLiveIn & (RBM_EDI | RBM_ECX | RBM_EAX)) == 0);
9283 regNumber poisonValReg = REG_SCRATCH;
9284 assert((regLiveIn & (genRegMask(REG_SCRATCH) | RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2)) == 0);
9288 const ssize_t poisonVal = (ssize_t)0xcdcdcdcdcdcdcdcd;
9290 const ssize_t poisonVal = (ssize_t)0xcdcdcdcd;
9293 // The first time we need to poison something we will initialize a register to the largest immediate cccccccc that
9295 bool hasPoisonImm = false;
9296 for (unsigned varNum = 0; varNum < compiler->info.compLocalsCount; varNum++)
9298 LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
9299 if (varDsc->lvIsParam || varDsc->lvMustInit || !varDsc->IsAddressExposed())
9304 assert(varDsc->lvOnFrame);
9306 unsigned int size = compiler->lvaLclSize(varNum);
9307 if ((size / TARGET_POINTER_SIZE) > 16)
9309 // This will require more than 16 instructions, switch to rep stosd/memset call.
9310 CLANG_FORMAT_COMMENT_ANCHOR;
9311 #if defined(TARGET_XARCH)
9312 GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_EDI, (int)varNum, 0);
9313 assert(size % 4 == 0);
9314 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ECX, size / 4);
9315 // On xarch we can leave the value in eax and only set eax once
9316 // since rep stosd does not kill eax.
9319 instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_EAX, poisonVal);
9320 hasPoisonImm = true;
9322 instGen(INS_r_stosd);
9324 GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0);
9325 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast<char>(poisonVal));
9326 instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_ARG_2, size);
9327 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
9328 // May kill REG_SCRATCH, so we need to reload it.
9329 hasPoisonImm = false;
9336 instGen_Set_Reg_To_Imm(EA_PTRSIZE, poisonValReg, poisonVal);
9337 hasPoisonImm = true;
9340 // For 64-bit we check if the local is 8-byte aligned. For 32-bit, we assume everything is always 4-byte aligned.
9343 int addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
9347 int end = addr + (int)size;
9348 for (int offs = addr; offs < end;)
9351 if ((offs % 8) == 0 && end - offs >= 8)
9353 GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr);
9359 assert((offs % 4) == 0 && end - offs >= 4);
9360 GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr);
9367 //----------------------------------------------------------------------
9368 // genBitCast - Generate the instruction to move a value between register files
9371 // targetType - the destination type
9372 // targetReg - the destination register
9373 // srcType - the source type
9374 // srcReg - the source register
9376 void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg)
9378 const bool srcFltReg = varTypeUsesFloatReg(srcType);
9379 assert(srcFltReg == genIsValidFloatReg(srcReg));
9381 const bool dstFltReg = varTypeUsesFloatReg(targetType);
9382 assert(dstFltReg == genIsValidFloatReg(targetReg));
9384 inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true);
9387 //----------------------------------------------------------------------
9388 // genCodeForBitCast - Generate code for a GT_BITCAST that is not contained
9391 // treeNode - the GT_BITCAST for which we're generating code
9393 void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
9395 assert(treeNode->TypeGet() == genActualType(treeNode));
9396 regNumber targetReg = treeNode->GetRegNum();
9397 var_types targetType = treeNode->TypeGet();
9398 GenTree* op1 = treeNode->gtGetOp1();
9399 genConsumeRegs(op1);
9401 if (op1->isContained())
9403 assert(op1->OperIs(GT_LCL_VAR));
9404 unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
9405 instruction loadIns = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lclNum));
9406 GetEmitter()->emitIns_R_S(loadIns, emitTypeSize(targetType), targetReg, lclNum, 0);
9411 if (compiler->opts.compUseSoftFP && (targetType == TYP_LONG))
9413 // This is a special arm-softFP case when a TYP_LONG node was introduced during lowering
9414 // for a call argument, so it was not handled by decomposelongs phase as all other TYP_LONG nodes.
9415 // Example foo(double LclVar V01), LclVar V01 has to be passed in general registers r0, r1,
9416 // so lowering will add `BITCAST long(LclVar double V01)` and codegen has to support it here.
9417 const regNumber srcReg = op1->GetRegNum();
9418 const regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg;
9419 assert(otherReg != REG_NA);
9420 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, srcReg, EA_8BYTE);
9423 #endif // TARGET_ARM
9425 genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum());
9428 genProduceReg(treeNode);
9431 //----------------------------------------------------------------------
9432 // genCanOmitNormalizationForBswap16:
9433 // Small peephole to check if a bswap16 node can omit normalization.
9436 // tree - The BSWAP16 node
9439 // BSWAP16 nodes are required to zero extend the upper 16 bits, but since the
9440 // importer always inserts a normalizing cast (either sign or zero extending)
9441 // we almost never need to actually do this.
9443 bool CodeGen::genCanOmitNormalizationForBswap16(GenTree* tree)
9445 if (compiler->opts.OptimizationDisabled())
9450 assert(tree->OperIs(GT_BSWAP16));
9451 if ((tree->gtNext == nullptr) || !tree->gtNext->OperIs(GT_CAST))
9456 GenTreeCast* cast = tree->gtNext->AsCast();
9457 if (cast->gtOverflow() || (cast->CastOp() != tree))
9462 return (cast->gtCastType == TYP_USHORT) || (cast->gtCastType == TYP_SHORT);
9465 //----------------------------------------------------------------------
9466 // genCodeForReuseVal: Generate code for a node marked with re-using a register.
9469 // tree - The node marked with re-using a register
9472 // Generates nothing, except for when the node is a CNS_INT(0) where
9473 // we will define a new label to propagate GC info. We want to do this
9474 // because if the node is a CNS_INT(0) and is re-using a register,
9475 // that register could have been used for a CNS_INT(ref null) that is GC
9478 void CodeGen::genCodeForReuseVal(GenTree* treeNode)
9480 assert(treeNode->IsReuseRegVal());
9482 // For now, this is only used for constant nodes.
9483 assert(treeNode->OperIs(GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC));
9484 JITDUMP(" TreeNode is marked ReuseReg\n");
9486 if (treeNode->IsIntegralConst(0) && GetEmitter()->emitCurIGnonEmpty())
9488 genDefineTempLabel(genCreateTempLabel());