1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
4 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX Code Generator Common: XX
8 XX Methods common to all architectures and register allocation strategies XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
15 // identical, and which should probably be moved here.
26 #ifndef JIT32_GCENCODER
27 #include "gcinfoencoder.h"
30 #include "patchpointinfo.h"
32 /*****************************************************************************/
34 void CodeGenInterface::setFramePointerRequiredEH(bool value)
36 m_cgFramePointerRequired = value;
38 #ifndef JIT32_GCENCODER
41 // EnumGcRefs will only enumerate slots in aborted frames
42 // if they are fully-interruptible. So if we have a catch
43 // or finally that will keep frame-vars alive, we need to
44 // force fully-interruptible.
45 CLANG_FORMAT_COMMENT_ANCHOR;
50 printf("Method has EH, marking method as fully interruptible\n");
54 m_cgInterruptible = true;
56 #endif // JIT32_GCENCODER
59 /*****************************************************************************/
60 CodeGenInterface* getCodeGenerator(Compiler* comp)
62 return new (comp, CMK_Codegen) CodeGen(comp);
65 // CodeGen constructor
66 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
67 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
71 #if defined(TARGET_XARCH)
72 void CodeGenInterface::CopyRegisterInfo()
74 #if defined(TARGET_AMD64)
75 rbmAllFloat = compiler->rbmAllFloat;
76 rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
77 #endif // TARGET_AMD64
79 rbmAllMask = compiler->rbmAllMask;
80 rbmMskCalleeTrash = compiler->rbmMskCalleeTrash;
82 #endif // TARGET_XARCH
84 /*****************************************************************************/
86 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
88 #if defined(TARGET_XARCH)
89 negBitmaskFlt = nullptr;
90 negBitmaskDbl = nullptr;
91 absBitmaskFlt = nullptr;
92 absBitmaskDbl = nullptr;
93 zroSimd12Elm3 = nullptr;
94 u8ToDblBitmask = nullptr;
95 #endif // defined(TARGET_XARCH)
97 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86)
98 m_stkArgVarNum = BAD_VAR_NUM;
101 #if defined(UNIX_X86_ABI)
102 curNestedAlignment = 0;
103 maxNestedAlignment = 0;
106 gcInfo.regSet = ®Set;
107 m_cgEmitter = new (compiler->getAllocator()) emitter();
108 m_cgEmitter->codeGen = this;
109 m_cgEmitter->gcInfo = &gcInfo;
112 setVerbose(compiler->verbose);
118 getDisAssembler().disInit(compiler);
122 genTempLiveChg = true;
123 genTrnslLocalVarCount = 0;
125 // Shouldn't be used before it is set in genFnProlog()
126 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
128 #if defined(TARGET_XARCH)
129 // Shouldn't be used before it is set in genFnProlog()
130 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
131 #endif // defined(TARGET_XARCH)
135 // This will be set before final frame layout.
136 compiler->compVSQuirkStackPaddingNeeded = 0;
137 #endif // TARGET_AMD64
139 compiler->genCallSite2DebugInfoMap = nullptr;
141 /* Assume that we not fully interruptible */
143 SetInterruptible(false);
144 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
145 SetHasTailCalls(false);
146 #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
148 genInterruptibleUsed = false;
149 genCurDispOffset = (unsigned)-1;
153 genSaveFpLrWithAllCalleeSavedRegisters = false;
154 genForceFuncletFrameType5 = false;
155 #endif // TARGET_ARM64
158 #if defined(TARGET_X86) || defined(TARGET_ARM)
160 //---------------------------------------------------------------------
161 // genTotalFrameSize - return the "total" size of the stack frame, including local size
162 // and callee-saved register size. There are a few things "missing" depending on the
163 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
165 // For ARM, this doesn't include the prespilled registers.
167 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
168 // It also doesn't include the pushed return address.
173 int CodeGenInterface::genTotalFrameSize() const
175 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
177 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
179 assert(totalFrameSize >= 0);
180 return totalFrameSize;
183 //---------------------------------------------------------------------
184 // genSPtoFPdelta - return the offset from SP to the frame pointer.
185 // This number is going to be positive, since SP must be at the lowest
188 // There must be a frame pointer to call this function!
190 int CodeGenInterface::genSPtoFPdelta() const
192 assert(isFramePointerUsed());
196 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
202 //---------------------------------------------------------------------
203 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
204 // This number is going to be negative, since the Caller-SP is at a higher
205 // address than the frame pointer.
207 // There must be a frame pointer to call this function!
209 int CodeGenInterface::genCallerSPtoFPdelta() const
211 assert(isFramePointerUsed());
212 int callerSPtoFPdelta = 0;
214 #if defined(TARGET_ARM)
215 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
216 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
217 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
218 #elif defined(TARGET_X86)
219 // Thanks to ebp chaining, the difference between ebp-based addresses
220 // and caller-SP-relative addresses is just the 2 pointers:
223 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
225 #error "Unknown TARGET"
228 assert(callerSPtoFPdelta <= 0);
229 return callerSPtoFPdelta;
232 //---------------------------------------------------------------------
233 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
235 // This number will be negative.
237 int CodeGenInterface::genCallerSPtoInitialSPdelta() const
239 int callerSPtoSPdelta = 0;
241 #if defined(TARGET_ARM)
242 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
243 callerSPtoSPdelta -= genTotalFrameSize();
244 #elif defined(TARGET_X86)
245 callerSPtoSPdelta -= genTotalFrameSize();
246 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
248 // compCalleeRegsPushed does not account for the frame pointer
249 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
250 if (isFramePointerUsed())
252 callerSPtoSPdelta -= REGSIZE_BYTES;
255 #error "Unknown TARGET"
258 assert(callerSPtoSPdelta <= 0);
259 return callerSPtoSPdelta;
262 #endif // defined(TARGET_X86) || defined(TARGET_ARM)
264 /*****************************************************************************
265 * Should we round simple operations (assignments, arithmetic operations, etc.)
270 bool CodeGen::genShouldRoundFP()
272 RoundLevel roundLevel = getRoundFloatLevel();
277 case ROUND_CMP_CONST:
282 assert(roundLevel == ROUND_ALWAYS);
287 /*****************************************************************************
289 * Initialize some global variables.
292 void CodeGen::genPrepForCompiler()
294 treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
296 /* Figure out which non-register variables hold pointers */
298 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
300 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
301 // in a register (i.e. they live on the stack for all or part of their lifetime).
302 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
306 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
308 if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
310 if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
312 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
316 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
317 genLastLiveMask = RBM_NONE;
319 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
323 //------------------------------------------------------------------------
324 // genMarkLabelsForCodegen: Mark labels required for codegen.
326 // Mark all blocks that require a label with BBF_HAS_LABEL. These are either blocks that are:
327 // 1. the target of jumps (fall-through flow doesn't require a label),
328 // 2. referenced labels such as for "switch" codegen,
329 // 3. needed to denote the range of EH regions to the VM.
330 // 4. needed to denote the range of code for alignment processing.
332 // No labels will be in the IR before now, but future codegen might annotate additional blocks
333 // with this flag, such as "switch" codegen, or codegen-created blocks from genCreateTempLabel().
334 // Also, the alignment processing code marks BBJ_COND fall-through labels elsewhere.
336 // To report exception handling information to the VM, we need the size of the exception
337 // handling regions. To compute that, we need to emit labels for the beginning block of
338 // an EH region, and the block that immediately follows a region. Go through the EH
339 // table and mark all these blocks with BBF_HAS_LABEL to make this happen.
341 // This code is closely couple with genReportEH() in the sense that any block
342 // that this procedure has determined it needs to have a label has to be selected
343 // using the same logic both here and in genReportEH(), so basically any time there is
344 // a change in the way we handle EH reporting, we have to keep the logic of these two
345 // methods 'in sync'.
347 // No blocks should be added or removed after this.
349 void CodeGen::genMarkLabelsForCodegen()
351 assert(!compiler->fgSafeBasicBlockCreation);
353 JITDUMP("Mark labels for codegen\n");
356 // No label flags should be set before this.
357 for (BasicBlock* const block : compiler->Blocks())
359 assert((block->bbFlags & BBF_HAS_LABEL) == 0);
363 // The first block is special; it always needs a label. This is to properly set up GC info.
364 JITDUMP(" " FMT_BB " : first block\n", compiler->fgFirstBB->bbNum);
365 compiler->fgFirstBB->bbFlags |= BBF_HAS_LABEL;
367 // The current implementation of switch tables requires the first block to have a label so it
368 // can generate offsets to the switch label targets.
369 // (This is duplicative with the fact we always set the first block with a label above.)
370 // TODO-CQ: remove this when switches have been re-implemented to not use this.
371 if (compiler->fgHasSwitch)
373 JITDUMP(" " FMT_BB " : function has switch; mark first block\n", compiler->fgFirstBB->bbNum);
374 compiler->fgFirstBB->bbFlags |= BBF_HAS_LABEL;
377 for (BasicBlock* const block : compiler->Blocks())
379 switch (block->bbJumpKind)
381 case BBJ_ALWAYS: // This will also handle the BBJ_ALWAYS of a BBJ_CALLFINALLY/BBJ_ALWAYS pair.
384 JITDUMP(" " FMT_BB " : branch target\n", block->bbJumpDest->bbNum);
385 block->bbJumpDest->bbFlags |= BBF_HAS_LABEL;
389 for (BasicBlock* const bTarget : block->SwitchTargets())
391 JITDUMP(" " FMT_BB " : branch target\n", bTarget->bbNum);
392 bTarget->bbFlags |= BBF_HAS_LABEL;
396 case BBJ_CALLFINALLY:
397 // The finally target itself will get marked by walking the EH table, below, and marking
398 // all handler begins.
399 CLANG_FORMAT_COMMENT_ANCHOR;
401 #if FEATURE_EH_CALLFINALLY_THUNKS
403 // For callfinally thunks, we need to mark the block following the callfinally/always pair,
404 // as that's needed for identifying the range of the "duplicate finally" region in EH data.
405 BasicBlock* bbToLabel = block->bbNext;
406 if (block->isBBCallAlwaysPair())
408 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
410 if (bbToLabel != nullptr)
412 JITDUMP(" " FMT_BB " : callfinally thunk region end\n", bbToLabel->bbNum);
413 bbToLabel->bbFlags |= BBF_HAS_LABEL;
416 #endif // FEATURE_EH_CALLFINALLY_THUNKS
420 case BBJ_EHFINALLYRET:
422 case BBJ_EHFILTERRET:
429 noway_assert(!"Unexpected bbJumpKind");
434 // Walk all the exceptional code blocks and mark them, since they don't appear in the normal flow graph.
435 for (Compiler::AddCodeDsc* add = compiler->fgAddCodeList; add; add = add->acdNext)
437 JITDUMP(" " FMT_BB " : throw helper block\n", add->acdDstBlk->bbNum);
438 add->acdDstBlk->bbFlags |= BBF_HAS_LABEL;
441 for (EHblkDsc* const HBtab : EHClauses(compiler))
443 HBtab->ebdTryBeg->bbFlags |= BBF_HAS_LABEL;
444 HBtab->ebdHndBeg->bbFlags |= BBF_HAS_LABEL;
446 JITDUMP(" " FMT_BB " : try begin\n", HBtab->ebdTryBeg->bbNum);
447 JITDUMP(" " FMT_BB " : hnd begin\n", HBtab->ebdHndBeg->bbNum);
449 if (HBtab->ebdTryLast->bbNext != nullptr)
451 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
452 JITDUMP(" " FMT_BB " : try end\n", HBtab->ebdTryLast->bbNext->bbNum);
455 if (HBtab->ebdHndLast->bbNext != nullptr)
457 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
458 JITDUMP(" " FMT_BB " : hnd end\n", HBtab->ebdHndLast->bbNext->bbNum);
461 if (HBtab->HasFilter())
463 HBtab->ebdFilter->bbFlags |= BBF_HAS_LABEL;
464 JITDUMP(" " FMT_BB " : filter begin\n", HBtab->ebdFilter->bbNum);
469 if (compiler->verbose)
471 printf("*************** After genMarkLabelsForCodegen()\n");
472 compiler->fgDispBasicBlocks();
477 void CodeGenInterface::genUpdateLife(GenTree* tree)
479 treeLifeUpdater->UpdateLife(tree);
482 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
484 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
487 // Return the register mask for the given register variable
489 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
491 regMaskTP regMask = RBM_NONE;
493 assert(varDsc->lvIsInReg());
495 regNumber reg = varDsc->GetRegNum();
496 if (genIsValidFloatReg(reg))
498 regMask = genRegMaskFloat(reg ARM_ARG(varDsc->GetRegisterType()));
502 regMask = genRegMask(reg);
507 // Return the register mask for the given lclVar or regVar tree node
509 regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
511 assert(tree->gtOper == GT_LCL_VAR);
513 regMaskTP regMask = RBM_NONE;
514 const LclVarDsc* varDsc = compiler->lvaGetDesc(tree->AsLclVarCommon());
515 if (varDsc->lvPromoted)
517 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
519 const LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(i);
520 noway_assert(fieldVarDsc->lvIsStructField);
521 if (fieldVarDsc->lvIsInReg())
523 regMask |= genGetRegMask(fieldVarDsc);
527 else if (varDsc->lvIsInReg())
529 regMask = genGetRegMask(varDsc);
534 // The given lclVar is either going live (being born) or dying.
535 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
536 // Update regSet.GetMaskVars() accordingly.
538 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
540 regMaskTP regMask = genGetRegMask(varDsc);
543 if (compiler->verbose)
545 printf("\t\t\t\t\t\t\tV%02u in reg ", compiler->lvaGetLclNum(varDsc));
547 varDsc->PrintVarReg();
548 printf(" is becoming %s ", (isDying) ? "dead" : "live");
549 Compiler::printTreeID(tree);
556 // We'd like to be able to assert the following, however if we are walking
557 // through a qmark/colon tree, we may encounter multiple last-use nodes.
558 // assert((regSet.GetMaskVars() & regMask) == regMask);
559 regSet.RemoveMaskVars(regMask);
563 // If this is going live, the register must not have a variable in it, except
564 // in the case of an exception or "spill at single-def" variable, which may be already treated
565 // as live in the register.
566 assert(varDsc->IsAlwaysAliveInMemory() || ((regSet.GetMaskVars() & regMask) == 0));
567 regSet.AddMaskVars(regMask);
571 //----------------------------------------------------------------------
572 // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
573 // Not all JIT Helper calls follow the standard ABI on the target architecture.
575 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
576 // enumerated) and not 100% accurate (some killsets are bigger than
577 // what they really are).
578 // There's some work to be done in several places in the JIT to
579 // accurately track the registers that are getting killed by
581 // a) LSRA needs several changes to accommodate more precise killsets
582 // for every helper call it sees (both explicitly [easy] and
583 // implicitly [hard])
584 // b) Currently for AMD64, when we generate code for a helper call
585 // we're independently over-pessimizing the killsets of the call
586 // (independently from LSRA) and this needs changes
587 // both in CodeGenAmd64.cpp and emitx86.cpp.
589 // The best solution for this problem would be to try to centralize
590 // the killset information in a single place but then make the
591 // corresponding changes so every code generation phase is in sync
594 // The interim solution is to only add known helper calls that don't
595 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
598 // helper - The helper being inquired about
601 // Mask of register kills -- registers whose values are no longer guaranteed to be the same.
603 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
607 case CORINFO_HELP_ASSIGN_REF:
608 case CORINFO_HELP_CHECKED_ASSIGN_REF:
609 return RBM_CALLEE_TRASH_WRITEBARRIER;
611 case CORINFO_HELP_ASSIGN_BYREF:
612 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
614 case CORINFO_HELP_PROF_FCN_ENTER:
615 return RBM_PROFILER_ENTER_TRASH;
617 case CORINFO_HELP_PROF_FCN_LEAVE:
618 return RBM_PROFILER_LEAVE_TRASH;
620 case CORINFO_HELP_PROF_FCN_TAILCALL:
621 return RBM_PROFILER_TAILCALL_TRASH;
624 case CORINFO_HELP_ASSIGN_REF_EAX:
625 case CORINFO_HELP_ASSIGN_REF_ECX:
626 case CORINFO_HELP_ASSIGN_REF_EBX:
627 case CORINFO_HELP_ASSIGN_REF_EBP:
628 case CORINFO_HELP_ASSIGN_REF_ESI:
629 case CORINFO_HELP_ASSIGN_REF_EDI:
631 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
632 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
633 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
634 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
635 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
636 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
640 case CORINFO_HELP_STOP_FOR_GC:
641 return RBM_STOP_FOR_GC_TRASH;
643 case CORINFO_HELP_INIT_PINVOKE_FRAME:
644 return RBM_INIT_PINVOKE_FRAME_TRASH;
646 case CORINFO_HELP_VALIDATE_INDIRECT_CALL:
647 return RBM_VALIDATE_INDIRECT_CALL_TRASH;
650 return RBM_CALLEE_TRASH;
654 //------------------------------------------------------------------------
655 // compChangeLife: Compare the given "newLife" with last set of live variables and update
656 // codeGen "gcInfo", siScopes, "regSet" with the new variable's homes/liveness.
659 // newLife - the new set of variables that are alive.
662 // The set of live variables reflects the result of only emitted code, it should not be considering the becoming
663 // live/dead of instructions that has not been emitted yet. This is used to ensure [) "VariableLiveRange"
664 // intervals when calling "siStartVariableLiveRange" and "siEndVariableLiveRange".
667 // If "ForCodeGen" is false, only "compCurLife" set (and no mask) will be setted.
669 template <bool ForCodeGen>
670 void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
675 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
676 dumpConvertedVarSet(this, compCurLife);
677 printf(" -> %s ", VarSetOps::ToString(this, newLife));
678 dumpConvertedVarSet(this, newLife);
683 /* We should only be called when the live set has actually changed */
685 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
689 VarSetOps::Assign(this, compCurLife, newLife);
693 /* Figure out which variables are becoming live/dead at this point */
695 // deadSet = compCurLife - newLife
696 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
698 // bornSet = newLife - compCurLife
699 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
701 /* Can't simultaneously become live and dead at the same time */
703 // (deadSet UNION bornSet) != EMPTY
704 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
705 // (deadSet INTERSECTION bornSet) == EMPTY
706 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
708 VarSetOps::Assign(this, compCurLife, newLife);
710 // Handle the dying vars first, then the newly live vars.
711 // This is because, in the RyuJIT backend case, they may occupy registers that
712 // will be occupied by another var that is newly live.
713 VarSetOps::Iter deadIter(this, deadSet);
714 unsigned deadVarIndex = 0;
715 while (deadIter.NextElem(&deadVarIndex))
717 unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex);
718 LclVarDsc* varDsc = lvaGetDesc(varNum);
719 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
720 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
721 bool isInReg = varDsc->lvIsInReg();
722 bool isInMemory = !isInReg || varDsc->IsAlwaysAliveInMemory();
726 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
728 regMaskTP regMask = varDsc->lvRegMask();
731 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
735 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
737 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
739 // Update the gcVarPtrSetCur if it is in memory.
740 if (isInMemory && (isGCRef || isByRef))
742 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
743 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
746 codeGen->getVariableLiveKeeper()->siEndVariableLiveRange(varNum);
749 VarSetOps::Iter bornIter(this, bornSet);
750 unsigned bornVarIndex = 0;
751 while (bornIter.NextElem(&bornVarIndex))
753 unsigned varNum = lvaTrackedIndexToLclNum(bornVarIndex);
754 LclVarDsc* varDsc = lvaGetDesc(varNum);
755 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
756 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
758 if (varDsc->lvIsInReg())
760 // If this variable is going live in a register, it is no longer live on the stack,
761 // unless it is an EH/"spill at single-def" var, which always remains live on the stack.
762 if (!varDsc->IsAlwaysAliveInMemory())
765 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
767 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
770 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
772 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
773 regMaskTP regMask = varDsc->lvRegMask();
776 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
780 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
783 else if (lvaIsGCTracked(varDsc))
785 // This isn't in a register, so update the gcVarPtrSetCur to show that it's live on the stack.
786 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
787 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
790 codeGen->getVariableLiveKeeper()->siStartVariableLiveRange(varDsc, varNum);
794 // Need an explicit instantiation.
795 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
797 /*****************************************************************************
801 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
803 GetEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
806 /*****************************************************************************
810 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
812 GetEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
816 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
821 //----------------------------------------------------------------------
822 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
825 // tree - spilled GenTree node
828 // TempDsc corresponding to tree
829 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
831 // tree must be in spilled state.
832 assert((tree->gtFlags & GTF_SPILLED) != 0);
834 // Get the tree's SpillDsc.
835 RegSet::SpillDsc* prevDsc;
836 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->GetRegNum(), &prevDsc);
837 assert(spillDsc != nullptr);
839 // Get the temp desc.
840 TempDsc* temp = regSet.rsGetSpillTempWord(tree->GetRegNum(), spillDsc, prevDsc);
844 /*****************************************************************************
846 * The following can be used to create basic blocks that serve as labels for
847 * the emitter. Use with caution - these are not real basic blocks!
852 BasicBlock* CodeGen::genCreateTempLabel()
855 // These blocks don't affect FP
856 compiler->fgSafeBasicBlockCreation = true;
859 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
862 compiler->fgSafeBasicBlockCreation = false;
865 JITDUMP("Mark " FMT_BB " as label: codegen temp block\n", block->bbNum);
866 block->bbFlags |= BBF_HAS_LABEL;
868 // Use coldness of current block, as this label will
869 // be contained in it.
870 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
874 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
876 block->bbTgtStkDepth = genStackLevel / sizeof(int);
882 void CodeGen::genLogLabel(BasicBlock* bb)
885 if (compiler->opts.dspCode)
887 printf("\n L_M%03u_" FMT_BB ":\n", compiler->compMethodID, bb->bbNum);
892 // genDefineTempLabel: Define a label based on the current GC info tracked by
893 // the code generator.
896 // label - A label represented as a basic block. These are created with
897 // genCreateTempLabel and are not normal basic blocks.
900 // The label will be defined with the current GC info tracked by the code
901 // generator. When the emitter sees this label it will thus remove any temporary
902 // GC refs it is tracking in registers. For example, a call might produce a ref
903 // in RAX which the emitter would track but which would not be tracked in
904 // codegen's GC info since codegen would immediately copy it from RAX into its
907 void CodeGen::genDefineTempLabel(BasicBlock* label)
910 label->bbEmitCookie = GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
911 gcInfo.gcRegByrefSetCur, false DEBUG_ARG(label));
914 // genDefineInlineTempLabel: Define an inline label that does not affect the GC
918 // label - A label represented as a basic block. These are created with
919 // genCreateTempLabel and are not normal basic blocks.
922 // The emitter will continue to track GC info as if there was no label.
924 void CodeGen::genDefineInlineTempLabel(BasicBlock* label)
927 label->bbEmitCookie = GetEmitter()->emitAddInlineLabel();
930 //------------------------------------------------------------------------
931 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
934 // block - The BasicBlock for which we are about to generate code.
937 // Must be called just prior to generating code for 'block'.
940 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
941 // and if 'block' is a throw helper block with a non-zero stack level.
943 void CodeGen::genAdjustStackLevel(BasicBlock* block)
945 #if !FEATURE_FIXED_OUT_ARGS
946 // Check for inserted throw blocks and adjust genStackLevel.
947 CLANG_FORMAT_COMMENT_ANCHOR;
949 #if defined(UNIX_X86_ABI)
950 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
952 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
953 // at this point if a jump to this block is made in the middle of pushing arguments.
955 // Here we restore SP to prevent potential stack alignment issues.
956 GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
960 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
962 noway_assert(block->bbFlags & BBF_HAS_LABEL);
964 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
966 if (genStackLevel != 0)
969 GetEmitter()->emitMarkStackLvl(genStackLevel);
970 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
973 NYI("Need emitMarkStackLvl()");
977 #endif // !FEATURE_FIXED_OUT_ARGS
980 /*****************************************************************************
982 * Take an address expression and try to find the best set of components to
983 * form an address mode; returns non-zero if this is successful.
985 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
986 * Refactor this code so that the underlying analysis can be used in
987 * the RyuJIT Backend to do lowering, instead of having to call this method with the
988 * option to not generate the code.
990 * 'fold' specifies if it is OK to fold the array index which hangs off
993 * If successful, the parameters will be set to the following values:
995 * *rv1Ptr ... base operand
996 * *rv2Ptr ... optional operand
997 * *revPtr ... true if rv2 is before rv1 in the evaluation order
998 * *mulPtr ... optional multiplier (2/4/8) for rv2
999 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1000 * *cnsPtr ... integer constant [optional]
1002 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1003 * identifies the components that might be used to
1004 * form an address mode later on.
1007 bool CodeGen::genCreateAddrMode(
1008 GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr)
1011 The following indirections are valid address modes on x86/x64:
1013 [ icon] * not handled here
1017 [reg1 + reg2 + icon]
1024 [reg1 + 2 * reg2 + icon]
1025 [reg1 + 4 * reg2 + icon]
1026 [reg1 + 8 * reg2 + icon]
1028 The following indirections are valid address modes on arm64:
1033 [reg1 + reg2 * natural-scale]
1037 /* All indirect address modes require the address to be an addition */
1039 if (!addr->OperIs(GT_ADD))
1044 GenTree* rv1 = nullptr;
1045 GenTree* rv2 = nullptr;
1055 /* What order are the sub-operands to be evaluated */
1057 if (addr->gtFlags & GTF_REVERSE_OPS)
1059 op1 = addr->AsOp()->gtOp2;
1060 op2 = addr->AsOp()->gtOp1;
1064 op1 = addr->AsOp()->gtOp1;
1065 op2 = addr->AsOp()->gtOp2;
1068 // Can't use indirect addressing mode as we need to check for overflow.
1069 // Also, can't use 'lea' as it doesn't set the flags.
1071 if (addr->gtOverflow())
1076 bool rev = false; // Is op2 first in the evaluation order?
1079 A complex address mode can combine the following operands:
1081 op1 ... base address
1082 op2 ... optional scaled index
1083 mul ... optional multiplier (2/4/8) for op2
1084 cns ... optional displacement
1086 Here we try to find such a set of operands and arrange for these
1087 to sit in registers.
1094 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1095 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1096 here if we find a scaled index.
1098 CLANG_FORMAT_COMMENT_ANCHOR;
1102 /* Special case: keep constants as 'op2', but don't do this for constant handles
1103 because they don't fit I32 that we're going to check for below anyway. */
1105 if (op1->IsCnsIntOrI() && !op1->IsIconHandle())
1107 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1113 /* Check for an addition of a constant */
1115 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->AsIntConCommon()->IconValue()))
1117 // We should not be building address modes out of non-foldable constants
1118 if (!op2->AsIntConCommon()->ImmedValCanBeFolded(compiler, addr->OperGet()))
1120 assert(compiler->opts.compReloc);
1124 /* We're adding a constant */
1126 cns += op2->AsIntConCommon()->IconValue();
1128 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1132 /* Inspect the operand the constant is being added to */
1134 switch (op1->gtOper)
1138 if (op1->gtOverflow())
1143 op2 = op1->AsOp()->gtOp2;
1144 op1 = op1->AsOp()->gtOp1;
1148 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1149 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1151 if (op1->gtOverflow())
1153 return false; // Need overflow check
1160 mul = op1->GetScaledIndex();
1163 /* We can use "[mul*rv2 + icon]" */
1166 rv2 = op1->AsOp()->gtOp1;
1171 #endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1178 /* The best we can do is "[rv1 + icon]" */
1186 // op2 is not a constant. So keep on trying.
1188 /* Neither op1 nor op2 are sitting in a register right now */
1190 switch (op1->gtOper)
1192 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1193 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1196 if (op1->gtOverflow())
1201 if (op1->AsOp()->gtOp2->IsIntCnsFitsInI32())
1203 GenTreeIntCon* addConst = op1->AsOp()->gtOp2->AsIntCon();
1205 if (addConst->ImmedValCanBeFolded(compiler, GT_ADD) && FitsIn<INT32>(cns + addConst->IconValue()))
1207 cns += addConst->IconValue();
1208 op1 = op1->AsOp()->gtOp1;
1217 if (op1->gtOverflow())
1226 mul = op1->GetScaledIndex();
1229 /* 'op1' is a scaled value */
1232 rv2 = op1->AsOp()->gtOp1;
1235 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1237 if (jitIsScaleIndexMul(argScale * mul))
1239 mul = mul * argScale;
1240 rv2 = rv2->AsOp()->gtOp1;
1248 noway_assert(rev == false);
1254 #endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 && !TARGET_RISCV64
1258 op1 = op1->AsOp()->gtOp1;
1263 op1 = op1->AsOp()->gtOp2;
1271 switch (op2->gtOper)
1273 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1274 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because
1275 // arm doesn't support both scale and offset at the same. Offset is handled
1276 // at the emitter as a peephole optimization.
1279 if (op2->gtOverflow())
1284 if (op2->AsOp()->gtOp2->IsIntCnsFitsInI32())
1286 GenTreeIntCon* addConst = op2->AsOp()->gtOp2->AsIntCon();
1288 if (addConst->ImmedValCanBeFolded(compiler, GT_ADD) && FitsIn<INT32>(cns + addConst->IconValue()))
1290 cns += addConst->IconValue();
1291 op2 = op2->AsOp()->gtOp1;
1300 if (op2->gtOverflow())
1309 mul = op2->GetScaledIndex();
1312 // 'op2' is a scaled value...is it's argument also scaled?
1314 rv2 = op2->AsOp()->gtOp1;
1315 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1317 if (jitIsScaleIndexMul(argScale * mul))
1319 mul = mul * argScale;
1320 rv2 = rv2->AsOp()->gtOp1;
1333 #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
1337 op2 = op2->AsOp()->gtOp1;
1342 op2 = op2->AsOp()->gtOp2;
1349 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
1361 // Make sure a GC address doesn't end up in 'rv2'
1362 if (varTypeIsGC(rv2->TypeGet()))
1364 std::swap(rv1, rv2);
1368 // Special case: constant array index (that is range-checked)
1371 // By default, assume index is rv2 and indexScale is mul (or 1 if mul is zero)
1372 GenTree* index = rv2;
1373 ssize_t indexScale = mul == 0 ? 1 : mul;
1375 if (rv2->OperIs(GT_MUL, GT_LSH) && (rv2->gtGetOp2()->IsCnsIntOrI()))
1377 indexScale *= compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1380 // "index * 0" means index is zero
1381 if (indexScale == 0)
1386 else if (index->IsIntCnsFitsInI32())
1388 ssize_t constantIndex = index->AsIntConCommon()->IconValue() * indexScale;
1389 if (constantIndex == 0)
1391 // while scale is a non-zero constant, the actual index is zero so drop it
1395 else if (FitsIn<INT32>(cns + constantIndex))
1397 // Add the constant index to the accumulated offset value
1398 cns += constantIndex;
1399 // and get rid of index
1407 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
1408 noway_assert(rv1 || mul != 1);
1410 noway_assert(FitsIn<INT32>(cns));
1412 if (rv1 == nullptr && rv2 == nullptr)
1417 /* Success - return the various components to the caller */
1428 /*****************************************************************************
1430 * Generate an exit sequence for a return from a method (note: when compiling
1431 * for speed there might be multiple exit points).
1434 void CodeGen::genExitCode(BasicBlock* block)
1436 /* Just wrote the first instruction of the epilog - inform debugger
1437 Note that this may result in a duplicate IPmapping entry, and
1440 // For non-optimized debuggable code, there is only one epilog.
1441 genIPmappingAdd(IPmappingDscKind::Epilog, DebugInfo(), true);
1443 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
1444 if (compiler->getNeedsGSSecurityCookie())
1446 genEmitGSCookieCheck(jmpEpilog);
1451 // The GS cookie check created a temp label that has no live
1452 // incoming GC registers, we need to fix that
1457 /* Figure out which register parameters hold pointers */
1459 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1462 noway_assert(varDsc->lvIsParam);
1464 gcInfo.gcMarkRegPtrVal(varDsc->GetArgReg(), varDsc->TypeGet());
1467 GetEmitter()->emitThisGCrefRegs = GetEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1468 GetEmitter()->emitThisByrefRegs = GetEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1472 genReserveEpilog(block);
1475 //------------------------------------------------------------------------
1476 // genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1479 // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1480 // Otherwise, we generate the 'throw' inline.
1483 // jumpKind - jump kind to generate;
1484 // codeKind - the special throw-helper kind;
1485 // failBlk - optional fail target block, if it is already known;
1487 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk)
1489 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1490 #if defined(UNIX_X86_ABI) && defined(FEATURE_EH_FUNCLETS)
1491 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1492 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1493 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1497 // For code with throw helper blocks, find and use the helper block for
1498 // raising the exception. The block may be shared by other trees too.
1500 BasicBlock* excpRaisingBlock;
1502 if (failBlk != nullptr)
1504 // We already know which block to jump to. Use that.
1505 excpRaisingBlock = failBlk;
1508 Compiler::AddCodeDsc* add =
1509 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1510 assert(excpRaisingBlock == add->acdDstBlk);
1511 #if !FEATURE_FIXED_OUT_ARGS
1512 assert(add->acdStkLvlInit || isFramePointerUsed());
1513 #endif // !FEATURE_FIXED_OUT_ARGS
1518 // Find the helper-block which raises the exception.
1519 Compiler::AddCodeDsc* add =
1520 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1521 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
1522 excpRaisingBlock = add->acdDstBlk;
1523 #if !FEATURE_FIXED_OUT_ARGS
1524 assert(add->acdStkLvlInit || isFramePointerUsed());
1525 #endif // !FEATURE_FIXED_OUT_ARGS
1528 noway_assert(excpRaisingBlock != nullptr);
1530 // Jump to the exception-throwing block on error.
1531 inst_JMP(jumpKind, excpRaisingBlock);
1535 // The code to throw the exception will be generated inline, and
1536 // we will jump around it in the normal non-exception case.
1538 BasicBlock* tgtBlk = nullptr;
1539 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
1540 if (reverseJumpKind != jumpKind)
1542 tgtBlk = genCreateTempLabel();
1543 inst_JMP(reverseJumpKind, tgtBlk);
1546 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
1548 // Define the spot for the normal non-exception case to jump to.
1549 if (tgtBlk != nullptr)
1551 assert(reverseJumpKind != jumpKind);
1552 genDefineTempLabel(tgtBlk);
1557 /*****************************************************************************
1559 * The last operation done was generating code for "tree" and that would
1560 * have set the flags. Check if the operation caused an overflow.
1563 #if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1565 void CodeGen::genCheckOverflow(GenTree* tree)
1567 // Overflow-check should be asked for this tree
1568 noway_assert(tree->gtOverflow());
1570 const var_types type = tree->TypeGet();
1572 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
1573 noway_assert(!varTypeIsSmall(type));
1575 emitJumpKind jumpKind;
1578 if (tree->OperGet() == GT_MUL)
1585 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
1587 #if defined(TARGET_XARCH)
1589 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
1591 #elif defined(TARGET_ARMARCH)
1593 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
1595 if (jumpKind == EJ_lo)
1597 if (tree->OperGet() != GT_SUB)
1602 #endif // defined(TARGET_ARMARCH)
1605 // Jump to the block which will throw the exception
1607 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
1611 #if defined(FEATURE_EH_FUNCLETS)
1613 /*****************************************************************************
1615 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
1616 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
1621 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
1623 if (block->bbFlags & BBF_FUNCLET_BEG)
1625 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
1626 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
1628 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
1632 // We shouldn't see FUNC_ROOT
1633 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
1634 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
1639 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
1640 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
1642 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
1644 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
1646 assert(!block->hasHndIndex());
1650 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
1651 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
1656 #endif // FEATURE_EH_FUNCLETS
1658 //----------------------------------------------------------------------
1659 // genGenerateCode: Generate code for the function.
1662 // codePtr [OUT] - address of generated code
1663 // nativeSizeOfCode [OUT] - length of generated code in bytes
1665 void CodeGen::genGenerateCode(void** codePtr, uint32_t* nativeSizeOfCode)
1671 printf("*************** In genGenerateCode()\n");
1672 compiler->fgDispBasicBlocks(compiler->verboseTrees);
1676 this->codePtr = codePtr;
1677 this->nativeSizeOfCode = nativeSizeOfCode;
1679 DoPhase(this, PHASE_GENERATE_CODE, &CodeGen::genGenerateMachineCode);
1680 DoPhase(this, PHASE_EMIT_CODE, &CodeGen::genEmitMachineCode);
1681 DoPhase(this, PHASE_EMIT_GCEH, &CodeGen::genEmitUnwindDebugGCandEH);
1684 //----------------------------------------------------------------------
1685 // genGenerateMachineCode -- determine which machine instructions to emit
1687 void CodeGen::genGenerateMachineCode()
1690 genInterruptibleUsed = true;
1692 compiler->fgDebugCheckBBlist();
1695 /* This is the real thing */
1697 genPrepForCompiler();
1699 /* Prepare the emitter */
1700 GetEmitter()->Init();
1702 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
1706 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
1708 compiler->opts.disAsm = true;
1711 compiler->compCurBB = compiler->fgFirstBB;
1713 if (compiler->opts.disAsm)
1716 const char* fullName = compiler->info.compFullName;
1718 const char* fullName = compiler->eeGetMethodFullName(compiler->info.compMethodHnd);
1721 printf("; Assembly listing for method %s (%s)\n", fullName, compiler->compGetTieringName(true));
1723 printf("; Emitting ");
1725 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
1727 printf("SMALL_CODE");
1729 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
1731 printf("FAST_CODE");
1735 printf("BLENDED_CODE");
1740 #if defined(TARGET_X86)
1741 if (compiler->canUseEvexEncoding())
1743 printf("X86 with AVX512");
1745 else if (compiler->canUseVexEncoding())
1747 printf("X86 with AVX");
1751 printf("generic X86");
1753 #elif defined(TARGET_AMD64)
1754 if (compiler->canUseEvexEncoding())
1756 printf("X64 with AVX512");
1758 else if (compiler->canUseVexEncoding())
1760 printf("X64 with AVX");
1764 printf("generic X64");
1766 #elif defined(TARGET_ARM)
1767 printf("generic ARM");
1768 #elif defined(TARGET_ARM64)
1769 printf("generic ARM64");
1770 #elif defined(TARGET_LOONGARCH64)
1771 printf("generic LOONGARCH64");
1772 #elif defined(TARGET_RISCV64)
1773 printf("generic RISCV64");
1775 printf("unknown architecture");
1778 if (TargetOS::IsWindows)
1780 printf(" - Windows");
1782 else if (TargetOS::IsMacOS)
1786 else if (TargetOS::IsUnix)
1793 printf("; %s code\n", compiler->compGetTieringName(false));
1795 if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
1797 printf("; NativeAOT compilation\n");
1799 else if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN))
1801 printf("; ReadyToRun compilation\n");
1804 if (compiler->opts.IsOSR())
1806 printf("; OSR variant for entry point 0x%x\n", compiler->info.compILEntry);
1809 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
1811 printf("; optimized code\n");
1813 else if (compiler->opts.compDbgEnC)
1815 printf("; EnC code\n");
1817 else if (compiler->opts.compDbgCode)
1819 printf("; debuggable code\n");
1822 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && compiler->fgHaveProfileWeights())
1824 printf("; optimized using %s\n", compiler->compGetPgoSourceName());
1828 if (compiler->genDoubleAlign())
1829 printf("; double-aligned frame\n");
1832 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
1834 if (GetInterruptible())
1836 printf("; fully interruptible\n");
1840 printf("; partially interruptible\n");
1843 if (compiler->fgHaveProfileWeights())
1845 printf("; with %s: edge weights are %s, and fgCalledCount is " FMT_WT "\n",
1846 compiler->compGetPgoSourceName(), compiler->fgHaveValidEdgeWeights ? "valid" : "invalid",
1847 compiler->fgCalledCount);
1850 if (compiler->fgPgoFailReason != nullptr)
1852 printf("; %s\n", compiler->fgPgoFailReason);
1855 if ((compiler->fgPgoInlineePgo + compiler->fgPgoInlineeNoPgo + compiler->fgPgoInlineeNoPgoSingleBlock) > 0)
1857 printf("; %u inlinees with PGO data; %u single block inlinees; %u inlinees without PGO data\n",
1858 compiler->fgPgoInlineePgo, compiler->fgPgoInlineeNoPgoSingleBlock, compiler->fgPgoInlineeNoPgo);
1861 if (compiler->opts.IsCFGEnabled())
1863 printf("; control-flow guard enabled\n");
1866 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
1868 printf("; invoked as altjit\n");
1872 // We compute the final frame layout before code generation. This is because LSRA
1873 // has already computed exactly the maximum concurrent number of spill temps of each type that are
1874 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
1875 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
1876 // memory from the VM.
1880 GetEmitter()->emitBegFN(isFramePointerUsed()
1883 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
1884 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
1888 /* Now generate code for the function */
1892 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
1893 // generation hasn't touched it (it shouldn't!).
1896 compiler->lvaTableDump();
1900 /* We can now generate the function prolog and epilog */
1901 genGeneratePrologsAndEpilogs();
1903 // check to see if any jumps can be removed
1904 GetEmitter()->emitRemoveJumpToNextInst();
1906 /* Bind jump distances */
1907 GetEmitter()->emitJumpDistBind();
1909 #if FEATURE_LOOP_ALIGN
1910 /* Perform alignment adjustments */
1912 GetEmitter()->emitLoopAlignAdjustments();
1915 /* The code is now complete and final; it should not change after this. */
1918 //----------------------------------------------------------------------
1919 // genEmitMachineCode -- emit the actual machine instruction code
1921 void CodeGen::genEmitMachineCode()
1923 /* Compute the size of the code sections that we are going to ask the VM
1924 to allocate. Note that this might not be precisely the size of the
1925 code we emit, though it's fatal if we emit more code than the size we
1927 (Note: an example of a case where we emit less code would be useful.)
1930 GetEmitter()->emitComputeCodeSizes();
1933 unsigned instrCount;
1935 // Code to test or stress our ability to run a fallback compile.
1936 // We trigger the fallback here, before asking the VM for any memory,
1937 // because if not, we will leak mem, as the current codebase can't free
1938 // the mem after the emitter asks the VM for it. As this is only a stress
1939 // mode, we only want the functionality, and don't care about the relative
1940 // ugliness of having the failure here.
1941 if (!compiler->jitFallbackCompile)
1943 // Use DOTNET_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
1944 // especially that caused by enabling JIT stress.
1945 if (!JitConfig.JitNoForceFallback())
1947 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
1949 JITDUMP("\n\n*** forcing no-way fallback -- current jit request will be abandoned ***\n\n");
1950 NO_WAY_NOASSERT("Stress failure");
1957 /* We've finished collecting all the unwind information for the function. Now reserve
1958 space for it from the VM.
1961 compiler->unwindReserve();
1963 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
1965 #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1966 trackedStackPtrsContig = false;
1967 #elif defined(TARGET_ARM)
1968 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
1969 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
1971 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
1974 if (compiler->opts.disAsm && compiler->opts.disTesting)
1976 printf("; BEGIN METHOD %s\n", compiler->eeGetMethodFullName(compiler->info.compMethodHnd));
1979 codeSize = GetEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, GetInterruptible(),
1980 IsFullPtrRegMapRequired(), compiler->compHndBBtabCount, &prologSize,
1981 &epilogSize, codePtr, &coldCodePtr, &consPtr DEBUGARG(&instrCount));
1984 assert(compiler->compCodeGenDone == false);
1986 /* We're done generating code for this function */
1987 compiler->compCodeGenDone = true;
1990 #if defined(DEBUG) || defined(LATE_DISASM)
1991 // Add code size information into the Perf Score
1992 // All compPerfScore calculations must be performed using doubles
1993 compiler->info.compPerfScore += ((double)compiler->info.compTotalHotCodeSize * (double)PERFSCORE_CODESIZE_COST_HOT);
1994 compiler->info.compPerfScore +=
1995 ((double)compiler->info.compTotalColdCodeSize * (double)PERFSCORE_CODESIZE_COST_COLD);
1996 #endif // DEBUG || LATE_DISASM
1998 if (compiler->opts.disAsm && compiler->opts.disTesting)
2000 printf("; END METHOD %s\n", compiler->eeGetMethodFullName(compiler->info.compMethodHnd));
2004 if (compiler->opts.disAsm || verbose)
2006 printf("\n; Total bytes of code %d, prolog size %d, PerfScore %.2f, instruction count %d, allocated bytes for "
2008 codeSize, prologSize, compiler->info.compPerfScore, instrCount,
2009 GetEmitter()->emitTotalHotCodeSize + GetEmitter()->emitTotalColdCodeSize);
2011 #if TRACK_LSRA_STATS
2012 if (JitConfig.DisplayLsraStats() == 3)
2014 compiler->m_pLinearScan->dumpLsraStatsSummary(jitstdout());
2016 #endif // TRACK_LSRA_STATS
2018 printf(" (MethodHash=%08x) for method %s (%s)\n", compiler->info.compMethodHash(), compiler->info.compFullName,
2019 compiler->compGetTieringName(true));
2021 printf("; ============================================================\n\n");
2022 printf(""); // in our logic this causes a flush
2027 printf("*************** After end code gen, before unwindEmit()\n");
2028 GetEmitter()->emitDispIGlist(/* displayInstructions */ true);
2031 if (compiler->opts.disAsm)
2033 printf("\n; Total bytes of code %d\n\n", codeSize);
2037 *nativeSizeOfCode = codeSize;
2038 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2040 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2042 // Make sure that the x86 alignment and cache prefetch optimization rules
2045 // Don't start a method in the last 7 bytes of a 16-byte alignment area
2046 // unless we are generating SMALL_CODE
2047 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
2050 //----------------------------------------------------------------------
2051 // genEmitUnwindDebugGCandEH: emit unwind, debug, gc, and EH info
2053 void CodeGen::genEmitUnwindDebugGCandEH()
2055 /* Now that the code is issued, we can finalize and emit the unwind data */
2057 compiler->unwindEmit(*codePtr, coldCodePtr);
2059 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
2063 genReportRichDebugInfo();
2065 /* Finalize the Local Var info in terms of generated code */
2070 unsigned finalHotCodeSize;
2071 unsigned finalColdCodeSize;
2072 if (compiler->fgFirstColdBlock != nullptr)
2074 // We did some hot/cold splitting. The hot section is always padded out to the
2075 // size we thought it would be, but the cold section is not.
2076 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2077 assert(compiler->info.compTotalHotCodeSize > 0);
2078 assert(compiler->info.compTotalColdCodeSize > 0);
2079 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2080 finalColdCodeSize = codeSize - finalHotCodeSize;
2084 // No hot/cold splitting
2085 assert(codeSize <= compiler->info.compTotalHotCodeSize);
2086 assert(compiler->info.compTotalHotCodeSize > 0);
2087 assert(compiler->info.compTotalColdCodeSize == 0);
2088 finalHotCodeSize = codeSize;
2089 finalColdCodeSize = 0;
2091 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2092 #endif // LATE_DISASM
2094 /* Report any exception handlers to the VM */
2098 #ifdef JIT32_GCENCODER
2103 // Create and store the GC info for this method.
2104 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2107 FILE* dmpf = jitstdout();
2109 compiler->opts.dmpHex = false;
2110 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2113 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2118 compiler->opts.dmpHex = true;
2121 if (compiler->opts.dmpHex)
2123 size_t consSize = GetEmitter()->emitDataSize();
2125 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2126 fprintf(dmpf, "\n");
2130 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2134 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2136 #ifdef JIT32_GCENCODER
2137 size_t infoSize = compiler->compInfoBlkSize;
2139 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2140 #endif // JIT32_GCENCODER
2142 fprintf(dmpf, "\n");
2146 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
2150 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2152 #ifdef JIT32_GCENCODER
2154 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2155 #endif // JIT32_GCENCODER
2160 if (dmpf != jitstdout())
2167 /* Tell the emitter that we're done with this function */
2169 GetEmitter()->emitEndFN();
2171 /* Shut down the spill logic */
2173 regSet.rsSpillDone();
2175 /* Shut down the temp logic */
2181 size_t dataSize = GetEmitter()->emitDataSize();
2182 grossVMsize += compiler->info.compILCodeSize;
2183 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2184 grossNCsize += codeSize + dataSize;
2186 #endif // DISPLAY_SIZES
2189 /*****************************************************************************
2191 * Report EH clauses to the VM
2194 void CodeGen::genReportEH()
2196 if (compiler->compHndBBtabCount == 0)
2202 if (compiler->opts.dspEHTable)
2204 printf("*************** EH table for %s\n", compiler->info.compFullName);
2210 bool isNativeAOT = compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI);
2212 unsigned EHCount = compiler->compHndBBtabCount;
2214 #if defined(FEATURE_EH_FUNCLETS)
2215 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2217 unsigned duplicateClauseCount = 0;
2218 unsigned enclosingTryIndex;
2220 // Duplicate clauses are not used by NativeAOT ABI
2223 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
2225 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2226 // ignoring 'mutual protect' trys
2227 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2228 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2230 ++duplicateClauseCount;
2233 EHCount += duplicateClauseCount;
2236 #if FEATURE_EH_CALLFINALLY_THUNKS
2237 unsigned clonedFinallyCount = 0;
2239 // Duplicate clauses are not used by NativeAOT ABI
2242 // We don't keep track of how many cloned finally there are. So, go through and count.
2243 // We do a quick pass first through the EH table to see if there are any try/finally
2244 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2246 bool anyFinallys = false;
2247 for (EHblkDsc* const HBtab : EHClauses(compiler))
2249 if (HBtab->HasFinallyHandler())
2257 for (BasicBlock* const block : compiler->Blocks())
2259 if (block->bbJumpKind == BBJ_CALLFINALLY)
2261 ++clonedFinallyCount;
2265 EHCount += clonedFinallyCount;
2268 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2270 #endif // FEATURE_EH_FUNCLETS
2273 if (compiler->opts.dspEHTable)
2275 #if defined(FEATURE_EH_FUNCLETS)
2276 #if FEATURE_EH_CALLFINALLY_THUNKS
2277 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2278 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2279 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2280 #else // !FEATURE_EH_CALLFINALLY_THUNKS
2281 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2282 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2283 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2284 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
2285 #else // !FEATURE_EH_FUNCLETS
2286 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2287 assert(compiler->compHndBBtabCount == EHCount);
2288 #endif // !FEATURE_EH_FUNCLETS
2292 // Tell the VM how many EH clauses to expect.
2293 compiler->eeSetEHcount(EHCount);
2295 XTnum = 0; // This is the index we pass to the VM
2297 for (EHblkDsc* const HBtab : EHClauses(compiler))
2299 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2301 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2302 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2304 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2305 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2306 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2307 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2309 if (HBtab->HasFilter())
2311 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2315 hndTyp = HBtab->ebdTyp;
2318 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2322 // CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2323 // try block as the previous one. The runtime cannot reliably infer this information from
2324 // native code offsets because of different try blocks can have same offsets. Alternative
2325 // solution to this problem would be inserting extra nops to ensure that different try
2326 // blocks have different offsets.
2327 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
2329 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2330 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2331 // IL as "try { try {} catch {} catch {} } finally {}".
2332 assert(HBtab->HasCatchHandler());
2333 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
2337 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2338 // the fields aren't accurate.
2340 CORINFO_EH_CLAUSE clause;
2341 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2342 clause.Flags = flags;
2343 clause.TryOffset = tryBeg;
2344 clause.TryLength = tryEnd;
2345 clause.HandlerOffset = hndBeg;
2346 clause.HandlerLength = hndEnd;
2348 assert(XTnum < EHCount);
2350 // Tell the VM about this EH clause.
2351 compiler->eeSetEHinfo(XTnum, &clause);
2356 #if defined(FEATURE_EH_FUNCLETS)
2357 // Now output duplicated clauses.
2359 // If a funclet has been created by moving a handler out of a try region that it was originally nested
2360 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
2361 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2362 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2363 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2364 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2365 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2368 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2369 // try or handler region):
2387 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2388 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
2389 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2390 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2391 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2392 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2393 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2394 // representing try (1) protecting the new funclets catch (3) and (4).
2395 // The code will be generated as follows:
2397 // ABCFH // "main" code
2402 // The EH regions are:
2407 // D -> G // "duplicate" clause
2408 // E -> G // "duplicate" clause
2410 // Note that we actually need to generate one of these additional "duplicate" clauses for every
2411 // region the funclet is nested in. Take this example:
2443 // When we pull out funclets, we get the following generated code:
2445 // ABCDEHJMO // "main" function
2453 // And the EH regions we report to the VM are (in order; main clauses
2454 // first in most-to-least nested order, funclets ("duplicated clauses")
2455 // last, in most-to-least nested) are:
2463 // F -> I // funclet clause #1 for F
2464 // F -> K // funclet clause #2 for F
2465 // F -> L // funclet clause #3 for F
2466 // F -> N // funclet clause #4 for F
2467 // G -> I // funclet clause #1 for G
2468 // G -> K // funclet clause #2 for G
2469 // G -> L // funclet clause #3 for G
2470 // G -> N // funclet clause #4 for G
2471 // I -> K // funclet clause #1 for I
2472 // I -> L // funclet clause #2 for I
2473 // I -> N // funclet clause #3 for I
2474 // K -> N // funclet clause #1 for K
2475 // L -> N // funclet clause #1 for L
2477 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2478 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2479 // to add a clause "F -> G" because F is NOT protected by G, but we still have
2480 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2482 // The overall ordering of the clauses is still the same most-to-least nesting
2483 // after front-to-back start offset. Because we place the funclets at the end
2484 // these new clauses should also go at the end by this ordering.
2487 if (duplicateClauseCount > 0)
2489 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
2492 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2494 unsigned enclosingTryIndex;
2496 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2498 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2499 // ignoring 'mutual protect' trys
2500 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2501 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2503 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2504 // that will have the enclosing try protecting the funclet.
2506 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2507 // greater EH table index
2509 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2511 // The try region is the handler of the funclet. Note that for filters, we don't protect the
2512 // filter region, only the filter handler region. This is because exceptions in filters never
2513 // escape; the VM swallows them.
2515 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2516 BasicBlock* bbTryLast = fletTab->ebdHndLast;
2518 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2519 BasicBlock* bbHndLast = encTab->ebdHndLast;
2521 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2523 tryBeg = compiler->ehCodeOffset(bbTryBeg);
2524 hndBeg = compiler->ehCodeOffset(bbHndBeg);
2526 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2527 : compiler->ehCodeOffset(bbTryLast->bbNext);
2528 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2529 : compiler->ehCodeOffset(bbHndLast->bbNext);
2531 if (encTab->HasFilter())
2533 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2537 hndTyp = encTab->ebdTyp;
2540 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2542 // Tell the VM this is an extra clause caused by moving funclets out of line.
2543 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
2545 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2546 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2547 // instruction immediately after the 'try' body. So, it really could be more accurately named
2550 CORINFO_EH_CLAUSE clause;
2551 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2552 clause.Flags = flags;
2553 clause.TryOffset = tryBeg;
2554 clause.TryLength = tryEnd;
2555 clause.HandlerOffset = hndBeg;
2556 clause.HandlerLength = hndEnd;
2558 assert(XTnum < EHCount);
2560 // Tell the VM about this EH clause (a duplicated clause).
2561 compiler->eeSetEHinfo(XTnum, &clause);
2564 ++reportedDuplicateClauseCount;
2567 if (duplicateClauseCount == reportedDuplicateClauseCount)
2569 break; // we've reported all of them; no need to continue looking
2573 } // for each 'true' enclosing 'try'
2574 } // for each EH table entry
2576 assert(duplicateClauseCount == reportedDuplicateClauseCount);
2577 } // if (duplicateClauseCount > 0)
2579 #if FEATURE_EH_CALLFINALLY_THUNKS
2580 if (clonedFinallyCount > 0)
2582 unsigned reportedClonedFinallyCount = 0;
2583 for (BasicBlock* const block : compiler->Blocks())
2585 if (block->bbJumpKind == BBJ_CALLFINALLY)
2587 UNATIVE_OFFSET hndBeg, hndEnd;
2589 hndBeg = compiler->ehCodeOffset(block);
2591 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
2592 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
2593 BasicBlock* bbLabel = block->bbNext;
2594 if (block->isBBCallAlwaysPair())
2596 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
2598 if (bbLabel == nullptr)
2600 hndEnd = compiler->info.compNativeCodeSize;
2604 assert(bbLabel->bbEmitCookie != nullptr);
2605 hndEnd = compiler->ehCodeOffset(bbLabel);
2608 CORINFO_EH_CLAUSE clause;
2609 clause.ClassToken = 0; // unused
2610 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
2611 clause.TryOffset = hndBeg;
2612 clause.TryLength = hndBeg;
2613 clause.HandlerOffset = hndBeg;
2614 clause.HandlerLength = hndEnd;
2616 assert(XTnum < EHCount);
2618 // Tell the VM about this EH clause (a cloned finally clause).
2619 compiler->eeSetEHinfo(XTnum, &clause);
2622 ++reportedClonedFinallyCount;
2625 if (clonedFinallyCount == reportedClonedFinallyCount)
2627 break; // we're done; no need to keep looking
2630 } // block is BBJ_CALLFINALLY
2633 assert(clonedFinallyCount == reportedClonedFinallyCount);
2634 } // if (clonedFinallyCount > 0)
2635 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2637 #endif // FEATURE_EH_FUNCLETS
2639 assert(XTnum == EHCount);
2642 //----------------------------------------------------------------------
2643 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
2644 // helper should be used.
2647 // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
2650 // true if an optimized write barrier helper should be used, false otherwise.
2651 // Note: only x86 implements register-specific source optimized write
2652 // barriers currently.
2654 bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
2656 #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS
2658 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
2667 //----------------------------------------------------------------------
2668 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
2669 // helper should be used.
2671 // This has the same functionality as the version of
2672 // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
2673 // determining what the required write barrier form is, if possible.
2676 // store - the GT_STOREIND node
2679 // true if an optimized write barrier helper should be used, false otherwise.
2680 // Note: only x86 implements register-specific source optimized write
2681 // barriers currently.
2683 bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTreeStoreInd* store)
2685 #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS
2687 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(store);
2688 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
2697 //----------------------------------------------------------------------
2698 // genWriteBarrierHelperForWriteBarrierForm: Given a write barrier form
2699 // return the corresponding helper.
2702 // wbf - the write barrier form
2705 // Write barrier helper to use.
2707 // Note: do not call this function to get an optimized write barrier helper (e.g.,
2710 CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GCInfo::WriteBarrierForm wbf)
2714 case GCInfo::WBF_BarrierChecked:
2715 return CORINFO_HELP_CHECKED_ASSIGN_REF;
2717 case GCInfo::WBF_BarrierUnchecked:
2718 return CORINFO_HELP_ASSIGN_REF;
2721 case GCInfo::WBF_NoBarrier_CheckNotHeapInDebug:
2722 return CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
2730 //----------------------------------------------------------------------
2731 // genGCWriteBarrier: Generate a write barrier for a node.
2734 // store - the GT_STOREIND node
2735 // wbf - already computed write barrier form to use
2737 void CodeGen::genGCWriteBarrier(GenTreeStoreInd* store, GCInfo::WriteBarrierForm wbf)
2739 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(wbf);
2741 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
2742 // Under FEATURE_COUNT_GC_WRITE_BARRIERS, we will add an extra argument to the
2743 // checked write barrier call denoting the kind of address being written to.
2745 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
2747 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
2748 GenTree* tgtAddr = store->Addr();
2750 while (tgtAddr->OperIs(GT_ADD, GT_LEA))
2752 if (tgtAddr->OperIs(GT_LEA) && tgtAddr->AsAddrMode()->HasBase())
2754 tgtAddr = tgtAddr->AsAddrMode()->Base();
2756 else if (tgtAddr->OperIs(GT_ADD) && tgtAddr->AsOp()->gtGetOp2()->IsCnsIntOrI())
2758 tgtAddr = tgtAddr->AsOp()->gtGetOp1();
2766 if (tgtAddr->OperIs(GT_LCL_VAR))
2768 unsigned lclNum = tgtAddr->AsLclVar()->GetLclNum();
2769 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
2770 if (lclNum == compiler->info.compRetBuffArg)
2772 wbKind = CWBKind_RetBuf
2774 else if (varDsc->TypeGet() == TYP_BYREF)
2776 wbKind = varDsc->lvIsParam ? CWBKind_ByRefArg : CWBKind_OtherByRefLocal;
2779 else if (tgtAddr->OperIs(GT_LCL_ADDR))
2781 // Ideally, we should have eliminated the barrier for this case.
2782 wbKind = CWBKind_AddrOfLocal;
2787 // Enable this to sample the unclassified trees.
2788 static int unclassifiedBarrierSite = 0;
2789 if (wbKind == CWBKind_Unclassified)
2791 unclassifiedBarrierSite++;
2792 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite);
2793 compiler->gtDispTree(store);
2794 printf(""); // Flush.
2801 inst_IV(INS_push, wbKind);
2802 genEmitHelperCall(helper,
2804 EA_PTRSIZE); // retSize
2805 SubtractStackLevel(4);
2808 #endif // FEATURE_COUNT_GC_WRITE_BARRIERS
2810 genEmitHelperCall(helper,
2812 EA_PTRSIZE); // retSize
2816 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2817 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2819 XX Prolog / Epilog XX
2821 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2822 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2825 /*****************************************************************************
2827 * Generates code for moving incoming register arguments to their
2828 * assigned location, in the function prolog.
2832 #pragma warning(push)
2833 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2836 #if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
2837 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
2842 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
2846 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
2847 unsigned argNum; // current argNum, always in [0..argMax-1]
2848 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
2849 unsigned regArgNum; // index into the regArgTab[] table
2850 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
2851 bool doingFloat = regState->rsIsFloat;
2853 // We should be generating the prolog block when we are called
2854 assert(compiler->compGeneratingProlog);
2856 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
2857 noway_assert(regArgMaskLive != 0);
2859 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
2860 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
2862 // The regArgTab can always have unused entries,
2863 // for example if an architecture always increments the arg register number but uses either
2864 // an integer register or a floating point register to hold the next argument
2865 // then with a mix of float and integer args you could have:
2867 // sampleMethod(int i, float x, int j, float y, int k, float z);
2868 // r0, r2 and r4 as valid integer arguments with argMax as 5
2869 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
2870 // The first one is doingFloat==false and the second one is doingFloat==true
2872 // If a fixed return buffer (in r8) was also present then the first one would become:
2873 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
2876 argMax = regState->rsCalleeRegArgCount;
2877 fixedRetBufIndex = (unsigned)-1; // Invalid value
2879 // If necessary we will select a correct xtraReg for circular floating point args later.
2883 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
2885 else // we are doing the integer registers
2887 noway_assert(argMax <= MAX_REG_ARG);
2888 if (hasFixedRetBuffReg())
2890 fixedRetBufIndex = theFixedRetBuffArgNum();
2891 // We have an additional integer register argument when hasFixedRetBuffReg() is true
2892 argMax = fixedRetBufIndex + 1;
2893 assert(argMax == (MAX_REG_ARG + 1));
2898 // Construct a table with the register arguments, for detecting circular and
2899 // non-circular dependencies between the register arguments. A dependency is when
2900 // an argument register Rn needs to be moved to register Rm that is also an argument
2901 // register. The table is constructed in the order the arguments are passed in
2902 // registers: the first register argument is in regArgTab[0], the second in
2903 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
2904 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
2905 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
2906 // we have increased the allocated size of the regArgTab[] by one.
2910 unsigned varNum; // index into compiler->lvaTable[] for this register argument
2911 var_types type; // the Jit type of this regArgTab entry
2912 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
2913 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
2914 // argument register number 'x'. Only used when circular = true.
2915 char slot; // 0 means the register is not used for a register argument
2916 // 1 means the first part of a register argument
2917 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
2918 bool stackArg; // true if the argument gets homed to the stack
2919 bool writeThru; // true if the argument gets homed to both stack and register
2920 bool processed; // true after we've processed the argument (and it is in its final location)
2921 bool circular; // true if this register participates in a circular dependency loop.
2922 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
2927 for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
2929 varDsc = compiler->lvaGetDesc(varNum);
2931 // Is this variable a register arg?
2932 if (!varDsc->lvIsParam)
2937 if (!varDsc->lvIsRegArg)
2942 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
2943 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
2944 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
2945 // use the original TYP_STRUCT argument.
2947 if (varDsc->lvPromoted || varDsc->lvIsStructField)
2949 LclVarDsc* parentVarDsc = varDsc;
2950 if (varDsc->lvIsStructField)
2952 assert(!varDsc->lvPromoted);
2953 parentVarDsc = compiler->lvaGetDesc(varDsc->lvParentLcl);
2956 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
2958 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
2960 // For register arguments that are independent promoted structs we put the promoted field varNum in the
2962 if (varDsc->lvPromoted)
2969 // For register arguments that are not independent promoted structs we put the parent struct varNum in
2971 if (varDsc->lvIsStructField)
2978 var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
2979 // Change regType to the HFA type when we have a HFA argument
2980 if (varDsc->lvIsHfaRegArg())
2982 #if defined(TARGET_ARM64)
2983 if (TargetOS::IsWindows && compiler->info.compIsVarArgs)
2985 assert(!"Illegal incoming HFA arg encountered in Vararg method.");
2987 #endif // defined(TARGET_ARM64)
2988 regType = varDsc->GetHfaType();
2991 #if defined(UNIX_AMD64_ABI)
2992 if (!varTypeIsStruct(regType))
2993 #endif // defined(UNIX_AMD64_ABI)
2995 bool isFloatReg = emitter::isFloatReg(varDsc->GetArgReg());
2997 if (isFloatReg != doingFloat)
2999 // A struct might be passed partially in XMM register for System V calls.
3000 // So a single arg might use both register files.
3003 else if (isFloatReg != varTypeUsesFloatArgReg(regType))
3005 if (regType == TYP_FLOAT)
3011 assert(regType == TYP_DOUBLE);
3019 #if defined(UNIX_AMD64_ABI)
3020 if (varTypeIsStruct(varDsc))
3022 CORINFO_CLASS_HANDLE typeHnd;
3023 if (varDsc->lvIsStructField)
3025 // The only case we currently permit is a wrapped SIMD field,
3026 // where we won't have the class handle available, so get it
3027 // from the parent struct -- they will agree on ABI details.
3028 LclVarDsc* parentDsc = compiler->lvaGetDesc(varDsc->lvParentLcl);
3029 assert(varTypeIsSIMD(varDsc) && (parentDsc->lvFieldCnt == 1));
3030 typeHnd = parentDsc->GetLayout()->GetClassHandle();
3034 typeHnd = varDsc->GetLayout()->GetClassHandle();
3036 assert(typeHnd != nullptr);
3037 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3038 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3039 if (!structDesc.passedInRegisters)
3041 // The var is not passed in registers.
3045 unsigned firstRegSlot = 0;
3046 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
3048 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3053 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3054 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3055 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3056 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3059 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3060 // registers or on stack, the upper most 4-bytes will be zero.
3062 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3063 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3066 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3067 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3068 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3069 // there is no need to clear upper 4-bytes of Vector3 type args.
3071 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3072 // Vector3 return values are returned two return registers and Caller assembles them into a
3073 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3074 // type args in prolog and Vector3 type return value of a call
3076 if (varDsc->lvType == TYP_SIMD12)
3078 regType = TYP_DOUBLE;
3083 regType = compiler->GetEightByteType(structDesc, slotCounter);
3086 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3088 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
3089 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
3091 // Store the reg for the first slot.
3094 firstRegSlot = regArgNum;
3097 // Bingo - add it to our table
3098 noway_assert(regArgNum < argMax);
3099 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
3100 // not be multiple vars representing this argument
3102 regArgTab[regArgNum].varNum = varNum;
3103 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
3104 regArgTab[regArgNum].type = regType;
3111 continue; // Nothing to do for this regState set.
3114 regArgNum = firstRegSlot;
3117 #endif // defined(UNIX_AMD64_ABI)
3119 // Bingo - add it to our table
3120 regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType);
3123 if (TargetArchitecture::IsArm32)
3125 int lclSize = compiler->lvaLclSize(varNum);
3126 if (lclSize > REGSIZE_BYTES)
3128 slots = lclSize / REGSIZE_BYTES;
3131 #if FEATURE_MULTIREG_ARGS
3132 else if (varDsc->lvIsMultiRegArg)
3134 if (varDsc->lvIsHfaRegArg())
3136 // We have an HFA argument, set slots to the number of registers used
3137 slots = varDsc->lvHfaSlots();
3141 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3142 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
3143 // We have a non-HFA multireg argument, set slots to two
3147 #endif // FEATURE_MULTIREG_ARGS
3149 // Handle args split between registers and stack. The arm64 fixed ret buf arg is never split.
3150 if (compFeatureArgSplit() && (fixedRetBufIndex != regArgNum))
3152 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3153 if (regArgNum + slots > maxRegArgNum)
3155 JITDUMP("Splitting V%02u: %u registers, %u stack slots\n", varNum, maxRegArgNum - regArgNum,
3156 regArgNum + slots - maxRegArgNum);
3157 slots = maxRegArgNum - regArgNum;
3161 // Note that regArgNum + 1 represents an argument index not an actual argument register;
3162 // see genMapRegArgNumToRegNum().
3164 for (int i = 0; i < slots; i++)
3166 noway_assert((regArgNum + i) < argMax);
3168 // We better not have added it already (there better not be multiple vars representing this argument
3170 noway_assert(regArgTab[regArgNum + i].slot == 0);
3172 regArgTab[regArgNum + i].varNum = varNum;
3173 regArgTab[regArgNum + i].slot = static_cast<char>(i + 1);
3175 regArgTab[regArgNum + i].type = regType; // Set the register type.
3179 for (int i = 0; i < slots; i++)
3181 regType = regArgTab[regArgNum + i].type;
3182 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3184 #if !defined(UNIX_AMD64_ABI)
3185 assert((i > 0) || (regNum == varDsc->GetArgReg()));
3186 #endif // defined(UNIX_AMD64_ABI)
3188 // Is the arg dead on entry to the method ?
3190 if ((regArgMaskLive & genRegMask(regNum)) == 0)
3192 if (varDsc->lvTrackedNonStruct())
3194 // We may now see some tracked locals with zero refs.
3195 // See Lowering::DoPhase. Tolerate these.
3196 if (varDsc->lvRefCnt() > 0)
3198 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3204 noway_assert(varDsc->lvType == TYP_STRUCT);
3205 #else // !TARGET_X86
3206 // For LSRA, it may not be in regArgMaskLive if it has a zero
3207 // refcnt. This is in contrast with the non-LSRA case in which all
3208 // non-tracked args are assumed live on entry.
3209 noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
3210 (varDsc->IsAddressExposed() && compiler->info.compIsVarArgs) ||
3211 (varDsc->IsAddressExposed() && compiler->opts.compUseSoftFP));
3212 #endif // !TARGET_X86
3214 // Mark it as processed and be done with it
3215 regArgTab[regArgNum + i].processed = true;
3220 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3221 // could be equal to GetArgReg(). The pre-spilled registers are also not considered live either since
3222 // they've already been spilled.
3224 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
3225 #endif // TARGET_ARM
3227 #if !defined(UNIX_AMD64_ABI)
3228 noway_assert(xtraReg != (varDsc->GetArgReg() + i));
3230 noway_assert(regArgMaskLive & genRegMask(regNum));
3233 regArgTab[regArgNum + i].processed = false;
3234 regArgTab[regArgNum + i].writeThru = (varDsc->lvIsInReg() && varDsc->lvLiveInOutOfHndlr);
3236 /* mark stack arguments since we will take care of those first */
3237 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3239 /* If it goes on the stack or in a register that doesn't hold
3240 * an argument anymore -> CANNOT form a circular dependency */
3242 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3244 /* will trash another argument -> possible dependency
3245 * We may need several passes after the table is constructed
3246 * to decide on that */
3248 /* Maybe the argument stays in the register (IDEAL) */
3250 if ((i == 0) && (varDsc->GetRegNum() == regNum))
3255 #if !defined(TARGET_64BIT)
3256 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->GetOtherReg() == regNum))
3260 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->GetOtherReg() == regNum))
3265 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3266 (REG_NEXT(varDsc->GetRegNum()) == regNum))
3270 #endif // !defined(TARGET_64BIT)
3271 regArgTab[regArgNum + i].circular = true;
3276 regArgTab[regArgNum + i].circular = false;
3278 /* mark the argument register as free */
3279 regArgMaskLive &= ~genRegMask(regNum);
3284 /* Find the circular dependencies for the argument registers, if any.
3285 * A circular dependency is a set of registers R1, R2, ..., Rn
3286 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3291 /* Possible circular dependencies still exist; the previous pass was not enough
3292 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3298 for (argNum = 0; argNum < argMax; argNum++)
3300 // If we already marked the argument as non-circular then continue
3302 if (!regArgTab[argNum].circular)
3307 if (regArgTab[argNum].slot == 0) // Not a register argument
3312 varNum = regArgTab[argNum].varNum;
3313 varDsc = compiler->lvaGetDesc(varNum);
3314 const var_types varRegType = varDsc->GetRegisterType();
3315 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3317 /* cannot possibly have stack arguments */
3318 noway_assert(varDsc->lvIsInReg());
3319 noway_assert(!regArgTab[argNum].stackArg);
3321 var_types regType = regArgTab[argNum].type;
3322 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3324 regNumber destRegNum = REG_NA;
3325 if (varTypeIsPromotable(varDsc) &&
3326 (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT))
3328 assert(regArgTab[argNum].slot <= varDsc->lvFieldCnt);
3329 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + regArgTab[argNum].slot - 1);
3330 destRegNum = fieldVarDsc->GetRegNum();
3332 else if (regArgTab[argNum].slot == 1)
3334 destRegNum = varDsc->GetRegNum();
3336 #if defined(TARGET_ARM64) && defined(FEATURE_SIMD)
3337 else if (varDsc->lvIsHfa())
3339 // This must be a SIMD type that's fully enregistered, but is passed as an HFA.
3340 // Each field will be inserted into the same destination register.
3341 assert(varTypeIsSIMD(varDsc));
3342 assert(regArgTab[argNum].slot <= (int)varDsc->lvHfaSlots());
3344 assert(regArgTab[argNum - 1].varNum == varNum);
3345 regArgMaskLive &= ~genRegMask(regNum);
3346 regArgTab[argNum].circular = false;
3350 #elif defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
3353 assert(regArgTab[argNum].slot == 2);
3355 assert(regArgTab[argNum - 1].slot == 1);
3356 assert(regArgTab[argNum - 1].varNum == varNum);
3357 assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16));
3358 regArgMaskLive &= ~genRegMask(regNum);
3359 regArgTab[argNum].circular = false;
3363 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
3364 #if !defined(TARGET_64BIT)
3365 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
3367 destRegNum = varDsc->GetOtherReg();
3371 assert(regArgTab[argNum].slot == 2);
3372 assert(varDsc->TypeGet() == TYP_DOUBLE);
3373 destRegNum = REG_NEXT(varDsc->GetRegNum());
3375 #endif // !defined(TARGET_64BIT)
3376 noway_assert(destRegNum != REG_NA);
3377 if (genRegMask(destRegNum) & regArgMaskLive)
3379 /* we are trashing a live argument register - record it */
3380 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3381 noway_assert(destRegArgNum < argMax);
3382 regArgTab[destRegArgNum].trashBy = argNum;
3386 /* argument goes to a free register */
3387 regArgTab[argNum].circular = false;
3390 /* mark the argument register as free */
3391 regArgMaskLive &= ~genRegMask(regNum);
3397 /* At this point, everything that has the "circular" flag
3398 * set to "true" forms a circular dependency */
3399 CLANG_FORMAT_COMMENT_ANCHOR;
3406 printf("Circular dependencies found while home-ing the incoming arguments.\n");
3411 // LSRA allocates registers to incoming parameters in order and will not overwrite
3412 // a register still holding a live parameter.
3414 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
3415 "Homing of float argument registers with circular dependencies not implemented.");
3417 // Now move the arguments to their locations.
3418 // First consider ones that go on the stack since they may free some registers.
3419 // Also home writeThru args, since they're also homed to the stack.
3421 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3422 for (argNum = 0; argNum < argMax; argNum++)
3426 #if defined(UNIX_AMD64_ABI)
3427 // If this is the wrong register file, just continue.
3428 if (regArgTab[argNum].type == TYP_UNDEF)
3430 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3431 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3432 // The next register file processing will process it.
3435 #endif // defined(UNIX_AMD64_ABI)
3437 // If the arg is dead on entry to the method, skip it
3439 if (regArgTab[argNum].processed)
3444 if (regArgTab[argNum].slot == 0) // Not a register argument
3449 varNum = regArgTab[argNum].varNum;
3450 varDsc = compiler->lvaGetDesc(varNum);
3452 #ifndef TARGET_64BIT
3453 // If this arg is never on the stack, go to the next one.
3454 if (varDsc->lvType == TYP_LONG)
3456 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru)
3460 else if (varDsc->GetOtherReg() != REG_STK)
3466 #endif // !TARGET_64BIT
3468 // If this arg is never on the stack, go to the next one.
3469 if (!regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru)
3475 #if defined(TARGET_ARM)
3476 if (varDsc->lvType == TYP_DOUBLE)
3478 if (regArgTab[argNum].slot == 2)
3480 // We handled the entire double when processing the first half (slot == 1)
3486 noway_assert(regArgTab[argNum].circular == false);
3488 noway_assert(varDsc->lvIsParam);
3489 noway_assert(varDsc->lvIsRegArg);
3490 noway_assert(varDsc->lvIsInReg() == false || varDsc->lvLiveInOutOfHndlr ||
3491 (varDsc->lvType == TYP_LONG && varDsc->GetOtherReg() == REG_STK && regArgTab[argNum].slot == 2));
3493 var_types storeType = TYP_UNDEF;
3494 unsigned slotSize = TARGET_POINTER_SIZE;
3496 if (varTypeIsStruct(varDsc))
3498 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3499 #if FEATURE_MULTIREG_ARGS
3500 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3501 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3502 #endif // FEATURE_MULTIREG_ARGS
3503 #ifdef UNIX_AMD64_ABI
3504 storeType = regArgTab[argNum].type;
3505 #endif // !UNIX_AMD64_ABI
3506 if (varDsc->lvIsHfaRegArg())
3509 // On ARM32 the storeType for HFA args is always TYP_FLOAT
3510 storeType = TYP_FLOAT;
3511 slotSize = (unsigned)emitActualTypeSize(storeType);
3512 #else // TARGET_ARM64
3513 storeType = genActualType(varDsc->GetHfaType());
3514 slotSize = (unsigned)emitActualTypeSize(storeType);
3515 #endif // TARGET_ARM64
3518 else // Not a struct type
3520 storeType = genActualType(regArgTab[argNum].type);
3522 size = emitActualTypeSize(storeType);
3524 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
3525 #endif // TARGET_X86
3527 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
3529 // Stack argument - if the ref count is 0 don't care about it
3531 if (!varDsc->lvOnFrame)
3533 noway_assert(varDsc->lvRefCnt() == 0);
3537 // Since slot is typically 1, baseOffset is typically 0
3538 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
3540 GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
3542 #ifndef UNIX_AMD64_ABI
3543 // Check if we are writing past the end of the struct
3544 if (varTypeIsStruct(varDsc))
3546 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
3548 #endif // !UNIX_AMD64_ABI
3551 // Mark the argument as processed, and set it as no longer live in srcRegNum,
3552 // unless it is a writeThru var, in which case we home it to the stack, but
3553 // don't mark it as processed until below.
3554 if (!regArgTab[argNum].writeThru)
3556 regArgTab[argNum].processed = true;
3557 regArgMaskLive &= ~genRegMask(srcRegNum);
3560 #if defined(TARGET_ARM)
3561 if ((storeType == TYP_DOUBLE) && !regArgTab[argNum].writeThru)
3563 regArgTab[argNum + 1].processed = true;
3564 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
3569 /* Process any circular dependencies */
3572 unsigned begReg, destReg, srcReg;
3573 unsigned varNumDest, varNumSrc;
3574 LclVarDsc* varDscDest;
3575 LclVarDsc* varDscSrc;
3576 instruction insCopy = INS_mov;
3580 #ifndef UNIX_AMD64_ABI
3581 if (GlobalJitOptions::compFeatureHfa)
3582 #endif // !UNIX_AMD64_ABI
3584 insCopy = ins_Copy(TYP_DOUBLE);
3585 // Compute xtraReg here when we have a float argument
3586 assert(xtraReg == REG_NA);
3588 regMaskTP fpAvailMask;
3590 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
3591 if (GlobalJitOptions::compFeatureHfa)
3593 fpAvailMask &= RBM_ALLDOUBLE;
3596 if (fpAvailMask == RBM_NONE)
3598 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
3599 if (GlobalJitOptions::compFeatureHfa)
3601 fpAvailMask &= RBM_ALLDOUBLE;
3605 assert(fpAvailMask != RBM_NONE);
3607 // We pick the lowest avail register number
3608 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
3609 xtraReg = genRegNumFromMask(tempMask);
3611 #if defined(TARGET_X86)
3612 // This case shouldn't occur on x86 since NYI gets converted to an assert
3613 NYI("Homing circular FP registers via xtraReg");
3617 for (argNum = 0; argNum < argMax; argNum++)
3619 // If not a circular dependency then continue
3620 if (!regArgTab[argNum].circular)
3625 // If already processed the dependency then continue
3627 if (regArgTab[argNum].processed)
3632 if (regArgTab[argNum].slot == 0) // Not a register argument
3637 destReg = begReg = argNum;
3638 srcReg = regArgTab[argNum].trashBy;
3640 varNumDest = regArgTab[destReg].varNum;
3641 varDscDest = compiler->lvaGetDesc(varNumDest);
3642 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
3644 noway_assert(srcReg < argMax);
3645 varNumSrc = regArgTab[srcReg].varNum;
3646 varDscSrc = compiler->lvaGetDesc(varNumSrc);
3647 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
3649 emitAttr size = EA_PTRSIZE;
3653 // The following code relies upon the target architecture having an
3654 // 'xchg' instruction which directly swaps the values held in two registers.
3655 // On the ARM architecture we do not have such an instruction.
3657 if (destReg == regArgTab[srcReg].trashBy)
3659 /* only 2 registers form the circular dependency - use "xchg" */
3661 varNum = regArgTab[argNum].varNum;
3662 varDsc = compiler->lvaGetDesc(varNum);
3663 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3665 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
3667 /* Set "size" to indicate GC if one and only one of
3668 * the operands is a pointer
3669 * RATIONALE: If both are pointers, nothing changes in
3670 * the GC pointer tracking. If only one is a pointer we
3671 * have to "swap" the registers in the GC reg pointer mask
3674 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
3679 noway_assert(varDscDest->GetArgReg() == varDscSrc->GetRegNum());
3681 GetEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->GetRegNum(), varDscSrc->GetArgReg());
3682 regSet.verifyRegUsed(varDscSrc->GetRegNum());
3683 regSet.verifyRegUsed(varDscSrc->GetArgReg());
3685 /* mark both arguments as processed */
3686 regArgTab[destReg].processed = true;
3687 regArgTab[srcReg].processed = true;
3689 regArgMaskLive &= ~genRegMask(varDscSrc->GetArgReg());
3690 regArgMaskLive &= ~genRegMask(varDscDest->GetArgReg());
3693 #endif // TARGET_XARCH
3695 var_types destMemType = varDscDest->TypeGet();
3698 bool cycleAllDouble = true; // assume the best
3700 unsigned iter = begReg;
3703 if (compiler->lvaGetDesc(regArgTab[iter].varNum)->TypeGet() != TYP_DOUBLE)
3705 cycleAllDouble = false;
3708 iter = regArgTab[iter].trashBy;
3709 } while (iter != begReg);
3711 // We may treat doubles as floats for ARM because we could have partial circular
3712 // dependencies of a float with a lo/hi part of the double. We mark the
3713 // trashBy values for each slot of the double, so let the circular dependency
3714 // logic work its way out for floats rather than doubles. If a cycle has all
3715 // doubles, then optimize so that instead of two vmov.f32's to move a double,
3716 // we can use one vmov.f64.
3718 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
3720 destMemType = TYP_FLOAT;
3722 #endif // TARGET_ARM
3724 if (destMemType == TYP_REF)
3728 else if (destMemType == TYP_BYREF)
3732 else if (destMemType == TYP_DOUBLE)
3736 else if (destMemType == TYP_FLOAT)
3741 /* move the dest reg (begReg) in the extra reg */
3743 assert(xtraReg != REG_NA);
3745 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
3747 GetEmitter()->emitIns_Mov(insCopy, size, xtraReg, begRegNum, /* canSkip */ false);
3749 regSet.verifyRegUsed(xtraReg);
3751 *pXtraRegClobbered = true;
3752 /* start moving everything to its right place */
3754 while (srcReg != begReg)
3758 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
3759 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
3761 GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, srcRegNum, /* canSkip */ false);
3763 regSet.verifyRegUsed(destRegNum);
3765 /* mark 'src' as processed */
3766 noway_assert(srcReg < argMax);
3767 regArgTab[srcReg].processed = true;
3769 if (size == EA_8BYTE)
3770 regArgTab[srcReg + 1].processed = true;
3772 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
3774 /* move to the next pair */
3776 srcReg = regArgTab[srcReg].trashBy;
3778 varDscDest = varDscSrc;
3779 destMemType = varDscDest->TypeGet();
3781 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
3783 destMemType = TYP_FLOAT;
3786 varNumSrc = regArgTab[srcReg].varNum;
3787 varDscSrc = compiler->lvaGetDesc(varNumSrc);
3788 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
3790 if (destMemType == TYP_REF)
3794 else if (destMemType == TYP_DOUBLE)
3804 /* take care of the beginning register */
3806 noway_assert(srcReg == begReg);
3808 /* move the dest reg (begReg) in the extra reg */
3810 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
3812 GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, xtraReg, /* canSkip */ false);
3814 regSet.verifyRegUsed(destRegNum);
3815 /* mark the beginning register as processed */
3817 regArgTab[srcReg].processed = true;
3819 if (size == EA_8BYTE)
3820 regArgTab[srcReg + 1].processed = true;
3822 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
3827 /* Finally take care of the remaining arguments that must be enregistered */
3828 while (regArgMaskLive)
3830 regMaskTP regArgMaskLiveSave = regArgMaskLive;
3832 for (argNum = 0; argNum < argMax; argNum++)
3834 /* If already processed go to the next one */
3835 if (regArgTab[argNum].processed)
3840 if (regArgTab[argNum].slot == 0)
3841 { // Not a register argument
3845 varNum = regArgTab[argNum].varNum;
3846 varDsc = compiler->lvaGetDesc(varNum);
3847 const var_types regType = regArgTab[argNum].type;
3848 const regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3849 const var_types varRegType = varDsc->GetRegisterType();
3851 #if defined(UNIX_AMD64_ABI)
3852 if (regType == TYP_UNDEF)
3854 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3855 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3856 // The next register file processing will process it.
3857 regArgMaskLive &= ~genRegMask(regNum);
3860 #endif // defined(UNIX_AMD64_ABI)
3862 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3864 // On x86 we don't enregister args that are not pointer sized.
3865 noway_assert(genTypeSize(varDsc->GetStackSlotHomeType()) == TARGET_POINTER_SIZE);
3866 #endif // TARGET_X86
3868 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
3870 /* Register argument - hopefully it stays in the same register */
3871 regNumber destRegNum = REG_NA;
3872 var_types destMemType = varDsc->GetRegisterType();
3874 if (regArgTab[argNum].slot == 1)
3876 destRegNum = varDsc->GetRegNum();
3879 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
3881 // The second half of the double has already been processed! Treat this as a single.
3882 destMemType = TYP_FLOAT;
3884 #endif // TARGET_ARM
3886 #ifndef TARGET_64BIT
3887 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
3889 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
3890 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
3892 destRegNum = regNum;
3896 destRegNum = varDsc->GetOtherReg();
3899 assert(destRegNum != REG_STK);
3903 assert(regArgTab[argNum].slot == 2);
3904 assert(destMemType == TYP_DOUBLE);
3906 // For doubles, we move the entire double using the argNum representing
3907 // the first half of the double. There are two things we won't do:
3908 // (1) move the double when the 1st half of the destination is free but the
3909 // 2nd half is occupied, and (2) move the double when the 2nd half of the
3910 // destination is free but the 1st half is occupied. Here we consider the
3911 // case where the first half can't be moved initially because its target is
3912 // still busy, but the second half can be moved. We wait until the entire
3913 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
3914 // and F2 single moving to F16. When we process F0, its target F2 is busy,
3915 // so we skip it on the first pass. When we process F1, its target F3 is
3916 // available. However, we want to move F0/F1 all at once, so we skip it here.
3917 // We process F2, which frees up F2. The next pass through, we process F0 and
3918 // F2/F3 are empty, so we move it. Note that if half of a double is involved
3919 // in a circularity with a single, then we will have already moved that half
3920 // above, so we go ahead and move the remaining half as a single.
3921 // Because there are no circularities left, we are guaranteed to terminate.
3924 assert(regArgTab[argNum - 1].slot == 1);
3926 if (!regArgTab[argNum - 1].processed)
3928 // The first half of the double hasn't been processed; try to be processed at the same time
3932 // The first half of the double has been processed but the second half hasn't!
3933 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
3934 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
3935 // dependency logic above will move them as singles, leaving just F3 to move. Treat
3936 // it as a single to finish the shuffling.
3938 destMemType = TYP_FLOAT;
3939 destRegNum = REG_NEXT(varDsc->GetRegNum());
3941 #endif // !TARGET_64BIT
3942 #if (defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD)
3945 assert(regArgTab[argNum].slot == 2);
3947 assert(regArgTab[argNum - 1].slot == 1);
3948 assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16));
3949 destRegNum = varDsc->GetRegNum();
3950 noway_assert(regNum != destRegNum);
3953 #endif // (defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD)
3954 noway_assert(destRegNum != REG_NA);
3955 if (destRegNum != regNum)
3957 /* Cannot trash a currently live register argument.
3958 * Skip this one until its target will be free
3959 * which is guaranteed to happen since we have no circular dependencies. */
3961 regMaskTP destMask = genRegMask(destRegNum);
3963 // Don't process the double until both halves of the destination are clear.
3964 if (genActualType(destMemType) == TYP_DOUBLE)
3966 assert((destMask & RBM_DBL_REGS) != 0);
3967 destMask |= genRegMask(REG_NEXT(destRegNum));
3971 if (destMask & regArgMaskLive)
3976 /* Move it to the new register */
3978 emitAttr size = emitActualTypeSize(destMemType);
3980 #if defined(TARGET_ARM64)
3981 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
3983 // For a SIMD type that is passed in two integer registers,
3984 // Limit the copy below to the first 8 bytes from the first integer register.
3985 // Handle the remaining 8 bytes from the second slot in the code further below
3986 assert(EA_SIZE(size) >= 8);
3990 inst_Mov(destMemType, destRegNum, regNum, /* canSkip */ false, size);
3993 /* mark the argument as processed */
3995 assert(!regArgTab[argNum].processed);
3996 regArgTab[argNum].processed = true;
3997 regArgMaskLive &= ~genRegMask(regNum);
3998 #if FEATURE_MULTIREG_ARGS
3999 int argRegCount = 1;
4001 if (genActualType(destMemType) == TYP_DOUBLE)
4006 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4007 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4010 int nextArgNum = argNum + 1;
4011 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
4012 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4013 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4014 // and moves the 0th element of the src reg into the 1st element of the dest reg.
4015 GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varRegType), destRegNum, nextRegNum, 0);
4016 // Set destRegNum to regNum so that we skip the setting of the register below,
4017 // but mark argNum as processed and clear regNum from the live mask.
4018 destRegNum = regNum;
4020 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4021 #ifdef TARGET_ARMARCH
4022 if (varDsc->lvIsHfa())
4024 // This includes both fixed-size SIMD types that are independently promoted, as well
4025 // as other HFA structs.
4026 argRegCount = varDsc->lvHfaSlots();
4027 if (argNum < (argMax - argRegCount + 1))
4029 if (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT)
4031 // For an HFA type that is passed in multiple registers and promoted, we copy each field to its
4032 // destination register.
4033 for (int i = 0; i < argRegCount; i++)
4035 int nextArgNum = argNum + i;
4036 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i);
4037 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
4038 destRegNum = fieldVarDsc->GetRegNum();
4039 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4040 noway_assert(genIsValidFloatReg(nextRegNum));
4041 noway_assert(genIsValidFloatReg(destRegNum));
4042 GetEmitter()->emitIns_Mov(INS_mov, EA_8BYTE, destRegNum, nextRegNum, /* canSkip */ false);
4045 #if defined(TARGET_ARM64) && defined(FEATURE_SIMD)
4048 // For a SIMD type that is passed in multiple registers but enregistered as a vector,
4049 // the code above copies the first argument register into the lower 4 or 8 bytes
4050 // of the target register. Here we must handle the subsequent fields by
4051 // inserting them into the upper bytes of the target SIMD floating point register.
4052 argRegCount = varDsc->lvHfaSlots();
4053 for (int i = 1; i < argRegCount; i++)
4055 int nextArgNum = argNum + i;
4056 regArgElem* nextArgElem = ®ArgTab[nextArgNum];
4057 var_types nextArgType = nextArgElem->type;
4058 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, nextArgType);
4059 noway_assert(nextArgElem->varNum == varNum);
4060 noway_assert(genIsValidFloatReg(nextRegNum));
4061 noway_assert(genIsValidFloatReg(destRegNum));
4062 GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, nextRegNum, i, 0);
4065 #endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD)
4068 #endif // TARGET_ARMARCH
4070 // Mark the rest of the argument registers corresponding to this multi-reg type as
4071 // being processed and no longer live.
4072 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
4074 int nextArgNum = argNum + regSlot;
4075 assert(!regArgTab[nextArgNum].processed);
4076 regArgTab[nextArgNum].processed = true;
4077 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
4078 regArgMaskLive &= ~genRegMask(nextRegNum);
4080 #endif // FEATURE_MULTIREG_ARGS
4083 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4086 #endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
4089 #pragma warning(pop)
4092 /*****************************************************************************
4093 * If any incoming stack arguments live in registers, load them.
4095 void CodeGen::genEnregisterIncomingStackArgs()
4100 printf("*************** In genEnregisterIncomingStackArgs()\n");
4104 // OSR handles this specially -- see genEnregisterOSRArgsAndLocals
4106 assert(!compiler->opts.IsOSR());
4108 assert(compiler->compGeneratingProlog);
4110 unsigned varNum = 0;
4112 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
4114 regNumber tmp_reg = REG_NA;
4117 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4119 /* Is this variable a parameter? */
4121 if (!varDsc->lvIsParam)
4126 /* If it's a register argument then it's already been taken care of.
4127 But, on Arm when under a profiler, we would have prespilled a register argument
4128 and hence here we need to load it from its prespilled location.
4130 bool isPrespilledForProfiling = false;
4131 #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
4132 isPrespilledForProfiling =
4133 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4136 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4141 /* Has the parameter been assigned to a register? */
4143 if (!varDsc->lvIsInReg())
4148 /* Is the variable dead on entry */
4150 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4155 /* Load the incoming parameter into the register */
4157 /* Figure out the home offset of the incoming argument */
4159 regNumber regNum = varDsc->GetArgInitReg();
4160 assert(regNum != REG_STK);
4162 var_types regType = varDsc->GetStackSlotHomeType();
4163 #ifdef TARGET_LOONGARCH64
4166 int base = compiler->lvaFrameAddress(varNum, &FPbased);
4168 if (emitter::isValidSimm12(base))
4170 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
4174 if (tmp_reg == REG_NA)
4176 regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
4180 GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
4181 GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
4182 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8);
4186 int baseOffset = -(base - tmp_offset) - 8;
4187 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset);
4191 #else // !TARGET_LOONGARCH64
4192 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
4193 #endif // !TARGET_LOONGARCH64
4195 regSet.verifyRegUsed(regNum);
4199 /*-------------------------------------------------------------------------
4201 * We have to decide whether we're going to use block initialization
4202 * in the prolog before we assign final stack offsets. This is because
4203 * when using block initialization we may need additional callee-saved
4204 * registers which need to be saved on the frame, thus increasing the
4207 * We'll count the number of locals we have to initialize,
4208 * and if there are lots of them we'll use block initialization.
4209 * Thus, the local variable table must have accurate register location
4210 * information for enregistered locals for their register state on entry
4213 * At the same time we set lvMustInit for locals (enregistered or on stack)
4214 * that must be initialized (e.g. initialize memory (comInitMem),
4215 * untracked pointers or disable DFA)
4217 void CodeGen::genCheckUseBlockInit()
4219 assert(!compiler->compGeneratingProlog);
4221 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
4222 // larger than int count for more than 1).
4227 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4229 // The logic below is complex. Make sure we are not
4230 // double-counting the initialization impact of any locals.
4231 bool counted = false;
4233 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4235 noway_assert(varDsc->lvRefCnt() == 0);
4236 varDsc->lvMustInit = 0;
4240 if (compiler->fgVarIsNeverZeroInitializedInProlog(varNum))
4242 varDsc->lvMustInit = 0;
4246 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4248 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4249 // initialized by the parent struct. No need to set the lvMustInit bit in the
4251 varDsc->lvMustInit = 0;
4255 if (varDsc->lvHasExplicitInit)
4257 varDsc->lvMustInit = 0;
4261 const bool isTemp = varDsc->lvIsTemp;
4262 const bool hasGCPtr = varDsc->HasGCPtr();
4263 const bool isTracked = varDsc->lvTracked;
4264 const bool isStruct = varTypeIsStruct(varDsc);
4265 const bool compInitMem = compiler->info.compInitMem;
4267 if (isTemp && !hasGCPtr)
4269 varDsc->lvMustInit = 0;
4273 if (compInitMem || hasGCPtr || varDsc->lvMustInit)
4277 /* For uninitialized use of tracked variables, the liveness
4278 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4280 if (varDsc->lvMustInit ||
4281 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4283 /* This var must be initialized */
4285 varDsc->lvMustInit = 1;
4287 /* See if the variable is on the stack will be initialized
4288 * using rep stos - compute the total size to be zero-ed */
4290 if (varDsc->lvOnFrame)
4292 if (!varDsc->lvRegister)
4294 if (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)
4296 // Var is on the stack at entry.
4298 roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4304 // Var is partially enregistered
4305 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) &&
4306 varDsc->GetOtherReg() == REG_STK);
4307 initStkLclCnt += genTypeStSz(TYP_INT);
4314 if (varDsc->lvOnFrame)
4316 bool mustInitThisVar = false;
4317 if (hasGCPtr && !isTracked)
4319 JITDUMP("must init V%02u because it has a GC ref\n", varNum);
4320 mustInitThisVar = true;
4322 else if (hasGCPtr && isStruct)
4324 // TODO-1stClassStructs: support precise liveness reporting for such structs.
4325 JITDUMP("must init a tracked V%02u because it a struct with a GC ref\n", varNum);
4326 mustInitThisVar = true;
4330 // We are done with tracked or GC vars, now look at untracked vars without GC refs.
4333 assert(!hasGCPtr && !isTemp);
4336 JITDUMP("must init V%02u because compInitMem is set and it is not a temp\n", varNum);
4337 mustInitThisVar = true;
4341 if (mustInitThisVar)
4343 varDsc->lvMustInit = true;
4347 initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4355 /* Don't forget about spill temps that hold pointers */
4356 assert(regSet.tmpAllFree());
4357 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4359 if (varTypeIsGC(tempThis->tdTempType()))
4365 // Record number of 4 byte slots that need zeroing.
4366 genInitStkLclCnt = initStkLclCnt;
4368 // Decide if we will do block initialization in the prolog, or use
4369 // a series of individual stores.
4371 // Primary factor is the number of slots that need zeroing. We've
4372 // been counting by sizeof(int) above. We assume for now we can
4373 // only zero register width bytes per store.
4375 // Current heuristic is to use block init when more than 4 stores
4378 // TODO: Consider taking into account the presence of large structs that
4379 // potentially only need some fields set to zero.
4381 // Compiler::fgVarNeedsExplicitZeroInit relies on this logic to
4382 // find structs that are guaranteed to be block initialized.
4383 // If this logic changes, Compiler::fgVarNeedsExplicitZeroInit needs
4385 CLANG_FORMAT_COMMENT_ANCHOR;
4388 #if defined(TARGET_AMD64)
4390 // We can clear using aligned SIMD so the threshold is lower,
4391 // and clears in order which is better for auto-prefetching
4392 genUseBlockInit = (genInitStkLclCnt > 4);
4394 #else // !defined(TARGET_AMD64)
4396 genUseBlockInit = (genInitStkLclCnt > 8);
4400 genUseBlockInit = (genInitStkLclCnt > 4);
4402 #endif // TARGET_64BIT
4404 if (genUseBlockInit)
4406 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4408 // If there is a secret stub param, don't count it, as it will no longer
4409 // be live when we do block init.
4410 if (compiler->info.compPublishStubParam)
4412 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4417 // On the Arm if we are using a block init to initialize, then we
4418 // must force spill R4/R5/R6 so that we can use them during
4419 // zero-initialization process.
4421 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
4422 if (forceSpillRegCount > 0)
4423 regSet.rsSetRegsModified(RBM_R4);
4424 if (forceSpillRegCount > 1)
4425 regSet.rsSetRegsModified(RBM_R5);
4426 if (forceSpillRegCount > 2)
4427 regSet.rsSetRegsModified(RBM_R6);
4428 #endif // TARGET_ARM
4432 /*****************************************************************************
4434 * initFltRegs -- The mask of float regs to be zeroed.
4435 * initDblRegs -- The mask of double regs to be zeroed.
4436 * initReg -- A zero initialized integer reg to copy from.
4438 * Does best effort to move between VFP/xmm regs if one is already
4439 * initialized to 0. (Arm Only) Else copies from the integer register which
4442 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
4444 assert(compiler->compGeneratingProlog);
4446 // The first float/double reg that is initialized to 0. So they can be used to
4447 // initialize the remaining registers.
4448 regNumber fltInitReg = REG_NA;
4449 regNumber dblInitReg = REG_NA;
4451 // Iterate through float/double registers and initialize them to 0 or
4452 // copy from already initialized register of the same type.
4453 regMaskTP regMask = genRegMask(REG_FP_FIRST);
4454 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
4456 if (regMask & initFltRegs)
4458 // Do we have a float register already set to 0?
4459 if (fltInitReg != REG_NA)
4462 inst_Mov(TYP_FLOAT, reg, fltInitReg, /* canSkip */ false);
4467 // Do we have a double register initialized to 0?
4468 if (dblInitReg != REG_NA)
4470 // Copy from double.
4471 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
4476 inst_Mov(TYP_FLOAT, reg, initReg, /* canSkip */ false);
4478 #elif defined(TARGET_XARCH)
4479 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
4480 GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
4482 #elif defined(TARGET_ARM64)
4483 // We will just zero out the entire vector register. This sets it to a double/float zero value
4484 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
4485 #elif defined(TARGET_LOONGARCH64)
4486 // We will just zero out the entire vector register. This sets it to a double/float zero value
4487 GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
4488 #elif defined(TARGET_RISCV64)
4489 GetEmitter()->emitIns_R_R(INS_fmv_w_x, EA_4BYTE, reg, REG_R0);
4491 #error Unsupported or unset target architecture
4496 else if (regMask & initDblRegs)
4498 // Do we have a double register already set to 0?
4499 if (dblInitReg != REG_NA)
4501 // Copy from double.
4502 inst_Mov(TYP_DOUBLE, reg, dblInitReg, /* canSkip */ false);
4507 // Do we have a float register initialized to 0?
4508 if (fltInitReg != REG_NA)
4511 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
4516 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
4518 #elif defined(TARGET_XARCH)
4519 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
4520 GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
4522 #elif defined(TARGET_ARM64)
4523 // We will just zero out the entire vector register. This sets it to a double/float zero value
4524 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
4525 #elif defined(TARGET_LOONGARCH64)
4526 GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
4527 #elif defined(TARGET_RISCV64)
4528 GetEmitter()->emitIns_R_R(INS_fmv_d_x, EA_8BYTE, reg, REG_R0);
4530 #error Unsupported or unset target architecture
4538 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
4539 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
4540 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
4544 #elif defined(TARGET_LOONGARCH64)
4546 #elif defined(TARGET_RISCV64)
4548 #else // !TARGET_ARM64
4549 if (*pInitRegZeroed == false)
4551 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
4552 *pInitRegZeroed = true;
4555 #endif // !TARGET_ARM64
4558 //-----------------------------------------------------------------------------
4559 // genZeroInitFrame: Zero any untracked pointer locals and/or initialize memory for locspace
4562 // untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init
4563 // code will end initializing memory (not inclusive).
4564 // untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will
4565 // start zero initializing memory.
4566 // initReg - A scratch register (that gets set to zero on some platforms).
4567 // pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero,
4568 // 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched.
4569 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
4571 assert(compiler->compGeneratingProlog);
4573 if (genUseBlockInit)
4575 genZeroInitFrameUsingBlockInit(untrLclHi, untrLclLo, initReg, pInitRegZeroed);
4577 else if (genInitStkLclCnt > 0)
4579 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // initReg is not a live incoming
4582 /* Initialize any lvMustInit vars on the stack */
4587 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4589 if (!varDsc->lvMustInit)
4594 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
4595 // partially-enregistered vars in the case where we don't use a block init.
4596 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
4598 // lvMustInit can only be set for GC types or TYP_STRUCT types
4599 // or when compInitMem is true
4600 // or when in debug code
4602 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
4603 compiler->info.compInitMem || compiler->opts.compDbgCode);
4605 if (!varDsc->lvOnFrame)
4610 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
4611 (varDsc->lvExactSize() >= TARGET_POINTER_SIZE))
4613 // We only initialize the GC variables in the TYP_STRUCT
4614 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
4615 ClassLayout* layout = varDsc->GetLayout();
4617 for (unsigned i = 0; i < slots; i++)
4619 if (layout->IsGCPtr(i))
4621 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
4622 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
4628 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
4630 // zero out the whole thing rounded up to a single stack slot size
4631 unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
4633 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
4635 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
4639 assert(i == lclSize || (i + sizeof(int) == lclSize));
4642 GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
4645 #endif // TARGET_64BIT
4646 assert(i == lclSize);
4650 assert(regSet.tmpAllFree());
4651 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4653 if (!varTypeIsGC(tempThis->tdTempType()))
4658 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
4660 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
4665 //-----------------------------------------------------------------------------
4666 // genEnregisterOSRArgsAndLocals: Initialize any enregistered args or locals
4667 // that get values from the tier0 frame.
4670 // initReg -- scratch register to use if needed
4671 // pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit)
4673 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
4674 void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed)
4676 void CodeGen::genEnregisterOSRArgsAndLocals()
4679 assert(compiler->opts.IsOSR());
4680 PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
4682 // basic sanity checks (make sure we're OSRing the right method)
4683 assert(patchpointInfo->NumberOfLocals() == compiler->info.compLocalsCount);
4685 const int originalFrameSize = patchpointInfo->TotalFrameSize();
4686 const unsigned patchpointInfoLen = patchpointInfo->NumberOfLocals();
4688 for (unsigned varNum = 0; varNum < compiler->lvaCount; varNum++)
4690 if (!compiler->lvaIsOSRLocal(varNum))
4692 // This local was not part of the tier0 method's state.
4693 // No work required.
4698 LclVarDsc* const varDsc = compiler->lvaGetDesc(varNum);
4700 if (!varDsc->lvIsInReg())
4702 // For args/locals in memory, the OSR frame will continue to access
4703 // that memory location. No work required.
4705 JITDUMP("---OSR--- V%02u in memory\n", varNum);
4709 // This local was part of the live tier0 state and is enregistered in the
4710 // OSR method. Initialize the register from the right frame slot.
4712 // If we ever enable promotion we'll need to generalize what follows to copy each
4713 // field from the tier0 frame to its OSR home.
4715 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4717 // This arg or local is not live at entry to the OSR method.
4718 // No work required.
4720 JITDUMP("---OSR--- V%02u (reg) not live at entry\n", varNum);
4724 int fieldOffset = 0;
4725 unsigned lclNum = varNum;
4727 if (varDsc->lvIsStructField)
4729 lclNum = varDsc->lvParentLcl;
4730 assert(lclNum < patchpointInfoLen);
4732 fieldOffset = varDsc->lvFldOffset;
4733 JITDUMP("---OSR--- V%02u is promoted field of V%02u at offset %d\n", varNum, lclNum, fieldOffset);
4736 // Note we are always reading from the tier0 frame here
4738 const var_types lclTyp = varDsc->GetStackSlotHomeType();
4739 const emitAttr size = emitActualTypeSize(lclTyp);
4740 const int stkOffs = patchpointInfo->Offset(lclNum) + fieldOffset;
4742 #if defined(TARGET_AMD64)
4744 // Original frames always use frame pointers, so
4745 // stkOffs is the tier0 frame's frame-relative offset
4748 // We need to determine the stack or frame-pointer relative
4749 // offset for this variable in the current frame.
4751 // If current frame does not use a frame pointer, we need to
4752 // add the SP-to-FP delta of this frame and the SP-to-FP delta
4753 // of the original frame; that translates from this frame's
4754 // stack pointer the old frame frame pointer.
4756 // We then add the original frame's frame-pointer relative
4757 // offset (note this offset is usually negative -- the stack
4758 // grows down, so locals are below the frame pointer).
4760 // /-----original frame-----/
4761 // / return address /
4762 // / saved RBP --+ / <--- Original frame ptr --+
4764 // / ... (stkOffs) / |
4766 // / variable --+ / |
4767 // / ... / (original frame sp-fp delta)
4769 // /-----OSR frame ---------/ |
4770 // / pseudo return address / --+
4772 // / ... / (this frame sp-fp delta)
4774 // /------------------------/ <--- Stack ptr --+
4776 // If the current frame is using a frame pointer, we need to
4777 // add the SP-to-FP delta of/ the original frame and then add
4778 // the original frame's frame-pointer relative offset.
4780 // /-----original frame-----/
4781 // / return address /
4782 // / saved RBP --+ / <--- Original frame ptr --+
4784 // / ... (stkOffs) / |
4786 // / variable --+ / |
4787 // / ... / (original frame sp-fp delta)
4789 // /-----OSR frame ---------/ |
4790 // / pseudo return address / --+
4791 // / saved RBP / <--- Frame ptr --+
4795 // /------------------------/
4797 int offset = originalFrameSize + stkOffs;
4799 if (isFramePointerUsed())
4801 // also adjust for saved RPB on this frame
4802 offset += TARGET_POINTER_SIZE;
4806 offset += genSPtoFPdelta();
4809 JITDUMP("---OSR--- V%02u (reg) old rbp offset %d old frame %d this frame sp-fp %d new offset %d (0x%02x)\n",
4810 varNum, stkOffs, originalFrameSize, genSPtoFPdelta(), offset, offset);
4812 GetEmitter()->emitIns_R_AR(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset);
4814 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
4816 // Patchpoint offset is from top of Tier0 frame
4818 // We need to determine the frame-pointer relative
4819 // offset for this variable in the osr frame.
4821 // First add the Tier0 frame size
4823 const int tier0FrameSize = compiler->info.compPatchpointInfo->TotalFrameSize();
4825 // then add the OSR frame size
4827 const int osrFrameSize = genTotalFrameSize();
4829 // then subtract OSR SP-FP delta
4831 const int osrSpToFpDelta = genSPtoFPdelta();
4833 // | => tier0 top of frame relative
4834 // | + => tier0 bottom of frame relative
4835 // | | + => osr bottom of frame (sp) relative
4836 // | | | - => osr fp relative
4838 const int offset = stkOffs + tier0FrameSize + osrFrameSize - osrSpToFpDelta;
4840 JITDUMP("---OSR--- V%02u (reg) Tier0 virtual offset %d OSR frame size %d OSR sp-fp "
4841 "delta %d total offset %d (0x%x)\n",
4842 varNum, stkOffs, osrFrameSize, osrSpToFpDelta, offset, offset);
4844 genInstrWithConstant(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset, initReg);
4845 *pInitRegZeroed = false;
4846 #endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
4850 /*-----------------------------------------------------------------------------
4852 * Save the generic context argument.
4854 * We need to do this within the "prolog" in case anyone tries to inspect
4855 * the param-type-arg/this (which can be done after the prolog) using
4856 * ICodeManager::GetParamTypeArg().
4859 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
4861 assert(compiler->compGeneratingProlog);
4863 const bool reportArg = compiler->lvaReportParamTypeArg();
4865 if (compiler->opts.IsOSR())
4867 PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo;
4870 // OSR method will use Tier0 slot to report context arg.
4872 assert(ppInfo->HasGenericContextArgOffset());
4873 JITDUMP("OSR method will use Tier0 frame slot for generics context arg.\n");
4875 else if (compiler->lvaKeepAliveAndReportThis())
4877 // OSR method will use Tier0 slot to report `this` as context.
4879 assert(ppInfo->HasKeptAliveThis());
4880 JITDUMP("OSR method will use Tier0 frame slot for generics context `this`.\n");
4886 // We should report either generic context arg or "this" when used so.
4889 #ifndef JIT32_GCENCODER
4890 if (!compiler->lvaKeepAliveAndReportThis())
4897 // For JIT32_GCENCODER, we won't be here if reportArg is false.
4898 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
4900 noway_assert(contextArg != BAD_VAR_NUM);
4901 LclVarDsc* varDsc = compiler->lvaGetDesc(contextArg);
4903 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
4904 // moved to its final home location. So we need to use it from the
4905 // incoming location.
4909 bool isPrespilledForProfiling = false;
4910 #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
4911 isPrespilledForProfiling =
4912 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
4915 // Load from the argument register only if it is not prespilled.
4916 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
4918 reg = varDsc->GetArgReg();
4922 if (isFramePointerUsed())
4924 #if defined(TARGET_ARM)
4925 // GetStackOffset() is always valid for incoming stack-arguments, even if the argument
4926 // will become enregistered.
4927 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
4928 noway_assert((2 * REGSIZE_BYTES <= varDsc->GetStackOffset()) &&
4929 (size_t(varDsc->GetStackOffset()) < compiler->compArgSize + 2 * REGSIZE_BYTES));
4931 // GetStackOffset() is always valid for incoming stack-arguments, even if the argument
4932 // will become enregistered.
4933 noway_assert((0 < varDsc->GetStackOffset()) && (size_t(varDsc->GetStackOffset()) < compiler->compArgSize));
4937 // We will just use the initReg since it is an available register
4938 // and we are probably done using it anyway...
4940 *pInitRegZeroed = false;
4942 // mov reg, [compiler->info.compTypeCtxtArg]
4943 GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4944 varDsc->GetStackOffset());
4945 regSet.verifyRegUsed(reg);
4948 #if defined(TARGET_ARM64)
4949 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4950 compiler->lvaCachedGenericContextArgOffset(), rsGetRsvdReg());
4951 #elif defined(TARGET_ARM)
4952 // ARM's emitIns_R_R_I automatically uses the reserved register if necessary.
4953 GetEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4954 compiler->lvaCachedGenericContextArgOffset());
4955 #elif defined(TARGET_LOONGARCH64)
4956 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4957 compiler->lvaCachedGenericContextArgOffset(), REG_R21);
4958 #elif defined(TARGET_RISCV64)
4959 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4960 compiler->lvaCachedGenericContextArgOffset(), rsGetRsvdReg());
4961 #else // !ARM64 !ARM !LOONGARCH64 !RISCV64
4962 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
4963 GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
4964 compiler->lvaCachedGenericContextArgOffset());
4965 #endif // !ARM64 !ARM !LOONGARCH64 !RISCV64
4968 /*****************************************************************************
4973 These instructions are just a reordering of the instructions used today.
4979 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
4981 add esp, LOCALS_SIZE / pop dummyReg
4991 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
4992 Everything else is similar, though in a different order.
4994 The security object will no longer be at a fixed offset. However, the
4995 offset can still be determined by looking up the GC-info and determining
4996 how many callee-saved registers are pushed.
5003 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
5005 add esp, LOCALS_SIZE / pop dummyReg
5009 (mov esp, ebp if there are no callee-saved registers)
5013 Double-aligned frame :
5014 --------------------
5016 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
5017 of callee-saved registers are pushed on the stack so that the locals
5018 themselves are qword-aligned. The instructions are the same as today,
5019 just in a different order.
5027 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
5029 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
5038 localloc (with ebp) frames :
5039 --------------------------
5041 The instructions are the same as today, just in a different order.
5042 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
5043 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
5050 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
5052 lea esp, [ebp-calleeSavedRegsPushedSize]
5056 (mov esp, ebp if there are no callee-saved registers)
5060 *****************************************************************************/
5062 /*****************************************************************************
5064 * Reserve space for a function prolog.
5067 void CodeGen::genReserveProlog(BasicBlock* block)
5069 assert(block != nullptr);
5071 JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
5073 /* Nothing is live on entry to the prolog */
5075 GetEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
5078 /*****************************************************************************
5080 * Reserve space for a function epilog.
5083 void CodeGen::genReserveEpilog(BasicBlock* block)
5085 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
5086 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
5088 /* The return value is special-cased: make sure it goes live for the epilog */
5090 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
5092 if (IsFullPtrRegMapRequired() && !jmpEpilog)
5094 if (varTypeIsGC(compiler->info.compRetNativeType))
5096 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
5098 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
5100 switch (compiler->info.compRetNativeType)
5103 gcrefRegsArg |= RBM_INTRET;
5106 byrefRegsArg |= RBM_INTRET;
5112 JITDUMP("Extending return value GC liveness to epilog\n");
5116 JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
5118 assert(block != nullptr);
5119 const VARSET_TP& gcrefVarsArg(GetEmitter()->emitThisGCrefVars);
5120 bool last = (block->bbNext == nullptr);
5121 GetEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
5124 #if defined(FEATURE_EH_FUNCLETS)
5126 /*****************************************************************************
5128 * Reserve space for a funclet prolog.
5131 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
5133 assert(block != nullptr);
5135 /* Currently, no registers are live on entry to the prolog, except maybe
5136 the exception object. There might be some live stack vars, but they
5137 cannot be accessed until after the frame pointer is re-established.
5138 In order to potentially prevent emitting a death before the prolog
5139 and a birth right after it, we just report it as live during the
5140 prolog, and rely on the prolog being non-interruptible. Trust
5141 genCodeForBBlist to correctly initialize all the sets.
5143 We might need to relax these asserts if the VM ever starts
5144 restoring any registers, then we could have live-in reg vars...
5147 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
5148 noway_assert(gcInfo.gcRegByrefSetCur == 0);
5150 JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
5152 GetEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
5153 gcInfo.gcRegByrefSetCur, false);
5156 /*****************************************************************************
5158 * Reserve space for a funclet epilog.
5161 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
5163 assert(block != nullptr);
5165 JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
5167 bool last = (block->bbNext == nullptr);
5168 GetEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
5169 gcInfo.gcRegByrefSetCur, last);
5172 #endif // FEATURE_EH_FUNCLETS
5174 /*****************************************************************************
5175 * Finalize the frame size and offset assignments.
5177 * No changes can be made to the modified register set after this, since that can affect how many
5178 * callee-saved registers get saved.
5180 void CodeGen::genFinalizeFrame()
5182 JITDUMP("Finalizing stack frame\n");
5184 // Initializations need to happen based on the var locations at the start
5185 // of the first basic block, so load those up. In particular, the determination
5186 // of whether or not to use block init in the prolog is dependent on the variable
5187 // locations on entry to the function.
5188 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
5190 genCheckUseBlockInit();
5192 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
5193 CLANG_FORMAT_COMMENT_ANCHOR;
5195 #if defined(TARGET_X86)
5197 if (compiler->compTailCallUsed)
5199 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
5200 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
5201 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
5202 // actually get saved.
5204 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
5206 #endif // TARGET_X86
5209 // Make sure that callee-saved registers used by call to a stack probing helper generated are pushed on stack.
5210 if (compiler->compLclFrameSize >= compiler->eeGetPageSize())
5212 regSet.rsSetRegsModified(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
5213 RBM_STACK_PROBE_HELPER_TRASH);
5216 // If there are any reserved registers, add them to the modified set.
5217 if (regSet.rsMaskResvd != RBM_NONE)
5219 regSet.rsSetRegsModified(regSet.rsMaskResvd);
5221 #endif // TARGET_ARM
5226 printf("Modified regs: ");
5227 dspRegMask(regSet.rsGetModifiedRegsMask());
5232 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
5233 if (compiler->opts.compDbgEnC)
5235 // We always save FP.
5236 noway_assert(isFramePointerUsed());
5237 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
5238 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_ENC_CALLEE_SAVED);
5239 if (RBM_ENC_CALLEE_SAVED != 0)
5241 regSet.rsSetRegsModified(RBM_ENC_CALLEE_SAVED);
5243 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
5244 #else // !TARGET_AMD64 && !TARGET_ARM64
5245 // On x86 we save all callee saved regs so the saved reg area size is consistent
5246 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
5247 #endif // !TARGET_AMD64 && !TARGET_ARM64
5250 /* If we have any pinvoke calls, we might potentially trash everything */
5251 if (compiler->compMethodRequiresPInvokeFrame())
5253 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
5254 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
5257 #ifdef UNIX_AMD64_ABI
5258 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
5259 if (compiler->compIsProfilerHookNeeded())
5261 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
5265 /* Count how many callee-saved registers will actually be saved (pushed) */
5267 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
5268 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
5271 // EBP cannot be (directly) modified
5272 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
5275 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5277 #ifdef TARGET_ARMARCH
5278 if (isFramePointerUsed())
5280 // For a FP based frame we have to push/pop the FP register
5282 maskCalleeRegsPushed |= RBM_FPBASE;
5284 // This assert check that we are not using REG_FP
5285 // as both the frame pointer and as a codegen register
5287 assert(!regSet.rsRegsModified(RBM_FPBASE));
5290 // we always push LR. See genPushCalleeSavedRegisters
5292 maskCalleeRegsPushed |= RBM_LR;
5294 #if defined(TARGET_ARM)
5295 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
5296 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
5297 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
5299 if ((maskPushRegsFloat != RBM_NONE) ||
5300 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
5302 // Here we try to keep stack double-aligned before the vpush
5303 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
5305 regNumber extraPushedReg = REG_R4;
5306 while (maskPushRegsInt & genRegMask(extraPushedReg))
5308 extraPushedReg = REG_NEXT(extraPushedReg);
5310 if (extraPushedReg < REG_R11)
5312 maskPushRegsInt |= genRegMask(extraPushedReg);
5313 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
5316 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
5319 // We currently only expect to push/pop consecutive FP registers
5320 // and these have to be double-sized registers as well.
5321 // Here we will insure that maskPushRegsFloat obeys these requirements.
5323 if (maskPushRegsFloat != RBM_NONE)
5325 regMaskTP contiguousMask = genRegMaskFloat(REG_F16);
5326 while (maskPushRegsFloat > contiguousMask)
5328 contiguousMask <<= 2;
5329 contiguousMask |= genRegMaskFloat(REG_F16);
5331 if (maskPushRegsFloat != contiguousMask)
5333 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
5334 maskPushRegsFloat |= maskExtraRegs;
5335 regSet.rsSetRegsModified(maskExtraRegs);
5336 maskCalleeRegsPushed |= maskExtraRegs;
5339 #endif // TARGET_ARM
5340 #endif // TARGET_ARMARCH
5342 #if defined(TARGET_XARCH)
5343 // Compute the count of callee saved float regs saved on stack.
5344 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm31)
5345 // regs are stack allocated and preserved in their stack locations.
5346 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
5347 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
5348 #endif // defined(TARGET_XARCH)
5350 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5351 if (isFramePointerUsed())
5353 // For a FP based frame we have to push/pop the FP register
5355 maskCalleeRegsPushed |= RBM_FPBASE;
5357 // This assert check that we are not using REG_FP
5358 // as both the frame pointer and as a codegen register
5360 assert(!regSet.rsRegsModified(RBM_FPBASE));
5363 // we always push RA. See genPushCalleeSavedRegisters
5364 maskCalleeRegsPushed |= RBM_RA;
5365 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
5367 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
5372 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
5373 dspRegMask(maskCalleeRegsPushed);
5378 /* Assign the final offsets to things living on the stack frame */
5380 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
5383 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
5385 compiler->lvaTableDump();
5390 /*****************************************************************************
5392 * Generates code for a function prolog.
5394 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
5396 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
5397 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
5398 * only instructions which result in control not going to the next instruction. Basically, any time execution would
5399 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
5400 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
5401 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
5403 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
5404 * debugger team to ensure that stepping still works.
5406 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
5410 #pragma warning(push)
5411 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5413 void CodeGen::genFnProlog()
5415 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
5417 compiler->funSetCurrentFunc(0);
5422 printf("*************** In genFnProlog()\n");
5427 genInterruptibleUsed = true;
5430 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
5432 /* Ready to start on the prolog proper */
5434 GetEmitter()->emitBegProlog();
5435 compiler->unwindBegProlog();
5437 // Do this so we can put the prolog instruction group ahead of
5438 // other instruction groups
5439 genIPmappingAddToFront(IPmappingDscKind::Prolog, DebugInfo(), true);
5442 if (compiler->opts.dspCode)
5444 printf("\n__prolog:\n");
5448 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
5450 // Create new scopes for the method-parameters for the prolog-block.
5454 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5455 // For arm64 OSR, emit a "phantom prolog" to account for the actions taken
5456 // in the tier0 frame that impact FP and SP on entry to the OSR method.
5458 // x64 handles this differently; the phantom prolog unwind is emitted in
5459 // genOSRRecordTier0CalleeSavedRegistersAndFrame.
5461 if (compiler->opts.IsOSR())
5463 PatchpointInfo* patchpointInfo = compiler->info.compPatchpointInfo;
5464 const int tier0FrameSize = patchpointInfo->TotalFrameSize();
5466 // SP is tier0 method's SP.
5467 compiler->unwindAllocStack(tier0FrameSize);
5469 #endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5473 if (compiler->compJitHaltMethod())
5475 /* put a nop first because the debugger and other tools are likely to
5476 put an int3 at the beginning and we don't want to confuse them */
5479 instGen(INS_BREAKPOINT);
5481 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5482 // Avoid asserts in the unwind info because these instructions aren't accounted for.
5483 compiler->unwindPadding();
5484 #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
5488 #if defined(FEATURE_EH_FUNCLETS) && defined(DEBUG)
5490 // We cannot force 0-initialization of the PSPSym
5491 // as it will overwrite the real value
5492 if (compiler->lvaPSPSym != BAD_VAR_NUM)
5494 const LclVarDsc* varDsc = compiler->lvaGetDesc(compiler->lvaPSPSym);
5495 assert(!varDsc->lvMustInit);
5498 #endif // FEATURE_EH_FUNCLETS && DEBUG
5500 /*-------------------------------------------------------------------------
5502 * Record the stack frame ranges that will cover all of the tracked
5503 * and untracked pointer variables.
5504 * Also find which registers will need to be zero-initialized.
5506 * 'initRegs': - Generally, enregistered variables should not need to be
5507 * zero-inited. They only need to be zero-inited when they
5508 * have a possibly uninitialized read on some control
5509 * flow path. Apparently some of the IL_STUBs that we
5510 * generate have this property.
5513 int untrLclLo = +INT_MAX;
5514 int untrLclHi = -INT_MAX;
5515 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
5516 // Note that they may be tracked, but simply not allocated to a register.
5517 bool hasUntrLcl = false;
5519 int GCrefLo = +INT_MAX;
5520 int GCrefHi = -INT_MAX;
5521 bool hasGCRef = false;
5523 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
5524 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
5525 regMaskTP initDblRegs = RBM_NONE;
5530 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
5532 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
5537 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
5539 noway_assert(varDsc->lvRefCnt() == 0);
5543 signed int loOffs = varDsc->GetStackOffset();
5544 signed int hiOffs = varDsc->GetStackOffset() + compiler->lvaLclSize(varNum);
5546 /* We need to know the offset range of tracked stack GC refs */
5547 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
5549 if (varDsc->HasGCPtr() && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
5551 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
5552 // taken care of by the parent struct.
5553 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5557 if (loOffs < GCrefLo)
5561 if (hiOffs > GCrefHi)
5568 /* For lvMustInit vars, gather pertinent info */
5570 if (!varDsc->lvMustInit)
5575 bool isInReg = varDsc->lvIsInReg();
5576 bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr;
5578 // Note that 'lvIsInReg()' will only be accurate for variables that are actually live-in to
5579 // the first block. This will include all possibly-uninitialized locals, whose liveness
5580 // will naturally propagate up to the entry block. However, we also set 'lvMustInit' for
5581 // locals that are live-in to a finally block, and those may not be live-in to the first
5582 // block. For those, we don't want to initialize the register, as it will not actually be
5583 // occupying it on entry.
5586 if (compiler->lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr)
5588 isInReg = VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex);
5592 assert(VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
5598 regNumber regForVar = varDsc->GetRegNum();
5599 regMaskTP regMask = genRegMask(regForVar);
5600 if (!genIsValidFloatReg(regForVar))
5602 initRegs |= regMask;
5604 if (varTypeIsMultiReg(varDsc))
5606 if (varDsc->GetOtherReg() != REG_STK)
5608 initRegs |= genRegMask(varDsc->GetOtherReg());
5612 /* Upper DWORD is on the stack, and needs to be inited */
5614 loOffs += sizeof(int);
5619 else if (varDsc->TypeGet() == TYP_DOUBLE)
5621 initDblRegs |= regMask;
5625 initFltRegs |= regMask;
5634 if (loOffs < untrLclLo)
5638 if (hiOffs > untrLclHi)
5645 /* Don't forget about spill temps that hold pointers */
5647 assert(regSet.tmpAllFree());
5648 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
5650 if (!varTypeIsGC(tempThis->tdTempType()))
5655 signed int loOffs = tempThis->tdTempOffs();
5656 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
5658 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
5659 // previous frame pointer. Thus, stkOffs can't be zero.
5660 CLANG_FORMAT_COMMENT_ANCHOR;
5662 #if !defined(TARGET_AMD64)
5663 // However, on amd64 there is no requirement to chain frame pointers.
5665 noway_assert(!isFramePointerUsed() || loOffs != 0);
5666 #endif // !defined(TARGET_AMD64)
5668 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
5672 if (loOffs < untrLclLo)
5676 if (hiOffs > untrLclHi)
5682 // TODO-Cleanup: Add suitable assert for the OSR case.
5683 assert(compiler->opts.IsOSR() || ((genInitStkLclCnt > 0) == hasUntrLcl));
5688 if (genInitStkLclCnt > 0)
5690 printf("Found %u lvMustInit int-sized stack slots, frame offsets %d through %d\n", genInitStkLclCnt,
5691 -untrLclLo, -untrLclHi);
5697 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
5698 // Ditto for all enregistered user arguments in a varargs method.
5699 // These registers will be available to use for the initReg. We just remove
5700 // all of these registers from the rsCalleeRegArgMaskLiveIn.
5702 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
5705 /* Choose the register to use for zero initialization */
5707 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
5709 // Track if initReg holds non-zero value. Start conservative and assume it has non-zero value.
5710 // If initReg is ever set to zero, this variable is set to true and zero initializing initReg
5712 bool initRegZeroed = false;
5713 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
5716 // We should not use the special PINVOKE registers as the initReg
5717 // since they are trashed by the jithelper call to setup the PINVOKE frame
5718 if (compiler->compMethodRequiresPInvokeFrame())
5720 excludeMask |= RBM_PINVOKE_FRAME;
5722 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
5723 if (!compiler->opts.ShouldUsePInvokeHelpers())
5725 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
5727 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
5729 const LclVarDsc* varDsc = compiler->lvaGetDesc(compiler->info.compLvFrameListRoot);
5730 if (varDsc->lvRegister)
5732 excludeMask |= genRegMask(varDsc->GetRegNum());
5738 // If we have a variable sized frame (compLocallocUsed is true)
5739 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
5740 if (compiler->compLocallocUsed)
5742 excludeMask |= RBM_SAVED_LOCALLOC_SP;
5744 #endif // TARGET_ARM
5746 const bool isRoot = (compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT);
5749 const bool isOSRx64Root = isRoot && compiler->opts.IsOSR();
5751 const bool isOSRx64Root = false;
5752 #endif // TARGET_AMD64
5754 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
5756 if (tempMask != RBM_NONE)
5758 // We will use one of the registers that we were planning to zero init anyway.
5759 // We pick the lowest register number.
5760 tempMask = genFindLowestBit(tempMask);
5761 initReg = genRegNumFromMask(tempMask);
5763 // Next we prefer to use one of the unused argument registers.
5764 // If they aren't available we use one of the caller-saved integer registers.
5767 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
5768 if (tempMask != RBM_NONE)
5770 // We pick the lowest register number
5771 tempMask = genFindLowestBit(tempMask);
5772 initReg = genRegNumFromMask(tempMask);
5776 #if defined(TARGET_AMD64)
5777 // For x64 OSR root frames, we can't use any as of yet unsaved
5778 // callee save as initReg, as we defer saving these until later in
5779 // the prolog, and we don't have normal arg regs.
5782 initReg = REG_SCRATCH; // REG_EAX
5784 #elif defined(TARGET_ARM64)
5785 // For arm64 OSR root frames, we may need a scratch register for large
5786 // offset addresses. Use a register that won't be allocated.
5788 if (isRoot && compiler->opts.IsOSR())
5792 #elif defined(TARGET_RISCV64)
5793 // For RISC-V64 OSR root frames, we may need a scratch register for large
5794 // offset addresses. Use a register that won't be allocated.
5795 if (isRoot && compiler->opts.IsOSR())
5797 initReg = REG_SCRATCH; // REG_T0
5801 #if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
5802 // For LoongArch64's OSR root frames, we may need a scratch register for large
5803 // offset addresses. But this does not conflict with the REG_PINVOKE_FRAME.
5805 // RISC-V64's OSR root frames are similar to LoongArch64's. In this case
5806 // REG_SCRATCH also shouldn't conflict with REG_PINVOKE_FRAME, even if
5807 // technically they are the same register - REG_T0.
5809 noway_assert(!compiler->compMethodRequiresPInvokeFrame() || (initReg != REG_PINVOKE_FRAME));
5810 #endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
5812 #if defined(TARGET_AMD64)
5813 // If we are a varargs call, in order to set up the arguments correctly this
5814 // must be done in a 2 step process. As per the x64 ABI:
5815 // a) The caller sets up the argument shadow space (just before the return
5816 // address, 4 pointer sized slots).
5817 // b) The callee is responsible to home the arguments on the shadow space
5818 // provided by the caller.
5819 // This way, the varargs iterator will be able to retrieve the
5820 // call arguments properly since both the arg regs and the stack allocated
5821 // args will be contiguous.
5823 // OSR methods can skip this, as the setup is done by the original method.
5824 if (compiler->info.compIsVarArgs && !compiler->opts.IsOSR())
5826 GetEmitter()->spillIntArgRegsToShadowSlots();
5829 #endif // TARGET_AMD64
5832 /*-------------------------------------------------------------------------
5834 * Now start emitting the part of the prolog which sets up the frame
5837 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
5839 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
5840 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
5842 #endif // TARGET_ARM
5844 unsigned extraFrameSize = 0;
5851 // Account for the Tier0 callee saves
5853 genOSRRecordTier0CalleeSavedRegistersAndFrame();
5855 // We don't actually push any callee saves on the OSR frame,
5856 // but we still reserve space, so account for this when
5857 // allocating the local frame.
5859 extraFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
5861 #endif // TARGET_AMD64
5863 if (doubleAlignOrFramePointerUsed())
5865 // OSR methods handle "saving" FP specially.
5867 // For epilog and unwind, we restore the RBP saved by the
5868 // Tier0 method. The save we do here is just to set up a
5869 // proper RBP-based frame chain link.
5871 if (isOSRx64Root && isFramePointerUsed())
5873 GetEmitter()->emitIns_R_AR(INS_mov, EA_8BYTE, initReg, REG_FPBASE, 0);
5874 inst_RV(INS_push, initReg, TYP_REF);
5875 initRegZeroed = false;
5877 // We account for the SP movement in unwind, but not for
5878 // the "save" of RBP.
5880 compiler->unwindAllocStack(REGSIZE_BYTES);
5884 inst_RV(INS_push, REG_FPBASE, TYP_REF);
5885 compiler->unwindPush(REG_FPBASE);
5887 #ifndef TARGET_AMD64 // On AMD64, establish the frame pointer after the "sub rsp"
5888 genEstablishFramePointer(0, /*reportUnwindData*/ true);
5889 #endif // !TARGET_AMD64
5892 if (compiler->genDoubleAlign())
5894 noway_assert(isFramePointerUsed() == false);
5895 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
5897 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
5899 #endif // DOUBLE_ALIGN
5901 #endif // TARGET_XARCH
5903 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
5904 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
5906 #else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
5910 genPushCalleeSavedRegisters();
5912 #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
5915 bool needToEstablishFP = false;
5916 int afterLclFrameSPtoFPdelta = 0;
5917 if (doubleAlignOrFramePointerUsed())
5919 needToEstablishFP = true;
5921 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
5922 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
5923 // too big, we go ahead and do it here.
5925 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
5926 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
5927 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
5929 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
5930 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
5931 needToEstablishFP = false;
5934 #endif // TARGET_ARM
5936 //-------------------------------------------------------------------------
5938 // Subtract the local frame size from SP.
5940 //-------------------------------------------------------------------------
5941 CLANG_FORMAT_COMMENT_ANCHOR;
5943 #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
5944 regMaskTP maskStackAlloc = RBM_NONE;
5947 maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize + extraFrameSize,
5948 regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
5949 #endif // TARGET_ARM
5951 if (maskStackAlloc == RBM_NONE)
5953 genAllocLclFrame(compiler->compLclFrameSize + extraFrameSize, initReg, &initRegZeroed,
5954 intRegState.rsCalleeRegArgMaskLiveIn);
5956 #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
5959 // For x64 OSR we have to finish saving int callee saves.
5963 genOSRSaveRemainingCalleeSavedRegisters();
5965 #endif // TARGET_AMD64
5967 //-------------------------------------------------------------------------
5970 if (compiler->compLocallocUsed)
5972 GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE, /* canSkip */ false);
5973 regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
5974 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
5976 #endif // TARGET_ARMARCH
5978 #if defined(TARGET_XARCH)
5979 // Preserve callee saved float regs to stack.
5980 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
5981 #endif // defined(TARGET_XARCH)
5984 // Establish the AMD64 frame pointer after the OS-reported prolog.
5985 if (doubleAlignOrFramePointerUsed())
5987 const bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
5988 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
5990 #endif // TARGET_AMD64
5991 compiler->unwindEndProlog();
5993 //-------------------------------------------------------------------------
5995 // This is the end of the OS-reported prolog for purposes of unwinding
5997 //-------------------------------------------------------------------------
6000 if (needToEstablishFP)
6002 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
6003 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
6005 #endif // TARGET_ARM
6007 if (compiler->info.compPublishStubParam)
6009 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM,
6010 compiler->lvaStubArgumentVar, 0);
6011 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
6013 // It's no longer live; clear it out so it can be used after this in the prolog
6014 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
6018 // Zero out the frame as needed
6021 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
6023 #if defined(FEATURE_EH_FUNCLETS)
6025 genSetPSPSym(initReg, &initRegZeroed);
6027 #else // !FEATURE_EH_FUNCLETS
6029 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
6030 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
6032 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
6033 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
6035 // Zero out the slot for nesting level 0
6036 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
6040 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6041 initRegZeroed = true;
6044 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
6048 #endif // !FEATURE_EH_FUNCLETS
6050 genReportGenericContextArg(initReg, &initRegZeroed);
6052 #ifdef JIT32_GCENCODER
6053 // Initialize the LocalAllocSP slot if there is localloc in the function.
6054 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
6056 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
6058 #endif // JIT32_GCENCODER
6060 // Set up the GS security cookie
6062 genSetGSSecurityCookie(initReg, &initRegZeroed);
6064 #ifdef PROFILING_SUPPORTED
6066 // Insert a function entry callback for profiling, if requested.
6067 // OSR methods aren't called, so don't have enter hooks.
6068 if (!compiler->opts.IsOSR())
6070 genProfilingEnterCallback(initReg, &initRegZeroed);
6073 #endif // PROFILING_SUPPORTED
6075 // For OSR we may have a zero-length prolog. That's not supported
6076 // when the method must report a generics context,/ so add a nop if so.
6078 if (compiler->opts.IsOSR() && (GetEmitter()->emitGetPrologOffsetEstimate() == 0) &&
6079 (compiler->lvaReportParamTypeArg() || compiler->lvaKeepAliveAndReportThis()))
6081 JITDUMP("OSR: prolog was zero length and has generic context to report: adding nop to pad prolog.\n");
6085 if (!GetInterruptible())
6087 // The 'real' prolog ends here for non-interruptible methods.
6088 // For fully-interruptible methods, we extend the prolog so that
6089 // we do not need to track GC information while shuffling the
6091 GetEmitter()->emitMarkPrologEnd();
6094 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
6095 // The unused bits of Vector3 arguments must be cleared
6096 // since native compiler doesn't initize the upper bits to zeros.
6098 // TODO-Cleanup: This logic can be implemented in
6099 // genFnPrologCalleeRegArgs() for argument registers and
6100 // genEnregisterIncomingStackArgs() for stack arguments.
6101 genClearStackVec3ArgUpperBits();
6102 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
6104 /*-----------------------------------------------------------------------------
6105 * Take care of register arguments first
6108 // Home incoming arguments and generate any required inits.
6109 // OSR handles this by moving the values from the original frame.
6111 // Update the arg initial register locations.
6113 if (compiler->opts.IsOSR())
6115 // For OSR we defer updating "initial reg" for args until
6116 // we've set the live-in regs with values from the Tier0 frame.
6118 // Otherwise we'll do some of these fetches twice.
6120 CLANG_FORMAT_COMMENT_ANCHOR;
6121 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6122 genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed);
6124 genEnregisterOSRArgsAndLocals();
6126 compiler->lvaUpdateArgsWithInitialReg();
6130 compiler->lvaUpdateArgsWithInitialReg();
6132 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6133 if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn)
6135 initRegZeroed = false;
6136 genFnPrologCalleeRegArgs();
6139 auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) {
6140 if (regState->rsCalleeRegArgMaskLiveIn)
6142 // If we need an extra register to shuffle around the incoming registers
6143 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
6144 // if we don't need to use the xtraReg then this flag will stay false
6147 bool xtraRegClobbered = false;
6149 if (genRegMask(initReg) & RBM_ARG_REGS)
6155 xtraReg = REG_SCRATCH;
6156 initRegZeroed = false;
6159 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
6161 if (xtraRegClobbered)
6163 initRegZeroed = false;
6168 #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
6169 assignIncomingRegisterArgs(&intRegState);
6170 assignIncomingRegisterArgs(&floatRegState);
6172 assignIncomingRegisterArgs(&intRegState);
6173 #endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
6175 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
6177 // Home the incoming arguments.
6178 genEnregisterIncomingStackArgs();
6181 /* Initialize any must-init registers variables now */
6185 regMaskTP regMask = 0x1;
6187 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
6189 if (regMask & initRegs)
6191 // Check if we have already zeroed this register
6192 if ((reg == initReg) && initRegZeroed)
6198 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
6201 initRegZeroed = true;
6208 if (initFltRegs | initDblRegs)
6210 // If initReg is not in initRegs then we will use REG_SCRATCH
6211 if ((genRegMask(initReg) & initRegs) == 0)
6213 initReg = REG_SCRATCH;
6214 initRegZeroed = false;
6218 // This is needed only for Arm since it can use a zero initialized int register
6219 // to initialize vfp registers.
6222 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6223 initRegZeroed = true;
6225 #endif // TARGET_ARM
6227 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
6230 //-----------------------------------------------------------------------------
6233 // Increase the prolog size here only if fully interruptible.
6236 if (GetInterruptible())
6238 GetEmitter()->emitMarkPrologEnd();
6240 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
6247 GetEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
6251 noway_assert(GCrefLo == +INT_MAX);
6252 noway_assert(GCrefHi == -INT_MAX);
6256 if (compiler->opts.dspCode)
6263 // On non-x86 the VARARG cookie does not need any special treatment.
6265 // Load up the VARARG argument pointer register so it doesn't get clobbered.
6266 // only do this if we actually access any statically declared args
6267 // (our argument pointer register has a refcount > 0).
6268 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
6270 if (compiler->info.compIsVarArgs && compiler->lvaGetDesc(argsStartVar)->lvRefCnt() > 0)
6272 varDsc = compiler->lvaGetDesc(argsStartVar);
6274 noway_assert(compiler->info.compArgsCount > 0);
6276 // MOV EAX, <VARARGS HANDLE>
6277 GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
6278 regSet.verifyRegUsed(REG_EAX);
6281 GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
6283 // EDX might actually be holding something here. So make sure to only use EAX for this code
6286 const LclVarDsc* lastArg = compiler->lvaGetDesc(compiler->info.compArgsCount - 1);
6287 noway_assert(!lastArg->lvRegister);
6288 signed offset = lastArg->GetStackOffset();
6289 assert(offset != BAD_STK_OFFS);
6290 noway_assert(lastArg->lvFramePointerBased);
6292 // LEA EAX, &<VARARGS HANDLE> + EAX
6293 GetEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
6295 if (varDsc->lvIsInReg())
6297 GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, varDsc->GetRegNum(), REG_EAX, /* canSkip */ true);
6298 regSet.verifyRegUsed(varDsc->GetRegNum());
6302 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
6306 #endif // TARGET_X86
6308 #if defined(DEBUG) && defined(TARGET_XARCH)
6309 if (compiler->opts.compStackCheckOnRet)
6311 assert(compiler->lvaReturnSpCheck != BAD_VAR_NUM);
6312 assert(compiler->lvaGetDesc(compiler->lvaReturnSpCheck)->lvDoNotEnregister);
6313 assert(compiler->lvaGetDesc(compiler->lvaReturnSpCheck)->lvOnFrame);
6314 GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0);
6316 #endif // defined(DEBUG) && defined(TARGET_XARCH)
6318 GetEmitter()->emitEndProlog();
6321 #pragma warning(pop)
6324 //------------------------------------------------------------------------
6325 // getCallTarget - Get the node that evaluates to the call target
6328 // call - the GT_CALL node
6331 // The node. Note that for direct calls this may still return non-null if the direct call
6332 // requires a 'complex' tree to load the target (e.g. in R2R or because we go through a stub).
6334 GenTree* CodeGen::getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd)
6336 // all virtuals should have been expanded into a control expression by this point.
6337 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
6339 if (call->gtCallType == CT_INDIRECT)
6341 assert(call->gtControlExpr == nullptr);
6343 if (methHnd != nullptr)
6348 return call->gtCallAddr;
6351 if (methHnd != nullptr)
6353 *methHnd = call->gtCallMethHnd;
6356 return call->gtControlExpr;
6359 //------------------------------------------------------------------------
6360 // getCallIndirectionCellReg - Get the register containing the indirection cell for a call
6366 // The register containing the indirection cell, or REG_NA if this call does not use an indirection cell argument.
6369 // We currently use indirection cells for VSD on all platforms and for R2R calls on ARM architectures.
6371 regNumber CodeGen::getCallIndirectionCellReg(GenTreeCall* call)
6373 regNumber result = REG_NA;
6374 switch (call->GetIndirectionCellArgKind())
6376 case WellKnownArg::None:
6378 case WellKnownArg::R2RIndirectionCell:
6379 result = REG_R2R_INDIRECT_PARAM;
6381 case WellKnownArg::VirtualStubCell:
6382 result = compiler->virtualStubParamInfo->GetReg();
6389 if (call->GetIndirectionCellArgKind() != WellKnownArg::None)
6391 CallArg* indirCellArg = call->gtArgs.FindWellKnownArg(call->GetIndirectionCellArgKind());
6392 assert((indirCellArg != nullptr) && (indirCellArg->AbiInfo.GetRegNum() == result));
6399 //------------------------------------------------------------------------
6400 // genDefinePendingLabel - If necessary, define the pending call label after a
6401 // call instruction was emitted.
6404 // call - the call node
6406 void CodeGen::genDefinePendingCallLabel(GenTreeCall* call)
6408 // for pinvoke/intrinsic/tailcalls we may have needed to get the address of
6410 if (!genPendingCallLabel)
6415 // For certain indirect calls we may introduce helper calls before that we need to skip:
6416 // - CFG may introduce a call to the validator first
6417 // - Generic virtual methods may compute the target dynamically through a separate helper call
6418 if (call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL) ||
6419 call->IsHelperCall(compiler, CORINFO_HELP_VIRTUAL_FUNC_PTR))
6424 genDefineInlineTempLabel(genPendingCallLabel);
6425 genPendingCallLabel = nullptr;
6428 /*****************************************************************************
6430 * Generates code for all the function and funclet prologs and epilogs.
6433 void CodeGen::genGeneratePrologsAndEpilogs()
6438 printf("*************** Before prolog / epilog generation\n");
6439 GetEmitter()->emitDispIGlist(/* displayInstructions */ false);
6443 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
6444 // This affects our code that determines which untracked locals need to be zero initialized.
6445 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
6447 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
6449 GetEmitter()->emitStartPrologEpilogGeneration();
6451 gcInfo.gcResetForBB();
6454 // Generate all the prologs and epilogs.
6455 CLANG_FORMAT_COMMENT_ANCHOR;
6457 #if defined(FEATURE_EH_FUNCLETS)
6459 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
6460 // information computed during codegen, or during function prolog generation, like
6461 // frame offsets. It must run after main function prolog generation.
6463 genCaptureFuncletPrologEpilogInfo();
6465 #endif // FEATURE_EH_FUNCLETS
6467 // Walk the list of prologs and epilogs and generate them.
6468 // We maintain a list of prolog and epilog basic blocks in
6469 // the insGroup structure in the emitter. This list was created
6470 // during code generation by the genReserve*() functions.
6472 // TODO: it seems like better design would be to create a list of prologs/epilogs
6473 // in the code generator (not the emitter), and then walk that list. But we already
6474 // have the insGroup list, which serves well, so we don't need the extra allocations
6475 // for a prolog/epilog list in the code generator.
6477 GetEmitter()->emitGeneratePrologEpilog();
6479 // Tell the emitter we're done with all prolog and epilog generation.
6481 GetEmitter()->emitFinishPrologEpilogGeneration();
6486 printf("*************** After prolog / epilog generation\n");
6487 GetEmitter()->emitDispIGlist(/* displayInstructions */ false);
6493 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6494 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6496 XX End Prolog / Epilog XX
6498 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6499 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6502 //-----------------------------------------------------------------------------------
6503 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
6506 // hClass - type handle
6509 // true if type is returned in multiple registers, false otherwise.
6511 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallConvExtension callConv)
6513 if (hClass == NO_CLASS_HANDLE)
6518 structPassingKind howToReturnStruct;
6519 var_types returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct);
6521 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6522 return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
6524 return (varTypeIsStruct(returnType));
6528 //----------------------------------------------
6529 // Methods that support HFA's for ARM32/ARM64
6530 //----------------------------------------------
6532 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
6534 return varTypeIsValidHfaType(GetHfaType(hClass));
6537 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
6539 if (GlobalJitOptions::compFeatureHfa)
6541 if (hClass != NO_CLASS_HANDLE)
6543 CorInfoHFAElemType elemKind = info.compCompHnd->getHFAType(hClass);
6544 if (elemKind != CORINFO_HFA_ELEM_NONE)
6546 // This type may not appear elsewhere, but it will occupy a floating point register.
6547 compFloatingPointUsed = true;
6549 return HfaTypeFromElemKind(elemKind);
6555 //------------------------------------------------------------------------
6556 // GetHfaCount: Given a class handle for an HFA struct
6557 // return the number of registers needed to hold the HFA
6559 // Note that on ARM32 the single precision registers overlap with
6560 // the double precision registers and for that reason each
6561 // double register is considered to be two single registers.
6562 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
6563 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
6564 // will return 4 for both.
6566 // hClass: the class handle of a HFA struct
6568 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
6570 assert(IsHfa(hClass));
6572 // A HFA of doubles is twice as large as an HFA of singles for ARM32
6573 // (i.e. uses twice the number of single precision registers)
6574 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
6575 #else // TARGET_ARM64
6576 var_types hfaType = GetHfaType(hClass);
6577 unsigned classSize = info.compCompHnd->getClassSize(hClass);
6578 // Note that the retail build issues a warning about a potential division by zero without the Max function
6579 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
6580 return classSize / elemSize;
6581 #endif // TARGET_ARM64
6584 //------------------------------------------------------------------------------------------------ //
6585 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
6588 // The number of the first argument with stack slot on the caller's frame.
6591 // On x64 Windows the caller always creates slots (homing space) in its frame for the
6592 // first 4 arguments of a callee (register passed args). So, the variable number
6593 // (lclNum) for the first argument with a stack slot is always 0.
6594 // For System V systems or armarch, there is no such calling convention requirement, and the code
6595 // needs to find the first stack passed argument from the caller. This is done by iterating over
6596 // all the lvParam variables and finding the first with GetArgReg() equals to REG_STK.
6598 unsigned CodeGen::getFirstArgWithStackSlot()
6600 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
6601 unsigned baseVarNum = 0;
6602 // Iterate over all the lvParam variables in the Lcl var table until we find the first one
6603 // that's passed on the stack.
6604 LclVarDsc* varDsc = nullptr;
6605 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
6607 varDsc = compiler->lvaGetDesc(i);
6609 // We should have found a stack parameter (and broken out of this loop) before
6610 // we find any non-parameters.
6611 assert(varDsc->lvIsParam);
6613 if (varDsc->GetArgReg() == REG_STK)
6619 assert(varDsc != nullptr);
6622 #elif defined(TARGET_AMD64)
6625 // Not implemented for x86.
6626 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
6628 #endif // TARGET_X86
6631 //------------------------------------------------------------------------
6632 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
6634 void CodeGen::genSinglePush()
6636 AddStackLevel(REGSIZE_BYTES);
6639 //------------------------------------------------------------------------
6640 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
6642 void CodeGen::genSinglePop()
6644 SubtractStackLevel(REGSIZE_BYTES);
6647 //------------------------------------------------------------------------
6648 // genPushRegs: Push the given registers.
6651 // regs - mask or registers to push
6652 // byrefRegs - OUT arg. Set to byref registers that were pushed.
6653 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
6656 // Mask of registers pushed.
6659 // This function does not check if the register is marked as used, etc.
6661 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
6663 *byrefRegs = RBM_NONE;
6664 *noRefRegs = RBM_NONE;
6666 if (regs == RBM_NONE)
6671 #if FEATURE_FIXED_OUT_ARGS
6673 NYI("Don't call genPushRegs with real regs!");
6676 #else // FEATURE_FIXED_OUT_ARGS
6678 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
6679 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
6681 regMaskTP pushedRegs = regs;
6683 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
6685 regMaskTP regBit = regMaskTP(1) << reg;
6687 if ((regBit & regs) == RBM_NONE)
6691 if (regBit & gcInfo.gcRegGCrefSetCur)
6695 else if (regBit & gcInfo.gcRegByrefSetCur)
6697 *byrefRegs |= regBit;
6700 else if (noRefRegs != NULL)
6702 *noRefRegs |= regBit;
6710 inst_RV(INS_push, reg, type);
6713 gcInfo.gcMarkRegSetNpt(regBit);
6720 #endif // FEATURE_FIXED_OUT_ARGS
6723 //------------------------------------------------------------------------
6724 // genPopRegs: Pop the registers that were pushed by genPushRegs().
6727 // regs - mask of registers to pop
6728 // byrefRegs - The byref registers that were pushed by genPushRegs().
6729 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
6734 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
6736 if (regs == RBM_NONE)
6741 #if FEATURE_FIXED_OUT_ARGS
6743 NYI("Don't call genPopRegs with real regs!");
6745 #else // FEATURE_FIXED_OUT_ARGS
6747 noway_assert((regs & byrefRegs) == byrefRegs);
6748 noway_assert((regs & noRefRegs) == noRefRegs);
6749 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
6751 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
6752 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
6754 // Walk the registers in the reverse order as genPushRegs()
6755 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
6757 regMaskTP regBit = regMaskTP(1) << reg;
6759 if ((regBit & regs) == RBM_NONE)
6763 if (regBit & byrefRegs)
6767 else if (regBit & noRefRegs)
6776 inst_RV(INS_pop, reg, type);
6779 if (type != TYP_INT)
6780 gcInfo.gcMarkRegPtrVal(reg, type);
6785 #endif // FEATURE_FIXED_OUT_ARGS
6788 /*****************************************************************************
6791 * This function should be called only after the sizes of the emitter blocks
6792 * have been finalized.
6795 void CodeGen::genSetScopeInfo()
6797 if (!compiler->opts.compScopeInfo)
6805 printf("*************** In genSetScopeInfo()\n");
6809 unsigned varsLocationsCount = 0;
6811 varsLocationsCount = (unsigned int)varLiveKeeper->getLiveRangesCount();
6813 if (varsLocationsCount == 0)
6815 // No variable home to report
6816 compiler->eeSetLVcount(0);
6817 compiler->eeSetLVdone();
6821 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
6823 // Initialize the table where the reported variables' home will be placed.
6824 compiler->eeSetLVcount(varsLocationsCount);
6827 genTrnslLocalVarCount = varsLocationsCount;
6828 if (varsLocationsCount)
6830 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[varsLocationsCount];
6834 // We can have one of both flags defined, both, or none. Specially if we need to compare both
6835 // both results. But we cannot report both to the debugger, since there would be overlapping
6836 // intervals, and may not indicate the same variable location.
6838 genSetScopeInfoUsingVariableRanges();
6840 compiler->eeSetLVdone();
6843 //------------------------------------------------------------------------
6844 // genSetScopeInfoUsingVariableRanges: Call "genSetScopeInfo" with the
6845 // "VariableLiveRanges" created for the arguments, special arguments and
6846 // IL local variables.
6849 // This function is called from "genSetScopeInfo" once the code is generated
6850 // and we want to send debug info to the debugger.
6852 void CodeGen::genSetScopeInfoUsingVariableRanges()
6854 unsigned int liveRangeIndex = 0;
6856 for (unsigned int varNum = 0; varNum < compiler->info.compLocalsCount; varNum++)
6858 LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
6860 if (compiler->compMap2ILvarNum(varNum) == (unsigned int)ICorDebugInfo::UNKNOWN_ILNUM)
6865 auto reportRange = [this, varDsc, varNum, &liveRangeIndex](siVarLoc* loc, UNATIVE_OFFSET start,
6866 UNATIVE_OFFSET end) {
6867 if (varDsc->lvIsParam && (start == end))
6869 // If the length is zero, it means that the prolog is empty. In that case,
6870 // CodeGen::genSetScopeInfo will report the liveness of all arguments
6871 // as spanning the first instruction in the method, so that they can
6872 // at least be inspected on entry to the method.
6878 genSetScopeInfo(liveRangeIndex, start, end - start, varNum, varNum, true, loc);
6883 siVarLoc* curLoc = nullptr;
6884 UNATIVE_OFFSET curStart = 0;
6885 UNATIVE_OFFSET curEnd = 0;
6887 for (int rangeIndex = 0; rangeIndex < 2; rangeIndex++)
6889 VariableLiveKeeper::LiveRangeList* liveRanges;
6890 if (rangeIndex == 0)
6892 liveRanges = varLiveKeeper->getLiveRangesForVarForProlog(varNum);
6896 liveRanges = varLiveKeeper->getLiveRangesForVarForBody(varNum);
6899 for (VariableLiveKeeper::VariableLiveRange& liveRange : *liveRanges)
6901 UNATIVE_OFFSET startOffs = liveRange.m_StartEmitLocation.CodeOffset(GetEmitter());
6902 UNATIVE_OFFSET endOffs = liveRange.m_EndEmitLocation.CodeOffset(GetEmitter());
6904 assert(startOffs <= endOffs);
6905 assert(startOffs >= curEnd);
6906 if ((curLoc != nullptr) && (startOffs == curEnd) && siVarLoc::Equals(curLoc, &liveRange.m_VarLocation))
6908 // Extend current range.
6913 // Report old range if any.
6914 if (curLoc != nullptr)
6916 reportRange(curLoc, curStart, curEnd);
6919 // Start a new range.
6920 curLoc = &liveRange.m_VarLocation;
6921 curStart = startOffs;
6926 // Report last range
6927 if (curLoc != nullptr)
6929 reportRange(curLoc, curStart, curEnd);
6933 compiler->eeVarsCount = liveRangeIndex;
6936 //------------------------------------------------------------------------
6937 // genSetScopeInfo: Record scope information for debug info
6941 // startOffs - the starting offset for this scope
6942 // length - the length of this scope
6943 // varNum - the lclVar for this scope info
6945 // avail - a bool indicating if it has a home
6946 // varLoc - the position (reg or stack) of the variable
6949 // Called for every scope info piece to record by the main genSetScopeInfo()
6951 void CodeGen::genSetScopeInfo(unsigned which,
6952 UNATIVE_OFFSET startOffs,
6953 UNATIVE_OFFSET length,
6959 // We need to do some mapping while reporting back these variables.
6961 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
6962 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
6965 // Non-x86 platforms are allowed to access all arguments directly
6966 // so we don't need this code.
6968 // Is this a varargs function?
6969 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
6970 varNum < compiler->info.compArgsCount && !compiler->lvaGetDesc(varNum)->lvIsRegArg)
6972 noway_assert(varLoc->vlType == VLT_STK || varLoc->vlType == VLT_STK2);
6974 // All stack arguments (except the varargs handle) have to be
6975 // accessed via the varargs cookie. Discard generated info,
6976 // and just find its position relative to the varargs handle
6978 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
6979 if (!compiler->lvaGetDesc(compiler->lvaVarargsHandleArg)->lvOnFrame)
6981 noway_assert(!compiler->opts.compDbgCode);
6985 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
6986 // arguments of vararg functions to avoid reporting them to GC.
6987 noway_assert(!compiler->lvaGetDesc(varNum)->lvRegister);
6988 unsigned cookieOffset = compiler->lvaGetDesc(compiler->lvaVarargsHandleArg)->GetStackOffset();
6989 unsigned varOffset = compiler->lvaGetDesc(varNum)->GetStackOffset();
6991 noway_assert(cookieOffset < varOffset);
6992 unsigned offset = varOffset - cookieOffset;
6993 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
6994 noway_assert(offset < stkArgSize);
6995 offset = stkArgSize - offset;
6997 varLoc->vlType = VLT_FIXED_VA;
6998 varLoc->vlFixedVarArg.vlfvOffset = offset;
7001 #endif // TARGET_X86
7003 VarName name = nullptr;
7007 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
7009 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
7011 name = compiler->info.compVarScopes[scopeNum].vsdName;
7015 // Hang on to this compiler->info.
7017 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
7019 tlvi.tlviVarNum = ilVarNum;
7020 tlvi.tlviLVnum = LVnum;
7021 tlvi.tlviName = name;
7022 tlvi.tlviStartPC = startOffs;
7023 tlvi.tlviLength = length;
7024 tlvi.tlviAvailable = avail;
7025 tlvi.tlviVarLoc = *varLoc;
7029 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, *varLoc);
7032 /*****************************************************************************/
7035 /*****************************************************************************
7038 * Can be called only after lviSetLocalVarInfo() has been called
7042 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
7044 if (!compiler->opts.compScopeInfo)
7047 if (compiler->info.compVarScopesCount == 0)
7050 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
7052 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
7054 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
7055 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
7056 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
7058 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
7065 /*****************************************************************************
7068 * Can be called only after lviSetLocalVarInfo() has been called
7072 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
7074 if (!compiler->opts.compScopeInfo)
7077 if (compiler->info.compVarScopesCount == 0)
7080 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
7082 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
7084 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStack((regNumber)reg, stkOffs)) &&
7085 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
7086 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
7088 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
7095 /*****************************************************************************/
7096 #endif // defined(DEBUG)
7097 #endif // LATE_DISASM
7101 /*****************************************************************************
7102 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
7105 void CodeGen::genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping)
7107 if (mappingNum != unsigned(-1))
7109 printf("%d: ", mappingNum);
7112 switch (ipMapping->ipmdKind)
7114 case IPmappingDscKind::Prolog:
7117 case IPmappingDscKind::Epilog:
7120 case IPmappingDscKind::NoMapping:
7123 case IPmappingDscKind::Normal:
7124 const ILLocation& loc = ipMapping->ipmdLoc;
7125 Compiler::eeDispILOffs(loc.GetOffset());
7126 if (loc.IsStackEmpty())
7128 printf(" STACK_EMPTY");
7133 printf(" CALL_INSTRUCTION");
7140 ipMapping->ipmdNativeLoc.Print(compiler->compMethodID);
7141 // We can only call this after code generation. Is there any way to tell when it's legal to call?
7142 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(GetEmitter()));
7144 if (ipMapping->ipmdIsLabel)
7152 void CodeGen::genIPmappingListDisp()
7154 unsigned mappingNum = 0;
7156 for (IPmappingDsc& dsc : compiler->genIPmappings)
7158 genIPmappingDisp(mappingNum, &dsc);
7165 /*****************************************************************************
7167 * Append an IPmappingDsc struct to the list that we're maintaining
7169 * Record the instr offset as being at the current code gen position.
7172 void CodeGen::genIPmappingAdd(IPmappingDscKind kind, const DebugInfo& di, bool isLabel)
7174 if (!compiler->opts.compDbgInfo)
7179 assert((kind == IPmappingDscKind::Normal) == di.IsValid());
7183 case IPmappingDscKind::Prolog:
7184 case IPmappingDscKind::Epilog:
7189 if (kind == IPmappingDscKind::Normal)
7191 noway_assert(di.GetLocation().GetOffset() <= compiler->info.compILCodeSize);
7194 // Ignore this one if it's the same IL location as the last one we saw.
7195 // Note that we'll let through two identical IL offsets if the flag bits
7196 // differ, or two identical "special" mappings (e.g., PROLOG).
7197 if ((compiler->genIPmappings.size() > 0) && (kind == compiler->genIPmappings.back().ipmdKind) &&
7198 (di.GetLocation() == compiler->genIPmappings.back().ipmdLoc))
7200 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", di.GetLocation().GetOffset());
7206 IPmappingDsc addMapping;
7207 addMapping.ipmdNativeLoc.CaptureLocation(GetEmitter());
7208 addMapping.ipmdKind = kind;
7209 addMapping.ipmdLoc = di.GetLocation();
7210 addMapping.ipmdIsLabel = isLabel;
7212 assert((kind == IPmappingDscKind::Normal) == addMapping.ipmdLoc.IsValid());
7213 compiler->genIPmappings.push_back(addMapping);
7218 printf("Added IP mapping: ");
7219 genIPmappingDisp(unsigned(-1), &addMapping);
7224 /*****************************************************************************
7226 * Prepend an IPmappingDsc struct to the list that we're maintaining
7229 void CodeGen::genIPmappingAddToFront(IPmappingDscKind kind, const DebugInfo& di, bool isLabel)
7231 if (!compiler->opts.compDbgInfo)
7236 noway_assert((kind != IPmappingDscKind::Normal) ||
7237 (di.IsValid() && (di.GetLocation().GetOffset() <= compiler->info.compILCodeSize)));
7239 /* Create a mapping entry and prepend it to the list */
7241 IPmappingDsc addMapping;
7242 addMapping.ipmdNativeLoc.CaptureLocation(GetEmitter());
7243 addMapping.ipmdKind = kind;
7244 addMapping.ipmdLoc = di.GetLocation();
7245 addMapping.ipmdIsLabel = isLabel;
7246 compiler->genIPmappings.push_front(addMapping);
7251 printf("Added IP mapping to front: ");
7252 genIPmappingDisp(unsigned(-1), &addMapping);
7257 /*****************************************************************************/
7259 void CodeGen::genEnsureCodeEmitted(const DebugInfo& di)
7261 if (!compiler->opts.compDbgCode)
7271 // If other IL were offsets reported, skip
7273 if (compiler->genIPmappings.size() <= 0)
7278 const IPmappingDsc& prev = compiler->genIPmappings.back();
7279 if (prev.ipmdLoc != di.GetLocation())
7284 // di represents the last reported offset. Make sure that we generated native code
7286 if (prev.ipmdNativeLoc.IsCurrentLocation(GetEmitter()))
7292 //------------------------------------------------------------------------
7293 // genIPmappingGen: Shut down the IP-mapping logic, report the info to the EE.
7295 void CodeGen::genIPmappingGen()
7297 if (!compiler->opts.compDbgInfo)
7302 JITDUMP("*************** In genIPmappingGen()\n");
7304 if (compiler->genIPmappings.size() <= 0)
7306 compiler->eeSetLIcount(0);
7307 compiler->eeSetLIdone();
7311 UNATIVE_OFFSET prevNativeOfs = UNATIVE_OFFSET(~0);
7312 for (jitstd::list<IPmappingDsc>::iterator it = compiler->genIPmappings.begin();
7313 it != compiler->genIPmappings.end();)
7315 UNATIVE_OFFSET dscNativeOfs = it->ipmdNativeLoc.CodeOffset(GetEmitter());
7316 if (dscNativeOfs != prevNativeOfs)
7318 prevNativeOfs = dscNativeOfs;
7323 // If we have a previous offset we should have a previous mapping.
7324 assert(it != compiler->genIPmappings.begin());
7325 jitstd::list<IPmappingDsc>::iterator prev = it;
7328 // Prev and current mappings have same native offset.
7329 // If one does not map to IL then remove that one.
7330 if (prev->ipmdKind == IPmappingDscKind::NoMapping)
7332 compiler->genIPmappings.erase(prev);
7337 if (it->ipmdKind == IPmappingDscKind::NoMapping)
7339 it = compiler->genIPmappings.erase(it);
7343 // Both have mappings.
7344 // If previous is the prolog, keep both if this one is at IL offset 0.
7345 // (TODO: Why? Debugger has no problem breaking on the prolog mapping
7347 if ((prev->ipmdKind == IPmappingDscKind::Prolog) && (it->ipmdKind == IPmappingDscKind::Normal) &&
7348 (it->ipmdLoc.GetOffset() == 0))
7354 // For the special case of an IL instruction with no body followed by
7355 // the epilog (say ret void immediately preceding the method end), we
7356 // leave both entries in, so that we'll stop at the (empty) ret
7357 // statement if the user tries to put a breakpoint there, and then have
7358 // the option of seeing the epilog or not based on SetUnmappedStopMask
7360 if (it->ipmdKind == IPmappingDscKind::Epilog)
7366 // For managed return values we store all calls. Keep both in this case
7368 if (((prev->ipmdKind == IPmappingDscKind::Normal) && (prev->ipmdLoc.IsCall())) ||
7369 ((it->ipmdKind == IPmappingDscKind::Normal) && (it->ipmdLoc.IsCall())))
7375 // Otherwise report the higher offset unless the previous mapping is a
7377 if (prev->ipmdIsLabel)
7379 it = compiler->genIPmappings.erase(it);
7383 compiler->genIPmappings.erase(prev);
7388 // Tell them how many mapping records we've got
7390 compiler->eeSetLIcount(static_cast<unsigned int>(compiler->genIPmappings.size()));
7392 // Now tell them about the mappings
7393 unsigned int mappingIdx = 0;
7394 for (const IPmappingDsc& dsc : compiler->genIPmappings)
7396 compiler->eeSetLIinfo(mappingIdx++, dsc.ipmdNativeLoc.CodeOffset(GetEmitter()), dsc.ipmdKind, dsc.ipmdLoc);
7401 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
7402 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
7403 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
7404 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
7406 if (compiler->opts.compDbgCode)
7408 //Assert that the first instruction of every basic block with more than one incoming edge has a
7409 //different sequence point from each incoming block.
7411 //It turns out that the only thing we really have to assert is that the first statement in each basic
7412 //block has an IL offset and appears in eeBoundaries.
7413 for (BasicBlock* const block : compiler->Blocks())
7415 Statement* stmt = block->firstStmt();
7416 if ((block->bbRefs > 1) && (stmt != nullptr))
7419 DebugInfo rootInfo = stmt->GetDebugInfo().GetRoot();
7420 if (rootInfo.IsValid())
7422 for (unsigned i = 0; i < compiler->eeBoundariesCount; ++i)
7424 if (compiler->eeBoundaries[i].ilOffset == rootInfo.GetLocation().GetOffset())
7431 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
7437 compiler->eeSetLIdone();
7441 //------------------------------------------------------------------------
7442 // genReportRichDebugInfoInlineTreeToFile:
7443 // Recursively process a context in the inline tree and write information about it to a file.
7447 // context - the context
7448 // first - whether this is the first of the siblings being written out
7450 void CodeGen::genReportRichDebugInfoInlineTreeToFile(FILE* file, InlineContext* context, bool* first)
7452 if (context->GetSibling() != nullptr)
7454 genReportRichDebugInfoInlineTreeToFile(file, context->GetSibling(), first);
7457 if (context->IsSuccess())
7466 fprintf(file, "{\"Ordinal\":%u,", context->GetOrdinal());
7467 fprintf(file, "\"MethodID\":%lld,", (int64_t)context->GetCallee());
7468 fprintf(file, "\"ILOffset\":%u,", context->GetLocation().GetOffset());
7469 fprintf(file, "\"LocationFlags\":%u,", (uint32_t)context->GetLocation().EncodeSourceTypes());
7470 fprintf(file, "\"ExactILOffset\":%u,", context->GetActualCallOffset());
7471 auto append = [&]() {
7473 const char* methodName = compiler->eeGetMethodName(context->GetCallee(), buffer, sizeof(buffer));
7474 fprintf(file, "\"MethodName\":\"%s\",", methodName);
7477 fprintf(file, "\"Inlinees\":[");
7478 if (context->GetChild() != nullptr)
7480 bool childFirst = true;
7481 genReportRichDebugInfoInlineTreeToFile(file, context->GetChild(), &childFirst);
7483 fprintf(file, "]}");
7487 //------------------------------------------------------------------------
7488 // genReportRichDebugInfoToFile:
7489 // Write rich debug info in JSON format to file specified by environment variable.
7491 void CodeGen::genReportRichDebugInfoToFile()
7493 if (JitConfig.WriteRichDebugInfoFile() == nullptr)
7498 static CritSecObject s_critSect;
7499 CritSecHolder holder(s_critSect);
7501 FILE* file = _wfopen(JitConfig.WriteRichDebugInfoFile(), W("a"));
7502 if (file == nullptr)
7507 // MethodID in ETW events are the method handles.
7508 fprintf(file, "{\"MethodID\":%lld,", (INT64)compiler->info.compMethodHnd);
7509 // Print inline tree.
7510 fprintf(file, "\"InlineTree\":");
7513 genReportRichDebugInfoInlineTreeToFile(file, compiler->compInlineContext, &first);
7514 fprintf(file, ",\"Mappings\":[");
7516 for (RichIPMapping& mapping : compiler->genRichIPmappings)
7525 fprintf(file, "{\"NativeOffset\":%u,\"InlineContext\":%u,\"ILOffset\":%u}",
7526 mapping.nativeLoc.CodeOffset(GetEmitter()), mapping.debugInfo.GetInlineContext()->GetOrdinal(),
7527 mapping.debugInfo.GetLocation().GetOffset());
7530 fprintf(file, "]}\n");
7537 //------------------------------------------------------------------------
7538 // genRecordRichDebugInfoInlineTree:
7539 // Recursively process a context in the inline tree and record information
7543 // context - the inline context
7544 // nodes - the array to record into
7546 void CodeGen::genRecordRichDebugInfoInlineTree(InlineContext* context, ICorDebugInfo::InlineTreeNode* nodes)
7548 if (context->IsSuccess())
7550 // We expect 1 + NumInlines unique ordinals
7551 assert(context->GetOrdinal() <= compiler->m_inlineStrategy->GetInlineCount());
7553 ICorDebugInfo::InlineTreeNode* node = &nodes[context->GetOrdinal()];
7554 node->Method = context->GetCallee();
7555 node->ILOffset = context->GetActualCallOffset();
7556 node->Child = context->GetChild() == nullptr ? 0 : context->GetChild()->GetOrdinal();
7557 node->Sibling = context->GetSibling() == nullptr ? 0 : context->GetSibling()->GetOrdinal();
7560 if (context->GetSibling() != nullptr)
7562 genRecordRichDebugInfoInlineTree(context->GetSibling(), nodes);
7565 if (context->GetChild() != nullptr)
7567 genRecordRichDebugInfoInlineTree(context->GetChild(), nodes);
7571 //------------------------------------------------------------------------
7572 // genReportRichDebugInfo:
7573 // If enabled, report rich debugging information to file and/or EE.
7575 void CodeGen::genReportRichDebugInfo()
7577 INDEBUG(genReportRichDebugInfoToFile());
7579 if (JitConfig.RichDebugInfo() == 0)
7584 unsigned numContexts = 1 + compiler->m_inlineStrategy->GetInlineCount();
7585 unsigned numRichMappings = static_cast<unsigned>(compiler->genRichIPmappings.size());
7587 ICorDebugInfo::InlineTreeNode* inlineTree = static_cast<ICorDebugInfo::InlineTreeNode*>(
7588 compiler->info.compCompHnd->allocateArray(numContexts * sizeof(ICorDebugInfo::InlineTreeNode)));
7589 ICorDebugInfo::RichOffsetMapping* mappings = static_cast<ICorDebugInfo::RichOffsetMapping*>(
7590 compiler->info.compCompHnd->allocateArray(numRichMappings * sizeof(ICorDebugInfo::RichOffsetMapping)));
7592 memset(inlineTree, 0, numContexts * sizeof(ICorDebugInfo::InlineTreeNode));
7593 memset(mappings, 0, numRichMappings * sizeof(ICorDebugInfo::RichOffsetMapping));
7595 genRecordRichDebugInfoInlineTree(compiler->compInlineContext, inlineTree);
7598 for (unsigned i = 0; i < numContexts; i++)
7600 assert(inlineTree[i].Method != NO_METHOD_HANDLE);
7604 size_t mappingIndex = 0;
7605 for (const RichIPMapping& richMapping : compiler->genRichIPmappings)
7607 ICorDebugInfo::RichOffsetMapping* mapping = &mappings[mappingIndex];
7608 assert(richMapping.debugInfo.IsValid());
7609 mapping->NativeOffset = richMapping.nativeLoc.CodeOffset(GetEmitter());
7610 mapping->Inlinee = richMapping.debugInfo.GetInlineContext()->GetOrdinal();
7611 mapping->ILOffset = richMapping.debugInfo.GetLocation().GetOffset();
7612 mapping->Source = richMapping.debugInfo.GetLocation().EncodeSourceTypes();
7617 compiler->info.compCompHnd->reportRichMappings(inlineTree, numContexts, mappings, numRichMappings);
7620 //------------------------------------------------------------------------
7621 // genAddRichIPMappingHere:
7622 // Create a rich IP mapping at the current emit location using the specified
7623 // debug information.
7626 // di - the debug information
7628 void CodeGen::genAddRichIPMappingHere(const DebugInfo& di)
7630 RichIPMapping mapping;
7631 mapping.nativeLoc.CaptureLocation(GetEmitter());
7632 mapping.debugInfo = di;
7633 compiler->genRichIPmappings.push_back(mapping);
7636 /*============================================================================
7638 * These are empty stubs to help the late dis-assembler to compile
7639 * if the late disassembler is being built into a non-DEBUG build.
7641 *============================================================================
7644 #if defined(LATE_DISASM)
7648 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
7654 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
7659 /*****************************************************************************/
7660 #endif // !defined(DEBUG)
7661 #endif // defined(LATE_DISASM)
7663 //------------------------------------------------------------------------
7664 // indirForm: Make a temporary indir we can feed to pattern matching routines
7665 // in cases where we don't want to instantiate all the indirs that happen.
7667 /* static */ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
7669 GenTreeIndir i(GT_IND, type, base, nullptr);
7670 i.SetRegNum(REG_NA);
7675 //------------------------------------------------------------------------
7676 // indirForm: Make a temporary indir we can feed to pattern matching routines
7677 // in cases where we don't want to instantiate all the indirs that happen.
7679 /* static */ GenTreeStoreInd CodeGen::storeIndirForm(var_types type, GenTree* base, GenTree* data)
7681 GenTreeStoreInd i(type, base, data);
7682 i.SetRegNum(REG_NA);
7686 //------------------------------------------------------------------------
7687 // intForm: Make a temporary int we can feed to pattern matching routines
7688 // in cases where we don't want to instantiate.
7690 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
7692 GenTreeIntCon i(type, value);
7693 i.SetRegNum(REG_NA);
7697 #if defined(TARGET_X86) || defined(TARGET_ARM)
7698 //------------------------------------------------------------------------
7699 // genLongReturn: Generates code for long return statement for x86 and arm.
7701 // Note: treeNode's and op1's registers are already consumed.
7704 // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
7709 void CodeGen::genLongReturn(GenTree* treeNode)
7711 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
7712 assert(treeNode->TypeGet() == TYP_LONG);
7713 GenTree* op1 = treeNode->gtGetOp1();
7714 var_types targetType = treeNode->TypeGet();
7716 assert(op1 != nullptr);
7717 assert(op1->OperGet() == GT_LONG);
7718 GenTree* loRetVal = op1->gtGetOp1();
7719 GenTree* hiRetVal = op1->gtGetOp2();
7720 assert((loRetVal->GetRegNum() != REG_NA) && (hiRetVal->GetRegNum() != REG_NA));
7722 genConsumeReg(loRetVal);
7723 genConsumeReg(hiRetVal);
7725 inst_Mov(targetType, REG_LNGRET_LO, loRetVal->GetRegNum(), /* canSkip */ true, emitActualTypeSize(TYP_INT));
7726 inst_Mov(targetType, REG_LNGRET_HI, hiRetVal->GetRegNum(), /* canSkip */ true, emitActualTypeSize(TYP_INT));
7728 #endif // TARGET_X86 || TARGET_ARM
7730 //------------------------------------------------------------------------
7731 // genReturn: Generates code for return statement.
7732 // In case of struct return, delegates to the genStructReturn method.
7735 // treeNode - The GT_RETURN or GT_RETFILT tree node.
7740 void CodeGen::genReturn(GenTree* treeNode)
7742 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
7743 GenTree* op1 = treeNode->gtGetOp1();
7744 var_types targetType = treeNode->TypeGet();
7746 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
7747 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
7748 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
7749 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
7752 if (targetType == TYP_VOID)
7754 assert(op1 == nullptr);
7758 #if defined(TARGET_X86) || defined(TARGET_ARM)
7759 if (targetType == TYP_LONG)
7761 genLongReturn(treeNode);
7764 #endif // TARGET_X86 || TARGET_ARM
7766 if (isStructReturn(treeNode))
7768 genStructReturn(treeNode);
7770 else if (targetType != TYP_VOID)
7772 assert(op1 != nullptr);
7773 noway_assert(op1->GetRegNum() != REG_NA);
7775 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
7776 // consumed a reg for the operand. This is because the variable
7777 // is dead after return. But we are issuing more instructions
7778 // like "profiler leave callback" after this consumption. So
7779 // if you are issuing more instructions after this point,
7780 // remember to keep the variable live up until the new method
7781 // exit point where it is actually dead.
7784 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7785 genSimpleReturn(treeNode);
7786 #else // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64
7787 #if defined(TARGET_X86)
7788 if (varTypeUsesFloatReg(treeNode))
7790 genFloatReturn(treeNode);
7793 #elif defined(TARGET_ARM)
7794 if (varTypeUsesFloatReg(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
7796 if (targetType == TYP_FLOAT)
7798 GetEmitter()->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->GetRegNum(),
7799 /* canSkip */ false);
7803 assert(targetType == TYP_DOUBLE);
7804 GetEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
7809 #endif // TARGET_ARM
7813 if (varTypeUsesIntReg(treeNode))
7815 retReg = REG_INTRET;
7819 assert(varTypeUsesFloatReg(treeNode));
7820 retReg = REG_FLOATRET;
7823 inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true);
7825 #endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64
7829 #ifdef PROFILING_SUPPORTED
7831 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
7832 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
7833 // in the handling of the GT_RETURN statement.
7834 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
7835 // for the return registers containing GC refs.
7837 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
7838 // In flowgraph and other places assert that the last node of a block marked as
7839 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
7840 // maintain such an invariant irrespective of whether profiler hook needed or not.
7841 // Also, there is not much to be gained by materializing it as an explicit node.
7843 // There should be a single return block while generating profiler ELT callbacks,
7844 // so we just look for that block to trigger insertion of the profile hook.
7845 if ((compiler->compCurBB == compiler->genReturnBB) && compiler->compIsProfilerHookNeeded())
7848 // Since we are invalidating the assumption that we would slip into the epilog
7849 // right after the "return", we need to preserve the return reg's GC state
7850 // across the call until actual method return.
7852 ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc;
7853 unsigned retRegCount = retTypeDesc.GetReturnRegCount();
7855 if (compiler->compMethodReturnsRetBufAddr())
7857 gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
7861 for (unsigned i = 0; i < retRegCount; ++i)
7863 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
7865 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
7870 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_LEAVE);
7872 if (compiler->compMethodReturnsRetBufAddr())
7874 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
7878 for (unsigned i = 0; i < retRegCount; ++i)
7880 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
7882 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
7887 #endif // PROFILING_SUPPORTED
7889 #if defined(DEBUG) && defined(TARGET_XARCH)
7890 bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
7892 #if defined(FEATURE_EH_FUNCLETS)
7893 // Don't do stack pointer check at the return from a funclet; only for the main function.
7894 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7896 doStackPointerCheck = false;
7898 #else // !FEATURE_EH_FUNCLETS
7899 // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
7900 // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
7901 if (compiler->compCurBB->KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET))
7903 doStackPointerCheck = false;
7905 #endif // !FEATURE_EH_FUNCLETS
7907 genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
7908 #endif // defined(DEBUG) && defined(TARGET_XARCH)
7911 //------------------------------------------------------------------------
7912 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
7915 // treeNode - The tree node to evaluate whether is a struct return.
7918 // Returns true if the 'treeNode" is a GT_RETURN node of type struct.
7919 // Otherwise returns false.
7921 bool CodeGen::isStructReturn(GenTree* treeNode)
7923 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
7924 // For the GT_RET_FILT, the return is always a bool or a void, for the end of a finally block.
7925 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
7926 if (treeNode->OperGet() != GT_RETURN)
7931 #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
7932 assert(!varTypeIsStruct(treeNode));
7935 return varTypeIsStruct(treeNode) && (compiler->info.compRetNativeType == TYP_STRUCT);
7939 //------------------------------------------------------------------------
7940 // genStructReturn: Generates code for returning a struct.
7943 // treeNode - The GT_RETURN tree node.
7949 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
7951 void CodeGen::genStructReturn(GenTree* treeNode)
7953 assert(treeNode->OperGet() == GT_RETURN);
7955 genConsumeRegs(treeNode->gtGetOp1());
7957 GenTree* op1 = treeNode->gtGetOp1();
7958 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
7960 ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc;
7961 const unsigned regCount = retTypeDesc.GetReturnRegCount();
7962 assert(regCount <= MAX_RET_REG_COUNT);
7964 #if FEATURE_MULTIREG_RET
7965 // Right now the only enregisterable structs supported are SIMD vector types.
7966 if (genIsRegCandidateLocal(actualOp1))
7969 const GenTreeLclVar* lclVar = actualOp1->AsLclVar();
7970 const LclVarDsc* varDsc = compiler->lvaGetDesc(lclVar);
7971 assert(varTypeIsSIMD(varDsc->GetRegisterType()));
7972 assert(!lclVar->IsMultiReg());
7976 genSIMDSplitReturn(op1, &retTypeDesc);
7977 #endif // FEATURE_SIMD
7979 else if (actualOp1->OperIs(GT_LCL_VAR) && !actualOp1->AsLclVar()->IsMultiReg())
7981 GenTreeLclVar* lclNode = actualOp1->AsLclVar();
7982 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
7983 assert(varDsc->lvIsMultiRegRet);
7985 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7986 // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }",
7987 // i. e. not include the padding for the first field, and so the general loop below won't work.
7988 var_types type = retTypeDesc.GetReturnRegType(0);
7989 regNumber toReg = retTypeDesc.GetABIReturnReg(0);
7990 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
7993 assert(regCount == 2);
7994 int offset = genTypeSize(type);
7995 type = retTypeDesc.GetReturnRegType(1);
7996 offset = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset);
7997 toReg = retTypeDesc.GetABIReturnReg(1);
7998 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
8000 #else // !TARGET_LOONGARCH64 && !TARGET_RISCV64
8002 for (unsigned i = 0; i < regCount; ++i)
8004 var_types type = retTypeDesc.GetReturnRegType(i);
8005 regNumber toReg = retTypeDesc.GetABIReturnReg(i);
8006 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
8007 offset += genTypeSize(type);
8009 #endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
8013 for (unsigned i = 0; i < regCount; ++i)
8015 var_types type = retTypeDesc.GetReturnRegType(i);
8016 regNumber toReg = retTypeDesc.GetABIReturnReg(i);
8017 regNumber fromReg = op1->GetRegByIndex(i);
8018 if ((fromReg == REG_NA) && op1->OperIs(GT_COPY))
8020 // A copy that doesn't copy this field will have REG_NA.
8021 // TODO-Cleanup: It would probably be better to always have a valid reg
8022 // on a GT_COPY, unless the operand is actually spilled. Then we wouldn't have
8023 // to check for this case (though we'd have to check in the genRegCopy that the
8025 fromReg = actualOp1->GetRegByIndex(i);
8027 if (fromReg == REG_NA)
8029 // This is a spilled field of a multi-reg lclVar.
8030 // We currently only mark a lclVar operand as RegOptional, since we don't have a way
8031 // to mark a multi-reg tree node as used from spill (GTF_NOREG_AT_USE) on a per-reg basis.
8032 LclVarDsc* varDsc = compiler->lvaGetDesc(actualOp1->AsLclVar());
8033 assert(varDsc->lvPromoted);
8034 unsigned fieldVarNum = varDsc->lvFieldLclStart + i;
8035 assert(compiler->lvaGetDesc(fieldVarNum)->lvOnFrame);
8037 GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, fieldVarNum, 0);
8041 // Note that ins_Copy(fromReg, type) will return the appropriate register to copy
8042 // between register files if needed.
8043 inst_Mov(type, toReg, fromReg, /* canSkip */ true);
8047 #else // !FEATURE_MULTIREG_RET
8052 //----------------------------------------------------------------------------------
8053 // genMultiRegStoreToLocal: store multi-reg value to a local
8056 // lclNode - GenTree of GT_STORE_LCL_VAR
8062 // The child of store is a multi-reg node.
8064 void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
8066 assert(lclNode->OperIs(GT_STORE_LCL_VAR));
8067 assert(varTypeIsStruct(lclNode) || varTypeIsMultiReg(lclNode));
8069 GenTree* op1 = lclNode->gtGetOp1();
8070 assert(op1->IsMultiRegNode());
8071 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
8072 unsigned regCount = actualOp1->GetMultiRegCount(compiler);
8073 assert(regCount > 1);
8075 // Assumption: current implementation requires that a multi-reg
8076 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
8077 // being promoted, unless compiler->lvaEnregMultiRegVars is true.
8079 unsigned lclNum = lclNode->GetLclNum();
8080 LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
8081 if (op1->OperIs(GT_CALL))
8083 assert(regCount <= MAX_RET_REG_COUNT);
8084 noway_assert(varDsc->lvIsMultiRegRet);
8088 // Check for the case of an enregistered SIMD type that's returned in multiple registers.
8089 if (varDsc->lvIsRegCandidate() && (lclNode->GetRegNum() != REG_NA))
8091 assert(varTypeIsSIMD(lclNode));
8092 genMultiRegStoreToSIMDLocal(lclNode);
8095 #endif // FEATURE_SIMD
8097 // We have either a multi-reg local or a local with multiple fields in memory.
8099 // The liveness model is as follows:
8100 // use reg #0 from src, including any reload or copy
8102 // use reg #1 from src, including any reload or copy
8105 // Imagine the following scenario:
8106 // There are 3 registers used. Prior to this node, they occupy registers r3, r2 and r1.
8107 // There are 3 registers defined by this node. They need to be placed in r1, r2 and r3,
8110 // If we defined the as using all the source registers at once, we'd have to adopt one
8111 // of the following models:
8112 // - All (or all but one) of the incoming sources are marked "delayFree" so that they won't
8113 // get the same register as any of the registers being defined. This would result in copies for
8114 // the common case where the source and destination registers are the same (e.g. when a CALL
8115 // result is assigned to a lclVar, which is then returned).
8116 // - For our example (and for many/most cases) we would have to copy or spill all sources.
8117 // - We allow circular dependencies between source and destination registers. This would require
8118 // the code generator to determine the order in which the copies must be generated, and would
8119 // require a temp register in case a swap is required. This complexity would have to be handled
8120 // in both the normal code generation case, as well as for copies & reloads, as they are currently
8121 // modeled by the register allocator to happen just prior to the use.
8122 // - For our example, a temp would be required to swap r1 and r3, unless a swap instruction is
8123 // available on the target.
8125 // By having a multi-reg local use and define each field in order, we avoid these issues, and the
8126 // register allocator will ensure that any conflicts are resolved via spill or inserted COPYs.
8127 // For our example, the register allocator would simple spill r1 because the first def requires it.
8128 // The code generator would move r3 to r1, leave r2 alone, and then load the spilled value into r3.
8130 unsigned offset = 0;
8131 bool isMultiRegVar = lclNode->IsMultiRegLclVar();
8132 bool hasRegs = false;
8136 assert(compiler->lvaEnregMultiRegVars);
8137 assert(regCount == varDsc->lvFieldCnt);
8140 for (unsigned i = 0; i < regCount; ++i)
8142 regNumber reg = genConsumeReg(op1, i);
8143 var_types srcType = actualOp1->GetRegTypeByIndex(i);
8144 // genConsumeReg will return the valid register, either from the COPY
8145 // or from the original source.
8146 assert(reg != REG_NA);
8150 // Each field is passed in its own register, use the field types.
8151 regNumber varReg = lclNode->GetRegByIndex(i);
8152 unsigned fieldLclNum = varDsc->lvFieldLclStart + i;
8153 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum);
8154 var_types destType = fieldVarDsc->TypeGet();
8155 if (varReg != REG_NA)
8159 // We may need a cross register-file copy here.
8160 inst_Mov(destType, varReg, reg, /* canSkip */ true);
8166 if ((varReg == REG_STK) || fieldVarDsc->IsAlwaysAliveInMemory())
8168 if (!lclNode->IsLastUse(i))
8170 // A byte field passed in a long register should be written on the stack as a byte.
8171 instruction storeIns = ins_StoreFromSrc(reg, destType);
8172 GetEmitter()->emitIns_S_R(storeIns, emitTypeSize(destType), reg, fieldLclNum, 0);
8175 fieldVarDsc->SetRegNum(varReg);
8179 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
8180 // should consider the padding field within a struct.
8181 offset = (offset % genTypeSize(srcType)) ? AlignUp(offset, genTypeSize(srcType)) : offset;
8183 // Several fields could be passed in one register, copy using the register type.
8184 // It could rewrite memory outside of the fields but local on the stack are rounded to POINTER_SIZE so
8185 // it is safe to store a long register into a byte field as it is known that we have enough padding after.
8186 GetEmitter()->emitIns_S_R(ins_Store(srcType), emitTypeSize(srcType), reg, lclNum, offset);
8187 offset += genTypeSize(srcType);
8191 assert(offset <= varDsc->lvSize());
8192 #else // !TARGET_64BIT
8193 if (varTypeIsStruct(varDsc))
8195 assert(offset <= varDsc->lvSize());
8199 assert(varDsc->TypeGet() == TYP_LONG);
8200 assert(offset <= genTypeSize(TYP_LONG));
8202 #endif // !TARGET_64BIT
8207 // Update variable liveness.
8212 genProduceReg(lclNode);
8216 genUpdateLife(lclNode);
8221 genUpdateLife(lclNode);
8222 varDsc->SetRegNum(REG_STK);
8226 //------------------------------------------------------------------------
8227 // genRegCopy: Produce code for a GT_COPY node.
8230 // tree - the GT_COPY node
8233 // This will copy the register produced by this node's source, to
8234 // the register allocated to this GT_COPY node.
8235 // It has some special handling for these cases:
8236 // - when the source and target registers are in different register files
8237 // (note that this is *not* a conversion).
8238 // - when the source is a lclVar whose home location is being moved to a new
8239 // register (rather than just being copied for temporary use).
8241 void CodeGen::genRegCopy(GenTree* treeNode)
8243 assert(treeNode->OperGet() == GT_COPY);
8244 GenTree* op1 = treeNode->AsOp()->gtOp1;
8246 if (op1->IsMultiRegNode())
8248 // Register allocation assumes that any reload and copy are done in operand order.
8249 // That is, we can have:
8250 // (reg0, reg1) = COPY(V0,V1) where V0 is in reg1 and V1 is in memory
8251 // The register allocation model assumes:
8252 // First, V0 is moved to reg0 (v1 can't be in reg0 because it is still live, which would be a conflict).
8253 // Then, V1 is moved to reg1
8254 // However, if we call genConsumeRegs on op1, it will do the reload of V1 before we do the copy of V0.
8255 // So we need to handle that case first.
8257 // There should never be any circular dependencies, and we will check that here.
8259 // GenTreeCopyOrReload only reports the highest index that has a valid register.
8260 // However, we need to ensure that we consume all the registers of the child node,
8261 // so we use its regCount.
8262 unsigned regCount = op1->GetMultiRegCount(compiler);
8263 assert(regCount <= MAX_MULTIREG_COUNT);
8265 // First set the source registers as busy if they haven't been spilled.
8266 // (Note that this is just for verification that we don't have circular dependencies.)
8267 regMaskTP busyRegs = RBM_NONE;
8268 for (unsigned i = 0; i < regCount; ++i)
8270 if ((op1->GetRegSpillFlagByIdx(i) & GTF_SPILLED) == 0)
8272 busyRegs |= genRegMask(op1->GetRegByIndex(i));
8275 for (unsigned i = 0; i < regCount; ++i)
8277 regNumber sourceReg = op1->GetRegByIndex(i);
8278 // genRegCopy will consume the source register, perform any required reloads,
8279 // and will return either the register copied to, or the original register if there's no copy.
8280 regNumber targetReg = genRegCopy(treeNode, i);
8281 if (targetReg != sourceReg)
8283 regMaskTP targetRegMask = genRegMask(targetReg);
8284 assert((busyRegs & targetRegMask) == 0);
8285 // Clear sourceReg from the busyRegs, and add targetReg.
8286 busyRegs &= ~genRegMask(sourceReg);
8288 busyRegs |= genRegMask(targetReg);
8293 regNumber srcReg = genConsumeReg(op1);
8294 var_types targetType = treeNode->TypeGet();
8295 regNumber targetReg = treeNode->GetRegNum();
8296 assert(srcReg != REG_NA);
8297 assert(targetReg != REG_NA);
8298 assert(targetType != TYP_STRUCT);
8300 inst_Mov(targetType, targetReg, srcReg, /* canSkip */ false);
8304 // The lclVar will never be a def.
8305 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
8306 // appropriately set the gcInfo for the copied value.
8307 // If not, there are two cases we need to handle:
8308 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
8309 // will remain live in its original register.
8310 // genProduceReg() will appropriately set the gcInfo for the copied value,
8311 // and genConsumeReg will reset it.
8312 // - Otherwise, we need to update register info for the lclVar.
8314 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
8315 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
8317 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
8319 LclVarDsc* varDsc = compiler->lvaGetDesc(lcl);
8321 // If we didn't just spill it (in genConsumeReg, above), then update the register info
8322 if (varDsc->GetRegNum() != REG_STK)
8324 // The old location is dying
8325 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
8327 gcInfo.gcMarkRegSetNpt(genRegMask(op1->GetRegNum()));
8329 genUpdateVarReg(varDsc, treeNode);
8331 // Report the home change for this variable
8332 varLiveKeeper->siUpdateVariableLiveRange(varDsc, lcl->GetLclNum());
8334 // The new location is going live
8335 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
8340 genProduceReg(treeNode);
8343 //------------------------------------------------------------------------
8344 // genRegCopy: Produce code for a single register of a multireg copy node.
8347 // tree - The GT_COPY node
8348 // multiRegIndex - The index of the register to be copied
8351 // This will copy the corresponding register produced by this node's source, to
8352 // the register allocated to the register specified by this GT_COPY node.
8353 // A multireg copy doesn't support moving between register files, as the GT_COPY
8354 // node does not retain separate types for each index.
8355 // - when the source is a lclVar whose home location is being moved to a new
8356 // register (rather than just being copied for temporary use).
8359 // Either the register copied to, or the original register if there's no copy.
8361 regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex)
8363 assert(treeNode->OperGet() == GT_COPY);
8364 GenTree* op1 = treeNode->gtGetOp1();
8365 assert(op1->IsMultiRegNode());
8367 GenTreeCopyOrReload* copyNode = treeNode->AsCopyOrReload();
8368 assert(copyNode->GetRegCount() <= MAX_MULTIREG_COUNT);
8370 // Consume op1's register, which will perform any necessary reloads.
8371 genConsumeReg(op1, multiRegIndex);
8373 regNumber sourceReg = op1->GetRegByIndex(multiRegIndex);
8374 regNumber targetReg = copyNode->GetRegNumByIdx(multiRegIndex);
8375 // GenTreeCopyOrReload only reports the highest index that has a valid register.
8376 // However there may be lower indices that have no valid register (i.e. the register
8377 // on the source is still valid at the consumer).
8378 if (targetReg != REG_NA)
8380 // We shouldn't specify a no-op move.
8381 assert(sourceReg != targetReg);
8383 if (op1->IsMultiRegLclVar())
8385 LclVarDsc* parentVarDsc = compiler->lvaGetDesc(op1->AsLclVar());
8386 unsigned fieldVarNum = parentVarDsc->lvFieldLclStart + multiRegIndex;
8387 LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldVarNum);
8388 type = fieldVarDsc->TypeGet();
8389 inst_Mov(type, targetReg, sourceReg, /* canSkip */ false);
8390 if (!op1->AsLclVar()->IsLastUse(multiRegIndex) && fieldVarDsc->GetRegNum() != REG_STK)
8392 // The old location is dying
8393 genUpdateRegLife(fieldVarDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
8394 gcInfo.gcMarkRegSetNpt(genRegMask(sourceReg));
8395 genUpdateVarReg(fieldVarDsc, treeNode);
8397 // Report the home change for this variable
8398 varLiveKeeper->siUpdateVariableLiveRange(fieldVarDsc, fieldVarNum);
8400 // The new location is going live
8401 genUpdateRegLife(fieldVarDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
8406 type = op1->GetRegTypeByIndex(multiRegIndex);
8407 inst_Mov(type, targetReg, sourceReg, /* canSkip */ false);
8408 // We never spill after a copy, so to produce the single register, we simply need to
8409 // update the GC info for the defined register.
8410 gcInfo.gcMarkRegPtrVal(targetReg, type);
8420 #if defined(DEBUG) && defined(TARGET_XARCH)
8422 //------------------------------------------------------------------------
8423 // genStackPointerCheck: Generate code to check the stack pointer against a saved value.
8424 // This is a debug check.
8427 // doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
8428 // lvaStackPointerVar - The local variable number that holds the value of the stack pointer
8429 // we are comparing against.
8430 // offset - the offset from the stack pointer to expect
8431 // regTmp - register we can use for computation if `offset` != 0
8436 void CodeGen::genStackPointerCheck(bool doStackPointerCheck,
8437 unsigned lvaStackPointerVar,
8441 if (doStackPointerCheck)
8443 assert(lvaStackPointerVar != BAD_VAR_NUM);
8444 assert(compiler->lvaGetDesc(lvaStackPointerVar)->lvDoNotEnregister);
8445 assert(compiler->lvaGetDesc(lvaStackPointerVar)->lvOnFrame);
8449 assert(regTmp != REG_NA);
8450 GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, regTmp, REG_SPBASE, /* canSkip */ false);
8451 GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, regTmp, offset);
8452 GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regTmp, lvaStackPointerVar, 0);
8456 GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0);
8459 BasicBlock* sp_check = genCreateTempLabel();
8460 GetEmitter()->emitIns_J(INS_je, sp_check);
8461 instGen(INS_BREAKPOINT);
8462 genDefineTempLabel(sp_check);
8466 #endif // defined(DEBUG) && defined(TARGET_XARCH)
8468 unsigned CodeGenInterface::getCurrentStackLevel() const
8470 return genStackLevel;
8474 //------------------------------------------------------------------------
8475 // VariableLiveRanges dumpers
8476 //------------------------------------------------------------------------
8478 // Dump "VariableLiveRange" when code has not been generated and we don't have so the assembly native offset
8479 // but at least "emitLocation"s and "siVarLoc"
8480 void CodeGenInterface::VariableLiveKeeper::VariableLiveRange::dumpVariableLiveRange(
8481 const CodeGenInterface* codeGen) const
8483 codeGen->dumpSiVarLoc(&m_VarLocation);
8486 m_StartEmitLocation.Print(codeGen->GetCompiler()->compMethodID);
8488 if (m_EndEmitLocation.Valid())
8490 m_EndEmitLocation.Print(codeGen->GetCompiler()->compMethodID);
8499 // Dump "VariableLiveRange" when code has been generated and we have the assembly native offset of each "emitLocation"
8500 void CodeGenInterface::VariableLiveKeeper::VariableLiveRange::dumpVariableLiveRange(
8501 emitter* emit, const CodeGenInterface* codeGen) const
8503 assert(emit != nullptr);
8505 // "VariableLiveRanges" are created setting its location ("m_VarLocation") and the initial native offset
8506 // ("m_StartEmitLocation")
8507 codeGen->dumpSiVarLoc(&m_VarLocation);
8509 // If this is an open "VariableLiveRange", "m_EndEmitLocation" is non-valid and print -1
8510 UNATIVE_OFFSET endAssemblyOffset = m_EndEmitLocation.Valid() ? m_EndEmitLocation.CodeOffset(emit) : -1;
8512 printf(" [%X, %X)", m_StartEmitLocation.CodeOffset(emit), m_EndEmitLocation.CodeOffset(emit));
8515 //------------------------------------------------------------------------
8517 //------------------------------------------------------------------------
8518 //------------------------------------------------------------------------
8519 // resetDumper: If the "liveRange" has its last "VariableLiveRange" closed, it makes
8520 // the "LiveRangeDumper" points to end of "liveRange" (nullptr). In other case,
8521 // it makes the "LiveRangeDumper" points to the last "VariableLiveRange" of
8522 // "liveRange", which is opened.
8525 // liveRanges - the "LiveRangeList" of the "VariableLiveDescriptor" we want to
8526 // update its "LiveRangeDumper".
8529 // This method is expected to be called once a the code for a BasicBlock has been
8530 // generated and all the new "VariableLiveRange"s of the variable during this block
8532 void CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::resetDumper(const LiveRangeList* liveRanges)
8534 // There must have reported something in order to reset
8535 assert(m_hasLiveRangestoDump);
8537 if (liveRanges->back().m_EndEmitLocation.Valid())
8539 // the last "VariableLiveRange" is closed and the variable
8540 // is no longer alive
8541 m_hasLiveRangestoDump = false;
8545 // the last "VariableLiveRange" remains opened because it is
8546 // live at "BasicBlock"s "bbLiveOut".
8547 m_StartingLiveRange = liveRanges->backPosition();
8551 //------------------------------------------------------------------------
8552 // setDumperStartAt: Make "LiveRangeDumper" instance points the last "VariableLiveRange"
8553 // added so we can starts dumping from there after the actual "BasicBlock"s code is generated.
8556 // liveRangeIt - an iterator to a position in "VariableLiveDescriptor::m_VariableLiveRanges"
8559 // A const pointer to the "LiveRangeList" containing all the "VariableLiveRange"s
8560 // of the variable with index "varNum".
8563 // "varNum" should be always a valid inde ("varnum" < "m_LiveDscCount")
8564 void CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::setDumperStartAt(const LiveRangeListIterator liveRangeIt)
8566 m_hasLiveRangestoDump = true;
8567 m_StartingLiveRange = liveRangeIt;
8570 //------------------------------------------------------------------------
8571 // getStartForDump: Return an iterator to the first "VariableLiveRange" edited/added
8572 // during the current "BasicBlock"
8575 // A LiveRangeListIterator to the first "VariableLiveRange" in "LiveRangeList" which
8576 // was used during last "BasicBlock".
8578 CodeGenInterface::VariableLiveKeeper::LiveRangeListIterator CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::
8579 getStartForDump() const
8581 return m_StartingLiveRange;
8584 //------------------------------------------------------------------------
8585 // hasLiveRangesToDump: Retutn whether at least a "VariableLiveRange" was alive during
8586 // the current "BasicBlock"'s code generation
8589 // A boolean indicating indicating if there is at least a "VariableLiveRange"
8590 // that has been used for the variable during last "BasicBlock".
8592 bool CodeGenInterface::VariableLiveKeeper::LiveRangeDumper::hasLiveRangesToDump() const
8594 return m_hasLiveRangestoDump;
8598 //------------------------------------------------------------------------
8599 // VariableLiveDescriptor
8600 //------------------------------------------------------------------------
8602 CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::VariableLiveDescriptor(CompAllocator allocator)
8604 // Initialize an empty list
8605 m_VariableLiveRanges = new (allocator) LiveRangeList(allocator);
8607 INDEBUG(m_VariableLifeBarrier = new (allocator) LiveRangeDumper(m_VariableLiveRanges));
8610 //------------------------------------------------------------------------
8611 // hasVariableLiveRangeOpen: Return true if the variable is still alive,
8612 // false in other case.
8614 bool CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::hasVariableLiveRangeOpen() const
8616 return !m_VariableLiveRanges->empty() && !m_VariableLiveRanges->back().m_EndEmitLocation.Valid();
8619 //------------------------------------------------------------------------
8620 // getLiveRanges: Return the list of variable locations for this variable.
8623 // A const LiveRangeList* pointing to the first variable location if it has
8624 // any or the end of the list in other case.
8626 CodeGenInterface::VariableLiveKeeper::LiveRangeList* CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::
8627 getLiveRanges() const
8629 return m_VariableLiveRanges;
8632 //------------------------------------------------------------------------
8633 // startLiveRangeFromEmitter: Report this variable as being born in "varLocation"
8634 // since the instruction where "emit" is located.
8637 // varLocation - the home of the variable.
8638 // emit - an emitter* instance located at the first instruction from
8639 // where "varLocation" becomes valid.
8642 // This variable is being born so it should be dead.
8645 // The position of "emit" matters to ensure intervals inclusive of the
8646 // beginning and exclusive of the end.
8648 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::startLiveRangeFromEmitter(
8649 CodeGenInterface::siVarLoc varLocation, emitter* emit) const
8651 noway_assert(emit != nullptr);
8653 // Is the first "VariableLiveRange" or the previous one has been closed so its "m_EndEmitLocation" is valid
8654 noway_assert(m_VariableLiveRanges->empty() || m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8656 if (!m_VariableLiveRanges->empty() &&
8657 siVarLoc::Equals(&varLocation, &(m_VariableLiveRanges->back().m_VarLocation)) &&
8658 m_VariableLiveRanges->back().m_EndEmitLocation.IsPreviousInsNum(emit))
8660 JITDUMP("Extending debug range...\n");
8662 // The variable is being born just after the instruction at which it died.
8663 // In this case, i.e. an update of the variable's value, we coalesce the live ranges.
8664 m_VariableLiveRanges->back().m_EndEmitLocation.Init();
8668 JITDUMP("New debug range: %s\n",
8669 m_VariableLiveRanges->empty()
8671 : siVarLoc::Equals(&varLocation, &(m_VariableLiveRanges->back().m_VarLocation))
8672 ? "new var or location"
8674 // Creates new live range with invalid end
8675 m_VariableLiveRanges->emplace_back(varLocation, emitLocation(), emitLocation());
8676 m_VariableLiveRanges->back().m_StartEmitLocation.CaptureLocation(emit);
8680 if (!m_VariableLifeBarrier->hasLiveRangesToDump())
8682 m_VariableLifeBarrier->setDumperStartAt(m_VariableLiveRanges->backPosition());
8686 // startEmitLocationendEmitLocation has to be Valid and endEmitLocationendEmitLocation not
8687 noway_assert(m_VariableLiveRanges->back().m_StartEmitLocation.Valid());
8688 noway_assert(!m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8691 //------------------------------------------------------------------------
8692 // endLiveRangeAtEmitter: Report this variable as becoming dead since the
8693 // instruction where "emit" is located.
8696 // emit - an emitter* instance located at the first instruction from
8697 // this variable becomes dead.
8700 // This variable is becoming dead so it should be alive.
8703 // The position of "emit" matters to ensure intervals inclusive of the
8704 // beginning and exclusive of the end.
8706 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::endLiveRangeAtEmitter(emitter* emit) const
8708 noway_assert(emit != nullptr);
8709 noway_assert(hasVariableLiveRangeOpen());
8711 // Using [close, open) ranges so as to not compute the size of the last instruction
8712 m_VariableLiveRanges->back().m_EndEmitLocation.CaptureLocation(emit);
8714 JITDUMP("Closing debug range.\n");
8715 // No m_EndEmitLocation has to be Valid
8716 noway_assert(m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8719 //------------------------------------------------------------------------
8720 // UpdateLiveRangeAtEmitter: Report this variable as changing its variable
8721 // home to "varLocation" since the instruction where "emit" is located.
8724 // varLocation - the new variable location.
8725 // emit - an emitter* instance located at the first instruction from
8726 // where "varLocation" becomes valid.
8729 // This variable is being born so it should be dead.
8732 // The position of "emit" matters to ensure intervals inclusive of the
8733 // beginning and exclusive of the end.
8735 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::updateLiveRangeAtEmitter(
8736 CodeGenInterface::siVarLoc varLocation, emitter* emit) const
8738 // This variable is changing home so it has been started before during this block
8739 noway_assert(m_VariableLiveRanges != nullptr && !m_VariableLiveRanges->empty());
8741 // And its last m_EndEmitLocation has to be invalid
8742 noway_assert(!m_VariableLiveRanges->back().m_EndEmitLocation.Valid());
8744 // If we are reporting again the same home, that means we are doing something twice?
8745 // noway_assert(! CodeGenInterface::siVarLoc::Equals(&m_VariableLiveRanges->back().m_VarLocation, varLocation));
8747 // Close previous live range
8748 endLiveRangeAtEmitter(emit);
8750 startLiveRangeFromEmitter(varLocation, emit);
8754 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::dumpAllRegisterLiveRangesForBlock(
8755 emitter* emit, const CodeGenInterface* codeGen) const
8758 for (LiveRangeListIterator it = m_VariableLiveRanges->begin(); it != m_VariableLiveRanges->end(); it++)
8764 it->dumpVariableLiveRange(emit, codeGen);
8769 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::dumpRegisterLiveRangesForBlockBeforeCodeGenerated(
8770 const CodeGenInterface* codeGen) const
8773 for (LiveRangeListIterator it = m_VariableLifeBarrier->getStartForDump(); it != m_VariableLiveRanges->end(); it++)
8779 it->dumpVariableLiveRange(codeGen);
8784 // Returns true if a live range for this variable has been recorded
8785 bool CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::hasVarLiveRangesToDump() const
8787 return !m_VariableLiveRanges->empty();
8790 // Returns true if a live range for this variable has been recorded from last call to EndBlock
8791 bool CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::hasVarLiveRangesFromLastBlockToDump() const
8793 return m_VariableLifeBarrier->hasLiveRangesToDump();
8796 // Reset the barrier so as to dump only next block changes on next block
8797 void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::endBlockLiveRanges()
8799 // make "m_VariableLifeBarrier->m_StartingLiveRange" now points to nullptr for printing purposes
8800 m_VariableLifeBarrier->resetDumper(m_VariableLiveRanges);
8804 //------------------------------------------------------------------------
8805 // VariableLiveKeeper
8806 //------------------------------------------------------------------------
8807 // Initialize structures for VariableLiveRanges
8808 void CodeGenInterface::initializeVariableLiveKeeper()
8810 CompAllocator allocator = compiler->getAllocator(CMK_VariableLiveRanges);
8812 int amountTrackedVariables = compiler->opts.compDbgInfo ? compiler->info.compLocalsCount : 0;
8813 int amountTrackedArgs = compiler->opts.compDbgInfo ? compiler->info.compArgsCount : 0;
8815 varLiveKeeper = new (allocator) VariableLiveKeeper(amountTrackedVariables, amountTrackedArgs, compiler, allocator);
8818 CodeGenInterface::VariableLiveKeeper* CodeGenInterface::getVariableLiveKeeper() const
8820 return varLiveKeeper;
8823 //------------------------------------------------------------------------
8824 // VariableLiveKeeper: Create an instance of the object in charge of managing
8825 // VariableLiveRanges and initialize the array "m_vlrLiveDsc".
8828 // totalLocalCount - the count of args, special args and IL Local
8829 // variables in the method.
8830 // argsCount - the count of args and special args in the method.
8831 // compiler - a compiler instance
8833 CodeGenInterface::VariableLiveKeeper::VariableLiveKeeper(unsigned int totalLocalCount,
8834 unsigned int argsCount,
8836 CompAllocator allocator)
8837 : m_LiveDscCount(totalLocalCount)
8838 , m_LiveArgsCount(argsCount)
8840 , m_LastBasicBlockHasBeenEmitted(false)
8842 if (m_LiveDscCount > 0)
8844 // Allocate memory for "m_vlrLiveDsc" and initialize each "VariableLiveDescriptor"
8845 m_vlrLiveDsc = allocator.allocate<VariableLiveDescriptor>(m_LiveDscCount);
8846 m_vlrLiveDscForProlog = allocator.allocate<VariableLiveDescriptor>(m_LiveDscCount);
8848 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
8850 new (m_vlrLiveDsc + varNum, jitstd::placement_t()) VariableLiveDescriptor(allocator);
8851 new (m_vlrLiveDscForProlog + varNum, jitstd::placement_t()) VariableLiveDescriptor(allocator);
8856 //------------------------------------------------------------------------
8857 // siStartOrCloseVariableLiveRange: Reports the given variable as beign born
8858 // or becoming dead.
8861 // varDsc - the variable for which a location changed will be reported
8862 // varNum - the index of the variable in the "compiler->lvaTable"
8863 // isBorn - whether the variable is being born from where the emitter is located.
8864 // isDying - whether the variable is dying from where the emitter is located.
8867 // The emitter should be located on the first instruction from where is true that
8868 // the variable becoming valid (when isBorn is true) or invalid (when isDying is true).
8871 // This method is being called from treeLifeUpdater when the variable is being born,
8872 // becoming dead, or both.
8874 void CodeGenInterface::VariableLiveKeeper::siStartOrCloseVariableLiveRange(const LclVarDsc* varDsc,
8875 unsigned int varNum,
8879 noway_assert(varDsc != nullptr);
8881 // Only the variables that exists in the IL, "this", and special arguments
8883 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount)
8885 if (isBorn && !isDying)
8887 // "varDsc" is valid from this point
8888 siStartVariableLiveRange(varDsc, varNum);
8890 if (isDying && !isBorn)
8892 // this variable live range is no longer valid from this point
8893 siEndVariableLiveRange(varNum);
8898 //------------------------------------------------------------------------
8899 // siStartOrCloseVariableLiveRanges: Iterates the given set of variables
8900 // calling "siStartOrCloseVariableLiveRange" with each one.
8903 // varsIndexSet - the set of variables to report start/end "VariableLiveRange"
8904 // isBorn - whether the set is being born from where the emitter is located.
8905 // isDying - whether the set is dying from where the emitter is located.
8908 // The emitter should be located on the first instruction from where is true that
8909 // the variable becoming valid (when isBorn is true) or invalid (when isDying is true).
8912 // This method is being called from treeLifeUpdater when a set of variables
8913 // is being born, becoming dead, or both.
8915 void CodeGenInterface::VariableLiveKeeper::siStartOrCloseVariableLiveRanges(VARSET_VALARG_TP varsIndexSet,
8919 if (m_Compiler->opts.compDbgInfo)
8921 VarSetOps::Iter iter(m_Compiler, varsIndexSet);
8922 unsigned varIndex = 0;
8923 while (iter.NextElem(&varIndex))
8925 unsigned int varNum = m_Compiler->lvaTrackedIndexToLclNum(varIndex);
8926 const LclVarDsc* varDsc = m_Compiler->lvaGetDesc(varNum);
8927 siStartOrCloseVariableLiveRange(varDsc, varNum, isBorn, isDying);
8932 //------------------------------------------------------------------------
8933 // siStartVariableLiveRange: Reports the given variable as being born.
8936 // varDsc - the variable for which a location changed will be reported
8937 // varNum - the index of the variable to report home in lvLiveDsc
8940 // The emitter should be pointing to the first instruction from where the VariableLiveRange is
8942 // The given "varDsc" should have its VariableRangeLists initialized.
8945 // This method should be called on every place a Variable is becoming alive.
8946 void CodeGenInterface::VariableLiveKeeper::siStartVariableLiveRange(const LclVarDsc* varDsc, unsigned int varNum)
8948 noway_assert(varDsc != nullptr);
8950 // Only the variables that exists in the IL, "this", and special arguments are reported, as long as they were
8952 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount && (varDsc->lvIsInReg() || varDsc->lvOnFrame))
8954 // Build siVarLoc for this born "varDsc"
8955 CodeGenInterface::siVarLoc varLocation =
8956 m_Compiler->codeGen->getSiVarLoc(varDsc, m_Compiler->codeGen->getCurrentStackLevel());
8958 VariableLiveDescriptor* varLiveDsc = &m_vlrLiveDsc[varNum];
8959 // this variable live range is valid from this point
8960 varLiveDsc->startLiveRangeFromEmitter(varLocation, m_Compiler->GetEmitter());
8964 //------------------------------------------------------------------------
8965 // siEndVariableLiveRange: Reports the variable as becoming dead.
8968 // varNum - the index of the variable at m_vlrLiveDsc or lvaTable in that
8969 // is becoming dead.
8972 // The given variable should be alive.
8973 // The emitter should be pointing to the first instruction from where the VariableLiveRange is
8974 // becoming invalid.
8977 // This method should be called on every place a Variable is becoming dead.
8978 void CodeGenInterface::VariableLiveKeeper::siEndVariableLiveRange(unsigned int varNum)
8980 // Only the variables that exists in the IL, "this", and special arguments
8981 // will be reported.
8983 // This method is being called from genUpdateLife, and that one is called after
8984 // code for BasicBlock have been generated, but the emitter has no longer
8985 // a valid IG so we don't report the close of a "VariableLiveRange" after code is
8988 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount && !m_LastBasicBlockHasBeenEmitted &&
8989 m_vlrLiveDsc[varNum].hasVariableLiveRangeOpen())
8991 // this variable live range is no longer valid from this point
8992 m_vlrLiveDsc[varNum].endLiveRangeAtEmitter(m_Compiler->GetEmitter());
8996 //------------------------------------------------------------------------
8997 // siUpdateVariableLiveRange: Reports the change of variable location for the
9001 // varDsc - the variable for which tis home has changed.
9002 // varNum - the index of the variable to report home in lvLiveDsc
9005 // The given variable should be alive.
9006 // The emitter should be pointing to the first instruction from where
9007 // the new variable location is becoming valid.
9009 void CodeGenInterface::VariableLiveKeeper::siUpdateVariableLiveRange(const LclVarDsc* varDsc, unsigned int varNum)
9011 noway_assert(varDsc != nullptr);
9013 // Only the variables that exists in the IL, "this", and special arguments
9014 // will be reported. This are locals and arguments, and are counted in
9015 // "info.compLocalsCount".
9017 // This method is being called when the prolog is being generated, and
9018 // the emitter has no longer a valid IG so we don't report the close of
9019 // a "VariableLiveRange" after code is emitted.
9020 if (m_Compiler->opts.compDbgInfo && varNum < m_LiveDscCount && !m_LastBasicBlockHasBeenEmitted)
9022 // Build the location of the variable
9023 CodeGenInterface::siVarLoc siVarLoc =
9024 m_Compiler->codeGen->getSiVarLoc(varDsc, m_Compiler->codeGen->getCurrentStackLevel());
9026 // Report the home change for this variable
9027 VariableLiveDescriptor* varLiveDsc = &m_vlrLiveDsc[varNum];
9028 varLiveDsc->updateLiveRangeAtEmitter(siVarLoc, m_Compiler->GetEmitter());
9032 //------------------------------------------------------------------------
9033 // siEndAllVariableLiveRange: Reports the set of variables as becoming dead.
9036 // newLife - the set of variables that are becoming dead.
9039 // All the variables in the set are alive.
9042 // This method is called when the last block being generated to killed all
9043 // the live variables and set a flag to avoid reporting variable locations for
9044 // on next calls to method that update variable liveness.
9045 void CodeGenInterface::VariableLiveKeeper::siEndAllVariableLiveRange(VARSET_VALARG_TP varsToClose)
9047 if (m_Compiler->opts.compDbgInfo)
9049 if (m_Compiler->lvaTrackedCount > 0 || !m_Compiler->opts.OptimizationDisabled())
9051 VarSetOps::Iter iter(m_Compiler, varsToClose);
9052 unsigned varIndex = 0;
9053 while (iter.NextElem(&varIndex))
9055 unsigned int varNum = m_Compiler->lvaTrackedIndexToLclNum(varIndex);
9056 siEndVariableLiveRange(varNum);
9061 // It seems we are jitting debug code, so we don't have variable
9063 siEndAllVariableLiveRange();
9067 m_LastBasicBlockHasBeenEmitted = true;
9070 //------------------------------------------------------------------------
9071 // siEndAllVariableLiveRange: Reports all live variables as dead.
9074 // This overload exists for the case we are jitting code compiled in
9075 // debug mode. When that happen we don't have variable liveness info
9076 // as "BaiscBlock::bbLiveIn" or "BaiscBlock::bbLiveOut" and there is no
9077 // tracked variable.
9079 void CodeGenInterface::VariableLiveKeeper::siEndAllVariableLiveRange()
9081 // TODO: we can improve this keeping a set for the variables with
9082 // open VariableLiveRanges
9084 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9086 const VariableLiveDescriptor* varLiveDsc = m_vlrLiveDsc + varNum;
9087 if (varLiveDsc->hasVariableLiveRangeOpen())
9089 siEndVariableLiveRange(varNum);
9094 //------------------------------------------------------------------------
9095 // getLiveRangesForVarForBody: Return the "VariableLiveRange" that correspond to
9096 // the given "varNum".
9099 // varNum - the index of the variable in m_vlrLiveDsc, which is the same as
9103 // A const pointer to the list of variable locations reported for the variable.
9106 // This variable should be an argument, a special argument or an IL local
9108 CodeGenInterface::VariableLiveKeeper::LiveRangeList* CodeGenInterface::VariableLiveKeeper::getLiveRangesForVarForBody(
9109 unsigned int varNum) const
9111 // There should be at least one variable for which its liveness is tracked
9112 noway_assert(varNum < m_LiveDscCount);
9114 return m_vlrLiveDsc[varNum].getLiveRanges();
9117 //------------------------------------------------------------------------
9118 // getLiveRangesForVarForProlog: Return the "VariableLiveRange" that correspond to
9119 // the given "varNum".
9122 // varNum - the index of the variable in m_vlrLiveDsc, which is the same as
9126 // A const pointer to the list of variable locations reported for the variable.
9129 // This variable should be an argument, a special argument or an IL local
9131 CodeGenInterface::VariableLiveKeeper::LiveRangeList* CodeGenInterface::VariableLiveKeeper::getLiveRangesForVarForProlog(
9132 unsigned int varNum) const
9134 // There should be at least one variable for which its liveness is tracked
9135 noway_assert(varNum < m_LiveDscCount);
9137 return m_vlrLiveDscForProlog[varNum].getLiveRanges();
9140 //------------------------------------------------------------------------
9141 // getLiveRangesCount: Returns the count of variable locations reported for the tracked
9142 // variables, which are arguments, special arguments, and local IL variables.
9145 // size_t - the count of variable locations
9148 // This method is being called from "genSetScopeInfo" to know the count of
9149 // "varResultInfo" that should be created on eeSetLVcount.
9151 size_t CodeGenInterface::VariableLiveKeeper::getLiveRangesCount() const
9153 size_t liveRangesCount = 0;
9155 if (m_Compiler->opts.compDbgInfo)
9157 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9159 for (int i = 0; i < 2; i++)
9161 VariableLiveDescriptor* varLiveDsc = (i == 0 ? m_vlrLiveDscForProlog : m_vlrLiveDsc) + varNum;
9163 if (m_Compiler->compMap2ILvarNum(varNum) != (unsigned int)ICorDebugInfo::UNKNOWN_ILNUM)
9165 liveRangesCount += varLiveDsc->getLiveRanges()->size();
9170 return liveRangesCount;
9173 //------------------------------------------------------------------------
9174 // psiStartVariableLiveRange: Reports the given variable as being born.
9177 // varLcation - the variable location
9178 // varNum - the index of the variable in "compiler->lvaTable" or
9179 // "VariableLivekeeper->m_vlrLiveDsc"
9182 // This function is expected to be called from "psiBegProlog" during
9183 // prolog code generation.
9185 void CodeGenInterface::VariableLiveKeeper::psiStartVariableLiveRange(CodeGenInterface::siVarLoc varLocation,
9186 unsigned int varNum)
9188 // This descriptor has to correspond to a parameter. The first slots in lvaTable
9189 // are arguments and special arguments.
9190 noway_assert(varNum < m_LiveArgsCount);
9192 VariableLiveDescriptor* varLiveDsc = &m_vlrLiveDscForProlog[varNum];
9193 varLiveDsc->startLiveRangeFromEmitter(varLocation, m_Compiler->GetEmitter());
9196 //------------------------------------------------------------------------
9197 // psiClosePrologVariableRanges: Report all the parameters as becoming dead.
9200 // This function is expected to be called from preffix "psiEndProlog" after
9201 // code for prolog has been generated.
9203 void CodeGenInterface::VariableLiveKeeper::psiClosePrologVariableRanges()
9205 noway_assert(m_LiveArgsCount <= m_LiveDscCount);
9207 for (unsigned int varNum = 0; varNum < m_LiveArgsCount; varNum++)
9209 VariableLiveDescriptor* varLiveDsc = m_vlrLiveDscForProlog + varNum;
9211 if (varLiveDsc->hasVariableLiveRangeOpen())
9213 varLiveDsc->endLiveRangeAtEmitter(m_Compiler->GetEmitter());
9219 void CodeGenInterface::VariableLiveKeeper::dumpBlockVariableLiveRanges(const BasicBlock* block)
9221 assert(block != nullptr);
9223 bool hasDumpedHistory = false;
9225 printf("\nVariable Live Range History Dump for " FMT_BB "\n", block->bbNum);
9227 if (m_Compiler->opts.compDbgInfo)
9229 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9231 VariableLiveDescriptor* varLiveDsc = m_vlrLiveDsc + varNum;
9233 if (varLiveDsc->hasVarLiveRangesFromLastBlockToDump())
9235 hasDumpedHistory = true;
9236 m_Compiler->gtDispLclVar(varNum, false);
9238 varLiveDsc->dumpRegisterLiveRangesForBlockBeforeCodeGenerated(m_Compiler->codeGen);
9239 varLiveDsc->endBlockLiveRanges();
9245 if (!hasDumpedHistory)
9247 printf("..None..\n");
9251 void CodeGenInterface::VariableLiveKeeper::dumpLvaVariableLiveRanges() const
9253 bool hasDumpedHistory = false;
9255 printf("VARIABLE LIVE RANGES:\n");
9257 if (m_Compiler->opts.compDbgInfo)
9259 for (unsigned int varNum = 0; varNum < m_LiveDscCount; varNum++)
9261 VariableLiveDescriptor* varLiveDsc = m_vlrLiveDsc + varNum;
9263 if (varLiveDsc->hasVarLiveRangesToDump())
9265 hasDumpedHistory = true;
9266 m_Compiler->gtDispLclVar(varNum, false);
9268 varLiveDsc->dumpAllRegisterLiveRangesForBlock(m_Compiler->GetEmitter(), m_Compiler->codeGen);
9274 if (!hasDumpedHistory)
9276 printf("..None..\n");
9281 //-----------------------------------------------------------------------------
9282 // genPoisonFrame: Generate code that places a recognizable value into address exposed variables.
9285 // This function emits code to poison address exposed non-zero-inited local variables. We expect this function
9286 // to be called when emitting code for the scratch BB that comes right after the prolog.
9287 // The variables are poisoned using 0xcdcdcdcd.
9288 void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
9290 assert(compiler->compShouldPoisonFrame());
9291 #if defined(TARGET_XARCH)
9292 regNumber poisonValReg = REG_EAX;
9293 assert((regLiveIn & (RBM_EDI | RBM_ECX | RBM_EAX)) == 0);
9295 regNumber poisonValReg = REG_SCRATCH;
9296 assert((regLiveIn & (genRegMask(REG_SCRATCH) | RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2)) == 0);
9300 const ssize_t poisonVal = (ssize_t)0xcdcdcdcdcdcdcdcd;
9302 const ssize_t poisonVal = (ssize_t)0xcdcdcdcd;
9305 // The first time we need to poison something we will initialize a register to the largest immediate cccccccc that
9307 bool hasPoisonImm = false;
9308 for (unsigned varNum = 0; varNum < compiler->info.compLocalsCount; varNum++)
9310 LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
9311 if (varDsc->lvIsParam || varDsc->lvMustInit || !varDsc->IsAddressExposed())
9316 assert(varDsc->lvOnFrame);
9318 unsigned int size = compiler->lvaLclSize(varNum);
9319 if ((size / TARGET_POINTER_SIZE) > 16)
9321 // This will require more than 16 instructions, switch to rep stosd/memset call.
9322 CLANG_FORMAT_COMMENT_ANCHOR;
9323 #if defined(TARGET_XARCH)
9324 GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_EDI, (int)varNum, 0);
9325 assert(size % 4 == 0);
9326 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ECX, size / 4);
9327 // On xarch we can leave the value in eax and only set eax once
9328 // since rep stosd does not kill eax.
9331 instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_EAX, poisonVal);
9332 hasPoisonImm = true;
9334 instGen(INS_r_stosd);
9336 GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0);
9337 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast<char>(poisonVal));
9338 instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_ARG_2, size);
9339 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
9340 // May kill REG_SCRATCH, so we need to reload it.
9341 hasPoisonImm = false;
9348 instGen_Set_Reg_To_Imm(EA_PTRSIZE, poisonValReg, poisonVal);
9349 hasPoisonImm = true;
9352 // For 64-bit we check if the local is 8-byte aligned. For 32-bit, we assume everything is always 4-byte aligned.
9355 int addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
9359 int end = addr + (int)size;
9360 for (int offs = addr; offs < end;)
9363 if ((offs % 8) == 0 && end - offs >= 8)
9365 GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr);
9371 assert((offs % 4) == 0 && end - offs >= 4);
9372 GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr);
9379 //----------------------------------------------------------------------
9380 // genBitCast - Generate the instruction to move a value between register files
9383 // targetType - the destination type
9384 // targetReg - the destination register
9385 // srcType - the source type
9386 // srcReg - the source register
9388 void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg)
9390 const bool srcFltReg = varTypeUsesFloatReg(srcType);
9391 assert(srcFltReg == genIsValidFloatReg(srcReg));
9393 const bool dstFltReg = varTypeUsesFloatReg(targetType);
9394 assert(dstFltReg == genIsValidFloatReg(targetReg));
9396 inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true);
9399 //----------------------------------------------------------------------
9400 // genCodeForBitCast - Generate code for a GT_BITCAST that is not contained
9403 // treeNode - the GT_BITCAST for which we're generating code
9405 void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
9407 assert(treeNode->TypeGet() == genActualType(treeNode));
9408 regNumber targetReg = treeNode->GetRegNum();
9409 var_types targetType = treeNode->TypeGet();
9410 GenTree* op1 = treeNode->gtGetOp1();
9411 genConsumeRegs(op1);
9413 if (op1->isContained())
9415 assert(op1->OperIs(GT_LCL_VAR));
9416 unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
9417 instruction loadIns = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lclNum));
9418 GetEmitter()->emitIns_R_S(loadIns, emitTypeSize(targetType), targetReg, lclNum, 0);
9423 if (compiler->opts.compUseSoftFP && (targetType == TYP_LONG))
9425 // This is a special arm-softFP case when a TYP_LONG node was introduced during lowering
9426 // for a call argument, so it was not handled by decomposelongs phase as all other TYP_LONG nodes.
9427 // Example foo(double LclVar V01), LclVar V01 has to be passed in general registers r0, r1,
9428 // so lowering will add `BITCAST long(LclVar double V01)` and codegen has to support it here.
9429 const regNumber srcReg = op1->GetRegNum();
9430 const regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg;
9431 assert(otherReg != REG_NA);
9432 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, srcReg, EA_8BYTE);
9435 #endif // TARGET_ARM
9437 genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum());
9440 genProduceReg(treeNode);
9443 //----------------------------------------------------------------------
9444 // genCanOmitNormalizationForBswap16:
9445 // Small peephole to check if a bswap16 node can omit normalization.
9448 // tree - The BSWAP16 node
9451 // BSWAP16 nodes are required to zero extend the upper 16 bits, but since the
9452 // importer always inserts a normalizing cast (either sign or zero extending)
9453 // we almost never need to actually do this.
9455 bool CodeGen::genCanOmitNormalizationForBswap16(GenTree* tree)
9457 if (compiler->opts.OptimizationDisabled())
9462 assert(tree->OperIs(GT_BSWAP16));
9463 if ((tree->gtNext == nullptr) || !tree->gtNext->OperIs(GT_CAST))
9468 GenTreeCast* cast = tree->gtNext->AsCast();
9469 if (cast->gtOverflow() || (cast->CastOp() != tree))
9474 return (cast->gtCastType == TYP_USHORT) || (cast->gtCastType == TYP_SHORT);
9477 //----------------------------------------------------------------------
9478 // genCodeForReuseVal: Generate code for a node marked with re-using a register.
9481 // tree - The node marked with re-using a register
9484 // Generates nothing, except for when the node is a CNS_INT(0) where
9485 // we will define a new label to propagate GC info. We want to do this
9486 // because if the node is a CNS_INT(0) and is re-using a register,
9487 // that register could have been used for a CNS_INT(ref null) that is GC
9490 void CodeGen::genCodeForReuseVal(GenTree* treeNode)
9492 assert(treeNode->IsReuseRegVal());
9494 // For now, this is only used for constant nodes.
9495 assert(treeNode->OperIs(GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC));
9496 JITDUMP(" TreeNode is marked ReuseReg\n");
9498 if (treeNode->IsIntegralConst(0) && GetEmitter()->emitCurIGnonEmpty())
9500 genDefineTempLabel(genCreateTempLabel());