1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Code Generator Common: XX
9 XX Methods common to all architectures and register allocation strategies XX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
16 // identical, and which should probably be moved here.
27 #ifndef JIT32_GCENCODER
28 #include "gcinfoencoder.h"
31 /*****************************************************************************/
33 const BYTE genTypeSizes[] = {
34 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
39 const BYTE genTypeAlignments[] = {
40 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
45 const BYTE genTypeStSzs[] = {
46 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
51 const BYTE genActualTypes[] = {
52 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
57 void CodeGenInterface::setFramePointerRequiredEH(bool value)
59 m_cgFramePointerRequired = value;
61 #ifndef JIT32_GCENCODER
64 // EnumGcRefs will only enumerate slots in aborted frames
65 // if they are fully-interruptible. So if we have a catch
66 // or finally that will keep frame-vars alive, we need to
67 // force fully-interruptible.
68 CLANG_FORMAT_COMMENT_ANCHOR;
73 printf("Method has EH, marking method as fully interruptible\n");
77 m_cgInterruptible = true;
79 #endif // JIT32_GCENCODER
82 /*****************************************************************************/
83 CodeGenInterface* getCodeGenerator(Compiler* comp)
85 return new (comp, CMK_Codegen) CodeGen(comp);
88 // CodeGen constructor
89 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler)
94 /*****************************************************************************/
96 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
98 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
99 negBitmaskFlt = nullptr;
100 negBitmaskDbl = nullptr;
101 absBitmaskFlt = nullptr;
102 absBitmaskDbl = nullptr;
103 u8ToDblBitmask = nullptr;
104 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
106 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107 m_stkArgVarNum = BAD_VAR_NUM;
110 #if defined(UNIX_X86_ABI)
111 curNestedAlignment = 0;
112 maxNestedAlignment = 0;
115 regTracker.rsTrackInit(compiler, ®Set);
116 gcInfo.regSet = ®Set;
117 m_cgEmitter = new (compiler->getAllocator()) emitter();
118 m_cgEmitter->codeGen = this;
119 m_cgEmitter->gcInfo = &gcInfo;
122 setVerbose(compiler->verbose);
128 #if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
129 // This appears to be x86-specific. It's attempting to make sure all offsets to temps
130 // are large. For ARM, this doesn't interact well with our decision about whether to use
131 // R10 or not as a reserved register.
132 if (regSet.rsStressRegs())
133 compiler->tmpIntSpillMax = (SCHAR_MAX / sizeof(int));
134 #endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
139 #ifdef LEGACY_BACKEND
140 // TODO-Cleanup: These used to be set in rsInit() - should they be moved to RegSet??
141 // They are also accessed by the register allocators and fgMorphLclVar().
142 intRegState.rsCurRegArgNum = 0;
143 floatRegState.rsCurRegArgNum = 0;
144 #endif // LEGACY_BACKEND
147 getDisAssembler().disInit(compiler);
151 genTempLiveChg = true;
152 genTrnslLocalVarCount = 0;
154 // Shouldn't be used before it is set in genFnProlog()
155 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>();
157 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
158 // Shouldn't be used before it is set in genFnProlog()
159 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
160 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
163 #ifdef _TARGET_AMD64_
164 // This will be set before final frame layout.
165 compiler->compVSQuirkStackPaddingNeeded = 0;
167 // Set to true if we perform the Quirk that fixes the PPP issue
168 compiler->compQuirkForPPPflag = false;
169 #endif // _TARGET_AMD64_
171 #ifdef LEGACY_BACKEND
172 genFlagsEqualToNone();
173 #endif // LEGACY_BACKEND
175 // Initialize the IP-mapping logic.
176 compiler->genIPmappingList = nullptr;
177 compiler->genIPmappingLast = nullptr;
178 compiler->genCallSite2ILOffsetMap = nullptr;
180 /* Assume that we not fully interruptible */
182 genInterruptible = false;
184 genInterruptibleUsed = false;
185 genCurDispOffset = (unsigned)-1;
189 void CodeGenInterface::genMarkTreeInReg(GenTreePtr tree, regNumber reg)
191 tree->gtRegNum = reg;
192 #ifdef LEGACY_BACKEND
194 #endif // LEGACY_BACKEND
197 #if CPU_LONG_USES_REGPAIR
198 void CodeGenInterface::genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair)
200 tree->gtRegPair = regPair;
201 #ifdef LEGACY_BACKEND
203 #endif // LEGACY_BACKEND
207 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
209 //---------------------------------------------------------------------
210 // genTotalFrameSize - return the "total" size of the stack frame, including local size
211 // and callee-saved register size. There are a few things "missing" depending on the
212 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
214 // For ARM, this doesn't include the prespilled registers.
216 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
217 // It also doesn't include the pushed return address.
222 int CodeGenInterface::genTotalFrameSize()
224 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
226 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
228 assert(totalFrameSize >= 0);
229 return totalFrameSize;
232 //---------------------------------------------------------------------
233 // genSPtoFPdelta - return the offset from SP to the frame pointer.
234 // This number is going to be positive, since SP must be at the lowest
237 // There must be a frame pointer to call this function!
239 int CodeGenInterface::genSPtoFPdelta()
241 assert(isFramePointerUsed());
245 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
251 //---------------------------------------------------------------------
252 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
253 // This number is going to be negative, since the Caller-SP is at a higher
254 // address than the frame pointer.
256 // There must be a frame pointer to call this function!
258 int CodeGenInterface::genCallerSPtoFPdelta()
260 assert(isFramePointerUsed());
261 int callerSPtoFPdelta = 0;
263 #if defined(_TARGET_ARM_)
264 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
265 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
266 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
267 #elif defined(_TARGET_X86_)
268 // Thanks to ebp chaining, the difference between ebp-based addresses
269 // and caller-SP-relative addresses is just the 2 pointers:
272 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
274 #error "Unknown _TARGET_"
277 assert(callerSPtoFPdelta <= 0);
278 return callerSPtoFPdelta;
281 //---------------------------------------------------------------------
282 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
284 // This number will be negative.
286 int CodeGenInterface::genCallerSPtoInitialSPdelta()
288 int callerSPtoSPdelta = 0;
290 #if defined(_TARGET_ARM_)
291 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
292 callerSPtoSPdelta -= genTotalFrameSize();
293 #elif defined(_TARGET_X86_)
294 callerSPtoSPdelta -= genTotalFrameSize();
295 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
297 // compCalleeRegsPushed does not account for the frame pointer
298 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
299 if (isFramePointerUsed())
301 callerSPtoSPdelta -= REGSIZE_BYTES;
304 #error "Unknown _TARGET_"
307 assert(callerSPtoSPdelta <= 0);
308 return callerSPtoSPdelta;
311 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
313 /*****************************************************************************
314 * Should we round simple operations (assignments, arithmetic operations, etc.)
319 bool CodeGen::genShouldRoundFP()
321 RoundLevel roundLevel = getRoundFloatLevel();
326 case ROUND_CMP_CONST:
331 assert(roundLevel == ROUND_ALWAYS);
336 /*****************************************************************************
338 * Initialize some global variables.
341 void CodeGen::genPrepForCompiler()
346 /* Figure out which non-register variables hold pointers */
348 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
350 // Figure out which variables live in registers.
351 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
352 // in a register (i.e. they live on the stack for all or part of their lifetime).
353 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
355 VarSetOps::AssignNoCopy(compiler, compiler->raRegVarsMask, VarSetOps::MakeEmpty(compiler));
357 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
359 if (varDsc->lvTracked
360 #ifndef LEGACY_BACKEND
361 || varDsc->lvIsRegCandidate()
362 #endif // !LEGACY_BACKEND
365 if (varDsc->lvRegister
366 #if FEATURE_STACK_FP_X87
367 && !varDsc->IsFloatRegType()
371 VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex);
373 else if (compiler->lvaIsGCTracked(varDsc))
375 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
379 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
380 genLastLiveMask = RBM_NONE;
382 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
386 /*****************************************************************************
387 * To report exception handling information to the VM, we need the size of the exception
388 * handling regions. To compute that, we need to emit labels for the beginning block of
389 * an EH region, and the block that immediately follows a region. Go through the EH
390 * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
392 * The beginning blocks of the EH regions already should have this flag set.
394 * No blocks should be added or removed after this.
396 * This code is closely couple with genReportEH() in the sense that any block
397 * that this procedure has determined it needs to have a label has to be selected
398 * using the same logic both here and in genReportEH(), so basically any time there is
399 * a change in the way we handle EH reporting, we have to keep the logic of these two
403 void CodeGen::genPrepForEHCodegen()
405 assert(!compiler->fgSafeBasicBlockCreation);
410 bool anyFinallys = false;
412 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
413 HBtab < HBtabEnd; HBtab++)
415 assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
416 assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
418 if (HBtab->ebdTryLast->bbNext != nullptr)
420 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
423 if (HBtab->ebdHndLast->bbNext != nullptr)
425 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
428 if (HBtab->HasFilter())
430 assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
431 // The block after the last block of the filter is
432 // the handler begin block, which we already asserted
433 // has BBF_HAS_LABEL set.
436 #ifdef _TARGET_AMD64_
437 if (HBtab->HasFinallyHandler())
441 #endif // _TARGET_AMD64_
444 #ifdef _TARGET_AMD64_
447 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
449 if (block->bbJumpKind == BBJ_CALLFINALLY)
451 BasicBlock* bbToLabel = block->bbNext;
452 if (block->isBBCallAlwaysPair())
454 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
456 if (bbToLabel != nullptr)
458 bbToLabel->bbFlags |= BBF_HAS_LABEL;
460 } // block is BBJ_CALLFINALLY
462 } // if (anyFinallys)
463 #endif // _TARGET_AMD64_
466 void CodeGenInterface::genUpdateLife(GenTreePtr tree)
468 compiler->compUpdateLife</*ForCodeGen*/ true>(tree);
471 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
473 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
476 #ifdef LEGACY_BACKEND
477 // Returns the liveSet after tree has executed.
478 // "tree" MUST occur in the current statement, AFTER the most recent
479 // update of compiler->compCurLifeTree and compiler->compCurLife.
481 VARSET_VALRET_TP CodeGen::genUpdateLiveSetForward(GenTreePtr tree)
483 VARSET_TP startLiveSet(VarSetOps::MakeCopy(compiler, compiler->compCurLife));
484 GenTreePtr startNode;
485 assert(tree != compiler->compCurLifeTree);
486 if (compiler->compCurLifeTree == nullptr)
488 assert(compiler->compCurStmt != nullptr);
489 startNode = compiler->compCurStmt->gtStmt.gtStmtList;
493 startNode = compiler->compCurLifeTree->gtNext;
495 return compiler->fgUpdateLiveSet(startLiveSet, startNode, tree);
498 // Determine the registers that are live after "second" has been evaluated,
499 // but which are not live after "first".
501 // 1. "first" must occur after compiler->compCurLifeTree in execution order for the current statement
502 // 2. "second" must occur after "first" in the current statement
504 regMaskTP CodeGen::genNewLiveRegMask(GenTreePtr first, GenTreePtr second)
506 // First, compute the liveset after "first"
507 VARSET_TP firstLiveSet = genUpdateLiveSetForward(first);
508 // Now, update the set forward from "first" to "second"
509 VARSET_TP secondLiveSet = compiler->fgUpdateLiveSet(firstLiveSet, first->gtNext, second);
510 regMaskTP newLiveMask = genLiveMask(VarSetOps::Diff(compiler, secondLiveSet, firstLiveSet));
515 // Return the register mask for the given register variable
517 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
519 regMaskTP regMask = RBM_NONE;
521 assert(varDsc->lvIsInReg());
523 if (varTypeIsFloating(varDsc->TypeGet()))
525 regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
529 regMask = genRegMask(varDsc->lvRegNum);
530 if (isRegPairType(varDsc->lvType))
532 regMask |= genRegMask(varDsc->lvOtherReg);
538 // Return the register mask for the given lclVar or regVar tree node
540 regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree)
542 assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_REG_VAR);
544 regMaskTP regMask = RBM_NONE;
545 const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
546 if (varDsc->lvPromoted)
548 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
550 noway_assert(compiler->lvaTable[i].lvIsStructField);
551 if (compiler->lvaTable[i].lvIsInReg())
553 regMask |= genGetRegMask(&compiler->lvaTable[i]);
557 else if (varDsc->lvIsInReg())
559 regMask = genGetRegMask(varDsc);
564 // The given lclVar is either going live (being born) or dying.
565 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
566 // Update regSet.rsMaskVars accordingly.
568 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTreePtr tree))
570 #if FEATURE_STACK_FP_X87
571 // The stack fp reg vars are handled elsewhere
572 if (varTypeIsFloating(varDsc->TypeGet()))
576 regMaskTP regMask = genGetRegMask(varDsc);
579 if (compiler->verbose)
581 printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
582 varDsc->PrintVarReg();
583 printf(" is becoming %s ", (isDying) ? "dead" : "live");
584 Compiler::printTreeID(tree);
591 // We'd like to be able to assert the following, however if we are walking
592 // through a qmark/colon tree, we may encounter multiple last-use nodes.
593 // assert((regSet.rsMaskVars & regMask) == regMask);
594 regSet.RemoveMaskVars(regMask);
598 assert((regSet.rsMaskVars & regMask) == 0);
599 regSet.AddMaskVars(regMask);
603 // Gets a register mask that represent the kill set for a helper call since
604 // not all JIT Helper calls follow the standard ABI on the target architecture.
606 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
607 // enumerated) and not 100% accurate (some killsets are bigger than
608 // what they really are).
609 // There's some work to be done in several places in the JIT to
610 // accurately track the registers that are getting killed by
612 // a) LSRA needs several changes to accomodate more precise killsets
613 // for every helper call it sees (both explicitly [easy] and
614 // implicitly [hard])
615 // b) Currently for AMD64, when we generate code for a helper call
616 // we're independently over-pessimizing the killsets of the call
617 // (independently from LSRA) and this needs changes
618 // both in CodeGenAmd64.cpp and emitx86.cpp.
620 // The best solution for this problem would be to try to centralize
621 // the killset information in a single place but then make the
622 // corresponding changes so every code generation phase is in sync
625 // The interim solution is to only add known helper calls that don't
626 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
627 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
631 case CORINFO_HELP_ASSIGN_BYREF:
632 #if defined(_TARGET_AMD64_)
633 return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH;
634 #elif defined(_TARGET_ARM64_)
635 return RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF | RBM_CALLEE_TRASH_NOGC;
636 #elif defined(_TARGET_X86_)
637 return RBM_ESI | RBM_EDI | RBM_ECX;
638 #elif defined(_TARGET_ARM_)
639 return RBM_ARG_1 | RBM_ARG_0 | RBM_CALLEE_TRASH_NOGC;
641 NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
642 return RBM_CALLEE_TRASH;
645 case CORINFO_HELP_PROF_FCN_ENTER:
646 #ifdef RBM_PROFILER_ENTER_TRASH
647 return RBM_PROFILER_ENTER_TRASH;
649 NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
652 case CORINFO_HELP_PROF_FCN_LEAVE:
653 #ifdef RBM_PROFILER_LEAVE_TRASH
654 return RBM_PROFILER_LEAVE_TRASH;
656 NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
659 case CORINFO_HELP_PROF_FCN_TAILCALL:
660 #ifdef RBM_PROFILER_TAILCALL_TRASH
661 return RBM_PROFILER_TAILCALL_TRASH;
663 NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
666 case CORINFO_HELP_STOP_FOR_GC:
667 return RBM_STOP_FOR_GC_TRASH;
669 case CORINFO_HELP_INIT_PINVOKE_FRAME:
670 return RBM_INIT_PINVOKE_FRAME_TRASH;
673 return RBM_CALLEE_TRASH;
678 // Gets a register mask that represents the kill set for "NO GC" helper calls since
679 // not all JIT Helper calls follow the standard ABI on the target architecture.
681 // Note: This list may not be complete and defaults to the default NOGC registers.
683 regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
685 assert(emitter::emitNoGChelper(helper));
689 #if defined(_TARGET_AMD64_) || defined(_TARGET_X86_)
690 case CORINFO_HELP_PROF_FCN_ENTER:
691 return RBM_PROFILER_ENTER_TRASH;
693 case CORINFO_HELP_PROF_FCN_LEAVE:
694 return RBM_PROFILER_LEAVE_TRASH;
696 case CORINFO_HELP_PROF_FCN_TAILCALL:
697 return RBM_PROFILER_TAILCALL_TRASH;
698 #endif // defined(_TARGET_AMD64_) || defined(_TARGET_X86_)
700 case CORINFO_HELP_ASSIGN_BYREF:
701 #if defined(_TARGET_AMD64_)
702 // this helper doesn't trash RSI and RDI
703 return RBM_CALLEE_TRASH_NOGC & ~(RBM_RSI | RBM_RDI);
704 #elif defined(_TARGET_X86_)
705 // This helper only trashes ECX.
707 #elif defined(_TARGET_ARM64_)
708 return RBM_CALLEE_TRASH_NOGC & ~(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
710 return RBM_CALLEE_TRASH_NOGC;
711 #endif // defined(_TARGET_AMD64_)
714 return RBM_CALLEE_TRASH_NOGC;
718 // Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e.,
719 // regSet.rsMaskVars as well)
720 // if the given lclVar (or indir(addr(local)))/regVar node is going live (being born) or dying.
721 template <bool ForCodeGen>
722 void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars)
724 GenTreePtr indirAddrLocal = fgIsIndirOfAddrOfLocal(tree);
725 assert(tree->OperIsNonPhiLocal() || indirAddrLocal != nullptr);
727 // Get the local var tree -- if "tree" is "Ldobj(addr(x))", or "ind(addr(x))" this is "x", else it's "tree".
728 GenTreePtr lclVarTree = indirAddrLocal;
729 if (lclVarTree == nullptr)
733 unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
734 LclVarDsc* varDsc = lvaTable + lclNum;
737 #if !defined(_TARGET_AMD64_)
738 // There are no addr nodes on ARM and we are experimenting with encountering vars in 'random' order.
739 // Struct fields are not traversed in a consistent order, so ignore them when
740 // verifying that we see the var nodes in execution order
743 if (tree->OperIsIndir())
745 assert(indirAddrLocal != NULL);
747 else if (tree->gtNext != NULL && tree->gtNext->gtOper == GT_ADDR &&
748 ((tree->gtNext->gtNext == NULL || !tree->gtNext->gtNext->OperIsIndir())))
750 assert(tree->IsLocal()); // Can only take the address of a local.
751 // The ADDR might occur in a context where the address it contributes is eventually
752 // dereferenced, so we can't say that this is not a use or def.
755 // TODO-ARM64-Bug?: These asserts don't seem right for ARM64: I don't understand why we have to assert
756 // two consecutive lclvars (in execution order) can only be observed if the first one is a struct field.
757 // It seems to me this is code only applicable to the legacy JIT and not RyuJIT (and therefore why it was
758 // ifdef'ed out for AMD64).
759 else if (!varDsc->lvIsStructField)
762 for (prevTree = tree->gtPrev;
763 prevTree != NULL && prevTree != compCurLifeTree;
764 prevTree = prevTree->gtPrev)
766 if ((prevTree->gtOper == GT_LCL_VAR) || (prevTree->gtOper == GT_REG_VAR))
768 LclVarDsc * prevVarDsc = lvaTable + prevTree->gtLclVarCommon.gtLclNum;
770 // These are the only things for which this method MUST be called
771 assert(prevVarDsc->lvIsStructField);
774 assert(prevTree == compCurLifeTree);
778 #endif // !_TARGET_AMD64_
781 compCurLifeTree = tree;
782 VARSET_TP newLife(VarSetOps::MakeCopy(this, compCurLife));
784 // By codegen, a struct may not be TYP_STRUCT, so we have to
785 // check lvPromoted, for the case where the fields are being
787 if (!varDsc->lvTracked && !varDsc->lvPromoted)
792 bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0); // if it's "x <op>=
793 // ..." then variable
794 // "x" must have had a
795 // previous, original,
797 bool isDying = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
798 #ifndef LEGACY_BACKEND
799 bool spill = ((tree->gtFlags & GTF_SPILL) != 0);
800 #endif // !LEGACY_BACKEND
802 #ifndef LEGACY_BACKEND
803 // For RyuJIT backend, since all tracked vars are register candidates, but not all are in registers at all times,
804 // we maintain two separate sets of variables - the total set of variables that are either
805 // born or dying here, and the subset of those that are on the stack
806 VARSET_TP stackVarDeltaSet(VarSetOps::MakeEmpty(this));
807 #endif // !LEGACY_BACKEND
809 if (isBorn || isDying)
811 bool hasDeadTrackedFieldVars = false; // If this is true, then, for a LDOBJ(ADDR(<promoted struct local>)),
812 VARSET_TP* deadTrackedFieldVars =
813 nullptr; // *deadTrackedFieldVars indicates which tracked field vars are dying.
814 VARSET_TP varDeltaSet(VarSetOps::MakeEmpty(this));
816 if (varDsc->lvTracked)
818 VarSetOps::AddElemD(this, varDeltaSet, varDsc->lvVarIndex);
821 #ifndef LEGACY_BACKEND
822 if (isBorn && varDsc->lvIsRegCandidate() && tree->gtHasReg())
824 codeGen->genUpdateVarReg(varDsc, tree);
826 #endif // !LEGACY_BACKEND
827 if (varDsc->lvIsInReg()
828 #ifndef LEGACY_BACKEND
829 && tree->gtRegNum != REG_NA
830 #endif // !LEGACY_BACKEND
833 codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree));
835 #ifndef LEGACY_BACKEND
838 VarSetOps::AddElemD(this, stackVarDeltaSet, varDsc->lvVarIndex);
840 #endif // !LEGACY_BACKEND
843 else if (varDsc->lvPromoted)
845 if (indirAddrLocal != nullptr && isDying)
847 assert(!isBorn); // GTF_VAR_DEATH only set for LDOBJ last use.
848 hasDeadTrackedFieldVars = GetPromotedStructDeathVars()->Lookup(indirAddrLocal, &deadTrackedFieldVars);
849 if (hasDeadTrackedFieldVars)
851 VarSetOps::Assign(this, varDeltaSet, *deadTrackedFieldVars);
855 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
857 LclVarDsc* fldVarDsc = &(lvaTable[i]);
858 noway_assert(fldVarDsc->lvIsStructField);
859 if (fldVarDsc->lvTracked)
861 unsigned fldVarIndex = fldVarDsc->lvVarIndex;
862 noway_assert(fldVarIndex < lvaTrackedCount);
863 if (!hasDeadTrackedFieldVars)
865 VarSetOps::AddElemD(this, varDeltaSet, fldVarIndex);
868 // We repeat this call here and below to avoid the VarSetOps::IsMember
869 // test in this, the common case, where we have no deadTrackedFieldVars.
870 if (fldVarDsc->lvIsInReg())
872 #ifndef LEGACY_BACKEND
875 codeGen->genUpdateVarReg(fldVarDsc, tree);
877 #endif // !LEGACY_BACKEND
878 codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
880 #ifndef LEGACY_BACKEND
883 VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
885 #endif // !LEGACY_BACKEND
888 else if (ForCodeGen && VarSetOps::IsMember(this, varDeltaSet, fldVarIndex))
890 if (lvaTable[i].lvIsInReg())
892 #ifndef LEGACY_BACKEND
895 codeGen->genUpdateVarReg(fldVarDsc, tree);
897 #endif // !LEGACY_BACKEND
898 codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
900 #ifndef LEGACY_BACKEND
903 VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
905 #endif // !LEGACY_BACKEND
911 // First, update the live set
914 // We'd like to be able to assert the following, however if we are walking
915 // through a qmark/colon tree, we may encounter multiple last-use nodes.
916 // assert (VarSetOps::IsSubset(compiler, regVarDeltaSet, newLife));
917 VarSetOps::DiffD(this, newLife, varDeltaSet);
918 if (pLastUseVars != nullptr)
920 VarSetOps::Assign(this, *pLastUseVars, varDeltaSet);
925 // This shouldn't be in newLife, unless this is debug code, in which
926 // case we keep vars live everywhere, OR the variable is address-exposed,
927 // OR this block is part of a try block, in which case it may be live at the handler
928 // Could add a check that, if it's in newLife, that it's also in
929 // fgGetHandlerLiveVars(compCurBB), but seems excessive
931 // For a dead store, it can be the case that we set both isBorn and isDying to true.
932 // (We don't eliminate dead stores under MinOpts, so we can't assume they're always
933 // eliminated.) If it's both, we handled it above.
934 VarSetOps::UnionD(this, newLife, varDeltaSet);
938 if (!VarSetOps::Equal(this, compCurLife, newLife))
943 printf("\t\t\t\t\t\t\tLive vars: ");
944 dumpConvertedVarSet(this, compCurLife);
946 dumpConvertedVarSet(this, newLife);
951 VarSetOps::Assign(this, compCurLife, newLife);
955 #ifndef LEGACY_BACKEND
957 // Only add vars to the gcInfo.gcVarPtrSetCur if they are currently on stack, since the
958 // gcInfo.gcTrkStkPtrLcls
959 // includes all TRACKED vars that EVER live on the stack (i.e. are not always in a register).
960 VARSET_TP gcTrkStkDeltaSet(
961 VarSetOps::Intersection(this, codeGen->gcInfo.gcTrkStkPtrLcls, stackVarDeltaSet));
962 if (!VarSetOps::IsEmpty(this, gcTrkStkDeltaSet))
967 printf("\t\t\t\t\t\t\tGCvars: ");
968 dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
975 VarSetOps::UnionD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
979 VarSetOps::DiffD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
985 dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
991 #else // LEGACY_BACKEND
996 VARSET_TP gcVarPtrSetNew(VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
997 if (!VarSetOps::Equal(this, codeGen->gcInfo.gcVarPtrSetCur, gcVarPtrSetNew))
999 printf("\t\t\t\t\t\t\tGCvars: ");
1000 dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
1002 dumpConvertedVarSet(this, gcVarPtrSetNew);
1008 VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
1009 VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
1011 #endif // LEGACY_BACKEND
1013 codeGen->siUpdate();
1017 #ifndef LEGACY_BACKEND
1018 if (ForCodeGen && spill)
1020 assert(!varDsc->lvPromoted);
1021 codeGen->genSpillVar(tree);
1022 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
1024 if (!VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
1026 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
1030 printf("\t\t\t\t\t\t\tVar V%02u becoming live\n", varDsc - lvaTable);
1036 #endif // !LEGACY_BACKEND
1039 // Need an explicit instantiation.
1040 template void Compiler::compUpdateLifeVar<false>(GenTreePtr tree, VARSET_TP* pLastUseVars);
1042 template <bool ForCodeGen>
1043 void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree))
1050 if (tree != nullptr)
1052 Compiler::printTreeID(tree);
1054 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
1055 dumpConvertedVarSet(this, compCurLife);
1056 printf(" -> %s ", VarSetOps::ToString(this, newLife));
1057 dumpConvertedVarSet(this, newLife);
1062 /* We should only be called when the live set has actually changed */
1064 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
1068 VarSetOps::Assign(this, compCurLife, newLife);
1072 /* Figure out which variables are becoming live/dead at this point */
1074 // deadSet = compCurLife - newLife
1075 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
1077 // bornSet = newLife - compCurLife
1078 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
1080 /* Can't simultaneously become live and dead at the same time */
1082 // (deadSet UNION bornSet) != EMPTY
1083 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
1084 // (deadSet INTERSECTION bornSet) == EMPTY
1085 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
1087 #ifdef LEGACY_BACKEND
1088 // In the LEGACY_BACKEND case, we only consider variables that are fully enregisterd
1089 // and there may be none.
1090 VarSetOps::IntersectionD(this, deadSet, raRegVarsMask);
1091 VarSetOps::IntersectionD(this, bornSet, raRegVarsMask);
1092 // And all gcTrkStkPtrLcls that are now live will be on the stack
1093 VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
1094 VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
1095 #endif // LEGACY_BACKEND
1097 VarSetOps::Assign(this, compCurLife, newLife);
1099 // Handle the dying vars first, then the newly live vars.
1100 // This is because, in the RyuJIT backend case, they may occupy registers that
1101 // will be occupied by another var that is newly live.
1102 VarSetOps::Iter deadIter(this, deadSet);
1103 unsigned deadVarIndex = 0;
1104 while (deadIter.NextElem(&deadVarIndex))
1106 unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
1107 varDsc = lvaTable + varNum;
1108 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
1109 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
1111 if (varDsc->lvIsInReg())
1113 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
1115 regMaskTP regMask = varDsc->lvRegMask();
1118 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
1122 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
1124 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(tree));
1126 #ifndef LEGACY_BACKEND
1127 // This isn't in a register, so update the gcVarPtrSetCur.
1128 // (Note that in the LEGACY_BACKEND case gcVarPtrSetCur is updated above unconditionally
1129 // for all gcTrkStkPtrLcls in newLife, because none of them ever live in a register.)
1130 else if (isGCRef || isByRef)
1132 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
1133 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
1135 #endif // !LEGACY_BACKEND
1138 VarSetOps::Iter bornIter(this, bornSet);
1139 unsigned bornVarIndex = 0;
1140 while (bornIter.NextElem(&bornVarIndex))
1142 unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
1143 varDsc = lvaTable + varNum;
1144 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
1145 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
1147 if (varDsc->lvIsInReg())
1149 #ifndef LEGACY_BACKEND
1151 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
1153 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
1156 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
1157 #endif // !LEGACY_BACKEND
1158 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(tree));
1159 regMaskTP regMask = varDsc->lvRegMask();
1162 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
1166 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
1169 #ifndef LEGACY_BACKEND
1170 // This isn't in a register, so update the gcVarPtrSetCur
1171 else if (lvaIsGCTracked(varDsc))
1173 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
1174 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
1176 #endif // !LEGACY_BACKEND
1179 codeGen->siUpdate();
1182 // Need an explicit instantiation.
1183 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree));
1185 #ifdef LEGACY_BACKEND
1187 /*****************************************************************************
1189 * Get the mask of integer registers that contain 'live' enregistered
1190 * local variables after "tree".
1192 * The output is the mask of integer registers that are currently
1193 * alive and holding the enregistered local variables.
1195 regMaskTP CodeGenInterface::genLiveMask(GenTreePtr tree)
1197 regMaskTP liveMask = regSet.rsMaskVars;
1199 GenTreePtr nextNode;
1200 if (compiler->compCurLifeTree == nullptr)
1202 assert(compiler->compCurStmt != nullptr);
1203 nextNode = compiler->compCurStmt->gtStmt.gtStmtList;
1207 nextNode = compiler->compCurLifeTree->gtNext;
1210 // Theoretically, we should always be able to find "tree" by walking
1211 // forward in execution order. But unfortunately, there is at least
1212 // one case (addressing) where a node may be evaluated out of order
1213 // So, we have to handle that case
1214 bool outOfOrder = false;
1215 for (; nextNode != tree->gtNext; nextNode = nextNode->gtNext)
1217 if (nextNode == nullptr)
1222 if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
1224 bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
1225 bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
1226 if (isBorn || isDying)
1228 regMaskTP regMask = genGetRegMask(nextNode);
1229 if (regMask != RBM_NONE)
1233 liveMask |= regMask;
1237 liveMask &= ~(regMask);
1245 assert(compiler->compCurLifeTree != nullptr);
1246 liveMask = regSet.rsMaskVars;
1247 // We were unable to find "tree" by traversing forward. We must now go
1248 // backward from compiler->compCurLifeTree instead. We have to start with compiler->compCurLifeTree,
1249 // since regSet.rsMaskVars reflects its completed execution
1250 for (nextNode = compiler->compCurLifeTree; nextNode != tree; nextNode = nextNode->gtPrev)
1252 assert(nextNode != nullptr);
1254 if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
1256 bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
1257 bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
1258 if (isBorn || isDying)
1260 regMaskTP regMask = genGetRegMask(nextNode);
1261 if (regMask != RBM_NONE)
1263 // We're going backward - so things born are removed
1267 liveMask &= ~(regMask);
1271 liveMask |= regMask;
1281 /*****************************************************************************
1283 * Get the mask of integer registers that contain 'live' enregistered
1286 * The input is a liveSet which contains a set of local
1287 * variables that are currently alive
1289 * The output is the mask of x86 integer registers that are currently
1290 * alive and holding the enregistered local variables
1293 regMaskTP CodeGenInterface::genLiveMask(VARSET_VALARG_TP liveSet)
1295 // Check for the zero LiveSet mask
1296 if (VarSetOps::IsEmpty(compiler, liveSet))
1301 // set if our liveSet matches the one we have cached: genLastLiveSet -> genLastLiveMask
1302 if (VarSetOps::Equal(compiler, liveSet, genLastLiveSet))
1304 return genLastLiveMask;
1307 regMaskTP liveMask = 0;
1309 VarSetOps::Iter iter(compiler, liveSet);
1310 unsigned varIndex = 0;
1311 while (iter.NextElem(&varIndex))
1314 // If the variable is not enregistered, then it can't contribute to the liveMask
1315 if (!VarSetOps::IsMember(compiler, compiler->raRegVarsMask, varIndex))
1320 // Find the variable in compiler->lvaTable
1321 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1322 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1324 #if !FEATURE_FP_REGALLOC
1325 // If the variable is a floating point type, then it can't contribute to the liveMask
1326 if (varDsc->IsFloatRegType())
1332 noway_assert(compiler->lvaTable[varNum].lvRegister);
1335 if (varTypeIsFloating(varDsc->TypeGet()))
1337 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
1341 regBit = genRegMask(varDsc->lvRegNum);
1343 // For longs we may have two regs
1344 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
1346 regBit |= genRegMask(varDsc->lvOtherReg);
1350 noway_assert(regBit != 0);
1352 // We should not already have any of these bits set
1353 noway_assert((liveMask & regBit) == 0);
1355 // Update the liveMask with the register bits that are live
1359 // cache the last mapping between gtLiveSet -> liveMask
1360 VarSetOps::Assign(compiler, genLastLiveSet, liveSet);
1361 genLastLiveMask = liveMask;
1368 /*****************************************************************************
1372 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
1374 getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
1377 /*****************************************************************************
1379 * Generate a reload.
1381 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
1383 getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
1386 #ifdef LEGACY_BACKEND
1387 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
1388 void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg)
1390 var_types tmpType = tmp->tdTempType();
1391 getEmitter()->emitIns_R_S(ins_FloatLoad(type), emitActualTypeSize(tmpType), reg, tmp->tdTempNum(), 0);
1394 #endif // LEGACY_BACKEND
1397 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
1402 //----------------------------------------------------------------------
1403 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
1406 // tree - spilled GenTree node
1409 // TempDsc corresponding to tree
1410 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
1412 // tree must be in spilled state.
1413 assert((tree->gtFlags & GTF_SPILLED) != 0);
1415 // Get the tree's SpillDsc.
1416 RegSet::SpillDsc* prevDsc;
1417 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
1418 assert(spillDsc != nullptr);
1420 // Get the temp desc.
1421 TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
1425 #ifdef _TARGET_XARCH_
1427 #ifdef _TARGET_AMD64_
1428 // Returns relocation type hint for an addr.
1429 // Note that there are no reloc hints on x86.
1432 // addr - data address
1435 // relocation type hint
1437 unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
1439 return compiler->eeGetRelocTypeHint((void*)addr);
1441 #endif //_TARGET_AMD64_
1443 // Return true if an absolute indirect data address can be encoded as IP-relative.
1444 // offset. Note that this method should be used only when the caller knows that
1445 // the address is an icon value that VM has given and there is no GenTree node
1446 // representing it. Otherwise, one should always use FitsInAddrBase().
1449 // addr - an absolute indirect data address
1452 // true if indir data addr could be encoded as IP-relative offset.
1454 bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
1456 #ifdef _TARGET_AMD64_
1457 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
1459 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
1464 // Return true if an indirect code address can be encoded as IP-relative offset.
1465 // Note that this method should be used only when the caller knows that the
1466 // address is an icon value that VM has given and there is no GenTree node
1467 // representing it. Otherwise, one should always use FitsInAddrBase().
1470 // addr - an absolute indirect code address
1473 // true if indir code addr could be encoded as IP-relative offset.
1475 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
1477 #ifdef _TARGET_AMD64_
1478 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
1480 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
1485 // Return true if an indirect code address can be encoded as 32-bit displacement
1486 // relative to zero. Note that this method should be used only when the caller
1487 // knows that the address is an icon value that VM has given and there is no
1488 // GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
1491 // addr - absolute indirect code address
1494 // true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
1496 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
1498 return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
1501 // Return true if an absolute indirect code address needs a relocation recorded with VM.
1504 // addr - an absolute indirect code address
1507 // true if indir code addr needs a relocation recorded with VM
1509 bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
1511 // If generating relocatable ngen code, then all code addr should go through relocation
1512 if (compiler->opts.compReloc)
1517 #ifdef _TARGET_AMD64_
1518 // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
1519 if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
1524 // It could be possible that the code indir addr could be encoded as 32-bit displacement relative
1525 // to zero. But we don't need to emit a relocation in that case.
1527 #else //_TARGET_X86_
1528 // On x86 there is need for recording relocations during jitting,
1529 // because all addrs fit within 32-bits.
1531 #endif //_TARGET_X86_
1534 // Return true if a direct code address needs to be marked as relocatable.
1537 // addr - absolute direct code address
1540 // true if direct code addr needs a relocation recorded with VM
1542 bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
1544 // If generating relocatable ngen code, then all code addr should go through relocation
1545 if (compiler->opts.compReloc)
1550 #ifdef _TARGET_AMD64_
1551 // By default all direct code addresses go through relocation so that VM will setup
1552 // a jump stub if addr cannot be encoded as pc-relative offset.
1554 #else //_TARGET_X86_
1555 // On x86 there is no need for recording relocations during jitting,
1556 // because all addrs fit within 32-bits.
1558 #endif //_TARGET_X86_
1560 #endif //_TARGET_XARCH_
1562 /*****************************************************************************
1564 * The following can be used to create basic blocks that serve as labels for
1565 * the emitter. Use with caution - these are not real basic blocks!
1570 BasicBlock* CodeGen::genCreateTempLabel()
1573 // These blocks don't affect FP
1574 compiler->fgSafeBasicBlockCreation = true;
1577 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
1580 compiler->fgSafeBasicBlockCreation = false;
1583 block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
1585 // Use coldness of current block, as this label will
1586 // be contained in it.
1587 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
1591 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
1593 block->bbTgtStkDepth = genStackLevel / sizeof(int);
1600 void CodeGen::genDefineTempLabel(BasicBlock* label)
1603 if (compiler->opts.dspCode)
1605 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, label->bbNum);
1609 label->bbEmitCookie =
1610 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1612 /* gcInfo.gcRegGCrefSetCur does not account for redundant load-suppression
1613 of GC vars, and the emitter will not know about */
1615 regTracker.rsTrackRegClrPtr();
1618 /*****************************************************************************
1620 * Adjust the stack pointer by the given value; assumes that this follows
1621 * a call so only callee-saved registers (and registers that may hold a
1622 * return value) are used at this point.
1625 void CodeGen::genAdjustSP(ssize_t delta)
1627 #if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
1628 if (delta == sizeof(int))
1629 inst_RV(INS_pop, REG_ECX, TYP_INT);
1632 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
1635 //------------------------------------------------------------------------
1636 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
1639 // block - The BasicBlock for which we are about to generate code.
1642 // Must be called just prior to generating code for 'block'.
1645 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1646 // and if 'block' is a throw helper block with a non-zero stack level.
1648 void CodeGen::genAdjustStackLevel(BasicBlock* block)
1650 #if !FEATURE_FIXED_OUT_ARGS
1651 // Check for inserted throw blocks and adjust genStackLevel.
1652 CLANG_FORMAT_COMMENT_ANCHOR;
1654 #if defined(UNIX_X86_ABI)
1655 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1657 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1658 // at this point if a jump to this block is made in the middle of pushing arugments.
1660 // Here we restore SP to prevent potential stack alignment issues.
1661 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1665 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1667 noway_assert(block->bbFlags & BBF_JMP_TARGET);
1669 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1671 if (genStackLevel != 0)
1674 getEmitter()->emitMarkStackLvl(genStackLevel);
1675 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1677 #else // _TARGET_X86_
1678 NYI("Need emitMarkStackLvl()");
1679 #endif // _TARGET_X86_
1682 #endif // !FEATURE_FIXED_OUT_ARGS
1687 // alignmentWB is out param
1688 unsigned CodeGenInterface::InferOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
1690 unsigned alignment = 0;
1691 unsigned opSize = 0;
1693 if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
1695 opSize = InferStructOpSizeAlign(op, &alignment);
1699 alignment = genTypeAlignments[op->TypeGet()];
1700 opSize = genTypeSizes[op->TypeGet()];
1703 assert(opSize != 0);
1704 assert(alignment != 0);
1706 (*alignmentWB) = alignment;
1710 // alignmentWB is out param
1711 unsigned CodeGenInterface::InferStructOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
1713 unsigned alignment = 0;
1714 unsigned opSize = 0;
1716 while (op->gtOper == GT_COMMA)
1718 op = op->gtOp.gtOp2;
1721 if (op->gtOper == GT_OBJ)
1723 CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1724 opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1725 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1727 else if (op->gtOper == GT_LCL_VAR)
1729 unsigned varNum = op->gtLclVarCommon.gtLclNum;
1730 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1731 assert(varDsc->lvType == TYP_STRUCT);
1732 opSize = varDsc->lvSize();
1733 if (varDsc->lvStructDoubleAlign)
1735 alignment = TARGET_POINTER_SIZE * 2;
1739 alignment = TARGET_POINTER_SIZE;
1742 else if (op->OperIsCopyBlkOp())
1744 GenTreePtr op2 = op->gtOp.gtOp2;
1746 if (op2->OperGet() == GT_CNS_INT)
1748 if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1750 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1751 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1753 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1757 opSize = op2->gtIntCon.gtIconVal;
1758 GenTreePtr op1 = op->gtOp.gtOp1;
1759 assert(op1->OperGet() == GT_LIST);
1760 GenTreePtr dstAddr = op1->gtOp.gtOp1;
1761 if (dstAddr->OperGet() == GT_ADDR)
1763 InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1767 assert(!"Unhandle dstAddr node");
1768 alignment = TARGET_POINTER_SIZE;
1774 noway_assert(!"Variable sized COPYBLK register arg!");
1776 alignment = TARGET_POINTER_SIZE;
1779 else if (op->gtOper == GT_MKREFANY)
1781 opSize = TARGET_POINTER_SIZE * 2;
1782 alignment = TARGET_POINTER_SIZE;
1784 else if (op->IsArgPlaceHolderNode())
1786 CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1787 assert(clsHnd != 0);
1788 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1789 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1793 assert(!"Unhandled gtOper");
1794 opSize = TARGET_POINTER_SIZE;
1795 alignment = TARGET_POINTER_SIZE;
1798 assert(opSize != 0);
1799 assert(alignment != 0);
1801 (*alignmentWB) = alignment;
1805 #endif // _TARGET_ARM_
1807 /*****************************************************************************
1809 * Take an address expression and try to find the best set of components to
1810 * form an address mode; returns non-zero if this is successful.
1812 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1813 * Refactor this code so that the underlying analysis can be used in
1814 * the RyuJIT Backend to do lowering, instead of having to call this method with the
1815 * option to not generate the code.
1817 * 'fold' specifies if it is OK to fold the array index which hangs off
1820 * If successful, the parameters will be set to the following values:
1822 * *rv1Ptr ... base operand
1823 * *rv2Ptr ... optional operand
1824 * *revPtr ... true if rv2 is before rv1 in the evaluation order
1825 * #if SCALED_ADDR_MODES
1826 * *mulPtr ... optional multiplier (2/4/8) for rv2
1827 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1829 * *cnsPtr ... integer constant [optional]
1831 * The 'mode' parameter may have one of the following values:
1834 * +1 ... we're trying to compute a value via 'LEA'
1837 * 0 ... we're trying to form an address mode
1839 * -1 ... we're generating code for an address mode,
1840 * and thus the address must already form an
1841 * address mode (without any further work)
1843 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1844 * identifies the components that might be used to
1845 * form an address mode later on.
1848 bool CodeGen::genCreateAddrMode(GenTreePtr addr,
1855 #if SCALED_ADDR_MODES
1861 #ifndef LEGACY_BACKEND
1862 assert(nogen == true);
1863 #endif // !LEGACY_BACKEND
1866 The following indirections are valid address modes on x86/x64:
1868 [ icon] * not handled here
1872 [reg1 + reg2 + icon]
1879 [reg1 + 2 * reg2 + icon]
1880 [reg1 + 4 * reg2 + icon]
1881 [reg1 + 8 * reg2 + icon]
1883 The following indirections are valid address modes on arm64:
1888 [reg1 + reg2 * natural-scale]
1892 /* All indirect address modes require the address to be an addition */
1894 if (addr->gtOper != GT_ADD)
1899 // Can't use indirect addressing mode as we need to check for overflow.
1900 // Also, can't use 'lea' as it doesn't set the flags.
1902 if (addr->gtOverflow())
1907 GenTreePtr rv1 = nullptr;
1908 GenTreePtr rv2 = nullptr;
1914 #if SCALED_ADDR_MODES
1920 /* What order are the sub-operands to be evaluated */
1922 if (addr->gtFlags & GTF_REVERSE_OPS)
1924 op1 = addr->gtOp.gtOp2;
1925 op2 = addr->gtOp.gtOp1;
1929 op1 = addr->gtOp.gtOp1;
1930 op2 = addr->gtOp.gtOp2;
1933 bool rev = false; // Is op2 first in the evaluation order?
1936 A complex address mode can combine the following operands:
1938 op1 ... base address
1939 op2 ... optional scaled index
1940 #if SCALED_ADDR_MODES
1941 mul ... optional multiplier (2/4/8) for op2
1943 cns ... optional displacement
1945 Here we try to find such a set of operands and arrange for these
1946 to sit in registers.
1950 #if SCALED_ADDR_MODES
1955 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1956 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1957 here if we find a scaled index.
1959 CLANG_FORMAT_COMMENT_ANCHOR;
1961 #if SCALED_ADDR_MODES
1965 #ifdef LEGACY_BACKEND
1966 /* Check both operands as far as being register variables */
1970 if (op1->gtOper == GT_LCL_VAR)
1972 if (op2->gtOper == GT_LCL_VAR)
1975 #endif // LEGACY_BACKEND
1977 /* Special case: keep constants as 'op2' */
1979 if (op1->IsCnsIntOrI())
1981 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1987 /* Check for an addition of a constant */
1989 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
1991 /* We're adding a constant */
1993 cns += op2->gtIntConCommon.IconValue();
1995 #ifdef LEGACY_BACKEND
1996 /* Can (and should) we use "add reg, icon" ? */
1998 if (op1->InReg() && mode == 1 && !nogen)
2000 regNumber reg1 = op1->gtRegNum;
2002 if ((regMask == 0 || (regMask & genRegMask(reg1))) && genRegTrashable(reg1, addr))
2004 // In case genMarkLclVar(op1) bashed it above and it is
2005 // the last use of the variable.
2009 /* 'reg1' is trashable, so add "icon" into it */
2011 genIncRegBy(reg1, cns, addr, addr->TypeGet());
2013 genUpdateLife(addr);
2017 #endif // LEGACY_BACKEND
2019 #if defined(_TARGET_ARM64_) || (defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND))
2023 /* Inspect the operand the constant is being added to */
2025 switch (op1->gtOper)
2029 if (op1->gtOverflow())
2034 op2 = op1->gtOp.gtOp2;
2035 op1 = op1->gtOp.gtOp1;
2039 #if SCALED_ADDR_MODES && !defined(_TARGET_ARM64_) && !(defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND))
2040 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
2042 if (op1->gtOverflow())
2044 return false; // Need overflow check
2051 mul = op1->GetScaledIndex();
2054 /* We can use "[mul*rv2 + icon]" */
2057 rv2 = op1->gtOp.gtOp1;
2069 /* The best we can do is "[rv1 + icon]" */
2077 // op2 is not a constant. So keep on trying.
2078 CLANG_FORMAT_COMMENT_ANCHOR;
2080 #ifdef LEGACY_BACKEND
2081 // Does op1 or op2 already sit in a register?
2084 /* op1 is sitting in a register */
2086 else if (op2->InReg())
2088 /* op2 is sitting in a register. Keep the enregistered value as op1 */
2094 noway_assert(rev == false);
2098 #endif // LEGACY_BACKEND
2100 /* Neither op1 nor op2 are sitting in a register right now */
2102 switch (op1->gtOper)
2104 #if !defined(_TARGET_ARM64_) && !(defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND))
2105 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
2108 if (op1->gtOverflow())
2113 if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
2115 cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
2116 op1 = op1->gtOp.gtOp1;
2123 #if SCALED_ADDR_MODES
2127 if (op1->gtOverflow())
2136 mul = op1->GetScaledIndex();
2139 /* 'op1' is a scaled value */
2142 rv2 = op1->gtOp.gtOp1;
2145 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
2147 if (jitIsScaleIndexMul(argScale * mul))
2149 mul = mul * argScale;
2150 rv2 = rv2->gtOp.gtOp1;
2158 noway_assert(rev == false);
2165 #endif // SCALED_ADDR_MODES
2166 #endif // !_TARGET_ARM64_ && !(_TARGET_ARM_ && !LEGACY_BACKEND)
2175 op1 = op1->gtOp.gtOp1;
2185 op1 = op1->gtOp.gtOp2;
2193 switch (op2->gtOper)
2195 #if !defined(_TARGET_ARM64_) && !(defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND))
2196 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
2199 if (op2->gtOverflow())
2204 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
2206 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
2207 op2 = op2->gtOp.gtOp1;
2214 #if SCALED_ADDR_MODES
2218 if (op2->gtOverflow())
2227 mul = op2->GetScaledIndex();
2230 // 'op2' is a scaled value...is it's argument also scaled?
2232 rv2 = op2->gtOp.gtOp1;
2233 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
2235 if (jitIsScaleIndexMul(argScale * mul))
2237 mul = mul * argScale;
2238 rv2 = rv2->gtOp.gtOp1;
2252 #endif // SCALED_ADDR_MODES
2253 #endif // !_TARGET_ARM64_ && !(_TARGET_ARM_ && !LEGACY_BACKEND)
2262 op2 = op2->gtOp.gtOp1;
2272 op2 = op2->gtOp.gtOp2;
2282 #ifdef LEGACY_BACKEND
2283 // op1 is in a register.
2284 // Note that this case only occurs during codegen for LEGACY_BACKEND.
2286 // Is op2 an addition or a scaled value?
2290 switch (op2->gtOper)
2294 if (op2->gtOverflow())
2299 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
2301 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
2302 op2 = op2->gtOp.gtOp1;
2308 #if SCALED_ADDR_MODES
2312 if (op2->gtOverflow())
2321 mul = op2->GetScaledIndex();
2325 rv2 = op2->gtOp.gtOp1;
2327 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
2329 if (jitIsScaleIndexMul(argScale * mul))
2331 mul = mul * argScale;
2332 rv2 = rv2->gtOp.gtOp1;
2344 #endif // SCALED_ADDR_MODES
2349 #endif // LEGACY_BACKEND
2353 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
2357 #ifdef _TARGET_ARM64_
2363 #ifdef LEGACY_BACKEND
2364 /* Check for register variables */
2368 if (rv1 && rv1->gtOper == GT_LCL_VAR)
2370 if (rv2 && rv2->gtOper == GT_LCL_VAR)
2373 #endif // LEGACY_BACKEND
2377 /* Make sure a GC address doesn't end up in 'rv2' */
2379 if (varTypeIsGC(rv2->TypeGet()))
2381 noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
2390 /* Special case: constant array index (that is range-checked) */
2391 CLANG_FORMAT_COMMENT_ANCHOR;
2393 #if defined(LEGACY_BACKEND)
2394 // If we've already placed rv2 in a register, we are probably being called in a context that has already
2395 // presumed that an addressing mode will be created, even if rv2 is constant, and if we fold we may not find a
2396 // useful addressing mode (e.g. if we had [mul * rv2 + cns] it might happen to fold to [cns2].
2397 if (mode == -1 && rv2->InReg())
2408 if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
2410 /* For valuetype arrays where we can't use the scaled address
2411 mode, rv2 will point to the scaled index. So we have to do
2414 tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
2422 /* May be a simple array. rv2 will points to the actual index */
2428 /* Get hold of the array index and see if it's a constant */
2429 if (index->IsIntCnsFitsInI32())
2431 /* Get hold of the index value */
2432 ssize_t ixv = index->AsIntConCommon()->IconValue();
2434 #if SCALED_ADDR_MODES
2435 /* Scale the index if necessary */
2442 if (FitsIn<INT32>(cns + ixv))
2444 /* Add the scaled index to the offset value */
2448 #if SCALED_ADDR_MODES
2449 /* There is no scaled operand any more */
2458 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
2459 noway_assert(rv1 || mul != 1);
2461 noway_assert(FitsIn<INT32>(cns));
2463 if (rv1 == nullptr && rv2 == nullptr)
2468 /* Success - return the various components to the caller */
2473 #if SCALED_ADDR_MODES
2476 // TODO-Cleanup: The offset is signed and it should be returned as such. See also
2477 // GenTreeAddrMode::gtOffset and its associated cleanup note.
2478 *cnsPtr = (unsigned)cns;
2483 /*****************************************************************************
2484 * The condition to use for (the jmp/set for) the given type of operation
2486 * In case of amd64, this routine should be used when there is no gentree available
2487 * and one needs to generate jumps based on integer comparisons. When gentree is
2488 * available always use its overloaded version.
2493 emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
2495 const static BYTE genJCCinsSigned[] = {
2496 #if defined(_TARGET_XARCH_)
2503 #ifndef LEGACY_BACKEND
2504 EJ_je, // GT_TEST_EQ
2505 EJ_jne, // GT_TEST_NE
2507 #elif defined(_TARGET_ARMARCH_)
2514 #if defined(_TARGET_ARM64_)
2515 EJ_eq, // GT_TEST_EQ
2516 EJ_ne, // GT_TEST_NE
2521 const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
2523 #if defined(_TARGET_XARCH_)
2530 #ifndef LEGACY_BACKEND
2531 EJ_je, // GT_TEST_EQ
2532 EJ_jne, // GT_TEST_NE
2534 #elif defined(_TARGET_ARMARCH_)
2541 #if defined(_TARGET_ARM64_)
2542 EJ_eq, // GT_TEST_EQ
2543 EJ_ne, // GT_TEST_NE
2548 const static BYTE genJCCinsLogical[] = /* logical operation */
2550 #if defined(_TARGET_XARCH_)
2551 EJ_je, // GT_EQ (Z == 1)
2552 EJ_jne, // GT_NE (Z == 0)
2553 EJ_js, // GT_LT (S == 1)
2555 EJ_jns, // GT_GE (S == 0)
2557 #ifndef LEGACY_BACKEND
2558 EJ_NONE, // GT_TEST_EQ
2559 EJ_NONE, // GT_TEST_NE
2561 #elif defined(_TARGET_ARMARCH_)
2562 EJ_eq, // GT_EQ (Z == 1)
2563 EJ_ne, // GT_NE (Z == 0)
2564 EJ_mi, // GT_LT (N == 1)
2566 EJ_pl, // GT_GE (N == 0)
2568 #if defined(_TARGET_ARM64_)
2569 EJ_eq, // GT_TEST_EQ
2570 EJ_ne, // GT_TEST_NE
2575 #if defined(_TARGET_XARCH_)
2576 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
2577 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
2578 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
2579 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
2580 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
2581 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
2582 #ifndef LEGACY_BACKEND
2583 assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
2584 assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
2587 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
2588 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
2589 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
2590 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
2591 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
2592 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
2593 #ifndef LEGACY_BACKEND
2594 assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
2595 assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
2598 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
2599 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
2600 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
2601 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
2602 #elif defined(_TARGET_ARMARCH_)
2603 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
2604 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
2605 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
2606 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
2607 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
2608 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
2610 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
2611 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
2612 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
2613 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
2614 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
2615 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
2617 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
2618 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
2619 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
2620 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
2622 assert(!"unknown arch");
2624 assert(GenTree::OperIsCompare(cmp));
2626 emitJumpKind result = EJ_COUNT;
2628 if (compareKind == CK_UNSIGNED)
2630 result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
2632 else if (compareKind == CK_SIGNED)
2634 result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
2636 else if (compareKind == CK_LOGICAL)
2638 result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
2640 assert(result != EJ_COUNT);
2644 #ifndef LEGACY_BACKEND
2645 #ifdef _TARGET_ARMARCH_
2646 //------------------------------------------------------------------------
2647 // genEmitGSCookieCheck: Generate code to check that the GS cookie
2648 // wasn't thrashed by a buffer overrun. Coomon code for ARM32 and ARM64
2650 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2652 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2654 // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
2655 // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
2656 if (!pushReg && (compiler->info.compRetType == TYP_REF))
2657 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2659 regNumber regGSConst = REG_TMP_0;
2660 regNumber regGSValue = REG_TMP_1;
2662 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
2664 // load the GS cookie constant into a reg
2666 genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
2670 // Ngen case - GS cookie constant needs to be accessed through an indirection.
2671 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2672 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
2674 // Load this method's GS value from the stack frame
2675 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
2676 // Compare with the GC cookie constant
2677 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
2679 BasicBlock* gsCheckBlk = genCreateTempLabel();
2680 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2681 inst_JMP(jmpEqual, gsCheckBlk);
2682 // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
2683 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
2684 genDefineTempLabel(gsCheckBlk);
2686 #endif // _TARGET_ARMARCH_
2687 #endif // !LEGACY_BACKEND
2689 /*****************************************************************************
2691 * Generate an exit sequence for a return from a method (note: when compiling
2692 * for speed there might be multiple exit points).
2695 void CodeGen::genExitCode(BasicBlock* block)
2697 /* Just wrote the first instruction of the epilog - inform debugger
2698 Note that this may result in a duplicate IPmapping entry, and
2701 // For non-optimized debuggable code, there is only one epilog.
2702 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
2704 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
2705 if (compiler->getNeedsGSSecurityCookie())
2707 genEmitGSCookieCheck(jmpEpilog);
2712 // The GS cookie check created a temp label that has no live
2713 // incoming GC registers, we need to fix that
2718 /* Figure out which register parameters hold pointers */
2720 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
2723 noway_assert(varDsc->lvIsParam);
2725 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
2728 getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
2729 getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
2733 genReserveEpilog(block);
2736 /*****************************************************************************
2738 * Generate code for an out-of-line exception.
2739 * For debuggable code, we generate the 'throw' inline.
2740 * For non-dbg code, we share the helper blocks created by fgAddCodeRef().
2743 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTreePtr failBlk)
2745 bool useThrowHlpBlk = !compiler->opts.compDbgCode;
2747 #if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
2748 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
2749 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
2750 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
2754 /* For non-debuggable code, find and use the helper block for
2755 raising the exception. The block may be shared by other trees too. */
2761 /* We already know which block to jump to. Use that. */
2763 noway_assert(failBlk->gtOper == GT_LABEL);
2764 tgtBlk = failBlk->gtLabel.gtLabBB;
2767 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB))->acdDstBlk);
2771 /* Find the helper-block which raises the exception. */
2773 Compiler::AddCodeDsc* add =
2774 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2775 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
2776 tgtBlk = add->acdDstBlk;
2779 noway_assert(tgtBlk);
2781 // Jump to the excption-throwing block on error.
2783 inst_JMP(jumpKind, tgtBlk);
2787 /* The code to throw the exception will be generated inline, and
2788 we will jump around it in the normal non-exception case */
2790 BasicBlock* tgtBlk = nullptr;
2791 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
2792 if (reverseJumpKind != jumpKind)
2794 tgtBlk = genCreateTempLabel();
2795 inst_JMP(reverseJumpKind, tgtBlk);
2798 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
2800 /* Define the spot for the normal non-exception case to jump to */
2801 if (tgtBlk != nullptr)
2803 assert(reverseJumpKind != jumpKind);
2804 genDefineTempLabel(tgtBlk);
2809 /*****************************************************************************
2811 * The last operation done was generating code for "tree" and that would
2812 * have set the flags. Check if the operation caused an overflow.
2816 void CodeGen::genCheckOverflow(GenTreePtr tree)
2818 // Overflow-check should be asked for this tree
2819 noway_assert(tree->gtOverflow());
2821 const var_types type = tree->TypeGet();
2823 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
2824 noway_assert(!varTypeIsSmall(type));
2826 emitJumpKind jumpKind;
2828 #ifdef _TARGET_ARM64_
2829 if (tree->OperGet() == GT_MUL)
2836 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
2838 #if defined(_TARGET_XARCH_)
2840 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
2842 #elif defined(_TARGET_ARMARCH_)
2844 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
2846 if (jumpKind == EJ_lo)
2848 if ((tree->OperGet() != GT_SUB) && (tree->gtOper != GT_ASG_SUB))
2854 #endif // defined(_TARGET_ARMARCH_)
2857 // Jump to the block which will throw the expection
2859 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
2862 #if FEATURE_EH_FUNCLETS
2864 /*****************************************************************************
2866 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
2867 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
2872 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
2874 if (block->bbFlags & BBF_FUNCLET_BEG)
2876 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
2877 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2879 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
2883 // We shouldn't see FUNC_ROOT
2884 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2885 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
2890 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
2891 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2893 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
2895 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
2897 assert(!block->hasHndIndex());
2901 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2902 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
2907 #if defined(_TARGET_ARM_)
2908 void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
2910 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
2911 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
2912 // calls the funclet during non-exceptional control flow.
2913 if (block->bbFlags & BBF_FINALLY_TARGET)
2915 assert(block->bbFlags & BBF_JMP_TARGET);
2918 if (compiler->verbose)
2920 printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
2923 // Create a label that we'll use for computing the start of an EH region, if this block is
2924 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
2925 // determining the EH regions, then this NOP would end up outside of the region, if this
2926 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
2927 // would be executed, which we would prefer not to do.
2929 block->bbUnwindNopEmitCookie =
2930 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
2937 #endif // FEATURE_EH_FUNCLETS
2939 /*****************************************************************************
2941 * Generate code for the function.
2944 void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
2949 printf("*************** In genGenerateCode()\n");
2950 compiler->fgDispBasicBlocks(compiler->verboseTrees);
2955 unsigned prologSize;
2956 unsigned epilogSize;
2961 genInterruptibleUsed = true;
2964 genNeedPrologStackProbe = false;
2967 compiler->fgDebugCheckBBlist();
2970 /* This is the real thing */
2972 genPrepForCompiler();
2974 /* Prepare the emitter */
2975 getEmitter()->Init();
2977 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
2981 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
2983 compiler->opts.disAsm = true;
2986 if (compiler->opts.disAsm)
2988 printf("; Assembly listing for method %s\n", compiler->info.compFullName);
2990 printf("; Emitting ");
2992 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
2994 printf("SMALL_CODE");
2996 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
2998 printf("FAST_CODE");
3002 printf("BLENDED_CODE");
3007 if (compiler->info.genCPU == CPU_X86)
3009 printf("generic X86 CPU");
3011 else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
3013 printf("Pentium 4");
3015 else if (compiler->info.genCPU == CPU_X64)
3017 if (compiler->canUseAVX())
3019 printf("X64 CPU with AVX");
3023 printf("X64 CPU with SSE2");
3027 else if (compiler->info.genCPU == CPU_ARM)
3029 printf("generic ARM CPU");
3034 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
3036 printf("; optimized code\n");
3038 else if (compiler->opts.compDbgCode)
3040 printf("; debuggable code\n");
3042 else if (compiler->opts.MinOpts())
3044 printf("; compiler->opts.MinOpts() is true\n");
3048 printf("; unknown optimization flags\n");
3052 if (compiler->genDoubleAlign())
3053 printf("; double-aligned frame\n");
3056 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
3058 if (genInterruptible)
3060 printf("; fully interruptible\n");
3064 printf("; partially interruptible\n");
3067 if (compiler->fgHaveProfileData())
3069 printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
3070 compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
3073 if (compiler->fgProfileData_ILSizeMismatch)
3075 printf("; discarded IBC profile data due to mismatch in ILSize\n");
3080 #ifndef LEGACY_BACKEND
3082 // For RyuJIT backend, we compute the final frame layout before code generation. This is because LSRA
3083 // has already computed exactly the maximum concurrent number of spill temps of each type that are
3084 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
3085 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
3086 // memory from the VM.
3090 unsigned maxTmpSize = compiler->tmpSize; // This is precise after LSRA has pre-allocated the temps.
3092 #else // LEGACY_BACKEND
3094 // Estimate the frame size: first, estimate the number of spill temps needed by taking the register
3095 // predictor spill temp estimates and stress levels into consideration. Then, compute the tentative
3096 // frame layout using conservative callee-save register estimation (namely, guess they'll all be used
3097 // and thus saved on the frame).
3099 // Compute the maximum estimated spill temp size.
3100 unsigned maxTmpSize = sizeof(double) + sizeof(float) + sizeof(__int64) + sizeof(void*);
3102 maxTmpSize += (compiler->tmpDoubleSpillMax * sizeof(double)) + (compiler->tmpIntSpillMax * sizeof(int));
3106 /* When StressRegs is >=1, there will be a bunch of spills not predicted by
3107 the predictor (see logic in rsPickReg). It will be very hard to teach
3108 the predictor about the behavior of rsPickReg for StressRegs >= 1, so
3109 instead let's make maxTmpSize large enough so that we won't be wrong.
3110 This means that at StressRegs >= 1, we will not be testing the logic
3111 that sets the maxTmpSize size.
3114 if (regSet.rsStressRegs() >= 1)
3116 maxTmpSize += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
3119 // JIT uses 2 passes when assigning stack variable (i.e. args, temps, and locals) locations in varDsc->lvStkOffs.
3120 // During the 1st pass (in genGenerateCode), it estimates the maximum possible size for stack temps
3121 // and put it in maxTmpSize. Then it calculates the varDsc->lvStkOffs for each variable based on this estimation.
3122 // However during stress mode, we might spill more temps on the stack, which might grow the
3123 // size of the temp area.
3124 // This might cause varDsc->lvStkOffs to change during the 2nd pass (in emitEndCodeGen).
3125 // If the change of varDsc->lvStkOffs crosses the threshold for the instruction size,
3126 // we will then have a mismatched estimated code size (during the 1st pass) and the actual emitted code size
3127 // (during the 2nd pass).
3128 // Also, if STRESS_UNSAFE_BUFFER_CHECKS is turned on, we might reorder the stack variable locations,
3129 // which could cause the mismatch too.
3131 // The following code is simply bump the maxTmpSize up to at least BYTE_MAX+1 during the stress mode, so that
3132 // we don't run into code size problem during stress.
3134 if (getJitStressLevel() != 0)
3136 if (maxTmpSize < BYTE_MAX + 1)
3138 maxTmpSize = BYTE_MAX + 1;
3143 /* Estimate the offsets of locals/arguments and size of frame */
3145 unsigned lclSize = compiler->lvaFrameSize(Compiler::TENTATIVE_FRAME_LAYOUT);
3149 // Display the local frame offsets that we have tentatively decided upon
3153 compiler->lvaTableDump();
3157 #endif // LEGACY_BACKEND
3159 getEmitter()->emitBegFN(isFramePointerUsed()
3162 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
3163 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
3165 #ifdef LEGACY_BACKEND
3168 #endif // LEGACY_BACKEND
3172 /* Now generate code for the function */
3175 #ifndef LEGACY_BACKEND
3177 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
3178 // generation hasn't touched it (it shouldn't!).
3181 compiler->lvaTableDump();
3184 #endif // !LEGACY_BACKEND
3186 /* We can now generate the function prolog and epilog */
3188 genGeneratePrologsAndEpilogs();
3190 /* Bind jump distances */
3192 getEmitter()->emitJumpDistBind();
3194 /* The code is now complete and final; it should not change after this. */
3196 /* Compute the size of the code sections that we are going to ask the VM
3197 to allocate. Note that this might not be precisely the size of the
3198 code we emit, though it's fatal if we emit more code than the size we
3200 (Note: an example of a case where we emit less code would be useful.)
3203 getEmitter()->emitComputeCodeSizes();
3207 // Code to test or stress our ability to run a fallback compile.
3208 // We trigger the fallback here, before asking the VM for any memory,
3209 // because if not, we will leak mem, as the current codebase can't free
3210 // the mem after the emitter asks the VM for it. As this is only a stress
3211 // mode, we only want the functionality, and don't care about the relative
3212 // ugliness of having the failure here.
3213 if (!compiler->jitFallbackCompile)
3215 // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
3216 // especially that caused by enabling JIT stress.
3217 if (!JitConfig.JitNoForceFallback())
3219 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
3221 NO_WAY_NOASSERT("Stress failure");
3228 /* We've finished collecting all the unwind information for the function. Now reserve
3229 space for it from the VM.
3232 compiler->unwindReserve();
3236 size_t dataSize = getEmitter()->emitDataSize();
3238 #endif // DISPLAY_SIZES
3242 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
3244 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
3245 trackedStackPtrsContig = false;
3246 #elif defined(_TARGET_ARM_)
3247 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
3248 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
3250 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
3254 /* We're done generating code for this function */
3255 compiler->compCodeGenDone = true;
3258 compiler->EndPhase(PHASE_GENERATE_CODE);
3260 codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
3261 (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
3262 &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
3264 compiler->EndPhase(PHASE_EMIT_CODE);
3267 if (compiler->opts.disAsm)
3269 printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
3270 compiler->info.compFullName);
3271 printf("; ============================================================\n");
3272 printf(""); // in our logic this causes a flush
3277 printf("*************** After end code gen, before unwindEmit()\n");
3278 getEmitter()->emitDispIGlist(true);
3282 #if EMIT_TRACK_STACK_DEPTH
3283 /* Check our max stack level. Needed for fgAddCodeRef().
3284 We need to relax the assert as our estimation won't include code-gen
3285 stack changes (which we know don't affect fgAddCodeRef()) */
3287 unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
3288 compiler->compHndBBtabCount + // Return address for locally-called finallys
3289 genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
3290 (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
3291 #if defined(UNIX_X86_ABI)
3292 maxAllowedStackDepth += maxNestedAlignment;
3294 noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
3296 #endif // EMIT_TRACK_STACK_DEPTH
3298 *nativeSizeOfCode = codeSize;
3299 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
3301 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
3303 // Make sure that the x86 alignment and cache prefetch optimization rules
3306 // Don't start a method in the last 7 bytes of a 16-byte alignment area
3307 // unless we are generating SMALL_CODE
3308 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
3310 /* Now that the code is issued, we can finalize and emit the unwind data */
3312 compiler->unwindEmit(*codePtr, coldCodePtr);
3314 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
3318 /* Finalize the Local Var info in terms of generated code */
3323 unsigned finalHotCodeSize;
3324 unsigned finalColdCodeSize;
3325 if (compiler->fgFirstColdBlock != nullptr)
3327 // We did some hot/cold splitting. The hot section is always padded out to the
3328 // size we thought it would be, but the cold section is not.
3329 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
3330 assert(compiler->info.compTotalHotCodeSize > 0);
3331 assert(compiler->info.compTotalColdCodeSize > 0);
3332 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
3333 finalColdCodeSize = codeSize - finalHotCodeSize;
3337 // No hot/cold splitting
3338 assert(codeSize <= compiler->info.compTotalHotCodeSize);
3339 assert(compiler->info.compTotalHotCodeSize > 0);
3340 assert(compiler->info.compTotalColdCodeSize == 0);
3341 finalHotCodeSize = codeSize;
3342 finalColdCodeSize = 0;
3344 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
3345 #endif // LATE_DISASM
3347 /* Report any exception handlers to the VM */
3351 #ifdef JIT32_GCENCODER
3356 // Create and store the GC info for this method.
3357 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
3360 FILE* dmpf = jitstdout;
3362 compiler->opts.dmpHex = false;
3363 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
3366 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
3371 compiler->opts.dmpHex = true;
3374 if (compiler->opts.dmpHex)
3376 size_t consSize = getEmitter()->emitDataSize();
3377 size_t infoSize = compiler->compInfoBlkSize;
3379 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
3380 fprintf(dmpf, "\n");
3384 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
3388 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
3390 #ifdef JIT32_GCENCODER
3392 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
3393 #endif // JIT32_GCENCODER
3395 fprintf(dmpf, "\n");
3399 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
3403 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
3405 #ifdef JIT32_GCENCODER
3407 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
3408 #endif // JIT32_GCENCODER
3413 if (dmpf != jitstdout)
3420 /* Tell the emitter that we're done with this function */
3422 getEmitter()->emitEndFN();
3424 /* Shut down the spill logic */
3426 regSet.rsSpillDone();
3428 /* Shut down the temp logic */
3430 compiler->tmpDone();
3434 grossVMsize += compiler->info.compILCodeSize;
3435 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
3436 grossNCsize += codeSize + dataSize;
3438 #endif // DISPLAY_SIZES
3440 compiler->EndPhase(PHASE_EMIT_GCEH);
3443 /*****************************************************************************
3445 * Report EH clauses to the VM
3448 void CodeGen::genReportEH()
3450 if (compiler->compHndBBtabCount == 0)
3456 if (compiler->opts.dspEHTable)
3458 printf("*************** EH table for %s\n", compiler->info.compFullName);
3466 bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
3468 unsigned EHCount = compiler->compHndBBtabCount;
3470 #if FEATURE_EH_FUNCLETS
3471 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
3473 unsigned duplicateClauseCount = 0;
3474 unsigned enclosingTryIndex;
3476 // Duplicate clauses are not used by CoreRT ABI
3479 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
3481 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
3482 // ignoring 'mutual protect' trys
3483 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
3484 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
3486 ++duplicateClauseCount;
3489 EHCount += duplicateClauseCount;
3492 #if FEATURE_EH_CALLFINALLY_THUNKS
3493 unsigned clonedFinallyCount = 0;
3495 // Duplicate clauses are not used by CoreRT ABI
3498 // We don't keep track of how many cloned finally there are. So, go through and count.
3499 // We do a quick pass first through the EH table to see if there are any try/finally
3500 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
3502 bool anyFinallys = false;
3503 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
3504 HBtab < HBtabEnd; HBtab++)
3506 if (HBtab->HasFinallyHandler())
3514 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3516 if (block->bbJumpKind == BBJ_CALLFINALLY)
3518 ++clonedFinallyCount;
3522 EHCount += clonedFinallyCount;
3525 #endif // FEATURE_EH_CALLFINALLY_THUNKS
3527 #endif // FEATURE_EH_FUNCLETS
3530 if (compiler->opts.dspEHTable)
3532 #if FEATURE_EH_FUNCLETS
3533 #if FEATURE_EH_CALLFINALLY_THUNKS
3534 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
3535 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
3536 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
3537 #else // !FEATURE_EH_CALLFINALLY_THUNKS
3538 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
3539 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
3540 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
3541 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
3542 #else // !FEATURE_EH_FUNCLETS
3543 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
3544 assert(compiler->compHndBBtabCount == EHCount);
3545 #endif // !FEATURE_EH_FUNCLETS
3549 // Tell the VM how many EH clauses to expect.
3550 compiler->eeSetEHcount(EHCount);
3552 XTnum = 0; // This is the index we pass to the VM
3554 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
3555 HBtab < HBtabEnd; HBtab++)
3557 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
3559 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
3560 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
3562 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3563 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
3564 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3565 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
3567 if (HBtab->HasFilter())
3569 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
3573 hndTyp = HBtab->ebdTyp;
3576 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
3578 if (isCoreRTABI && (XTnum > 0))
3580 // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
3581 // try block as the previous one. The runtime cannot reliably infer this information from
3582 // native code offsets because of different try blocks can have same offsets. Alternative
3583 // solution to this problem would be inserting extra nops to ensure that different try
3584 // blocks have different offsets.
3585 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
3587 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
3588 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
3589 // IL as "try { try {} catch {} catch {} } finally {}".
3590 assert(HBtab->HasCatchHandler());
3591 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
3595 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
3596 // the fields aren't accurate.
3598 CORINFO_EH_CLAUSE clause;
3599 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
3600 clause.Flags = flags;
3601 clause.TryOffset = tryBeg;
3602 clause.TryLength = tryEnd;
3603 clause.HandlerOffset = hndBeg;
3604 clause.HandlerLength = hndEnd;
3606 assert(XTnum < EHCount);
3608 // Tell the VM about this EH clause.
3609 compiler->eeSetEHinfo(XTnum, &clause);
3614 #if FEATURE_EH_FUNCLETS
3615 // Now output duplicated clauses.
3617 // If a funclet has been created by moving a handler out of a try region that it was originally nested
3618 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
3619 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
3620 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
3621 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
3622 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
3623 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
3626 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
3627 // try or handler region):
3645 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
3646 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
3647 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
3648 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
3649 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
3650 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
3651 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
3652 // representing try (1) protecting the new funclets catch (3) and (4).
3653 // The code will be generated as follows:
3655 // ABCFH // "main" code
3660 // The EH regions are:
3665 // D -> G // "duplicate" clause
3666 // E -> G // "duplicate" clause
3668 // Note that we actually need to generate one of these additional "duplicate" clauses for every
3669 // region the funclet is nested in. Take this example:
3701 // When we pull out funclets, we get the following generated code:
3703 // ABCDEHJMO // "main" function
3711 // And the EH regions we report to the VM are (in order; main clauses
3712 // first in most-to-least nested order, funclets ("duplicated clauses")
3713 // last, in most-to-least nested) are:
3721 // F -> I // funclet clause #1 for F
3722 // F -> K // funclet clause #2 for F
3723 // F -> L // funclet clause #3 for F
3724 // F -> N // funclet clause #4 for F
3725 // G -> I // funclet clause #1 for G
3726 // G -> K // funclet clause #2 for G
3727 // G -> L // funclet clause #3 for G
3728 // G -> N // funclet clause #4 for G
3729 // I -> K // funclet clause #1 for I
3730 // I -> L // funclet clause #2 for I
3731 // I -> N // funclet clause #3 for I
3732 // K -> N // funclet clause #1 for K
3733 // L -> N // funclet clause #1 for L
3735 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
3736 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
3737 // to add a clause "F -> G" because F is NOT protected by G, but we still have
3738 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
3740 // The overall ordering of the clauses is still the same most-to-least nesting
3741 // after front-to-back start offset. Because we place the funclets at the end
3742 // these new clauses should also go at the end by this ordering.
3745 if (duplicateClauseCount > 0)
3747 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
3749 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
3751 unsigned enclosingTryIndex;
3753 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
3755 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
3756 // ignoring 'mutual protect' trys
3757 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
3758 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
3760 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
3761 // that will have the enclosing try protecting the funclet.
3763 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
3764 // greater EH table index
3766 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
3768 // The try region is the handler of the funclet. Note that for filters, we don't protect the
3769 // filter region, only the filter handler region. This is because exceptions in filters never
3770 // escape; the VM swallows them.
3772 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
3773 BasicBlock* bbTryLast = fletTab->ebdHndLast;
3775 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
3776 BasicBlock* bbHndLast = encTab->ebdHndLast;
3778 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
3780 tryBeg = compiler->ehCodeOffset(bbTryBeg);
3781 hndBeg = compiler->ehCodeOffset(bbHndBeg);
3783 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3784 : compiler->ehCodeOffset(bbTryLast->bbNext);
3785 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3786 : compiler->ehCodeOffset(bbHndLast->bbNext);
3788 if (encTab->HasFilter())
3790 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
3794 hndTyp = encTab->ebdTyp;
3797 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
3799 // Tell the VM this is an extra clause caused by moving funclets out of line.
3800 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
3802 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
3803 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
3804 // instruction immediately after the 'try' body. So, it really could be more accurately named
3807 CORINFO_EH_CLAUSE clause;
3808 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
3809 clause.Flags = flags;
3810 clause.TryOffset = tryBeg;
3811 clause.TryLength = tryEnd;
3812 clause.HandlerOffset = hndBeg;
3813 clause.HandlerLength = hndEnd;
3815 assert(XTnum < EHCount);
3817 // Tell the VM about this EH clause (a duplicated clause).
3818 compiler->eeSetEHinfo(XTnum, &clause);
3821 ++reportedDuplicateClauseCount;
3824 if (duplicateClauseCount == reportedDuplicateClauseCount)
3826 break; // we've reported all of them; no need to continue looking
3830 } // for each 'true' enclosing 'try'
3831 } // for each EH table entry
3833 assert(duplicateClauseCount == reportedDuplicateClauseCount);
3834 } // if (duplicateClauseCount > 0)
3836 #if FEATURE_EH_CALLFINALLY_THUNKS
3837 if (clonedFinallyCount > 0)
3839 unsigned reportedClonedFinallyCount = 0;
3840 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3842 if (block->bbJumpKind == BBJ_CALLFINALLY)
3844 UNATIVE_OFFSET hndBeg, hndEnd;
3846 hndBeg = compiler->ehCodeOffset(block);
3848 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
3849 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
3850 BasicBlock* bbLabel = block->bbNext;
3851 if (block->isBBCallAlwaysPair())
3853 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
3855 if (bbLabel == nullptr)
3857 hndEnd = compiler->info.compNativeCodeSize;
3861 assert(bbLabel->bbEmitCookie != nullptr);
3862 hndEnd = compiler->ehCodeOffset(bbLabel);
3865 CORINFO_EH_CLAUSE clause;
3866 clause.ClassToken = 0; // unused
3867 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
3868 clause.TryOffset = hndBeg;
3869 clause.TryLength = hndBeg;
3870 clause.HandlerOffset = hndBeg;
3871 clause.HandlerLength = hndEnd;
3873 assert(XTnum < EHCount);
3875 // Tell the VM about this EH clause (a cloned finally clause).
3876 compiler->eeSetEHinfo(XTnum, &clause);
3879 ++reportedClonedFinallyCount;
3882 if (clonedFinallyCount == reportedClonedFinallyCount)
3884 break; // we're done; no need to keep looking
3887 } // block is BBJ_CALLFINALLY
3890 assert(clonedFinallyCount == reportedClonedFinallyCount);
3891 } // if (clonedFinallyCount > 0)
3892 #endif // FEATURE_EH_CALLFINALLY_THUNKS
3894 #endif // FEATURE_EH_FUNCLETS
3896 assert(XTnum == EHCount);
3899 void CodeGen::genGCWriteBarrier(GenTreePtr tgt, GCInfo::WriteBarrierForm wbf)
3901 #ifndef LEGACY_BACKEND
3902 noway_assert(tgt->gtOper == GT_STOREIND);
3903 #else // LEGACY_BACKEND
3904 noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); // enforced by gcIsWriteBarrierCandidate
3905 #endif // LEGACY_BACKEND
3907 /* Call the proper vm helper */
3908 int helper = CORINFO_HELP_ASSIGN_REF;
3910 if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
3912 helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
3916 if (tgt->gtOper != GT_CLS_VAR)
3918 if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
3920 if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
3922 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3924 else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
3926 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3930 assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
3931 ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
3932 (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
3933 ((helper == CORINFO_HELP_ASSIGN_REF) &&
3934 (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
3936 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
3937 // We classify the "tgt" trees as follows:
3938 // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
3939 // IND [-> ADDR -> IND] -> { GT_LCL_VAR, GT_REG_VAR, ADD({GT_LCL_VAR, GT_REG_VAR}, X), ADD(X, (GT_LCL_VAR,
3941 // then let "v" be the GT_LCL_VAR or GT_REG_VAR.
3942 // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
3943 // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
3944 // * Otherwise, classify as CWBKind_OtherByRefLocal.
3945 // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
3946 // Otherwise, classify as CWBKind_Unclassified.
3948 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
3949 if (tgt->gtOper == GT_IND)
3951 GenTreePtr lcl = NULL;
3953 GenTreePtr indArg = tgt->gtOp.gtOp1;
3954 if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
3956 indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
3958 if (indArg->gtOper == GT_LCL_VAR || indArg->gtOper == GT_REG_VAR)
3962 else if (indArg->gtOper == GT_ADD)
3964 if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp1->gtOper == GT_REG_VAR)
3966 lcl = indArg->gtOp.gtOp1;
3968 else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp2->gtOper == GT_REG_VAR)
3970 lcl = indArg->gtOp.gtOp2;
3975 wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
3976 unsigned lclNum = 0;
3977 if (lcl->gtOper == GT_LCL_VAR)
3978 lclNum = lcl->gtLclVarCommon.gtLclNum;
3981 assert(lcl->gtOper == GT_REG_VAR);
3982 lclNum = lcl->gtRegVar.gtLclNum;
3984 if (lclNum == compiler->info.compRetBuffArg)
3986 wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
3990 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
3991 if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
3993 wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
3999 // We should have eliminated the barrier for this case.
4000 assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
4004 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
4008 // Enable this to sample the unclassified trees.
4009 static int unclassifiedBarrierSite = 0;
4010 if (wbKind == CWBKind_Unclassified)
4012 unclassifiedBarrierSite++;
4013 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
4018 inst_IV(INS_push, wbKind);
4019 genEmitHelperCall(helper,
4021 EA_PTRSIZE); // retSize
4022 SubtractStackLevel(4);
4026 genEmitHelperCall(helper,
4028 EA_PTRSIZE); // retSize
4031 #else // !FEATURE_COUNT_GC_WRITE_BARRIERS
4032 genEmitHelperCall(helper,
4034 EA_PTRSIZE); // retSize
4035 #endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
4039 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4040 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4042 XX Prolog / Epilog XX
4044 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4045 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4048 /*****************************************************************************
4050 * Generates code for moving incoming register arguments to their
4051 * assigned location, in the function prolog.
4055 #pragma warning(push)
4056 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
4058 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
4063 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
4067 #ifdef _TARGET_ARM64_
4068 if (compiler->info.compIsVarArgs)
4070 // We've already saved all int registers at the top of stack in the prolog.
4071 // No need further action.
4076 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
4077 unsigned argNum; // current argNum, always in [0..argMax-1]
4078 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
4079 unsigned regArgNum; // index into the regArgTab[] table
4080 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
4081 bool doingFloat = regState->rsIsFloat;
4083 // We should be generating the prolog block when we are called
4084 assert(compiler->compGeneratingProlog);
4086 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
4087 noway_assert(regArgMaskLive != 0);
4089 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
4090 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
4092 // The regArgTab can always have unused entries,
4093 // for example if an architecture always increments the arg register number but uses either
4094 // an integer register or a floating point register to hold the next argument
4095 // then with a mix of float and integer args you could have:
4097 // sampleMethod(int i, float x, int j, float y, int k, float z);
4098 // r0, r2 and r4 as valid integer arguments with argMax as 5
4099 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
4100 // The first one is doingFloat==false and the second one is doingFloat==true
4102 // If a fixed return buffer (in r8) was also present then the first one would become:
4103 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
4106 argMax = regState->rsCalleeRegArgCount;
4107 fixedRetBufIndex = (unsigned)-1; // Invalid value
4109 // If necessary we will select a correct xtraReg for circular floating point args later.
4113 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
4115 else // we are doing the integer registers
4117 noway_assert(argMax <= MAX_REG_ARG);
4118 if (hasFixedRetBuffReg())
4120 fixedRetBufIndex = theFixedRetBuffArgNum();
4121 // We have an additional integer register argument when hasFixedRetBuffReg() is true
4122 argMax = fixedRetBufIndex + 1;
4123 assert(argMax == (MAX_REG_ARG + 1));
4128 // Construct a table with the register arguments, for detecting circular and
4129 // non-circular dependencies between the register arguments. A dependency is when
4130 // an argument register Rn needs to be moved to register Rm that is also an argument
4131 // register. The table is constructed in the order the arguments are passed in
4132 // registers: the first register argument is in regArgTab[0], the second in
4133 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
4134 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
4135 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
4136 // we have increased the allocated size of the regArgTab[] by one.
4140 unsigned varNum; // index into compiler->lvaTable[] for this register argument
4141 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4142 var_types type; // the Jit type of this regArgTab entry
4143 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4144 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
4145 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
4146 // argument register number 'x'. Only used when circular = true.
4147 char slot; // 0 means the register is not used for a register argument
4148 // 1 means the first part of a register argument
4149 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
4150 bool stackArg; // true if the argument gets homed to the stack
4151 bool processed; // true after we've processed the argument (and it is in its final location)
4152 bool circular; // true if this register participates in a circular dependency loop.
4154 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4156 // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
4157 // the type of the lclVar in ways that are not ascertainable from lvType.
4158 // So, for that case we retain the type of the register in the regArgTab.
4160 var_types getRegType(Compiler* compiler)
4162 return type; // UNIX_AMD64 implementation
4165 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4167 // In other cases, we simply use the type of the lclVar to determine the type of the register.
4168 var_types getRegType(Compiler* compiler)
4170 LclVarDsc varDsc = compiler->lvaTable[varNum];
4171 // Check if this is an HFA register arg and return the HFA type
4172 if (varDsc.lvIsHfaRegArg())
4174 return varDsc.GetHfaType();
4176 return varDsc.lvType;
4179 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4180 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
4184 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4186 // Is this variable a register arg?
4187 if (!varDsc->lvIsParam)
4192 if (!varDsc->lvIsRegArg)
4197 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
4198 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
4199 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
4200 // use the the original TYP_STRUCT argument.
4202 if (varDsc->lvPromoted || varDsc->lvIsStructField)
4204 LclVarDsc* parentVarDsc = varDsc;
4205 if (varDsc->lvIsStructField)
4207 assert(!varDsc->lvPromoted);
4208 parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
4211 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
4213 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
4215 noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
4217 // For register arguments that are independent promoted structs we put the promoted field varNum in the
4219 if (varDsc->lvPromoted)
4226 // For register arguments that are not independent promoted structs we put the parent struct varNum in
4228 if (varDsc->lvIsStructField)
4235 var_types regType = varDsc->TypeGet();
4236 // Change regType to the HFA type when we have a HFA argument
4237 if (varDsc->lvIsHfaRegArg())
4239 regType = varDsc->GetHfaType();
4242 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4243 if (!varTypeIsStruct(regType))
4244 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4246 // A struct might be passed partially in XMM register for System V calls.
4247 // So a single arg might use both register files.
4248 if (isFloatRegType(regType) != doingFloat)
4256 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4257 if (varTypeIsStruct(varDsc))
4259 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
4260 assert(typeHnd != nullptr);
4261 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
4262 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
4263 if (!structDesc.passedInRegisters)
4265 // The var is not passed in registers.
4269 unsigned firstRegSlot = 0;
4270 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
4272 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
4277 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
4278 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
4279 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
4280 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
4283 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
4284 // registers or on stack, the upper most 4-bytes will be zero.
4286 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
4287 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
4290 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
4291 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
4292 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
4293 // there is no need to clear upper 4-bytes of Vector3 type args.
4295 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
4296 // Vector3 return values are returned two return registers and Caller assembles them into a
4297 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
4298 // type args in prolog and Vector3 type return value of a call
4300 if (varDsc->lvType == TYP_SIMD12)
4302 regType = TYP_DOUBLE;
4307 regType = compiler->GetEightByteType(structDesc, slotCounter);
4310 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
4312 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
4313 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
4315 // Store the reg for the first slot.
4318 firstRegSlot = regArgNum;
4321 // Bingo - add it to our table
4322 noway_assert(regArgNum < argMax);
4323 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
4324 // not be multiple vars representing this argument
4326 regArgTab[regArgNum].varNum = varNum;
4327 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
4328 regArgTab[regArgNum].type = regType;
4335 continue; // Nothing to do for this regState set.
4338 regArgNum = firstRegSlot;
4341 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4343 // Bingo - add it to our table
4344 regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
4346 noway_assert(regArgNum < argMax);
4347 // We better not have added it already (there better not be multiple vars representing this argument
4349 noway_assert(regArgTab[regArgNum].slot == 0);
4351 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4352 // Set the register type.
4353 regArgTab[regArgNum].type = regType;
4354 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4356 regArgTab[regArgNum].varNum = varNum;
4357 regArgTab[regArgNum].slot = 1;
4361 #if FEATURE_MULTIREG_ARGS
4362 if (compiler->lvaIsMultiregStruct(varDsc))
4364 if (varDsc->lvIsHfaRegArg())
4366 // We have an HFA argument, set slots to the number of registers used
4367 slots = varDsc->lvHfaSlots();
4371 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
4372 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
4373 // We have a non-HFA multireg argument, set slots to two
4377 // Note that regArgNum+1 represents an argument index not an actual argument register.
4378 // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
4380 // This is the setup for the rest of a multireg struct arg
4382 for (int i = 1; i < slots; i++)
4384 noway_assert((regArgNum + i) < argMax);
4386 // We better not have added it already (there better not be multiple vars representing this argument
4388 noway_assert(regArgTab[regArgNum + i].slot == 0);
4390 regArgTab[regArgNum + i].varNum = varNum;
4391 regArgTab[regArgNum + i].slot = (char)(i + 1);
4394 #endif // FEATURE_MULTIREG_ARGS
4398 int lclSize = compiler->lvaLclSize(varNum);
4400 if (lclSize > REGSIZE_BYTES)
4402 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
4403 slots = lclSize / REGSIZE_BYTES;
4404 if (regArgNum + slots > maxRegArgNum)
4406 slots = maxRegArgNum - regArgNum;
4409 C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
4410 assert(slots < INT8_MAX);
4411 for (char i = 1; i < slots; i++)
4413 regArgTab[regArgNum + i].varNum = varNum;
4414 regArgTab[regArgNum + i].slot = i + 1;
4416 #endif // _TARGET_ARM_
4418 for (int i = 0; i < slots; i++)
4420 regType = regArgTab[regArgNum + i].getRegType(compiler);
4421 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
4423 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4424 // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
4425 // The type of the register depends on the classification of the first eightbyte
4426 // of the struct. For information on classification refer to the System V x86_64 ABI at:
4427 // http://www.x86-64.org/documentation/abi.pdf
4429 assert((i > 0) || (regNum == varDsc->lvArgReg));
4430 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4431 // Is the arg dead on entry to the method ?
4433 if ((regArgMaskLive & genRegMask(regNum)) == 0)
4435 if (varDsc->lvTrackedNonStruct())
4437 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
4442 noway_assert(varDsc->lvType == TYP_STRUCT);
4443 #else // !_TARGET_X86_
4444 #ifndef LEGACY_BACKEND
4445 // For LSRA, it may not be in regArgMaskLive if it has a zero
4446 // refcnt. This is in contrast with the non-LSRA case in which all
4447 // non-tracked args are assumed live on entry.
4448 noway_assert((varDsc->lvRefCnt == 0) || (varDsc->lvType == TYP_STRUCT) ||
4449 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs));
4450 #else // LEGACY_BACKEND
4452 varDsc->lvType == TYP_STRUCT ||
4453 (varDsc->lvAddrExposed && (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)));
4454 #endif // LEGACY_BACKEND
4455 #endif // !_TARGET_X86_
4457 // Mark it as processed and be done with it
4458 regArgTab[regArgNum + i].processed = true;
4463 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
4464 // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
4465 // they've already been spilled.
4467 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
4468 #endif // _TARGET_ARM_
4470 #if !defined(UNIX_AMD64_ABI)
4471 noway_assert(xtraReg != (varDsc->lvArgReg + i));
4473 noway_assert(regArgMaskLive & genRegMask(regNum));
4476 regArgTab[regArgNum + i].processed = false;
4478 /* mark stack arguments since we will take care of those first */
4479 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
4481 /* If it goes on the stack or in a register that doesn't hold
4482 * an argument anymore -> CANNOT form a circular dependency */
4484 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
4486 /* will trash another argument -> possible dependency
4487 * We may need several passes after the table is constructed
4488 * to decide on that */
4490 /* Maybe the argument stays in the register (IDEAL) */
4492 if ((i == 0) && (varDsc->lvRegNum == regNum))
4497 #if !defined(_TARGET_64BIT_)
4498 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
4502 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
4507 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
4508 (REG_NEXT(varDsc->lvRegNum) == regNum))
4512 #endif // !defined(_TARGET_64BIT_)
4513 regArgTab[regArgNum + i].circular = true;
4518 regArgTab[regArgNum + i].circular = false;
4520 /* mark the argument register as free */
4521 regArgMaskLive &= ~genRegMask(regNum);
4526 /* Find the circular dependencies for the argument registers, if any.
4527 * A circular dependency is a set of registers R1, R2, ..., Rn
4528 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
4533 /* Possible circular dependencies still exist; the previous pass was not enough
4534 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
4540 for (argNum = 0; argNum < argMax; argNum++)
4542 // If we already marked the argument as non-circular then continue
4544 if (!regArgTab[argNum].circular)
4549 if (regArgTab[argNum].slot == 0) // Not a register argument
4554 varNum = regArgTab[argNum].varNum;
4555 noway_assert(varNum < compiler->lvaCount);
4556 varDsc = compiler->lvaTable + varNum;
4557 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4559 /* cannot possibly have stack arguments */
4560 noway_assert(varDsc->lvIsInReg());
4561 noway_assert(!regArgTab[argNum].stackArg);
4563 var_types regType = regArgTab[argNum].getRegType(compiler);
4564 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4566 regNumber destRegNum = REG_NA;
4567 if (regArgTab[argNum].slot == 1)
4569 destRegNum = varDsc->lvRegNum;
4571 #if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
4574 assert(regArgTab[argNum].slot == 2);
4576 assert(regArgTab[argNum - 1].slot == 1);
4577 assert(regArgTab[argNum - 1].varNum == varNum);
4578 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
4579 regArgMaskLive &= ~genRegMask(regNum);
4580 regArgTab[argNum].circular = false;
4584 #elif !defined(_TARGET_64BIT_)
4585 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
4587 destRegNum = varDsc->lvOtherReg;
4591 assert(regArgTab[argNum].slot == 2);
4592 assert(varDsc->TypeGet() == TYP_DOUBLE);
4593 destRegNum = REG_NEXT(varDsc->lvRegNum);
4595 #endif // !defined(_TARGET_64BIT_)
4596 noway_assert(destRegNum != REG_NA);
4597 if (genRegMask(destRegNum) & regArgMaskLive)
4599 /* we are trashing a live argument register - record it */
4600 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
4601 noway_assert(destRegArgNum < argMax);
4602 regArgTab[destRegArgNum].trashBy = argNum;
4606 /* argument goes to a free register */
4607 regArgTab[argNum].circular = false;
4610 /* mark the argument register as free */
4611 regArgMaskLive &= ~genRegMask(regNum);
4617 /* At this point, everything that has the "circular" flag
4618 * set to "true" forms a circular dependency */
4619 CLANG_FORMAT_COMMENT_ANCHOR;
4626 printf("Circular dependencies found while home-ing the incoming arguments.\n");
4631 // LSRA allocates registers to incoming parameters in order and will not overwrite
4632 // a register still holding a live parameter.
4633 CLANG_FORMAT_COMMENT_ANCHOR;
4635 #ifndef LEGACY_BACKEND
4636 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
4637 "Homing of float argument registers with circular dependencies not implemented.");
4638 #endif // LEGACY_BACKEND
4640 /* Now move the arguments to their locations.
4641 * First consider ones that go on the stack since they may
4642 * free some registers. */
4644 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
4645 for (argNum = 0; argNum < argMax; argNum++)
4649 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4650 // If this is the wrong register file, just continue.
4651 if (regArgTab[argNum].type == TYP_UNDEF)
4653 // This could happen if the reg in regArgTab[argNum] is of the other register file -
4654 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
4655 // The next register file processing will process it.
4658 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4659 // If the arg is dead on entry to the method, skip it
4661 if (regArgTab[argNum].processed)
4666 if (regArgTab[argNum].slot == 0) // Not a register argument
4671 varNum = regArgTab[argNum].varNum;
4672 noway_assert(varNum < compiler->lvaCount);
4673 varDsc = compiler->lvaTable + varNum;
4675 #ifndef _TARGET_64BIT_
4676 // If not a stack arg go to the next one
4677 if (varDsc->lvType == TYP_LONG)
4679 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
4683 else if (varDsc->lvOtherReg != REG_STK)
4689 #endif // !_TARGET_64BIT_
4691 // If not a stack arg go to the next one
4692 if (!regArgTab[argNum].stackArg)
4698 #if defined(_TARGET_ARM_)
4699 if (varDsc->lvType == TYP_DOUBLE)
4701 if (regArgTab[argNum].slot == 2)
4703 // We handled the entire double when processing the first half (slot == 1)
4709 noway_assert(regArgTab[argNum].circular == false);
4711 noway_assert(varDsc->lvIsParam);
4712 noway_assert(varDsc->lvIsRegArg);
4713 noway_assert(varDsc->lvIsInReg() == false ||
4714 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
4716 var_types storeType = TYP_UNDEF;
4717 unsigned slotSize = TARGET_POINTER_SIZE;
4719 if (varTypeIsStruct(varDsc))
4721 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
4722 #if FEATURE_MULTIREG_ARGS
4723 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
4724 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
4725 #endif // FEATURE_MULTIREG_ARGS
4726 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4727 storeType = regArgTab[argNum].type;
4728 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4729 if (varDsc->lvIsHfaRegArg())
4732 // On ARM32 the storeType for HFA args is always TYP_FLOAT
4733 storeType = TYP_FLOAT;
4734 slotSize = (unsigned)emitActualTypeSize(storeType);
4735 #else // _TARGET_ARM64_
4736 storeType = genActualType(varDsc->GetHfaType());
4737 slotSize = (unsigned)emitActualTypeSize(storeType);
4738 #endif // _TARGET_ARM64_
4741 else // Not a struct type
4743 storeType = genActualType(varDsc->TypeGet());
4745 size = emitActualTypeSize(storeType);
4747 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
4748 #endif //_TARGET_X86_
4750 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
4752 // Stack argument - if the ref count is 0 don't care about it
4754 if (!varDsc->lvOnFrame)
4756 noway_assert(varDsc->lvRefCnt == 0);
4760 // Since slot is typically 1, baseOffset is typically 0
4761 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
4763 getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
4765 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4766 // Check if we are writing past the end of the struct
4767 if (varTypeIsStruct(varDsc))
4769 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
4771 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4773 if (regArgTab[argNum].slot == 1)
4775 psiMoveToStack(varNum);
4779 /* mark the argument as processed */
4781 regArgTab[argNum].processed = true;
4782 regArgMaskLive &= ~genRegMask(srcRegNum);
4784 #if defined(_TARGET_ARM_)
4785 if (storeType == TYP_DOUBLE)
4787 regArgTab[argNum + 1].processed = true;
4788 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
4793 /* Process any circular dependencies */
4796 unsigned begReg, destReg, srcReg;
4797 unsigned varNumDest, varNumSrc;
4798 LclVarDsc* varDscDest;
4799 LclVarDsc* varDscSrc;
4800 instruction insCopy = INS_mov;
4804 #if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4805 insCopy = ins_Copy(TYP_DOUBLE);
4806 // Compute xtraReg here when we have a float argument
4807 assert(xtraReg == REG_NA);
4809 regMaskTP fpAvailMask;
4811 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
4812 #if defined(FEATURE_HFA)
4813 fpAvailMask &= RBM_ALLDOUBLE;
4815 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4816 #error Error. Wrong architecture.
4817 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4818 #endif // defined(FEATURE_HFA)
4820 if (fpAvailMask == RBM_NONE)
4822 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
4823 #if defined(FEATURE_HFA)
4824 fpAvailMask &= RBM_ALLDOUBLE;
4826 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4827 #error Error. Wrong architecture.
4828 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4829 #endif // defined(FEATURE_HFA)
4832 assert(fpAvailMask != RBM_NONE);
4834 // We pick the lowest avail register number
4835 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
4836 xtraReg = genRegNumFromMask(tempMask);
4837 #elif defined(_TARGET_X86_)
4838 // This case shouldn't occur on x86 since NYI gets converted to an assert
4839 NYI("Homing circular FP registers via xtraReg");
4843 for (argNum = 0; argNum < argMax; argNum++)
4845 // If not a circular dependency then continue
4846 if (!regArgTab[argNum].circular)
4851 // If already processed the dependency then continue
4853 if (regArgTab[argNum].processed)
4858 if (regArgTab[argNum].slot == 0) // Not a register argument
4863 destReg = begReg = argNum;
4864 srcReg = regArgTab[argNum].trashBy;
4866 varNumDest = regArgTab[destReg].varNum;
4867 noway_assert(varNumDest < compiler->lvaCount);
4868 varDscDest = compiler->lvaTable + varNumDest;
4869 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
4871 noway_assert(srcReg < argMax);
4872 varNumSrc = regArgTab[srcReg].varNum;
4873 noway_assert(varNumSrc < compiler->lvaCount);
4874 varDscSrc = compiler->lvaTable + varNumSrc;
4875 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4877 emitAttr size = EA_PTRSIZE;
4879 #ifdef _TARGET_XARCH_
4881 // The following code relies upon the target architecture having an
4882 // 'xchg' instruction which directly swaps the values held in two registers.
4883 // On the ARM architecture we do not have such an instruction.
4885 if (destReg == regArgTab[srcReg].trashBy)
4887 /* only 2 registers form the circular dependency - use "xchg" */
4889 varNum = regArgTab[argNum].varNum;
4890 noway_assert(varNum < compiler->lvaCount);
4891 varDsc = compiler->lvaTable + varNum;
4892 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4894 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
4896 /* Set "size" to indicate GC if one and only one of
4897 * the operands is a pointer
4898 * RATIONALE: If both are pointers, nothing changes in
4899 * the GC pointer tracking. If only one is a pointer we
4900 * have to "swap" the registers in the GC reg pointer mask
4903 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
4908 noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
4910 getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
4911 regTracker.rsTrackRegTrash(varDscSrc->lvRegNum);
4912 regTracker.rsTrackRegTrash(varDscSrc->lvArgReg);
4914 /* mark both arguments as processed */
4915 regArgTab[destReg].processed = true;
4916 regArgTab[srcReg].processed = true;
4918 regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
4919 regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
4921 psiMoveToReg(varNumSrc);
4922 psiMoveToReg(varNumDest);
4925 #endif // _TARGET_XARCH_
4927 var_types destMemType = varDscDest->TypeGet();
4930 bool cycleAllDouble = true; // assume the best
4932 unsigned iter = begReg;
4935 if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
4937 cycleAllDouble = false;
4940 iter = regArgTab[iter].trashBy;
4941 } while (iter != begReg);
4943 // We may treat doubles as floats for ARM because we could have partial circular
4944 // dependencies of a float with a lo/hi part of the double. We mark the
4945 // trashBy values for each slot of the double, so let the circular dependency
4946 // logic work its way out for floats rather than doubles. If a cycle has all
4947 // doubles, then optimize so that instead of two vmov.f32's to move a double,
4948 // we can use one vmov.f64.
4950 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4952 destMemType = TYP_FLOAT;
4954 #endif // _TARGET_ARM_
4956 if (destMemType == TYP_REF)
4960 else if (destMemType == TYP_BYREF)
4964 else if (destMemType == TYP_DOUBLE)
4968 else if (destMemType == TYP_FLOAT)
4973 /* move the dest reg (begReg) in the extra reg */
4975 assert(xtraReg != REG_NA);
4977 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
4979 getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
4981 regTracker.rsTrackRegCopy(xtraReg, begRegNum);
4983 *pXtraRegClobbered = true;
4985 psiMoveToReg(varNumDest, xtraReg);
4987 /* start moving everything to its right place */
4989 while (srcReg != begReg)
4993 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4994 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
4996 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
4998 regTracker.rsTrackRegCopy(destRegNum, srcRegNum);
5000 /* mark 'src' as processed */
5001 noway_assert(srcReg < argMax);
5002 regArgTab[srcReg].processed = true;
5004 if (size == EA_8BYTE)
5005 regArgTab[srcReg + 1].processed = true;
5007 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
5009 /* move to the next pair */
5011 srcReg = regArgTab[srcReg].trashBy;
5013 varDscDest = varDscSrc;
5014 destMemType = varDscDest->TypeGet();
5016 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
5018 destMemType = TYP_FLOAT;
5021 varNumSrc = regArgTab[srcReg].varNum;
5022 noway_assert(varNumSrc < compiler->lvaCount);
5023 varDscSrc = compiler->lvaTable + varNumSrc;
5024 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
5026 if (destMemType == TYP_REF)
5030 else if (destMemType == TYP_DOUBLE)
5040 /* take care of the beginning register */
5042 noway_assert(srcReg == begReg);
5044 /* move the dest reg (begReg) in the extra reg */
5046 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
5048 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
5050 regTracker.rsTrackRegCopy(destRegNum, xtraReg);
5052 psiMoveToReg(varNumSrc);
5054 /* mark the beginning register as processed */
5056 regArgTab[srcReg].processed = true;
5058 if (size == EA_8BYTE)
5059 regArgTab[srcReg + 1].processed = true;
5061 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
5066 /* Finally take care of the remaining arguments that must be enregistered */
5067 while (regArgMaskLive)
5069 regMaskTP regArgMaskLiveSave = regArgMaskLive;
5071 for (argNum = 0; argNum < argMax; argNum++)
5073 /* If already processed go to the next one */
5074 if (regArgTab[argNum].processed)
5079 if (regArgTab[argNum].slot == 0)
5080 { // Not a register argument
5084 varNum = regArgTab[argNum].varNum;
5085 noway_assert(varNum < compiler->lvaCount);
5086 varDsc = compiler->lvaTable + varNum;
5087 var_types regType = regArgTab[argNum].getRegType(compiler);
5088 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
5090 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5091 if (regType == TYP_UNDEF)
5093 // This could happen if the reg in regArgTab[argNum] is of the other register file -
5094 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
5095 // The next register file processing will process it.
5096 regArgMaskLive &= ~genRegMask(regNum);
5099 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5101 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
5102 #ifndef _TARGET_64BIT_
5103 #ifndef _TARGET_ARM_
5104 // Right now we think that incoming arguments are not pointer sized. When we eventually
5105 // understand the calling convention, this still won't be true. But maybe we'll have a better
5106 // idea of how to ignore it.
5108 // On Arm, a long can be passed in register
5109 noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == sizeof(void*));
5111 #endif //_TARGET_64BIT_
5113 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
5115 /* Register argument - hopefully it stays in the same register */
5116 regNumber destRegNum = REG_NA;
5117 var_types destMemType = varDsc->TypeGet();
5119 if (regArgTab[argNum].slot == 1)
5121 destRegNum = varDsc->lvRegNum;
5124 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
5126 // The second half of the double has already been processed! Treat this as a single.
5127 destMemType = TYP_FLOAT;
5129 #endif // _TARGET_ARM_
5131 #ifndef _TARGET_64BIT_
5132 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
5134 #ifndef LEGACY_BACKEND
5135 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
5136 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
5138 destRegNum = regNum;
5141 #endif // !LEGACY_BACKEND
5142 destRegNum = varDsc->lvOtherReg;
5144 assert(destRegNum != REG_STK);
5148 assert(regArgTab[argNum].slot == 2);
5149 assert(destMemType == TYP_DOUBLE);
5151 // For doubles, we move the entire double using the argNum representing
5152 // the first half of the double. There are two things we won't do:
5153 // (1) move the double when the 1st half of the destination is free but the
5154 // 2nd half is occupied, and (2) move the double when the 2nd half of the
5155 // destination is free but the 1st half is occupied. Here we consider the
5156 // case where the first half can't be moved initially because its target is
5157 // still busy, but the second half can be moved. We wait until the entire
5158 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
5159 // and F2 single moving to F16. When we process F0, its target F2 is busy,
5160 // so we skip it on the first pass. When we process F1, its target F3 is
5161 // available. However, we want to move F0/F1 all at once, so we skip it here.
5162 // We process F2, which frees up F2. The next pass through, we process F0 and
5163 // F2/F3 are empty, so we move it. Note that if half of a double is involved
5164 // in a circularity with a single, then we will have already moved that half
5165 // above, so we go ahead and move the remaining half as a single.
5166 // Because there are no circularities left, we are guaranteed to terminate.
5169 assert(regArgTab[argNum - 1].slot == 1);
5171 if (!regArgTab[argNum - 1].processed)
5173 // The first half of the double hasn't been processed; try to be processed at the same time
5177 // The first half of the double has been processed but the second half hasn't!
5178 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
5179 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
5180 // dependency logic above will move them as singles, leaving just F3 to move. Treat
5181 // it as a single to finish the shuffling.
5183 destMemType = TYP_FLOAT;
5184 destRegNum = REG_NEXT(varDsc->lvRegNum);
5186 #endif // !_TARGET_64BIT_
5187 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
5190 assert(regArgTab[argNum].slot == 2);
5192 assert(regArgTab[argNum - 1].slot == 1);
5193 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
5194 destRegNum = varDsc->lvRegNum;
5195 noway_assert(regNum != destRegNum);
5198 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
5199 noway_assert(destRegNum != REG_NA);
5200 if (destRegNum != regNum)
5202 /* Cannot trash a currently live register argument.
5203 * Skip this one until its target will be free
5204 * which is guaranteed to happen since we have no circular dependencies. */
5206 regMaskTP destMask = genRegMask(destRegNum);
5208 // Don't process the double until both halves of the destination are clear.
5209 if (genActualType(destMemType) == TYP_DOUBLE)
5211 assert((destMask & RBM_DBL_REGS) != 0);
5212 destMask |= genRegMask(REG_NEXT(destRegNum));
5216 if (destMask & regArgMaskLive)
5221 /* Move it to the new register */
5223 emitAttr size = emitActualTypeSize(destMemType);
5225 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
5227 psiMoveToReg(varNum);
5230 /* mark the argument as processed */
5232 assert(!regArgTab[argNum].processed);
5233 regArgTab[argNum].processed = true;
5234 regArgMaskLive &= ~genRegMask(regNum);
5235 #if FEATURE_MULTIREG_ARGS
5236 int argRegCount = 1;
5238 if (genActualType(destMemType) == TYP_DOUBLE)
5243 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
5244 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
5247 int nextArgNum = argNum + 1;
5248 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
5249 noway_assert(regArgTab[nextArgNum].varNum == varNum);
5250 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
5251 // and moves the 0th element of the src reg into the 1st element of the dest reg.
5252 getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
5253 // Set destRegNum to regNum so that we skip the setting of the register below,
5254 // but mark argNum as processed and clear regNum from the live mask.
5255 destRegNum = regNum;
5257 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
5258 // Mark the rest of the argument registers corresponding to this multi-reg type as
5259 // being processed and no longer live.
5260 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
5262 int nextArgNum = argNum + regSlot;
5263 assert(!regArgTab[nextArgNum].processed);
5264 regArgTab[nextArgNum].processed = true;
5265 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
5266 regArgMaskLive &= ~genRegMask(nextRegNum);
5268 #endif // FEATURE_MULTIREG_ARGS
5271 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
5275 #pragma warning(pop)
5278 /*****************************************************************************
5279 * If any incoming stack arguments live in registers, load them.
5281 void CodeGen::genEnregisterIncomingStackArgs()
5286 printf("*************** In genEnregisterIncomingStackArgs()\n");
5290 assert(compiler->compGeneratingProlog);
5292 unsigned varNum = 0;
5294 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
5296 /* Is this variable a parameter? */
5298 if (!varDsc->lvIsParam)
5303 /* If it's a register argument then it's already been taken care of.
5304 But, on Arm when under a profiler, we would have prespilled a register argument
5305 and hence here we need to load it from its prespilled location.
5307 bool isPrespilledForProfiling = false;
5308 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
5309 isPrespilledForProfiling =
5310 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
5313 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
5318 /* Has the parameter been assigned to a register? */
5320 if (!varDsc->lvIsInReg())
5325 var_types type = genActualType(varDsc->TypeGet());
5327 #if FEATURE_STACK_FP_X87
5328 // Floating point locals are loaded onto the x86-FPU in the next section
5329 if (varTypeIsFloating(type))
5333 /* Is the variable dead on entry */
5335 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
5340 /* Load the incoming parameter into the register */
5342 /* Figure out the home offset of the incoming argument */
5347 #ifndef LEGACY_BACKEND
5349 if (type == TYP_LONG)
5351 regPairNo regPair = varDsc->lvArgInitRegPair;
5352 regNum = genRegPairLo(regPair);
5353 otherReg = genRegPairHi(regPair);
5356 #endif // _TARGET_ARM
5358 regNum = varDsc->lvArgInitReg;
5361 #else // LEGACY_BACKEND
5362 regNum = varDsc->lvRegNum;
5363 if (type == TYP_LONG)
5365 otherReg = varDsc->lvOtherReg;
5371 #endif // LEGACY_BACKEND
5373 assert(regNum != REG_STK);
5375 #ifndef _TARGET_64BIT_
5376 if (type == TYP_LONG)
5378 /* long - at least the low half must be enregistered */
5380 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, regNum, varNum, 0);
5381 regTracker.rsTrackRegTrash(regNum);
5383 /* Is the upper half also enregistered? */
5385 if (otherReg != REG_STK)
5387 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, otherReg, varNum, sizeof(int));
5388 regTracker.rsTrackRegTrash(otherReg);
5392 #endif // _TARGET_64BIT_
5394 /* Loading a single register - this is the easy/common case */
5396 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
5397 regTracker.rsTrackRegTrash(regNum);
5400 psiMoveToReg(varNum);
5404 /*-------------------------------------------------------------------------
5406 * We have to decide whether we're going to use block initialization
5407 * in the prolog before we assign final stack offsets. This is because
5408 * when using block initialization we may need additional callee-saved
5409 * registers which need to be saved on the frame, thus increasing the
5412 * We'll count the number of locals we have to initialize,
5413 * and if there are lots of them we'll use block initialization.
5414 * Thus, the local variable table must have accurate register location
5415 * information for enregistered locals for their register state on entry
5418 * At the same time we set lvMustInit for locals (enregistered or on stack)
5419 * that must be initialized (e.g. initialize memory (comInitMem),
5420 * untracked pointers or disable DFA)
5422 void CodeGen::genCheckUseBlockInit()
5424 #ifndef LEGACY_BACKEND // this is called before codegen in RyuJIT backend
5425 assert(!compiler->compGeneratingProlog);
5426 #else // LEGACY_BACKEND
5427 assert(compiler->compGeneratingProlog);
5428 #endif // LEGACY_BACKEND
5430 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
5431 // larger than int count for more than 1).
5432 unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
5433 // determine whether to use block init.
5438 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
5440 if (varDsc->lvIsParam)
5445 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
5447 noway_assert(varDsc->lvRefCnt == 0);
5451 if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
5456 #if FEATURE_FIXED_OUT_ARGS
5457 if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
5461 if (varNum == compiler->lvaOutgoingArgSpaceVar)
5467 #if FEATURE_EH_FUNCLETS
5468 // There's no need to force 0-initialization of the PSPSym, it will be
5469 // initialized with a real value in the prolog
5470 if (varNum == compiler->lvaPSPSym)
5476 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5478 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
5479 // initialized by the parent struct. No need to set the lvMustInit bit in the
5484 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
5487 if (varDsc->lvTracked)
5489 /* For uninitialized use of tracked variables, the liveness
5490 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
5492 if (varDsc->lvMustInit ||
5493 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
5495 /* This var must be initialized */
5497 varDsc->lvMustInit = 1;
5499 /* See if the variable is on the stack will be initialized
5500 * using rep stos - compute the total size to be zero-ed */
5502 if (varDsc->lvOnFrame)
5504 if (!varDsc->lvRegister)
5506 #ifndef LEGACY_BACKEND
5507 if (!varDsc->lvIsInReg())
5508 #endif // !LEGACY_BACKEND
5510 // Var is completely on the stack, in the legacy JIT case, or
5511 // on the stack at entry, in the RyuJIT case.
5512 initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
5517 // Var is partially enregistered
5518 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
5519 initStkLclCnt += genTypeStSz(TYP_INT);
5525 /* With compInitMem, all untracked vars will have to be init'ed */
5526 /* VSW 102460 - Do not force initialization of compiler generated temps,
5527 unless they are untracked GC type or structs that contain GC pointers */
5528 CLANG_FORMAT_COMMENT_ANCHOR;
5531 // TODO-1stClassStructs
5532 // This is here to duplicate previous behavior, where TYP_SIMD8 locals
5533 // were not being re-typed correctly.
5534 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
5535 #else // !FEATURE_SIMD
5536 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
5537 #endif // !FEATURE_SIMD
5538 varDsc->lvOnFrame &&
5539 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
5541 varDsc->lvMustInit = true;
5543 initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
5549 /* Ignore if not a pointer variable or value class with a GC field */
5551 if (!compiler->lvaTypeIsGC(varNum))
5556 /* If we don't know lifetimes of variables, must be conservative */
5557 if (!compiler->backendRequiresLocalVarLifetimes())
5559 varDsc->lvMustInit = true;
5560 noway_assert(!varDsc->lvRegister);
5564 if (!varDsc->lvTracked)
5566 varDsc->lvMustInit = true;
5570 /* Is this a 'must-init' stack pointer local? */
5572 if (varDsc->lvMustInit && varDsc->lvOnFrame)
5574 initStkLclCnt += varDsc->lvStructGcCount;
5577 if ((compiler->lvaLclSize(varNum) > (3 * sizeof(void*))) && (largeGcStructs <= 4))
5583 /* Don't forget about spill temps that hold pointers */
5585 if (!TRACK_GC_TEMP_LIFETIMES)
5587 assert(compiler->tmpAllFree());
5588 for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
5590 if (varTypeIsGC(tempThis->tdTempType()))
5597 // After debugging this further it was found that this logic is incorrect:
5598 // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
5599 // and this also double counts variables (we saw this in the debugger) around line 4829.
5600 // Even though this doesn't pose a problem with correctness it will improperly decide to
5601 // zero init the stack using a block operation instead of a 'case by case' basis.
5602 genInitStkLclCnt = initStkLclCnt;
5604 /* If we have more than 4 untracked locals, use block initialization */
5605 /* TODO-Review: If we have large structs, bias toward not using block initialization since
5606 we waste all the other slots. Really need to compute the correct
5607 and compare that against zeroing the slots individually */
5609 genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
5611 if (genUseBlockInit)
5613 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
5615 // If there is a secret stub param, don't count it, as it will no longer
5616 // be live when we do block init.
5617 if (compiler->info.compPublishStubParam)
5619 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
5622 #ifdef _TARGET_XARCH_
5623 // If we're going to use "REP STOS", remember that we will trash EDI
5624 // For fastcall we will have to save ECX, EAX
5625 // so reserve two extra callee saved
5626 // This is better than pushing eax, ecx, because we in the later
5627 // we will mess up already computed offsets on the stack (for ESP frames)
5628 regSet.rsSetRegsModified(RBM_EDI);
5630 #ifdef UNIX_AMD64_ABI
5631 // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
5632 // In such case use R12 and R13 registers.
5633 if (maskCalleeRegArgMask & RBM_RCX)
5635 regSet.rsSetRegsModified(RBM_R12);
5638 if (maskCalleeRegArgMask & RBM_RDI)
5640 regSet.rsSetRegsModified(RBM_R13);
5642 #else // !UNIX_AMD64_ABI
5643 if (maskCalleeRegArgMask & RBM_ECX)
5645 regSet.rsSetRegsModified(RBM_ESI);
5647 #endif // !UNIX_AMD64_ABI
5649 if (maskCalleeRegArgMask & RBM_EAX)
5651 regSet.rsSetRegsModified(RBM_EBX);
5654 #endif // _TARGET_XARCH_
5657 // On the Arm if we are using a block init to initialize, then we
5658 // must force spill R4/R5/R6 so that we can use them during
5659 // zero-initialization process.
5661 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
5662 if (forceSpillRegCount > 0)
5663 regSet.rsSetRegsModified(RBM_R4);
5664 if (forceSpillRegCount > 1)
5665 regSet.rsSetRegsModified(RBM_R5);
5666 if (forceSpillRegCount > 2)
5667 regSet.rsSetRegsModified(RBM_R6);
5668 #endif // _TARGET_ARM_
5672 /*-----------------------------------------------------------------------------
5674 * Push any callee-saved registers we have used
5677 #if defined(_TARGET_ARM64_)
5678 void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
5680 void CodeGen::genPushCalleeSavedRegisters()
5683 assert(compiler->compGeneratingProlog);
5685 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
5686 // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
5687 // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
5689 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
5690 #else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
5691 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5695 if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
5697 noway_assert(!"Used register RBM_FPBASE as a scratch register!");
5701 #ifdef _TARGET_XARCH_
5702 // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
5703 if (isFramePointerUsed())
5705 rsPushRegs &= ~RBM_FPBASE;
5709 #ifdef _TARGET_ARMARCH_
5710 // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
5711 if (isFramePointerUsed())
5712 rsPushRegs |= RBM_FPBASE;
5715 // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
5716 // changes in GC suspension architecture.
5718 // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
5719 // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
5720 // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
5721 // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
5722 // be saved on the stack and the GC suspension would time out.
5724 // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
5725 // the following to make GC suspension work in the above scenario:
5726 // - Make return address hijacking work even when lr is not saved on the stack.
5727 // - Generate fully interruptible code for loops that contains calls
5728 // - Generate fully interruptible code for leaf methods
5730 // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
5733 rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
5735 regSet.rsMaskCalleeSaved = rsPushRegs;
5736 #endif // _TARGET_ARMARCH_
5739 if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
5741 printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
5742 compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
5743 dspRegMask(rsPushRegs);
5745 assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
5749 #if defined(_TARGET_ARM_)
5750 regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5751 regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
5753 maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
5755 assert(FitsIn<int>(maskPushRegsInt));
5756 inst_IV(INS_push, (int)maskPushRegsInt);
5757 compiler->unwindPushMaskInt(maskPushRegsInt);
5759 if (maskPushRegsFloat != 0)
5761 genPushFltRegs(maskPushRegsFloat);
5762 compiler->unwindPushMaskFloat(maskPushRegsFloat);
5764 #elif defined(_TARGET_ARM64_)
5765 // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
5766 // options. Case numbers in comments here refer to this document.
5768 // For most frames, generate, e.g.:
5769 // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
5770 // // ensures stack stays aligned.
5771 // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
5772 // // at top of frame (highest addresses).
5773 // stp r21, r22, [sp, 0x70]
5776 // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
5777 // at the top of the frame.
5778 // 2. If we save FP, then the first store is FP, LR.
5779 // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
5780 // preserve their lower 8 bytes, by calling convention.
5781 // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
5783 // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
5785 int totalFrameSize = genTotalFrameSize();
5787 int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
5789 regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5790 regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
5792 if (compiler->info.compIsVarArgs)
5794 assert(maskSaveRegsFloat == RBM_NONE);
5797 int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
5798 // generate based on various sizes.
5799 int calleeSaveSPDelta = 0;
5800 int calleeSaveSPDeltaUnaligned = 0;
5802 if (isFramePointerUsed())
5804 // We need to save both FP and LR.
5806 assert((maskSaveRegsInt & RBM_FP) != 0);
5807 assert((maskSaveRegsInt & RBM_LR) != 0);
5809 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5814 // stp fp,lr,[sp,#-framesz]!
5816 // The (totalFrameSize < 512) condition ensures that both the predecrement
5817 // and the postincrement of SP can occur with STP.
5819 // After saving callee-saved registers, we establish the frame pointer with:
5821 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5825 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
5826 INS_OPTS_PRE_INDEX);
5827 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5829 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5830 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5832 else if (totalFrameSize <= 512)
5837 // sub sp,sp,#framesz
5838 // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
5840 // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
5841 // signed offset encoding.
5843 // After saving callee-saved registers, we establish the frame pointer with:
5845 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5849 assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
5851 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5852 compiler->unwindAllocStack(totalFrameSize);
5854 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5855 compiler->lvaOutgoingArgSpaceSize);
5856 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5858 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5859 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5865 // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
5866 // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
5867 // stored. For example:
5868 // stp r19,r20,[sp,#-96]!
5869 // stp d8,d9,[sp,#16]
5870 // ... save varargs incoming integer registers ...
5871 // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
5872 // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
5873 // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
5877 // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
5878 // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
5879 // above them. If that is preferable, we could implement it.
5880 // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
5882 // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
5883 // padding from above).
5884 // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
5887 // sub sp,sp,#remainingFrameSz
5888 // or, for large frames:
5889 // mov rX, #remainingFrameSz // maybe multiple instructions
5893 // stp fp,lr,[sp,#outsz]
5896 // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
5897 // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
5898 // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
5899 // following sequences:
5901 // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
5903 // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
5906 // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
5910 // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
5911 // // always guaranteed to be 8 byte aligned).
5912 // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
5914 // sub sp,sp,#outsz - #8
5916 // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
5917 // mov rX, #outsz - #8 // maybe multiple instructions
5923 calleeSaveSPDeltaUnaligned =
5924 totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
5925 assert(calleeSaveSPDeltaUnaligned >= 0);
5926 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5927 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5929 offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5930 assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
5931 // store the callee-saved registers.
5933 // We'll take care of these later, but callee-saved regs code shouldn't see them.
5934 maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
5939 // No frame pointer (no chaining).
5940 assert((maskSaveRegsInt & RBM_FP) == 0);
5941 assert((maskSaveRegsInt & RBM_LR) != 0);
5943 // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
5944 // if we only have one callee-saved register plus LR to save.
5946 NYI("Frame without frame pointer");
5950 assert(frameType != 0);
5952 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
5954 offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
5956 // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
5957 // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
5958 // need to add codes at all.
5960 if (compiler->info.compIsVarArgs)
5962 // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
5963 assert((offset % 16) == 0);
5964 for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
5966 regNumber reg2 = REG_NEXT(reg1);
5967 // stp REG, REG + 1, [SP, #offset]
5968 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
5969 compiler->unwindNop();
5970 offset += 2 * REGSIZE_BYTES;
5976 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
5977 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5979 else if (frameType == 2)
5981 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5982 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5984 else if (frameType == 3)
5986 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5987 assert(remainingFrameSz > 0);
5988 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
5989 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5991 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5993 // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
5994 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5995 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5996 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5997 int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
5998 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5999 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
6001 genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
6002 offset += spAdjustment2;
6004 // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
6007 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
6008 assert(spAdjustment3 > 0);
6009 assert((spAdjustment3 % 16) == 0);
6011 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
6012 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
6014 genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
6015 offset += spAdjustment3;
6019 genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
6021 offset += remainingFrameSz;
6023 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
6024 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
6028 assert(offset == totalFrameSize);
6030 #elif defined(_TARGET_XARCH_)
6031 // Push backwards so we match the order we will pop them in the epilog
6032 // and all the other code that expects it to be in this order.
6033 for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
6035 regMaskTP regBit = genRegMask(reg);
6037 if ((regBit & rsPushRegs) != 0)
6039 inst_RV(INS_push, reg, TYP_REF);
6040 compiler->unwindPush(reg);
6042 if (!doubleAlignOrFramePointerUsed())
6044 psiAdjustStackLevel(REGSIZE_BYTES);
6047 rsPushRegs &= ~regBit;
6052 assert(!"Unknown TARGET");
6056 /*-----------------------------------------------------------------------------
6058 * Probe the stack and allocate the local stack frame: subtract from SP.
6059 * On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
6062 void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
6064 assert(compiler->compGeneratingProlog);
6071 const size_t pageSize = compiler->eeGetPageSize();
6074 assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
6075 #endif // _TARGET_ARM_
6077 #ifdef _TARGET_XARCH_
6078 if (frameSize == REGSIZE_BYTES)
6080 // Frame size is the same as register size.
6081 inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
6084 #endif // _TARGET_XARCH_
6085 if (frameSize < pageSize)
6087 #ifndef _TARGET_ARM64_
6088 // Frame size is (0x0008..0x1000)
6089 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
6090 #endif // !_TARGET_ARM64_
6092 else if (frameSize < compiler->getVeryLargeFrameSize())
6094 // Frame size is (0x1000..0x3000)
6095 CLANG_FORMAT_COMMENT_ANCHOR;
6097 #if CPU_LOAD_STORE_ARCH
6098 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
6099 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
6100 regTracker.rsTrackRegTrash(initReg);
6101 *pInitRegZeroed = false; // The initReg does not contain zero
6103 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
6106 if (frameSize >= 0x2000)
6108 #if CPU_LOAD_STORE_ARCH
6109 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
6110 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
6111 regTracker.rsTrackRegTrash(initReg);
6113 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
6117 #ifdef _TARGET_ARM64_
6118 compiler->unwindPadding();
6119 #else // !_TARGET_ARM64_
6120 #if CPU_LOAD_STORE_ARCH
6121 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
6122 compiler->unwindPadding();
6123 getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
6125 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
6127 #endif // !_TARGET_ARM64_
6131 // Frame size >= 0x3000
6132 assert(frameSize >= compiler->getVeryLargeFrameSize());
6134 // Emit the following sequence to 'tickle' the pages.
6135 // Note it is important that stack pointer not change until this is
6136 // complete since the tickles could cause a stack overflow, and we
6137 // need to be able to crawl the stack afterward (which means the
6138 // stack pointer needs to be known).
6139 CLANG_FORMAT_COMMENT_ANCHOR;
6141 #ifdef _TARGET_XARCH_
6142 bool pushedStubParam = false;
6143 if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
6145 // push register containing the StubParam
6146 inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
6147 pushedStubParam = true;
6149 #endif // !_TARGET_XARCH_
6151 #if CPU_LOAD_STORE_ARCH
6152 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6153 #endif // CPU_LOAD_STORE_ARCH
6156 // Can't have a label inside the ReJIT padding area
6158 genPrologPadForReJit();
6160 #if CPU_LOAD_STORE_ARCH
6162 // TODO-ARM64-Bug?: set the availMask properly!
6163 regMaskTP availMask =
6164 (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
6165 availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
6166 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
6168 regNumber rOffset = initReg;
6173 // We pick the next lowest register number for rTemp
6174 noway_assert(availMask != RBM_NONE);
6175 tempMask = genFindLowestBit(availMask);
6176 rTemp = genRegNumFromMask(tempMask);
6177 availMask &= ~tempMask;
6179 // We pick the next lowest register number for rLimit
6180 noway_assert(availMask != RBM_NONE);
6181 tempMask = genFindLowestBit(availMask);
6182 rLimit = genRegNumFromMask(tempMask);
6183 availMask &= ~tempMask;
6185 // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
6187 // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
6189 // mov rLimit, -frameSize
6191 // ldr rTemp, [sp+rOffset]
6192 // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
6193 // cmp rOffset, rLimit
6195 noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
6196 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
6197 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
6198 regTracker.rsTrackRegTrash(rTemp);
6199 #if defined(_TARGET_ARM_)
6200 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
6201 #elif defined(_TARGET_ARM64_)
6202 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
6203 #endif // _TARGET_ARM64_
6204 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
6205 getEmitter()->emitIns_J(INS_bhi, NULL, -4);
6207 #else // !CPU_LOAD_STORE_ARCH
6209 // Code size for each instruction. We need this because the
6210 // backward branch is hard-coded with the number of bytes to branch.
6211 // The encoding differs based on the architecture and what register is
6212 // used (namely, using RAX has a smaller encoding).
6215 // lea eax, [esp - frameSize]
6217 // lea esp, [esp - pageSize] 7
6218 // test [esp], eax 3
6221 // lea rsp, [rbp + frameSize]
6223 // For AMD64 using RAX
6224 // lea rax, [rsp - frameSize]
6226 // lea rsp, [rsp - pageSize] 8
6227 // test [rsp], rax 4
6230 // lea rsp, [rax + frameSize]
6232 // For AMD64 using RBP
6233 // lea rbp, [rsp - frameSize]
6235 // lea rsp, [rsp - pageSize] 8
6236 // test [rsp], rbp 4
6239 // lea rsp, [rbp + frameSize]
6241 int sPageSize = (int)pageSize;
6243 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border
6245 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize);
6246 getEmitter()->emitIns_R_AR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, 0);
6247 inst_RV_RV(INS_cmp, REG_SPBASE, initReg);
6249 int bytesForBackwardJump;
6250 #ifdef _TARGET_AMD64_
6251 assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
6252 bytesForBackwardJump = -17;
6253 #else // !_TARGET_AMD64_
6254 assert(initReg == REG_EAX);
6255 bytesForBackwardJump = -14;
6256 #endif // !_TARGET_AMD64_
6258 inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
6260 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer
6261 #endif // !CPU_LOAD_STORE_ARCH
6263 *pInitRegZeroed = false; // The initReg does not contain zero
6265 #ifdef _TARGET_XARCH_
6266 if (pushedStubParam)
6269 inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
6270 regTracker.rsTrackRegTrash(REG_SECRET_STUB_PARAM);
6272 #endif // _TARGET_XARCH_
6274 #if CPU_LOAD_STORE_ARCH
6275 compiler->unwindPadding();
6278 #if CPU_LOAD_STORE_ARCH
6279 #ifndef _TARGET_ARM64_
6280 inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
6281 #endif // !_TARGET_ARM64_
6283 // sub esp, frameSize 6
6284 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
6288 #ifndef _TARGET_ARM64_
6289 compiler->unwindAllocStack(frameSize);
6291 if (!doubleAlignOrFramePointerUsed())
6293 psiAdjustStackLevel(frameSize);
6295 #endif // !_TARGET_ARM64_
6298 #if defined(_TARGET_ARM_)
6300 void CodeGen::genPushFltRegs(regMaskTP regMask)
6302 assert(regMask != 0); // Don't call uness we have some registers to push
6303 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
6305 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
6306 int slots = genCountBits(regMask);
6307 // regMask should be contiguously set
6308 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
6309 assert((tmpMask & (tmpMask - 1)) == 0);
6310 assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
6312 // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
6313 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
6314 noway_assert((slots % 2) == 0);
6316 getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
6319 void CodeGen::genPopFltRegs(regMaskTP regMask)
6321 assert(regMask != 0); // Don't call uness we have some registers to pop
6322 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
6324 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
6325 int slots = genCountBits(regMask);
6326 // regMask should be contiguously set
6327 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
6328 assert((tmpMask & (tmpMask - 1)) == 0);
6330 // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
6331 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
6332 noway_assert((slots % 2) == 0);
6334 getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
6337 /*-----------------------------------------------------------------------------
6339 * If we have a jmp call, then the argument registers cannot be used in the
6340 * epilog. So return the current call's argument registers as the argument
6341 * registers for the jmp call.
6343 regMaskTP CodeGen::genJmpCallArgMask()
6345 assert(compiler->compGeneratingEpilog);
6347 regMaskTP argMask = RBM_NONE;
6348 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
6350 const LclVarDsc& desc = compiler->lvaTable[varNum];
6351 if (desc.lvIsRegArg)
6353 argMask |= genRegMask(desc.lvArgReg);
6359 /*-----------------------------------------------------------------------------
6361 * Free the local stack frame: add to SP.
6362 * If epilog unwind hasn't been started, and we generate code, we start unwind
6363 * and set *pUnwindStarted = true.
6366 void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
6368 assert(compiler->compGeneratingEpilog);
6373 // Add 'frameSize' to SP.
6375 // Unfortunately, we can't just use:
6377 // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
6379 // because we need to generate proper unwind codes for each instruction generated,
6380 // and large frame sizes might generate a temp register load which might
6381 // need an unwind code. We don't want to generate a "NOP" code for this
6382 // temp register load; we want the unwind codes to start after that.
6384 if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
6386 if (!*pUnwindStarted)
6388 compiler->unwindBegEpilog();
6389 *pUnwindStarted = true;
6392 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
6396 regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
6399 // Do not use argument registers as scratch registers in the jmp epilog.
6400 grabMask &= ~genJmpCallArgMask();
6402 #ifndef LEGACY_BACKEND
6405 #else // LEGACY_BACKEND
6406 regNumber tmpReg = regSet.rsGrabReg(grabMask);
6407 #endif // LEGACY_BACKEND
6408 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
6409 if (*pUnwindStarted)
6411 compiler->unwindPadding();
6414 // We're going to generate an unwindable instruction, so check again if
6415 // we need to start the unwind codes.
6417 if (!*pUnwindStarted)
6419 compiler->unwindBegEpilog();
6420 *pUnwindStarted = true;
6423 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
6426 compiler->unwindAllocStack(frameSize);
6429 /*-----------------------------------------------------------------------------
6431 * Move of relocatable displacement value to register
6433 void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
6435 getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
6436 getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
6438 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
6440 getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
6444 /*-----------------------------------------------------------------------------
6446 * Move of relocatable data-label to register
6448 void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
6450 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
6451 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
6453 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
6455 getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
6459 /*-----------------------------------------------------------------------------
6461 * Move of relocatable immediate to register
6463 void CodeGen::genMov32RelocatableImmediate(emitAttr size, unsigned value, regNumber reg)
6465 _ASSERTE(EA_IS_RELOC(size));
6467 getEmitter()->emitIns_R_I(INS_movw, size, reg, value);
6468 getEmitter()->emitIns_R_I(INS_movt, size, reg, value);
6470 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
6472 getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
6476 /*-----------------------------------------------------------------------------
6478 * Returns register mask to push/pop to allocate a small stack frame,
6479 * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
6480 * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
6482 regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
6484 assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
6486 // We can't do this optimization with callee saved floating point registers because
6487 // the stack would be allocated in a wrong spot.
6488 if (maskCalleeSavedFloat != RBM_NONE)
6491 // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
6492 // that extra sub sp,XXX/add sp,XXX.
6493 // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
6494 // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
6495 // significant negative side-effects (more memory bus traffic).
6500 case 2 * REGSIZE_BYTES:
6501 return RBM_R2 | RBM_R3;
6507 #endif // _TARGET_ARM_
6509 #if !FEATURE_STACK_FP_X87
6511 /*****************************************************************************
6513 * initFltRegs -- The mask of float regs to be zeroed.
6514 * initDblRegs -- The mask of double regs to be zeroed.
6515 * initReg -- A zero initialized integer reg to copy from.
6517 * Does best effort to move between VFP/xmm regs if one is already
6518 * initialized to 0. (Arm Only) Else copies from the integer register which
6521 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
6523 assert(compiler->compGeneratingProlog);
6525 // The first float/double reg that is initialized to 0. So they can be used to
6526 // initialize the remaining registers.
6527 regNumber fltInitReg = REG_NA;
6528 regNumber dblInitReg = REG_NA;
6530 // Iterate through float/double registers and initialize them to 0 or
6531 // copy from already initialized register of the same type.
6532 regMaskTP regMask = genRegMask(REG_FP_FIRST);
6533 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
6535 if (regMask & initFltRegs)
6537 // Do we have a float register already set to 0?
6538 if (fltInitReg != REG_NA)
6541 inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
6546 // Do we have a double register initialized to 0?
6547 if (dblInitReg != REG_NA)
6549 // Copy from double.
6550 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
6555 inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
6557 #elif defined(_TARGET_XARCH_)
6558 // Xorpd xmmreg, xmmreg is the fastest way to initialize a float register to
6559 // zero instead of moving constant 0.0f. Though we just need to initialize just the 32-bits
6560 // we will use xorpd to initialize 64-bits of the xmm register so that it can be
6561 // used to zero initialize xmm registers that hold double values.
6562 inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
6564 #elif defined(_TARGET_ARM64_)
6565 NYI("Initialize floating-point register to zero");
6567 #error Unsupported or unset target architecture
6572 else if (regMask & initDblRegs)
6574 // Do we have a double register already set to 0?
6575 if (dblInitReg != REG_NA)
6577 // Copy from double.
6578 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
6583 // Do we have a float register initialized to 0?
6584 if (fltInitReg != REG_NA)
6587 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
6592 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
6594 #elif defined(_TARGET_XARCH_)
6595 // Xorpd xmmreg, xmmreg is the fastest way to initialize a double register to
6596 // zero than moving constant 0.0d. We can also use lower 32-bits of 'reg'
6597 // for zero initializing xmm registers subsequently that contain float values.
6598 inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
6600 #elif defined(_TARGET_ARM64_)
6601 // We will just zero out the entire vector register. This sets it to a double zero value
6602 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
6604 #error Unsupported or unset target architecture
6611 #endif // !FEATURE_STACK_FP_X87
6613 /*-----------------------------------------------------------------------------
6615 * Restore any callee-saved registers we have used
6618 #if defined(_TARGET_ARM_)
6620 bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
6622 assert(compiler->compGeneratingEpilog);
6624 if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
6630 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
6632 assert(compiler->compGeneratingEpilog);
6634 regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
6635 regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
6636 regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
6638 // First, pop float registers
6640 if (maskPopRegsFloat != RBM_NONE)
6642 genPopFltRegs(maskPopRegsFloat);
6643 compiler->unwindPopMaskFloat(maskPopRegsFloat);
6646 // Next, pop integer registers
6650 regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
6651 maskPopRegsInt |= maskStackAlloc;
6654 if (isFramePointerUsed())
6656 assert(!regSet.rsRegsModified(RBM_FPBASE));
6657 maskPopRegsInt |= RBM_FPBASE;
6660 if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
6662 maskPopRegsInt |= RBM_PC;
6663 // Record the fact that we use a pop to the PC to perform the return
6664 genUsedPopToReturn = true;
6668 maskPopRegsInt |= RBM_LR;
6669 // Record the fact that we did not use a pop to the PC to perform the return
6670 genUsedPopToReturn = false;
6673 assert(FitsIn<int>(maskPopRegsInt));
6674 inst_IV(INS_pop, (int)maskPopRegsInt);
6675 compiler->unwindPopMaskInt(maskPopRegsInt);
6678 #elif defined(_TARGET_ARM64_)
6680 void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
6682 assert(compiler->compGeneratingEpilog);
6684 regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
6686 if (isFramePointerUsed())
6688 rsRestoreRegs |= RBM_FPBASE;
6691 rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
6693 regMaskTP regsToRestoreMask = rsRestoreRegs;
6695 int totalFrameSize = genTotalFrameSize();
6697 int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
6699 int frameType = 0; // An indicator of what type of frame we are popping.
6700 int calleeSaveSPDelta = 0;
6701 int calleeSaveSPDeltaUnaligned = 0;
6703 if (isFramePointerUsed())
6705 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
6708 if (compiler->compLocallocUsed)
6710 // Restore sp from fp
6712 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
6713 compiler->unwindSetFrameReg(REG_FPBASE, 0);
6716 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
6718 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
6720 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
6722 else if (totalFrameSize <= 512)
6725 if (compiler->compLocallocUsed)
6727 // Restore sp from fp
6728 // sub sp, fp, #outsz
6729 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
6730 compiler->lvaOutgoingArgSpaceSize);
6731 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
6734 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
6736 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
6738 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
6744 calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
6745 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
6746 assert(calleeSaveSPDeltaUnaligned >= 0);
6747 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
6748 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
6750 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
6752 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
6753 assert(remainingFrameSz > 0);
6755 if (compiler->lvaOutgoingArgSpaceSize >= 504)
6757 // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
6758 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
6759 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
6760 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
6761 int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
6762 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
6763 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
6765 if (compiler->compLocallocUsed)
6767 // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
6769 // sub sp, fp, #alignmentAdjustment2
6770 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
6771 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
6776 // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
6778 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
6779 assert(spAdjustment3 > 0);
6780 assert((spAdjustment3 % 16) == 0);
6781 genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
6786 // add sp,sp,#remainingFrameSz
6787 genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP0, nullptr);
6791 if (compiler->compLocallocUsed)
6793 // Restore sp from fp
6794 // sub sp, fp, #outsz
6795 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
6796 compiler->lvaOutgoingArgSpaceSize);
6797 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
6801 // ldp fp,lr,[sp,#outsz]
6802 // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
6805 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP0,
6809 // Unlike frameType=1 or frameType=2 that restore SP at the end,
6810 // frameType=3 already adjusted SP above to delete local frame.
6811 // There is at most one alignment slot between SP and where we store the callee-saved registers.
6812 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
6813 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
6818 // No frame pointer (no chaining).
6819 NYI("Frame without frame pointer");
6820 calleeSaveSPOffset = 0;
6823 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
6828 // ldp fp,lr,[sp],#framesz
6830 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
6831 INS_OPTS_POST_INDEX);
6832 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
6834 else if (frameType == 2)
6837 // ldr fp,lr,[sp,#outsz]
6838 // add sp,sp,#framesz
6840 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
6841 compiler->lvaOutgoingArgSpaceSize);
6842 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
6844 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
6845 compiler->unwindAllocStack(totalFrameSize);
6847 else if (frameType == 3)
6849 // Nothing to do after restoring callee-saved registers.
6857 #elif defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
6859 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
6861 assert(compiler->compGeneratingEpilog);
6863 unsigned popCount = 0;
6864 if (regSet.rsRegsModified(RBM_EBX))
6867 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
6869 if (regSet.rsRegsModified(RBM_FPBASE))
6871 // EBP cannot be directly modified for EBP frame and double-aligned frames
6872 assert(!doubleAlignOrFramePointerUsed());
6875 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
6878 #ifndef UNIX_AMD64_ABI
6879 // For System V AMD64 calling convention ESI and EDI are volatile registers.
6880 if (regSet.rsRegsModified(RBM_ESI))
6883 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
6885 if (regSet.rsRegsModified(RBM_EDI))
6888 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
6890 #endif // !defined(UNIX_AMD64_ABI)
6892 #ifdef _TARGET_AMD64_
6893 if (regSet.rsRegsModified(RBM_R12))
6896 inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
6898 if (regSet.rsRegsModified(RBM_R13))
6901 inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
6903 if (regSet.rsRegsModified(RBM_R14))
6906 inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
6908 if (regSet.rsRegsModified(RBM_R15))
6911 inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
6913 #endif // _TARGET_AMD64_
6915 // Amd64/x86 doesn't support push/pop of xmm registers.
6916 // These will get saved to stack separately after allocating
6917 // space on stack in prolog sequence. PopCount is essentially
6918 // tracking the count of integer registers pushed.
6920 noway_assert(compiler->compCalleeRegsPushed == popCount);
6923 #elif defined(_TARGET_X86_)
6925 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
6927 assert(compiler->compGeneratingEpilog);
6929 unsigned popCount = 0;
6931 /* NOTE: The EBP-less frame code below depends on the fact that
6932 all of the pops are generated right at the start and
6933 each takes one byte of machine code.
6936 if (regSet.rsRegsModified(RBM_FPBASE))
6938 // EBP cannot be directly modified for EBP frame and double-aligned frames
6939 noway_assert(!doubleAlignOrFramePointerUsed());
6941 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
6944 if (regSet.rsRegsModified(RBM_EBX))
6947 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
6949 if (regSet.rsRegsModified(RBM_ESI))
6952 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
6954 if (regSet.rsRegsModified(RBM_EDI))
6957 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
6959 noway_assert(compiler->compCalleeRegsPushed == popCount);
6964 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
6965 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
6966 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
6968 #ifdef _TARGET_ARM64_
6970 #else // !_TARGET_ARM64_
6971 if (*pInitRegZeroed == false)
6973 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6974 *pInitRegZeroed = true;
6977 #endif // !_TARGET_ARM64_
6980 /*-----------------------------------------------------------------------------
6982 * Do we have any untracked pointer locals at all,
6983 * or do we need to initialize memory for locspace?
6985 * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
6986 * initializing memory (not inclusive).
6987 * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
6988 * initializing memory.
6989 * initReg - A scratch register (that gets set to zero on some platforms).
6990 * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
6992 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
6994 assert(compiler->compGeneratingProlog);
6996 if (genUseBlockInit)
6998 assert(untrLclHi > untrLclLo);
6999 #ifdef _TARGET_ARMARCH_
7001 Generate the following code:
7003 For cnt less than 10
7008 stm <rZero1,rZero2>,[rAddr!]
7009 <optional> stm <rZero1,rZero2>,[rAddr!]
7010 <optional> stm <rZero1,rZero2>,[rAddr!]
7011 <optional> stm <rZero1,rZero2>,[rAddr!]
7012 <optional> str rZero1,[rAddr]
7014 For rCnt greater than or equal to 10
7022 stm <rZero1,rZero2>,[rAddr!]
7026 <optional> str rZero1,[rAddr] // When cnt is odd
7028 NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
7032 regNumber rCnt = REG_NA; // Invalid
7035 regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
7036 availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
7038 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
7039 // a large constant.
7041 #if defined(_TARGET_ARM_)
7043 if (compiler->compLocallocUsed)
7045 availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
7048 regNumber rZero1; // We're going to use initReg for rZero1
7051 // We pick the next lowest register number for rZero2
7052 noway_assert(availMask != RBM_NONE);
7053 regMask = genFindLowestBit(availMask);
7054 rZero2 = genRegNumFromMask(regMask);
7055 availMask &= ~regMask;
7056 assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7057 0); // rZero2 is not a live incoming argument reg
7059 // We pick the next lowest register number for rAddr
7060 noway_assert(availMask != RBM_NONE);
7061 regMask = genFindLowestBit(availMask);
7062 rAddr = genRegNumFromMask(regMask);
7063 availMask &= ~regMask;
7065 #else // !define(_TARGET_ARM_)
7067 regNumber rZero1 = REG_ZR;
7069 *pInitRegZeroed = false;
7071 #endif // !defined(_TARGET_ARM_)
7073 bool useLoop = false;
7074 unsigned uCntBytes = untrLclHi - untrLclLo;
7075 assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
7076 unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
7078 // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
7079 // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
7080 // In both of these cases the stm/stp instruction will write two zeros to memory
7081 // and we will use a single str instruction at the end whenever we have an odd count.
7082 if (uCntSlots >= 10)
7087 // We pick the next lowest register number for rCnt
7088 noway_assert(availMask != RBM_NONE);
7089 regMask = genFindLowestBit(availMask);
7090 rCnt = genRegNumFromMask(regMask);
7091 availMask &= ~regMask;
7094 assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7095 0); // rAddr is not a live incoming argument reg
7096 #if defined(_TARGET_ARM_)
7097 if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
7098 #else // !_TARGET_ARM_
7099 if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
7100 #endif // !_TARGET_ARM_
7102 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
7106 // Load immediate into the InitReg register
7107 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
7108 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
7109 *pInitRegZeroed = false;
7114 noway_assert(uCntSlots >= 2);
7115 assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7116 0); // rCnt is not a live incoming argument reg
7117 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
7120 #if defined(_TARGET_ARM_)
7121 rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
7122 instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
7123 ssize_t stmImm = (ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
7124 #endif // _TARGET_ARM_
7128 while (uCntBytes >= REGSIZE_BYTES * 2)
7131 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
7132 #else // !_TARGET_ARM_
7133 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
7134 INS_OPTS_POST_INDEX);
7135 #endif // !_TARGET_ARM_
7136 uCntBytes -= REGSIZE_BYTES * 2;
7139 else // useLoop is true
7142 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
7143 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
7144 #else // !_TARGET_ARM_
7145 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
7146 INS_OPTS_POST_INDEX); // zero stack slots
7147 getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
7148 #endif // !_TARGET_ARM_
7149 getEmitter()->emitIns_J(INS_bhi, NULL, -3);
7150 uCntBytes %= REGSIZE_BYTES * 2;
7153 if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
7156 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
7157 #else // _TARGET_ARM_
7158 if ((uCntBytes - REGSIZE_BYTES) == 0)
7160 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
7164 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
7166 #endif // !_TARGET_ARM_
7167 uCntBytes -= REGSIZE_BYTES;
7169 #ifdef _TARGET_ARM64_
7172 assert(uCntBytes == sizeof(int));
7173 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
7174 uCntBytes -= sizeof(int);
7176 #endif // _TARGET_ARM64_
7177 noway_assert(uCntBytes == 0);
7179 #elif defined(_TARGET_XARCH_)
7181 Generate the following code:
7183 lea edi, [ebp/esp-OFFS]
7189 noway_assert(regSet.rsRegsModified(RBM_EDI));
7191 #ifdef UNIX_AMD64_ABI
7192 // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
7193 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
7195 noway_assert(regSet.rsRegsModified(RBM_R12));
7196 inst_RV_RV(INS_mov, REG_R12, REG_RCX);
7197 regTracker.rsTrackRegTrash(REG_R12);
7200 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
7202 noway_assert(regSet.rsRegsModified(RBM_R13));
7203 inst_RV_RV(INS_mov, REG_R13, REG_RDI);
7204 regTracker.rsTrackRegTrash(REG_R13);
7206 #else // !UNIX_AMD64_ABI
7207 // For register arguments we may have to save ECX
7208 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
7210 noway_assert(regSet.rsRegsModified(RBM_ESI));
7211 inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
7212 regTracker.rsTrackRegTrash(REG_ESI);
7214 #endif // !UNIX_AMD64_ABI
7216 noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
7218 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
7219 regTracker.rsTrackRegTrash(REG_EDI);
7221 inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
7222 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
7223 instGen(INS_r_stosd);
7225 #ifdef UNIX_AMD64_ABI
7226 // Move back the argument registers
7227 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
7229 inst_RV_RV(INS_mov, REG_RCX, REG_R12);
7232 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
7234 inst_RV_RV(INS_mov, REG_RDI, REG_R13);
7236 #else // !UNIX_AMD64_ABI
7237 // Move back the argument registers
7238 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
7240 inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
7242 #endif // !UNIX_AMD64_ABI
7245 #error Unsupported or unset target architecture
7248 else if (genInitStkLclCnt > 0)
7250 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7251 0); // initReg is not a live incoming argument reg
7253 /* Initialize any lvMustInit vars on the stack */
7258 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7260 if (!varDsc->lvMustInit)
7265 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
7266 // partially-enregistered vars in the case where we don't use a block init.
7267 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
7269 // lvMustInit can only be set for GC types or TYP_STRUCT types
7270 // or when compInitMem is true
7271 // or when in debug code
7273 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
7274 compiler->info.compInitMem || compiler->opts.compDbgCode);
7276 #ifndef LEGACY_BACKEND
7277 if (!varDsc->lvOnFrame)
7281 #else // LEGACY_BACKEND
7282 if (varDsc->lvRegister)
7284 if (varDsc->lvOnFrame)
7286 /* This is a partially enregistered TYP_LONG var */
7287 noway_assert(varDsc->lvOtherReg == REG_STK);
7288 noway_assert(varDsc->lvType == TYP_LONG);
7290 noway_assert(compiler->info.compInitMem);
7292 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, genGetZeroReg(initReg, pInitRegZeroed),
7293 varNum, sizeof(int));
7297 #endif // LEGACY_BACKEND
7299 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
7300 (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
7302 // We only initialize the GC variables in the TYP_STRUCT
7303 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
7304 const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
7306 for (unsigned i = 0; i < slots; i++)
7308 if (gcPtrs[i] != TYPE_GC_NONE)
7310 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
7311 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
7317 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
7319 // zero out the whole thing rounded up to a single stack slot size
7320 unsigned lclSize = (unsigned)roundUp(compiler->lvaLclSize(varNum), sizeof(int));
7322 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
7324 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
7327 #ifdef _TARGET_64BIT_
7328 assert(i == lclSize || (i + sizeof(int) == lclSize));
7331 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
7334 #endif // _TARGET_64BIT_
7335 assert(i == lclSize);
7339 if (!TRACK_GC_TEMP_LIFETIMES)
7341 assert(compiler->tmpAllFree());
7342 for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr;
7343 tempThis = compiler->tmpListNxt(tempThis))
7345 if (!varTypeIsGC(tempThis->tdTempType()))
7350 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
7352 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
7358 /*-----------------------------------------------------------------------------
7360 * Save the generic context argument.
7362 * We need to do this within the "prolog" in case anyone tries to inspect
7363 * the param-type-arg/this (which can be done after the prolog) using
7364 * ICodeManager::GetParamTypeArg().
7367 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
7369 assert(compiler->compGeneratingProlog);
7371 bool reportArg = compiler->lvaReportParamTypeArg();
7373 // We should report either generic context arg or "this" when used so.
7376 #ifndef JIT32_GCENCODER
7377 if (!compiler->lvaKeepAliveAndReportThis())
7384 // For JIT32_GCENCODER, we won't be here if reportArg is false.
7385 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
7387 noway_assert(contextArg != BAD_VAR_NUM);
7388 LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
7390 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
7391 // moved to its final home location. So we need to use it from the
7392 // incoming location.
7396 bool isPrespilledForProfiling = false;
7397 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
7398 isPrespilledForProfiling =
7399 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
7402 // Load from the argument register only if it is not prespilled.
7403 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
7405 reg = varDsc->lvArgReg;
7409 if (isFramePointerUsed())
7411 #if defined(_TARGET_ARM_)
7412 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
7413 // will become enregistered.
7414 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
7415 noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
7416 (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
7418 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
7419 // will become enregistered.
7420 noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
7424 // We will just use the initReg since it is an available register
7425 // and we are probably done using it anyway...
7427 *pInitRegZeroed = false;
7429 // mov reg, [compiler->info.compTypeCtxtArg]
7430 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
7431 regTracker.rsTrackRegTrash(reg);
7434 #if CPU_LOAD_STORE_ARCH
7435 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
7436 compiler->lvaCachedGenericContextArgOffset());
7437 #else // CPU_LOAD_STORE_ARCH
7438 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
7439 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
7440 compiler->lvaCachedGenericContextArgOffset());
7441 #endif // !CPU_LOAD_STORE_ARCH
7444 /*-----------------------------------------------------------------------------
7446 * Set the "GS" security cookie in the prolog.
7449 void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
7451 assert(compiler->compGeneratingProlog);
7453 if (!compiler->getNeedsGSSecurityCookie())
7458 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
7460 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
7462 #ifdef _TARGET_AMD64_
7463 // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
7464 getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
7465 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
7467 // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
7468 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
7469 compiler->lvaGSSecurityCookie, 0, initReg);
7475 #ifdef _TARGET_XARCH_
7476 // Always use EAX on x86 and x64
7477 // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
7480 // We will just use the initReg since it is an available register
7484 *pInitRegZeroed = false;
7486 #if CPU_LOAD_STORE_ARCH
7487 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
7488 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
7489 regTracker.rsTrackRegTrash(reg);
7491 // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
7492 // mov dword ptr [frame.GSSecurityCookie], reg
7493 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
7494 regTracker.rsTrackRegTrash(reg);
7496 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
7500 #ifdef PROFILING_SUPPORTED
7502 //-----------------------------------------------------------------------------------
7503 // genProfilingEnterCallback: Generate the profiling function enter callback.
7506 // initReg - register to use as scratch register
7507 // pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
7508 // not zero after this call.
7514 // The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
7515 // VM\i386\asmhelpers.asm for details):
7516 // 1. The calling sequence for calling the helper is:
7517 // push FunctionIDOrClientID
7518 // call ProfileEnterHelper
7519 // 2. The calling function has an EBP frame.
7520 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
7521 // the following prolog is assumed:
7524 // 4. All registers are preserved.
7525 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
7527 void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
7529 assert(compiler->compGeneratingProlog);
7531 // Give profiler a chance to back out of hooking this method
7532 if (!compiler->compIsProfilerHookNeeded())
7537 #if defined(_TARGET_AMD64_)
7538 #if !defined(UNIX_AMD64_ABI)
7543 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
7544 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7545 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
7547 // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
7548 // In case of vararg methods, arg regs are already homed.
7550 // Note: Here we don't need to worry about updating gc'info since enter
7551 // callback is generated as part of prolog which is non-gc interruptible.
7552 // Moreover GC cannot kick while executing inside profiler callback which is a
7553 // profiler requirement so it can examine arguments which could be obj refs.
7554 if (!compiler->info.compIsVarArgs)
7556 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
7558 noway_assert(varDsc->lvIsParam);
7560 if (!varDsc->lvIsRegArg)
7565 var_types storeType = varDsc->lvaArgType();
7566 regNumber argReg = varDsc->lvArgReg;
7568 instruction store_ins = ins_Store(storeType);
7571 if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
7573 store_ins = INS_mov;
7575 #endif // FEATURE_SIMD
7577 getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
7581 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
7582 // RCX = ProfilerMethHnd
7583 if (compiler->compProfilerMethHndIndirected)
7585 // Profiler hooks enabled during Ngen time.
7586 // Profiler handle needs to be accessed through an indirection of a pointer.
7587 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7591 // No need to record relocations, if we are generating ELT hooks under the influence
7592 // of COMPlus_JitELTHookEnabled=1
7593 if (compiler->opts.compJitELTHookEnabled)
7595 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
7599 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7603 // RDX = caller's SP
7605 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
7606 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
7607 // of that offset to FramePointer to obtain caller's SP value.
7608 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7609 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
7610 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
7612 // Can't have a call until we have enough padding for rejit
7613 genPrologPadForReJit();
7615 // This will emit either
7616 // "call ip-relative 32-bit offset" or
7617 // "mov rax, helper addr; call rax"
7618 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
7620 // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
7621 // generation logic that moves args around as required by first BB entry point conditions
7622 // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
7623 // and genEnregisterIncomingStackArgs().
7625 // Now reload arg registers from home locations.
7627 // - we need to reload only known (i.e. fixed) reg args.
7628 // - if floating point type, also reload it into corresponding integer reg
7629 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
7631 noway_assert(varDsc->lvIsParam);
7633 if (!varDsc->lvIsRegArg)
7638 var_types loadType = varDsc->lvaArgType();
7639 regNumber argReg = varDsc->lvArgReg;
7641 instruction load_ins = ins_Load(loadType);
7644 if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
7648 #endif // FEATURE_SIMD
7650 getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
7653 if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
7655 regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
7656 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
7657 inst_RV_RV(ins, argReg, intArgReg, loadType);
7659 #endif // FEATURE_VARARG
7662 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
7663 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
7665 *pInitRegZeroed = false;
7668 #else // !defined(UNIX_AMD64_ABI)
7670 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
7671 // R14 = ProfilerMethHnd
7672 if (compiler->compProfilerMethHndIndirected)
7674 // Profiler hooks enabled during Ngen time.
7675 // Profiler handle needs to be accessed through an indirection of a pointer.
7676 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
7677 (ssize_t)compiler->compProfilerMethHnd);
7681 // No need to record relocations, if we are generating ELT hooks under the influence
7682 // of COMPlus_JitELTHookEnabled=1
7683 if (compiler->opts.compJitELTHookEnabled)
7685 genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
7689 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7693 // R15 = caller's SP
7695 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
7696 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
7697 // of that offset to FramePointer to obtain caller's SP value.
7698 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7699 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
7700 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
7702 // Can't have a call until we have enough padding for rejit
7703 genPrologPadForReJit();
7705 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
7706 // We use R11 here. This will emit either
7707 // "call ip-relative 32-bit offset" or
7708 // "mov r11, helper addr; call r11"
7709 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
7711 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
7712 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
7714 *pInitRegZeroed = false;
7717 #endif // !defined(UNIX_AMD64_ABI)
7719 #elif defined(_TARGET_X86_) || (defined(_TARGET_ARM_) && defined(LEGACY_BACKEND))
7721 unsigned saveStackLvl2 = genStackLevel;
7723 #if defined(_TARGET_X86_)
7724 // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
7725 // for x86 stack unwinding
7727 // Push the profilerHandle
7728 if (compiler->compProfilerMethHndIndirected)
7730 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
7734 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
7736 #elif defined(_TARGET_ARM_)
7737 // On Arm arguments are prespilled on stack, which frees r0-r3.
7738 // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
7739 // The call target register could be any free register.
7740 regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_ENTER_ARG);
7741 noway_assert(argReg == REG_PROFILER_ENTER_ARG);
7742 regSet.rsLockReg(RBM_PROFILER_ENTER_ARG);
7744 if (compiler->compProfilerMethHndIndirected)
7746 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
7747 regTracker.rsTrackRegTrash(argReg);
7751 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
7754 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
7758 // Can't have a call until we have enough padding for rejit
7760 genPrologPadForReJit();
7762 // This will emit either
7763 // "call ip-relative 32-bit offset" or
7764 // "mov rax, helper addr; call rax"
7765 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
7766 0, // argSize. Again, we have to lie about it
7767 EA_UNKNOWN); // retSize
7769 #if defined(_TARGET_X86_)
7771 // Adjust the number of stack slots used by this managed method if necessary.
7773 if (compiler->fgPtrArgCntMax < 1)
7775 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
7776 compiler->fgPtrArgCntMax = 1;
7778 #elif defined(_TARGET_ARM_)
7780 regSet.rsUnlockReg(RBM_PROFILER_ENTER_ARG);
7782 if (initReg == argReg)
7784 *pInitRegZeroed = false;
7787 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
7790 /* Restore the stack level */
7792 SetStackLevel(saveStackLvl2);
7795 NYI("Emit Profiler Enter callback");
7799 //-----------------------------------------------------------------------------------
7800 // genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
7801 // Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
7804 // helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
7810 // The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
7811 // ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
7812 // 1. The calling sequence for calling the helper is:
7813 // push FunctionIDOrClientID
7814 // call ProfileLeaveHelper or ProfileTailcallHelper
7815 // 2. The calling function has an EBP frame.
7816 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
7817 // the following prolog is assumed:
7820 // 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
7821 // helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
7822 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
7824 void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
7826 assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
7828 // Only hook if profiler says it's okay.
7829 if (!compiler->compIsProfilerHookNeeded())
7834 compiler->info.compProfilerCallback = true;
7836 // Need to save on to the stack level, since the helper call will pop the argument
7837 unsigned saveStackLvl2 = genStackLevel;
7839 #if defined(_TARGET_AMD64_)
7840 #if !defined(UNIX_AMD64_ABI)
7842 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
7843 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7844 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
7846 // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
7847 // registers that profiler callback kills.
7848 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
7850 regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
7851 noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
7854 // At this point return value is computed and stored in RAX or XMM0.
7855 // On Amd64, Leave callback preserves the return register. We keep
7856 // RAX alive by not reporting as trashed by helper call. Also note
7857 // that GC cannot kick-in while executing inside profiler callback,
7858 // which is a requirement of profiler as well since it needs to examine
7859 // return value which could be an obj ref.
7861 // RCX = ProfilerMethHnd
7862 if (compiler->compProfilerMethHndIndirected)
7864 // Profiler hooks enabled during Ngen time.
7865 // Profiler handle needs to be accessed through an indirection of an address.
7866 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7870 // Don't record relocations, if we are generating ELT hooks under the influence
7871 // of COMPlus_JitELTHookEnabled=1
7872 if (compiler->opts.compJitELTHookEnabled)
7874 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
7878 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7882 // RDX = caller's SP
7883 // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
7884 // of the stmnts to execute unconditionally and clean-up rest.
7885 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7887 // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
7888 // value of that offset to FramePointer to obtain caller's SP value.
7889 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
7890 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
7894 // If we are here means that it is a tentative frame layout during which we
7895 // cannot use caller's SP offset since it is an estimate. For now we require the
7896 // method to have at least a single arg so that we can use it to obtain caller's
7898 LclVarDsc* varDsc = compiler->lvaTable;
7899 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
7901 // lea rdx, [FramePointer + Arg0's offset]
7902 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
7905 // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
7906 // We use R8 here. This will emit either
7907 // "call ip-relative 32-bit offset" or
7908 // "mov r8, helper addr; call r8"
7909 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
7911 #else // !defined(UNIX_AMD64_ABI)
7913 // RDI = ProfilerMethHnd
7914 if (compiler->compProfilerMethHndIndirected)
7916 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7920 if (compiler->opts.compJitELTHookEnabled)
7922 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
7926 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7930 // RSI = caller's SP
7931 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7933 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
7934 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
7938 LclVarDsc* varDsc = compiler->lvaTable;
7939 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
7941 // lea rdx, [FramePointer + Arg0's offset]
7942 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
7945 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
7946 // We use R11 here. This will emit either
7947 // "call ip-relative 32-bit offset" or
7948 // "mov r11, helper addr; call r11"
7949 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
7951 #endif // !defined(UNIX_AMD64_ABI)
7953 #elif defined(_TARGET_X86_)
7956 // Push the profilerHandle
7959 if (compiler->compProfilerMethHndIndirected)
7961 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
7965 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
7969 genEmitHelperCall(helper,
7970 sizeof(int) * 1, // argSize
7971 EA_UNKNOWN); // retSize
7974 // Adjust the number of stack slots used by this managed method if necessary.
7976 if (compiler->fgPtrArgCntMax < 1)
7978 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
7979 compiler->fgPtrArgCntMax = 1;
7982 #elif defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
7985 // Push the profilerHandle
7988 // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock
7989 // RBM_PROFILER_RET_USED always.
7990 regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
7991 noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);
7992 regSet.rsLockReg(RBM_PROFILER_RET_USED);
7994 // Contract between JIT and Profiler Leave callout on arm:
7995 // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
7996 // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
7997 // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
7998 // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
8000 // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
8003 emitAttr attr = EA_UNKNOWN;
8005 if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
8006 (varTypeIsFloating(compiler->info.compRetType) ||
8007 compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
8013 // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
8014 // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
8015 if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
8018 gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
8020 else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
8023 gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
8030 getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
8031 regTracker.rsTrackRegTrash(REG_PROFILER_RET_SCRATCH);
8032 gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
8036 if (compiler->compProfilerMethHndIndirected)
8038 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8039 regTracker.rsTrackRegTrash(REG_ARG_0);
8043 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8046 genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
8048 EA_UNKNOWN); // retSize
8050 // Restore state that existed before profiler callback
8053 getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
8054 regTracker.rsTrackRegTrash(REG_ARG_0);
8055 gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
8058 regSet.rsUnlockReg(RBM_PROFILER_RET_USED);
8061 NYI("Emit Profiler Leave callback");
8064 /* Restore the stack level */
8065 SetStackLevel(saveStackLvl2);
8068 #endif // PROFILING_SUPPORTED
8070 /*****************************************************************************
8075 These instructions are just a reordering of the instructions used today.
8081 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
8083 add esp, LOCALS_SIZE / pop dummyReg
8093 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
8094 Everything else is similar, though in a different order.
8096 The security object will no longer be at a fixed offset. However, the
8097 offset can still be determined by looking up the GC-info and determining
8098 how many callee-saved registers are pushed.
8105 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
8107 add esp, LOCALS_SIZE / pop dummyReg
8111 (mov esp, ebp if there are no callee-saved registers)
8115 Double-aligned frame :
8116 --------------------
8118 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
8119 of callee-saved registers are pushed on the stack so that the locals
8120 themselves are qword-aligned. The instructions are the same as today,
8121 just in a different order.
8129 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
8131 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
8140 localloc (with ebp) frames :
8141 --------------------------
8143 The instructions are the same as today, just in a different order.
8144 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
8145 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
8152 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
8154 lea esp, [ebp-calleeSavedRegsPushedSize]
8158 (mov esp, ebp if there are no callee-saved registers)
8162 *****************************************************************************/
8164 /*****************************************************************************
8166 * Generates appropriate NOP padding for a function prolog to support ReJIT.
8169 void CodeGen::genPrologPadForReJit()
8171 assert(compiler->compGeneratingProlog);
8173 #ifdef _TARGET_XARCH_
8174 if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
8179 #if FEATURE_EH_FUNCLETS
8181 // No need to generate pad (nops) for funclets.
8182 // When compiling the main function (and not a funclet)
8183 // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
8184 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
8189 #endif // FEATURE_EH_FUNCLETS
8191 unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
8199 /*****************************************************************************
8201 * Reserve space for a function prolog.
8204 void CodeGen::genReserveProlog(BasicBlock* block)
8206 assert(block != nullptr);
8208 JITDUMP("Reserving prolog IG for block BB%02u\n", block->bbNum);
8210 /* Nothing is live on entry to the prolog */
8212 getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
8215 /*****************************************************************************
8217 * Reserve space for a function epilog.
8220 void CodeGen::genReserveEpilog(BasicBlock* block)
8222 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
8223 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
8225 /* The return value is special-cased: make sure it goes live for the epilog */
8227 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8229 if (genFullPtrRegMap && !jmpEpilog)
8231 if (varTypeIsGC(compiler->info.compRetNativeType))
8233 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
8235 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
8237 switch (compiler->info.compRetNativeType)
8240 gcrefRegsArg |= RBM_INTRET;
8243 byrefRegsArg |= RBM_INTRET;
8251 JITDUMP("Reserving epilog IG for block BB%02u\n", block->bbNum);
8253 assert(block != nullptr);
8254 const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
8255 bool last = (block->bbNext == nullptr);
8256 getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
8259 #if FEATURE_EH_FUNCLETS
8261 /*****************************************************************************
8263 * Reserve space for a funclet prolog.
8266 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
8268 assert(block != nullptr);
8270 /* Currently, no registers are live on entry to the prolog, except maybe
8271 the exception object. There might be some live stack vars, but they
8272 cannot be accessed until after the frame pointer is re-established.
8273 In order to potentially prevent emitting a death before the prolog
8274 and a birth right after it, we just report it as live during the
8275 prolog, and rely on the prolog being non-interruptible. Trust
8276 genCodeForBBlist to correctly initialize all the sets.
8278 We might need to relax these asserts if the VM ever starts
8279 restoring any registers, then we could have live-in reg vars...
8282 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
8283 noway_assert(gcInfo.gcRegByrefSetCur == 0);
8285 JITDUMP("Reserving funclet prolog IG for block BB%02u\n", block->bbNum);
8287 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
8288 gcInfo.gcRegByrefSetCur, false);
8291 /*****************************************************************************
8293 * Reserve space for a funclet epilog.
8296 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
8298 assert(block != nullptr);
8300 JITDUMP("Reserving funclet epilog IG for block BB%02u\n", block->bbNum);
8302 bool last = (block->bbNext == nullptr);
8303 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
8304 gcInfo.gcRegByrefSetCur, last);
8307 #endif // FEATURE_EH_FUNCLETS
8309 /*****************************************************************************
8310 * Finalize the frame size and offset assignments.
8312 * No changes can be made to the modified register set after this, since that can affect how many
8313 * callee-saved registers get saved.
8315 void CodeGen::genFinalizeFrame()
8317 JITDUMP("Finalizing stack frame\n");
8319 #ifndef LEGACY_BACKEND
8320 // Initializations need to happen based on the var locations at the start
8321 // of the first basic block, so load those up. In particular, the determination
8322 // of whether or not to use block init in the prolog is dependent on the variable
8323 // locations on entry to the function.
8324 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
8325 #endif // !LEGACY_BACKEND
8327 genCheckUseBlockInit();
8329 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
8330 CLANG_FORMAT_COMMENT_ANCHOR;
8332 #if defined(_TARGET_X86_)
8334 if (compiler->compTailCallUsed)
8336 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
8337 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
8338 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
8339 // actually get saved.
8341 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
8343 #endif // _TARGET_X86_
8345 #if defined(_TARGET_ARMARCH_)
8346 // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
8347 // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
8348 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
8350 regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
8352 #endif // defined(_TARGET_ARMARCH_)
8354 #if defined(_TARGET_ARM_)
8355 // If there are any reserved registers, add them to the
8356 if (regSet.rsMaskResvd != RBM_NONE)
8358 regSet.rsSetRegsModified(regSet.rsMaskResvd);
8360 #endif // _TARGET_ARM_
8365 printf("Modified regs: ");
8366 dspRegMask(regSet.rsGetModifiedRegsMask());
8371 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
8372 if (compiler->opts.compDbgEnC)
8374 // We always save FP.
8375 noway_assert(isFramePointerUsed());
8376 #ifdef _TARGET_AMD64_
8377 // On x64 we always save exactly RBP, RSI and RDI for EnC.
8378 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
8379 regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
8380 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
8381 #else // !_TARGET_AMD64_
8382 // On x86 we save all callee saved regs so the saved reg area size is consistent
8383 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
8384 #endif // !_TARGET_AMD64_
8387 /* If we have any pinvoke calls, we might potentially trash everything */
8388 if (compiler->info.compCallUnmanaged)
8390 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
8391 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
8394 #ifdef UNIX_AMD64_ABI
8395 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
8396 if (compiler->compIsProfilerHookNeeded())
8398 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
8402 /* Count how many callee-saved registers will actually be saved (pushed) */
8404 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
8405 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
8408 // EBP cannot be (directly) modified
8409 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
8412 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
8414 #ifdef _TARGET_ARMARCH_
8415 if (isFramePointerUsed())
8417 // For a FP based frame we have to push/pop the FP register
8419 maskCalleeRegsPushed |= RBM_FPBASE;
8421 // This assert check that we are not using REG_FP
8422 // as both the frame pointer and as a codegen register
8424 assert(!regSet.rsRegsModified(RBM_FPBASE));
8427 // we always push LR. See genPushCalleeSavedRegisters
8429 maskCalleeRegsPushed |= RBM_LR;
8431 #if defined(_TARGET_ARM_)
8432 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
8433 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
8434 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
8436 if ((maskPushRegsFloat != RBM_NONE) ||
8437 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
8439 // Here we try to keep stack double-aligned before the vpush
8440 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
8442 regNumber extraPushedReg = REG_R4;
8443 while (maskPushRegsInt & genRegMask(extraPushedReg))
8445 extraPushedReg = REG_NEXT(extraPushedReg);
8447 if (extraPushedReg < REG_R11)
8449 maskPushRegsInt |= genRegMask(extraPushedReg);
8450 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
8453 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
8456 // We currently only expect to push/pop consecutive FP registers
8457 // and these have to be double-sized registers as well.
8458 // Here we will insure that maskPushRegsFloat obeys these requirements.
8460 if (maskPushRegsFloat != RBM_NONE)
8462 regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
8463 while (maskPushRegsFloat > contiguousMask)
8465 contiguousMask <<= 2;
8466 contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
8468 if (maskPushRegsFloat != contiguousMask)
8470 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
8471 maskPushRegsFloat |= maskExtraRegs;
8472 regSet.rsSetRegsModified(maskExtraRegs);
8473 maskCalleeRegsPushed |= maskExtraRegs;
8476 #endif // _TARGET_ARM_
8477 #endif // _TARGET_ARMARCH_
8479 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
8480 // Compute the count of callee saved float regs saved on stack.
8481 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
8482 // regs are stack allocated and preserved in their stack locations.
8483 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
8484 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
8485 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
8487 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
8492 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
8493 dspRegMask(maskCalleeRegsPushed);
8498 /* Assign the final offsets to things living on the stack frame */
8500 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
8502 /* We want to make sure that the prolog size calculated here is accurate
8503 (that is instructions will not shrink because of conservative stack
8504 frame approximations). We do this by filling in the correct size
8505 here (where we have committed to the final numbers for the frame offsets)
8506 This will ensure that the prolog size is always correct
8508 getEmitter()->emitMaxTmpSize = compiler->tmpSize;
8511 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
8513 compiler->lvaTableDump();
8518 //------------------------------------------------------------------------
8519 // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
8522 // delta - the offset to add to the current stack pointer to establish the frame pointer
8523 // reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
8525 void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
8527 assert(compiler->compGeneratingProlog);
8529 #if defined(_TARGET_XARCH_)
8533 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
8538 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
8539 // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
8543 if (reportUnwindData)
8545 compiler->unwindSetFrameReg(REG_FPBASE, delta);
8548 #elif defined(_TARGET_ARM_)
8550 assert(arm_Valid_Imm_For_Add_SP(delta));
8551 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
8553 if (reportUnwindData)
8555 compiler->unwindPadding();
8559 NYI("establish frame pointer");
8563 /*****************************************************************************
8565 * Generates code for a function prolog.
8567 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
8569 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
8570 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
8571 * only instructions which result in control not going to the next instruction. Basically, any time execution would
8572 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
8573 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
8574 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
8576 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
8577 * debugger team to ensure that stepping still works.
8579 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
8583 #pragma warning(push)
8584 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
8586 void CodeGen::genFnProlog()
8588 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
8590 compiler->funSetCurrentFunc(0);
8595 printf("*************** In genFnProlog()\n");
8600 genInterruptibleUsed = true;
8603 #ifdef LEGACY_BACKEND
8605 #endif // LEGACY_BACKEND
8607 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
8609 /* Ready to start on the prolog proper */
8611 getEmitter()->emitBegProlog();
8612 compiler->unwindBegProlog();
8614 // Do this so we can put the prolog instruction group ahead of
8615 // other instruction groups
8616 genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
8619 if (compiler->opts.dspCode)
8621 printf("\n__prolog:\n");
8625 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
8627 // Create new scopes for the method-parameters for the prolog-block.
8633 if (compiler->compJitHaltMethod())
8635 /* put a nop first because the debugger and other tools are likely to
8636 put an int3 at the begining and we don't want to confuse them */
8639 instGen(INS_BREAKPOINT);
8641 #ifdef _TARGET_ARMARCH_
8642 // Avoid asserts in the unwind info because these instructions aren't accounted for.
8643 compiler->unwindPadding();
8644 #endif // _TARGET_ARMARCH_
8648 #if FEATURE_EH_FUNCLETS && defined(DEBUG)
8650 // We cannot force 0-initialization of the PSPSym
8651 // as it will overwrite the real value
8652 if (compiler->lvaPSPSym != BAD_VAR_NUM)
8654 LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
8655 assert(!varDsc->lvMustInit);
8658 #endif // FEATURE_EH_FUNCLETS && DEBUG
8660 /*-------------------------------------------------------------------------
8662 * Record the stack frame ranges that will cover all of the tracked
8663 * and untracked pointer variables.
8664 * Also find which registers will need to be zero-initialized.
8666 * 'initRegs': - Generally, enregistered variables should not need to be
8667 * zero-inited. They only need to be zero-inited when they
8668 * have a possibly uninitialized read on some control
8669 * flow path. Apparently some of the IL_STUBs that we
8670 * generate have this property.
8673 int untrLclLo = +INT_MAX;
8674 int untrLclHi = -INT_MAX;
8675 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
8676 // Note that they may be tracked, but simply not allocated to a register.
8677 bool hasUntrLcl = false;
8679 int GCrefLo = +INT_MAX;
8680 int GCrefHi = -INT_MAX;
8681 bool hasGCRef = false;
8683 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
8684 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
8685 regMaskTP initDblRegs = RBM_NONE;
8690 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
8692 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
8697 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
8699 noway_assert(varDsc->lvRefCnt == 0);
8703 signed int loOffs = varDsc->lvStkOffs;
8704 signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
8706 /* We need to know the offset range of tracked stack GC refs */
8707 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
8709 if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
8711 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
8712 // taken care of by the parent struct.
8713 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
8717 if (loOffs < GCrefLo)
8721 if (hiOffs > GCrefHi)
8728 /* For lvMustInit vars, gather pertinent info */
8730 if (!varDsc->lvMustInit)
8735 if (varDsc->lvIsInReg())
8737 regMaskTP regMask = genRegMask(varDsc->lvRegNum);
8738 if (!varDsc->IsFloatRegType())
8740 initRegs |= regMask;
8742 if (varTypeIsMultiReg(varDsc))
8744 if (varDsc->lvOtherReg != REG_STK)
8746 initRegs |= genRegMask(varDsc->lvOtherReg);
8750 /* Upper DWORD is on the stack, and needs to be inited */
8752 loOffs += sizeof(int);
8757 #if !FEATURE_STACK_FP_X87
8758 else if (varDsc->TypeGet() == TYP_DOUBLE)
8760 initDblRegs |= regMask;
8764 initFltRegs |= regMask;
8766 #endif // !FEATURE_STACK_FP_X87
8774 if (loOffs < untrLclLo)
8778 if (hiOffs > untrLclHi)
8785 /* Don't forget about spill temps that hold pointers */
8787 if (!TRACK_GC_TEMP_LIFETIMES)
8789 assert(compiler->tmpAllFree());
8790 for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
8792 if (!varTypeIsGC(tempThis->tdTempType()))
8797 signed int loOffs = tempThis->tdTempOffs();
8798 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
8800 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
8801 // previous frame pointer. Thus, stkOffs can't be zero.
8802 CLANG_FORMAT_COMMENT_ANCHOR;
8804 #if !defined(_TARGET_AMD64_)
8805 // However, on amd64 there is no requirement to chain frame pointers.
8807 noway_assert(!isFramePointerUsed() || loOffs != 0);
8808 #endif // !defined(_TARGET_AMD64_)
8809 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
8813 if (loOffs < untrLclLo)
8817 if (hiOffs > untrLclHi)
8824 assert((genInitStkLclCnt > 0) == hasUntrLcl);
8829 if (genInitStkLclCnt > 0)
8831 printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
8838 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
8839 // Ditto for all enregistered user arguments in a varargs method.
8840 // These registers will be available to use for the initReg. We just remove
8841 // all of these registers from the rsCalleeRegArgMaskLiveIn.
8843 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
8846 /* Choose the register to use for zero initialization */
8848 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
8849 bool initRegZeroed = false;
8850 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
8853 // We should not use the special PINVOKE registers as the initReg
8854 // since they are trashed by the jithelper call to setup the PINVOKE frame
8855 if (compiler->info.compCallUnmanaged)
8857 excludeMask |= RBM_PINVOKE_FRAME;
8859 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
8860 if (!compiler->opts.ShouldUsePInvokeHelpers())
8862 noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
8864 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
8866 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
8868 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
8869 if (varDsc->lvRegister)
8871 excludeMask |= genRegMask(varDsc->lvRegNum);
8877 // If we have a variable sized frame (compLocallocUsed is true)
8878 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
8879 if (compiler->compLocallocUsed)
8881 excludeMask |= RBM_SAVED_LOCALLOC_SP;
8883 #endif // _TARGET_ARM_
8885 #if defined(_TARGET_XARCH_)
8886 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
8888 // We currently must use REG_EAX on x86 here
8889 // because the loop's backwards branch depends upon the size of EAX encodings
8890 assert(initReg == REG_EAX);
8893 #endif // _TARGET_XARCH_
8895 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
8897 if (tempMask != RBM_NONE)
8899 // We will use one of the registers that we were planning to zero init anyway.
8900 // We pick the lowest register number.
8901 tempMask = genFindLowestBit(tempMask);
8902 initReg = genRegNumFromMask(tempMask);
8904 // Next we prefer to use one of the unused argument registers.
8905 // If they aren't available we use one of the caller-saved integer registers.
8908 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
8909 if (tempMask != RBM_NONE)
8911 // We pick the lowest register number
8912 tempMask = genFindLowestBit(tempMask);
8913 initReg = genRegNumFromMask(tempMask);
8918 noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
8920 #if defined(_TARGET_AMD64_)
8921 // If we are a varargs call, in order to set up the arguments correctly this
8922 // must be done in a 2 step process. As per the x64 ABI:
8923 // a) The caller sets up the argument shadow space (just before the return
8924 // address, 4 pointer sized slots).
8925 // b) The callee is responsible to home the arguments on the shadow space
8926 // provided by the caller.
8927 // This way, the varargs iterator will be able to retrieve the
8928 // call arguments properly since both the arg regs and the stack allocated
8929 // args will be contiguous.
8930 if (compiler->info.compIsVarArgs)
8932 getEmitter()->spillIntArgRegsToShadowSlots();
8935 #endif // _TARGET_AMD64_
8938 /*-------------------------------------------------------------------------
8940 * Now start emitting the part of the prolog which sets up the frame
8943 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
8945 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
8946 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
8948 #endif // _TARGET_ARM_
8950 #ifdef _TARGET_XARCH_
8951 if (doubleAlignOrFramePointerUsed())
8953 inst_RV(INS_push, REG_FPBASE, TYP_REF);
8954 compiler->unwindPush(REG_FPBASE);
8955 psiAdjustStackLevel(REGSIZE_BYTES);
8957 #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
8958 genEstablishFramePointer(0, /*reportUnwindData*/ true);
8959 #endif // !_TARGET_AMD64_
8962 if (compiler->genDoubleAlign())
8964 noway_assert(isFramePointerUsed() == false);
8965 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
8967 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
8969 #endif // DOUBLE_ALIGN
8971 #endif // _TARGET_XARCH_
8973 #ifdef _TARGET_ARM64_
8974 // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
8975 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
8976 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
8977 #else // !_TARGET_ARM64_
8978 genPushCalleeSavedRegisters();
8979 #endif // !_TARGET_ARM64_
8982 bool needToEstablishFP = false;
8983 int afterLclFrameSPtoFPdelta = 0;
8984 if (doubleAlignOrFramePointerUsed())
8986 needToEstablishFP = true;
8988 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
8989 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
8990 // too big, we go ahead and do it here.
8992 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
8993 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
8994 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
8996 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
8997 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
8998 needToEstablishFP = false;
9001 #endif // _TARGET_ARM_
9003 //-------------------------------------------------------------------------
9005 // Subtract the local frame size from SP.
9007 //-------------------------------------------------------------------------
9008 CLANG_FORMAT_COMMENT_ANCHOR;
9010 #ifndef _TARGET_ARM64_
9011 regMaskTP maskStackAlloc = RBM_NONE;
9015 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
9016 #endif // _TARGET_ARM_
9018 if (maskStackAlloc == RBM_NONE)
9020 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
9022 #endif // !_TARGET_ARM64_
9024 //-------------------------------------------------------------------------
9027 if (compiler->compLocallocUsed)
9029 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
9030 regTracker.rsTrackRegTrash(REG_SAVED_LOCALLOC_SP);
9031 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
9033 #endif // _TARGET_ARMARCH_
9035 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
9036 // Preserve callee saved float regs to stack.
9037 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
9038 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
9040 #ifdef _TARGET_AMD64_
9041 // Establish the AMD64 frame pointer after the OS-reported prolog.
9042 if (doubleAlignOrFramePointerUsed())
9044 bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
9045 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
9047 #endif //_TARGET_AMD64_
9049 //-------------------------------------------------------------------------
9051 // This is the end of the OS-reported prolog for purposes of unwinding
9053 //-------------------------------------------------------------------------
9056 if (needToEstablishFP)
9058 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
9059 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
9061 #endif // _TARGET_ARM_
9063 if (compiler->info.compPublishStubParam)
9065 #if CPU_LOAD_STORE_ARCH
9066 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
9067 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
9069 // mov [lvaStubArgumentVar], EAX
9070 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
9071 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
9073 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
9075 // It's no longer live; clear it out so it can be used after this in the prolog
9076 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
9080 // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
9081 // when creating the stack frame. Don't think it's worth it, though.
9082 if (genNeedPrologStackProbe)
9085 // Can't have a call until we have enough padding for rejit
9087 genPrologPadForReJit();
9088 noway_assert(compiler->opts.compNeedStackProbes);
9089 genGenerateStackProbe();
9090 compiler->compStackProbePrologDone = true;
9092 #endif // STACK_PROBES
9095 // Zero out the frame as needed
9098 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
9100 #if FEATURE_EH_FUNCLETS
9102 genSetPSPSym(initReg, &initRegZeroed);
9104 #else // !FEATURE_EH_FUNCLETS
9106 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
9107 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
9109 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
9110 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*));
9112 // Zero out the slot for nesting level 0
9113 unsigned firstSlotOffs = filterEndOffsetSlotOffs - (sizeof(void*));
9117 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
9118 initRegZeroed = true;
9121 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
9125 #endif // !FEATURE_EH_FUNCLETS
9127 genReportGenericContextArg(initReg, &initRegZeroed);
9129 #if defined(LEGACY_BACKEND) // in RyuJIT backend this has already been expanded into trees
9130 if (compiler->info.compCallUnmanaged && !compiler->opts.ShouldUsePInvokeHelpers())
9132 getEmitter()->emitDisableRandomNops();
9133 initRegs = genPInvokeMethodProlog(initRegs);
9134 getEmitter()->emitEnableRandomNops();
9136 #endif // defined(LEGACY_BACKEND)
9138 // The local variable representing the security object must be on the stack frame
9139 // and must be 0 initialized.
9140 noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
9141 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
9142 compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
9144 // Initialize any "hidden" slots/locals
9146 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
9148 #ifdef _TARGET_ARM64_
9149 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0);
9151 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
9155 // Set up the GS security cookie
9157 genSetGSSecurityCookie(initReg, &initRegZeroed);
9159 #ifdef PROFILING_SUPPORTED
9161 // Insert a function entry callback for profiling, if requested.
9162 genProfilingEnterCallback(initReg, &initRegZeroed);
9164 #endif // PROFILING_SUPPORTED
9166 if (!genInterruptible)
9168 /*-------------------------------------------------------------------------
9170 * The 'real' prolog ends here for non-interruptible methods.
9171 * For fully-interruptible methods, we extend the prolog so that
9172 * we do not need to track GC inforation while shuffling the
9175 * Make sure there's enough padding for ReJIT.
9178 genPrologPadForReJit();
9179 getEmitter()->emitMarkPrologEnd();
9182 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
9183 // The unused bits of Vector3 arguments must be cleared
9184 // since native compiler doesn't initize the upper bits to zeros.
9186 // TODO-Cleanup: This logic can be implemented in
9187 // genFnPrologCalleeRegArgs() for argument registers and
9188 // genEnregisterIncomingStackArgs() for stack arguments.
9189 genClearStackVec3ArgUpperBits();
9190 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
9192 /*-----------------------------------------------------------------------------
9193 * Take care of register arguments first
9198 #ifndef LEGACY_BACKEND
9199 // Update the arg initial register locations.
9200 compiler->lvaUpdateArgsWithInitialReg();
9201 #endif // !LEGACY_BACKEND
9203 FOREACH_REGISTER_FILE(regState)
9205 if (regState->rsCalleeRegArgMaskLiveIn)
9207 // If we need an extra register to shuffle around the incoming registers
9208 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
9209 // if we don't need to use the xtraReg then this flag will stay false
9212 bool xtraRegClobbered = false;
9214 if (genRegMask(initReg) & RBM_ARG_REGS)
9220 xtraReg = REG_SCRATCH;
9221 initRegZeroed = false;
9224 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
9226 if (xtraRegClobbered)
9228 initRegZeroed = false;
9233 // Home the incoming arguments
9234 genEnregisterIncomingStackArgs();
9236 /* Initialize any must-init registers variables now */
9240 regMaskTP regMask = 0x1;
9242 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
9244 if (regMask & initRegs)
9246 // Check if we have already zeroed this register
9247 if ((reg == initReg) && initRegZeroed)
9253 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
9256 initRegZeroed = true;
9263 #if !FEATURE_STACK_FP_X87
9264 if (initFltRegs | initDblRegs)
9266 // If initReg is not in initRegs then we will use REG_SCRATCH
9267 if ((genRegMask(initReg) & initRegs) == 0)
9269 initReg = REG_SCRATCH;
9270 initRegZeroed = false;
9274 // This is needed only for Arm since it can use a zero initialized int register
9275 // to initialize vfp registers.
9278 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
9279 initRegZeroed = true;
9281 #endif // _TARGET_ARM_
9283 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
9285 #endif // !FEATURE_STACK_FP_X87
9287 #if FEATURE_STACK_FP_X87
9289 // Here is where we load the enregistered floating point arguments
9290 // and locals onto the x86-FPU.
9292 genCodeForPrologStackFP();
9295 //-----------------------------------------------------------------------------
9298 // Increase the prolog size here only if fully interruptible.
9299 // And again make sure it's big enough for ReJIT
9302 if (genInterruptible)
9304 genPrologPadForReJit();
9305 getEmitter()->emitMarkPrologEnd();
9308 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
9315 getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
9319 noway_assert(GCrefLo == +INT_MAX);
9320 noway_assert(GCrefHi == -INT_MAX);
9324 if (compiler->opts.dspCode)
9331 // On non-x86 the VARARG cookie does not need any special treatment.
9333 // Load up the VARARG argument pointer register so it doesn't get clobbered.
9334 // only do this if we actually access any statically declared args
9335 // (our argument pointer register has a refcount > 0).
9336 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
9338 if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt > 0)
9340 varDsc = &compiler->lvaTable[argsStartVar];
9342 noway_assert(compiler->info.compArgsCount > 0);
9344 // MOV EAX, <VARARGS HANDLE>
9345 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
9346 regTracker.rsTrackRegTrash(REG_EAX);
9349 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
9351 // EDX might actually be holding something here. So make sure to only use EAX for this code
9354 LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
9355 noway_assert(!lastArg->lvRegister);
9356 signed offset = lastArg->lvStkOffs;
9357 assert(offset != BAD_STK_OFFS);
9358 noway_assert(lastArg->lvFramePointerBased);
9360 // LEA EAX, &<VARARGS HANDLE> + EAX
9361 getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
9363 if (varDsc->lvIsInReg())
9365 if (varDsc->lvRegNum != REG_EAX)
9367 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
9368 regTracker.rsTrackRegTrash(varDsc->lvRegNum);
9373 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
9377 #endif // _TARGET_X86_
9380 if (compiler->opts.compStackCheckOnRet)
9382 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
9383 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
9384 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
9385 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
9389 getEmitter()->emitEndProlog();
9390 compiler->unwindEndProlog();
9392 noway_assert(getEmitter()->emitMaxTmpSize == compiler->tmpSize);
9395 #pragma warning(pop)
9398 /*****************************************************************************
9400 * Generates code for a function epilog.
9402 * Please consult the "debugger team notification" comment in genFnProlog().
9405 #if defined(_TARGET_ARM_)
9407 void CodeGen::genFnEpilog(BasicBlock* block)
9411 printf("*************** In genFnEpilog()\n");
9414 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9416 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
9417 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
9418 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
9421 if (compiler->opts.dspCode)
9422 printf("\n__epilog:\n");
9426 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
9427 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
9428 printf(", gcRegGCrefSetCur=");
9429 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
9430 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
9431 printf(", gcRegByrefSetCur=");
9432 printRegMaskInt(gcInfo.gcRegByrefSetCur);
9433 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
9438 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
9440 // We delay starting the unwind codes until we have an instruction which we know
9441 // needs an unwind code. In particular, for large stack frames in methods without
9442 // localloc, the sequence might look something like this:
9445 // pop {r4,r5,r6,r10,r11,pc}
9446 // In this case, the "movw" should not be part of the unwind codes, since it will
9447 // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
9448 // also sets the current location as the beginning offset of the epilog, so every
9449 // instruction afterwards needs an unwind code. In the case above, if you call
9450 // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
9452 bool unwindStarted = false;
9454 // Tear down the stack frame
9456 if (compiler->compLocallocUsed)
9460 compiler->unwindBegEpilog();
9461 unwindStarted = true;
9465 inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
9466 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
9470 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
9473 genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
9478 // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
9479 compiler->unwindBegEpilog();
9480 unwindStarted = true;
9483 genPopCalleeSavedRegisters(jmpEpilog);
9485 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
9487 // We better not have used a pop PC to return otherwise this will be unreachable code
9488 noway_assert(!genUsedPopToReturn);
9490 int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9491 inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
9492 compiler->unwindAllocStack(preSpillRegArgSize);
9497 noway_assert(block->bbJumpKind == BBJ_RETURN);
9498 noway_assert(block->bbTreeList);
9500 // We better not have used a pop PC to return otherwise this will be unreachable code
9501 noway_assert(!genUsedPopToReturn);
9503 /* figure out what jump we have */
9505 GenTree* jmpNode = block->lastNode();
9506 noway_assert(jmpNode->gtOper == GT_JMP);
9508 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
9510 CORINFO_CONST_LOOKUP addrInfo;
9512 regNumber indCallReg;
9513 emitter::EmitCallType callType;
9515 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
9516 switch (addrInfo.accessType)
9519 if (arm_Valid_Imm_For_BL((ssize_t)addrInfo.addr))
9521 // Simple direct call
9522 callType = emitter::EC_FUNC_TOKEN;
9523 addr = addrInfo.addr;
9524 indCallReg = REG_NA;
9528 // otherwise the target address doesn't fit in an immediate
9529 // so we have to burn a register...
9533 // Load the address into a register, load indirect and call through a register
9534 // We have to use R12 since we assume the argument registers are in use
9535 callType = emitter::EC_INDIR_R;
9536 indCallReg = REG_R12;
9538 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
9539 if (addrInfo.accessType == IAT_PVALUE)
9541 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
9542 regTracker.rsTrackRegTrash(indCallReg);
9548 NO_WAY("Unsupported JMP indirection");
9551 /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
9552 * the same descriptor with some minor adjustments.
9556 getEmitter()->emitIns_Call(callType,
9558 INDEBUG_LDISASM_COMMA(nullptr)
9561 EA_UNKNOWN, // retSize
9562 gcInfo.gcVarPtrSetCur,
9563 gcInfo.gcRegGCrefSetCur,
9564 gcInfo.gcRegByrefSetCur,
9565 BAD_IL_OFFSET, // IL offset
9575 if (!genUsedPopToReturn)
9577 // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
9578 // so we need a "bx lr" instruction to return from the function.
9579 inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
9580 compiler->unwindBranch16();
9584 compiler->unwindEndEpilog();
9587 #elif defined(_TARGET_ARM64_)
9589 void CodeGen::genFnEpilog(BasicBlock* block)
9593 printf("*************** In genFnEpilog()\n");
9596 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9598 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
9599 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
9600 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
9603 if (compiler->opts.dspCode)
9604 printf("\n__epilog:\n");
9608 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
9609 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
9610 printf(", gcRegGCrefSetCur=");
9611 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
9612 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
9613 printf(", gcRegByrefSetCur=");
9614 printRegMaskInt(gcInfo.gcRegByrefSetCur);
9615 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
9620 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
9622 compiler->unwindBegEpilog();
9624 genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
9628 noway_assert(block->bbJumpKind == BBJ_RETURN);
9629 noway_assert(block->bbTreeList != nullptr);
9631 // figure out what jump we have
9632 GenTree* jmpNode = block->lastNode();
9633 #if !FEATURE_FASTTAILCALL
9634 noway_assert(jmpNode->gtOper == GT_JMP);
9637 // If jmpNode is GT_JMP then gtNext must be null.
9638 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
9639 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
9641 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
9642 noway_assert((jmpNode->gtOper == GT_JMP) ||
9643 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
9645 // The next block is associated with this "if" stmt
9646 if (jmpNode->gtOper == GT_JMP)
9649 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
9650 // the same descriptor with some minor adjustments.
9651 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
9653 CORINFO_CONST_LOOKUP addrInfo;
9654 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
9655 if (addrInfo.accessType != IAT_VALUE)
9657 NYI_ARM64("Unsupported JMP indirection");
9660 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
9662 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
9663 // the same descriptor with some minor adjustments.
9666 getEmitter()->emitIns_Call(callType,
9668 INDEBUG_LDISASM_COMMA(nullptr)
9671 EA_UNKNOWN, // retSize
9672 EA_UNKNOWN, // secondRetSize
9673 gcInfo.gcVarPtrSetCur,
9674 gcInfo.gcRegGCrefSetCur,
9675 gcInfo.gcRegByrefSetCur,
9676 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
9680 #if FEATURE_FASTTAILCALL
9684 // Call target = REG_IP0.
9685 // https://github.com/dotnet/coreclr/issues/4827
9686 // Do we need a special encoding for stack walker like rex.w prefix for x64?
9687 getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_IP0);
9689 #endif // FEATURE_FASTTAILCALL
9693 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
9694 compiler->unwindReturn(REG_LR);
9697 compiler->unwindEndEpilog();
9700 #elif defined(_TARGET_XARCH_)
9702 void CodeGen::genFnEpilog(BasicBlock* block)
9707 printf("*************** In genFnEpilog()\n");
9711 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9713 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
9714 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
9715 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
9717 noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
9720 genInterruptibleUsed = true;
9723 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
9726 if (compiler->opts.dspCode)
9728 printf("\n__epilog:\n");
9733 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
9734 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
9735 printf(", gcRegGCrefSetCur=");
9736 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
9737 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
9738 printf(", gcRegByrefSetCur=");
9739 printRegMaskInt(gcInfo.gcRegByrefSetCur);
9740 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
9745 #if !FEATURE_STACK_FP_X87
9746 // Restore float registers that were saved to stack before SP is modified.
9747 genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
9748 #endif // !FEATURE_STACK_FP_X87
9750 #ifdef JIT32_GCENCODER
9751 // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
9752 // the above call to `genRestoreCalleeSavedFltRegs` because that function
9753 // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
9754 // which is the only target that uses the JIT32 GC encoder
9755 // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
9756 // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
9757 // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
9758 // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
9759 // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
9760 // we will need to rethink this.
9761 getEmitter()->emitStartEpilog();
9764 /* Compute the size in bytes we've pushed/popped */
9766 if (!doubleAlignOrFramePointerUsed())
9768 // We have an ESP frame */
9770 noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
9772 /* Get rid of our local variables */
9774 if (compiler->compLclFrameSize)
9777 /* Add 'compiler->compLclFrameSize' to ESP */
9778 /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
9780 if ((compiler->compLclFrameSize == sizeof(void*)) && !compiler->compJmpOpUsed)
9782 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
9783 regTracker.rsTrackRegTrash(REG_ECX);
9786 #endif // _TARGET_X86
9788 /* Add 'compiler->compLclFrameSize' to ESP */
9789 /* Generate "add esp, <stack-size>" */
9790 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
9794 genPopCalleeSavedRegisters();
9798 noway_assert(doubleAlignOrFramePointerUsed());
9800 /* Tear down the stack frame */
9802 bool needMovEspEbp = false;
9805 if (compiler->genDoubleAlign())
9808 // add esp, compLclFrameSize
9810 // We need not do anything (except the "mov esp, ebp") if
9811 // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
9812 // also complicates the code manager. Hence, we ignore that case.
9814 noway_assert(compiler->compLclFrameSize != 0);
9815 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
9817 needMovEspEbp = true;
9820 #endif // DOUBLE_ALIGN
9822 bool needLea = false;
9824 if (compiler->compLocallocUsed)
9826 // ESP may be variable if a localloc was actually executed. Reset it.
9827 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
9831 else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
9833 if (compiler->compLclFrameSize != 0)
9835 #ifdef _TARGET_AMD64_
9836 // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
9837 // do an LEA to "pop off" the frame allocation.
9839 #else // !_TARGET_AMD64_
9840 // We will just generate "mov esp, ebp" and be done with it.
9841 needMovEspEbp = true;
9842 #endif // !_TARGET_AMD64_
9845 else if (compiler->compLclFrameSize == 0)
9847 // do nothing before popping the callee-saved registers
9850 else if (compiler->compLclFrameSize == REGSIZE_BYTES)
9852 // "pop ecx" will make ESP point to the callee-saved registers
9853 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
9854 regTracker.rsTrackRegTrash(REG_ECX);
9856 #endif // _TARGET_X86
9859 // We need to make ESP point to the callee-saved registers
9867 #ifdef _TARGET_AMD64_
9868 // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
9870 // Case 1: localloc not used.
9871 // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
9872 // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
9873 // The amount to be subtracted from RBP to point at callee saved int regs.
9875 // Case 2: localloc used
9876 // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
9877 // Offset = Amount to be aded to RBP to point at callee saved int regs.
9878 offset = genSPtoFPdelta() - compiler->compLclFrameSize;
9880 // Offset should fit within a byte if localloc is not used.
9881 if (!compiler->compLocallocUsed)
9883 noway_assert(offset < UCHAR_MAX);
9886 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
9887 offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
9888 noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
9891 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
9896 // Pop the callee-saved registers (if any)
9899 genPopCalleeSavedRegisters();
9901 #ifdef _TARGET_AMD64_
9902 assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
9903 #else // !_TARGET_AMD64_
9907 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
9909 #endif // !_TARGET_AMD64_
9912 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
9915 getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
9917 /* Check if this a special return block i.e.
9918 * CEE_JMP instruction */
9922 noway_assert(block->bbJumpKind == BBJ_RETURN);
9923 noway_assert(block->bbTreeList);
9925 // figure out what jump we have
9926 GenTree* jmpNode = block->lastNode();
9927 #if !FEATURE_FASTTAILCALL
9929 noway_assert(jmpNode->gtOper == GT_JMP);
9932 // If jmpNode is GT_JMP then gtNext must be null.
9933 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
9934 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
9936 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
9937 noway_assert((jmpNode->gtOper == GT_JMP) ||
9938 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
9940 // The next block is associated with this "if" stmt
9941 if (jmpNode->gtOper == GT_JMP)
9944 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
9945 // the same descriptor with some minor adjustments.
9946 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
9948 CORINFO_CONST_LOOKUP addrInfo;
9949 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
9950 if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
9952 NO_WAY("Unsupported JMP indirection");
9955 const emitter::EmitCallType callType =
9956 (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
9958 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
9959 // the same descriptor with some minor adjustments.
9962 getEmitter()->emitIns_Call(callType,
9964 INDEBUG_LDISASM_COMMA(nullptr)
9967 EA_UNKNOWN // retSize
9968 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
9969 gcInfo.gcVarPtrSetCur,
9970 gcInfo.gcRegGCrefSetCur,
9971 gcInfo.gcRegByrefSetCur,
9972 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
9976 #if FEATURE_FASTTAILCALL
9979 #ifdef _TARGET_AMD64_
9981 // Call target = RAX.
9982 // Stack walker requires that a register indirect tail call be rex.w prefixed.
9983 getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
9985 assert(!"Fast tail call as epilog+jmp");
9987 #endif //_TARGET_AMD64_
9989 #endif // FEATURE_FASTTAILCALL
9993 unsigned stkArgSize = 0; // Zero on all platforms except x86
9995 #if defined(_TARGET_X86_)
9996 bool fCalleePop = true;
9998 // varargs has caller pop
9999 if (compiler->info.compIsVarArgs)
10000 fCalleePop = false;
10002 #ifdef UNIX_X86_ABI
10003 if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
10004 fCalleePop = false;
10005 #endif // UNIX_X86_ABI
10009 noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * sizeof(void*));
10010 stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
10012 noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
10014 #endif // _TARGET_X86_
10016 /* Return, popping our arguments (if any) */
10017 instGen_Return(stkArgSize);
10022 #error Unsupported or unset target architecture
10023 #endif // _TARGET_*
10025 #if FEATURE_EH_FUNCLETS
10027 #ifdef _TARGET_ARM_
10029 /*****************************************************************************
10031 * Generates code for an EH funclet prolog.
10033 * Funclets have the following incoming arguments:
10035 * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
10036 * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
10037 * finally/fault: none
10039 * Funclets set the following registers on exit:
10041 * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
10042 * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
10043 * finally/fault: none
10045 * The ARM funclet prolog sequence is:
10047 * push {regs,lr} ; We push the callee-saved regs and 'lr'.
10048 * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
10049 * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
10050 * ; calculated for the entire function.
10051 * sub sp, XXX ; Establish the rest of the frame.
10052 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
10053 * ; up to preserve stack alignment. If we push an odd number of registers, we also
10054 * ; generate this, to keep the stack aligned.
10056 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
10058 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
10061 * if (this is a filter funclet)
10063 * // r1 on entry to a filter funclet is CallerSP of the containing function:
10064 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
10065 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
10066 * // a funclet. Consider:
10070 * // throw new Exception();
10071 * // } catch(Exception) {
10072 * // throw new Exception(); // The exception thrown here ...
10074 * // } filter { // ... will be processed here, while the "catch" funclet frame is
10075 * // // still on the stack
10076 * // } filter-handler {
10079 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
10080 * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
10081 * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
10083 * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
10084 * ; the dynamically containing funclet or function)
10085 * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
10086 * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
10090 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
10091 * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
10093 * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
10094 * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
10097 * The epilog sequence is then:
10099 * add sp, XXX ; if necessary
10102 * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
10103 * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
10105 * The funclet frame is thus:
10108 * |-----------------------|
10111 * +=======================+ <---- Caller's SP
10112 * |Callee saved registers |
10113 * |-----------------------|
10114 * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
10115 * | | // in function and funclet
10116 * |-----------------------|
10117 * | PSP slot | // Omitted in CoreRT ABI
10118 * |-----------------------|
10119 * ~ possible 4 byte pad ~
10120 * ~ for alignment ~
10121 * |-----------------------|
10122 * | Outgoing arg space |
10123 * |-----------------------| <---- Ambient SP
10125 * ~ | Stack grows ~
10130 void CodeGen::genFuncletProlog(BasicBlock* block)
10134 printf("*************** In genFuncletProlog()\n");
10137 assert(block != NULL);
10138 assert(block->bbFlags & BBF_FUNCLET_BEG);
10140 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
10142 gcInfo.gcResetForBB();
10144 compiler->unwindBegProlog();
10146 regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
10147 regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
10149 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
10150 maskPushRegsInt |= maskStackAlloc;
10152 assert(FitsIn<int>(maskPushRegsInt));
10153 inst_IV(INS_push, (int)maskPushRegsInt);
10154 compiler->unwindPushMaskInt(maskPushRegsInt);
10156 if (maskPushRegsFloat != RBM_NONE)
10158 genPushFltRegs(maskPushRegsFloat);
10159 compiler->unwindPushMaskFloat(maskPushRegsFloat);
10162 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
10164 regMaskTP maskArgRegsLiveIn;
10167 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
10169 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
10171 maskArgRegsLiveIn = RBM_NONE;
10175 maskArgRegsLiveIn = RBM_R0;
10178 regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
10179 bool initRegZeroed = false;
10181 if (maskStackAlloc == RBM_NONE)
10183 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
10186 // This is the end of the OS-reported prolog for purposes of unwinding
10187 compiler->unwindEndProlog();
10191 // This is the first block of a filter
10193 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
10194 genFuncletInfo.fiPSP_slot_CallerSP_offset);
10195 regTracker.rsTrackRegTrash(REG_R1);
10196 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
10197 genFuncletInfo.fiPSP_slot_SP_offset);
10198 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
10199 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
10203 // This is a non-filter funclet
10204 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
10205 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
10206 regTracker.rsTrackRegTrash(REG_R3);
10207 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
10208 genFuncletInfo.fiPSP_slot_SP_offset);
10212 /*****************************************************************************
10214 * Generates code for an EH funclet epilog.
10217 void CodeGen::genFuncletEpilog()
10221 printf("*************** In genFuncletEpilog()\n");
10224 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
10226 // Just as for the main function, we delay starting the unwind codes until we have
10227 // an instruction which we know needs an unwind code. This is to support code like
10231 // pop {r4,r5,r6,r10,r11,pc}
10232 // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
10234 bool unwindStarted = false;
10236 /* The saved regs info saves the LR register. We need to pop the PC register to return */
10237 assert(genFuncletInfo.fiSaveRegs & RBM_LR);
10239 regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
10240 regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
10242 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
10243 maskPopRegsInt |= maskStackAlloc;
10245 if (maskStackAlloc == RBM_NONE)
10247 genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
10250 if (!unwindStarted)
10252 // We'll definitely generate an unwindable instruction next
10253 compiler->unwindBegEpilog();
10254 unwindStarted = true;
10257 maskPopRegsInt &= ~RBM_LR;
10258 maskPopRegsInt |= RBM_PC;
10260 if (maskPopRegsFloat != RBM_NONE)
10262 genPopFltRegs(maskPopRegsFloat);
10263 compiler->unwindPopMaskFloat(maskPopRegsFloat);
10266 assert(FitsIn<int>(maskPopRegsInt));
10267 inst_IV(INS_pop, (int)maskPopRegsInt);
10268 compiler->unwindPopMaskInt(maskPopRegsInt);
10270 compiler->unwindEndEpilog();
10273 /*****************************************************************************
10275 * Capture the information used to generate the funclet prologs and epilogs.
10276 * Note that all funclet prologs are identical, and all funclet epilogs are
10277 * identical (per type: filters are identical, and non-filters are identical).
10278 * Thus, we compute the data used for these just once.
10280 * See genFuncletProlog() for more information about the prolog/epilog sequences.
10283 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10285 if (compiler->ehAnyFunclets())
10287 assert(isFramePointerUsed());
10288 assert(compiler->lvaDoneFrameLayout ==
10289 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
10291 // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
10292 // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
10293 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
10294 // (also assumed in genFnProlog()).
10295 assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
10296 unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
10297 genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
10299 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
10300 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
10301 unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
10302 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
10303 unsigned funcletFrameSize =
10304 preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
10306 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
10307 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
10308 unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
10310 unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
10311 int PSP_slot_CallerSP_offset =
10312 -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
10314 /* Now save it for future use */
10316 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
10317 genFuncletInfo.fiSpDelta = spDelta;
10318 genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
10319 genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
10325 printf("Funclet prolog / epilog info\n");
10326 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
10327 printf(" Save regs: ");
10328 dspRegMask(rsMaskSaveRegs);
10330 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
10331 printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
10332 printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
10334 if (PSP_slot_CallerSP_offset !=
10335 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
10336 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
10337 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
10341 assert(PSP_slot_CallerSP_offset < 0);
10342 if (compiler->lvaPSPSym != BAD_VAR_NUM)
10344 assert(PSP_slot_CallerSP_offset ==
10345 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
10346 // function and funclet!
10351 #elif defined(_TARGET_AMD64_)
10353 /*****************************************************************************
10355 * Generates code for an EH funclet prolog.
10357 * Funclets have the following incoming arguments:
10359 * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
10360 * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
10361 * finally/fault: rcx = InitialSP
10363 * Funclets set the following registers on exit:
10365 * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
10366 * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
10367 * finally/fault: none
10369 * The AMD64 funclet prolog sequence is:
10372 * push callee-saved regs
10373 * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
10374 * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
10375 * ; the entire function.
10376 * sub sp, XXX ; Establish the rest of the frame.
10377 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
10378 * ; up to preserve stack alignment. If we push an odd number of registers, we also
10379 * ; generate this, to keep the stack aligned.
10381 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
10383 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
10385 * ; Also, re-establish the frame pointer from the PSP.
10387 * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
10388 * ; PSP of the dynamically containing funclet or function)
10389 * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
10390 * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
10391 * ; Function_InitialSP_to_FP_delta==0, we don't need this
10394 * The epilog sequence is then:
10397 * pop callee-saved regs ; if necessary
10401 * The funclet frame is thus:
10404 * |-----------------------|
10407 * +=======================+ <---- Caller's SP
10408 * | Return address |
10409 * |-----------------------|
10411 * |-----------------------|
10412 * |Callee saved registers |
10413 * |-----------------------|
10414 * ~ possible 8 byte pad ~
10415 * ~ for alignment ~
10416 * |-----------------------|
10417 * | PSP slot | // Omitted in CoreRT ABI
10418 * |-----------------------|
10419 * | Outgoing arg space | // this only exists if the function makes a call
10420 * |-----------------------| <---- Initial SP
10422 * ~ | Stack grows ~
10426 * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
10427 * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
10428 * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
10429 * "FRAMEPTR OFFSETS" for details.
10432 void CodeGen::genFuncletProlog(BasicBlock* block)
10437 printf("*************** In genFuncletProlog()\n");
10441 assert(!regSet.rsRegsModified(RBM_FPBASE));
10442 assert(block != nullptr);
10443 assert(block->bbFlags & BBF_FUNCLET_BEG);
10444 assert(isFramePointerUsed());
10446 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
10448 gcInfo.gcResetForBB();
10450 compiler->unwindBegProlog();
10452 // We need to push ebp, since it's callee-saved.
10453 // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
10454 // keep track of that on a per-funclet basis, so we push the same set as in the main function.
10455 // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
10456 // is stored here (all temps are allocated in the parent frame).
10457 // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
10458 // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
10460 inst_RV(INS_push, REG_FPBASE, TYP_REF);
10461 compiler->unwindPush(REG_FPBASE);
10463 // Callee saved int registers are pushed to stack.
10464 genPushCalleeSavedRegisters();
10466 regMaskTP maskArgRegsLiveIn;
10467 if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
10469 maskArgRegsLiveIn = RBM_ARG_0;
10473 maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
10476 regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
10477 bool initRegZeroed = false;
10479 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
10481 // Callee saved float registers are copied to stack in their assigned stack slots
10482 // after allocating space for them as part of funclet frame.
10483 genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
10485 // This is the end of the OS-reported prolog for purposes of unwinding
10486 compiler->unwindEndProlog();
10488 // If there is no PSPSym (CoreRT ABI), we are done.
10489 if (compiler->lvaPSPSym == BAD_VAR_NUM)
10494 getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
10496 regTracker.rsTrackRegTrash(REG_FPBASE);
10498 getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
10500 if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
10502 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
10503 genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
10506 // We've modified EBP, but not really. Say that we haven't...
10507 regSet.rsRemoveRegsModified(RBM_FPBASE);
10510 /*****************************************************************************
10512 * Generates code for an EH funclet epilog.
10514 * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
10517 void CodeGen::genFuncletEpilog()
10522 printf("*************** In genFuncletEpilog()\n");
10526 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
10528 // Restore callee saved XMM regs from their stack slots before modifying SP
10529 // to position at callee saved int regs.
10530 genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
10531 inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
10532 genPopCalleeSavedRegisters();
10533 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
10537 /*****************************************************************************
10539 * Capture the information used to generate the funclet prologs and epilogs.
10542 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10544 if (!compiler->ehAnyFunclets())
10549 // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
10550 // because we're not going to allocate the same size frame as the parent.
10552 assert(isFramePointerUsed());
10553 assert(compiler->lvaDoneFrameLayout ==
10554 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
10555 assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
10557 // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
10558 // that's ok, because we're figuring out an offset in the parent frame.
10559 genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
10560 compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
10563 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
10564 #ifndef UNIX_AMD64_ABI
10565 // No 4 slots for outgoing params on the stack for System V systems.
10566 assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
10567 (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
10568 // slots if there are any calls in the function.
10569 #endif // UNIX_AMD64_ABI
10570 unsigned offset = compiler->lvaOutgoingArgSpaceSize;
10572 genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
10574 // How much stack do we allocate in the funclet?
10575 // We need to 16-byte align the stack.
10577 unsigned totalFrameSize =
10578 REGSIZE_BYTES // return address
10579 + REGSIZE_BYTES // pushed EBP
10580 + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
10582 // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
10583 // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
10584 unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
10585 unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
10587 unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
10589 totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
10590 + calleeFPRegsSavedSize // pushed callee-saved float regs
10591 // below calculated 'pad' will go here
10592 + PSPSymSize // PSPSym
10593 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
10596 unsigned pad = AlignmentPad(totalFrameSize, 16);
10598 genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
10599 + calleeFPRegsSavedSize // Callee saved xmm regs
10600 + pad + PSPSymSize // PSPSym
10601 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
10608 printf("Funclet prolog / epilog info\n");
10609 printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
10610 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
10611 printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
10614 if (compiler->lvaPSPSym != BAD_VAR_NUM)
10616 assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
10617 compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
10623 #elif defined(_TARGET_ARM64_)
10625 // Look in CodeGenArm64.cpp
10627 #elif defined(_TARGET_X86_)
10629 /*****************************************************************************
10631 * Generates code for an EH funclet prolog.
10634 * Funclets have the following incoming arguments:
10636 * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
10637 * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
10638 * finally/fault: none
10640 * Funclets set the following registers on exit:
10642 * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
10643 * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
10644 * finally/fault: none
10646 * Funclet prolog/epilog sequence and funclet frame layout are TBD.
10650 void CodeGen::genFuncletProlog(BasicBlock* block)
10655 printf("*************** In genFuncletProlog()\n");
10659 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
10661 gcInfo.gcResetForBB();
10663 compiler->unwindBegProlog();
10665 // This is the end of the OS-reported prolog for purposes of unwinding
10666 compiler->unwindEndProlog();
10668 // TODO We may need EBP restore sequence here if we introduce PSPSym
10670 // Add a padding for 16-byte alignment
10671 inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
10674 /*****************************************************************************
10676 * Generates code for an EH funclet epilog.
10679 void CodeGen::genFuncletEpilog()
10684 printf("*************** In genFuncletEpilog()\n");
10688 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
10690 // Revert a padding that was added for 16-byte alignment
10691 inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
10696 /*****************************************************************************
10698 * Capture the information used to generate the funclet prologs and epilogs.
10701 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10703 if (!compiler->ehAnyFunclets())
10711 /*****************************************************************************
10713 * Generates code for an EH funclet prolog.
10716 void CodeGen::genFuncletProlog(BasicBlock* block)
10718 NYI("Funclet prolog");
10721 /*****************************************************************************
10723 * Generates code for an EH funclet epilog.
10726 void CodeGen::genFuncletEpilog()
10728 NYI("Funclet epilog");
10731 /*****************************************************************************
10733 * Capture the information used to generate the funclet prologs and epilogs.
10736 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10738 if (compiler->ehAnyFunclets())
10740 NYI("genCaptureFuncletPrologEpilogInfo()");
10744 #endif // _TARGET_*
10746 /*-----------------------------------------------------------------------------
10748 * Set the main function PSPSym value in the frame.
10749 * Funclets use different code to load the PSP sym and save it in their frame.
10750 * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
10751 * The PSPSym section of that document is copied here.
10753 ***********************************
10754 * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
10755 * accesses locals from the main function body.
10757 * First, two definitions.
10759 * Caller-SP is the value of the stack pointer in a function's caller before the call
10760 * instruction is executed. That is, when function A calls function B, Caller-SP for B
10761 * is the value of the stack pointer immediately before the call instruction in A
10762 * (calling B) was executed. Note that this definition holds for both AMD64, which
10763 * pushes the return value when a call instruction is executed, and for ARM, which
10764 * doesn't. For AMD64, Caller-SP is the address above the call return address.
10766 * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
10767 * the frame has been allocated. That is, before any "alloca"-type allocations.
10769 * The PSPSym is a pointer-sized local variable in the frame of the main function and
10770 * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
10771 * for the main function. The stack offset of the PSPSym is reported to the VM in the
10772 * GC information header. The value reported in the GC information is the offset of the
10773 * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
10774 * value is reported to the VM, differs between architectures. In particular, note that
10775 * most things in the GC information header are reported as offsets relative to Caller-SP,
10776 * but PSPSym on AMD64 is one (maybe the only) exception.)
10778 * The VM uses the PSPSym to find other locals it cares about (such as the generics context
10779 * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
10780 * the frame pointer is the same value in a funclet as it is in the main function body.
10782 * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
10783 * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
10784 * only true for first pass funclets (currently just filters) and it is passed as the second
10785 * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
10786 * frame in the exception processing system. For the CLR, it points either to the main function
10787 * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
10788 * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
10790 * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
10791 * don't know if the Establisher Frame is from the main function or a funclet, we design the
10792 * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
10793 * offset from the Establisher Frame in each case. (This is also required because we only report
10794 * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
10795 * function and all of its funclets). Then, the funclet uses this known offset to compute the
10796 * PSPSym address and read its value. From this, it can compute the value of the frame pointer
10797 * (which is a constant offset from the PSPSym value) and set the frame register to be the same
10798 * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
10799 * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
10800 * for every nested funclet invocation.
10802 * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
10803 * restores all non-volatile registers to their values within the parent frame. This includes
10804 * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
10805 * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
10807 * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
10808 * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
10809 * ARM this is the first argument and passed in R0.
10811 * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
10812 * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
10813 * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
10814 * is required in all funclets as well as the main function, whereas if the establisher frame was
10815 * correctly reported, the PSPSym could be omitted in some cases.)
10816 ***********************************
10818 void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
10820 assert(compiler->compGeneratingProlog);
10822 if (compiler->lvaPSPSym == BAD_VAR_NUM)
10827 noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
10829 #if defined(_TARGET_ARM_)
10831 // We either generate:
10833 // str r1, [reg + PSPSymOffset]
10836 // str r1, [reg + PSPSymOffset]
10837 // depending on the smallest encoding
10839 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
10844 if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
10846 // use the "add <reg>, sp, imm" form
10848 callerSPOffs = SPtoCallerSPdelta;
10849 regBase = REG_SPBASE;
10853 // use the "add <reg>, r11, imm" form
10855 int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
10856 noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
10858 callerSPOffs = FPtoCallerSPdelta;
10859 regBase = REG_FPBASE;
10862 // We will just use the initReg since it is an available register
10863 // and we are probably done using it anyway...
10864 regNumber regTmp = initReg;
10865 *pInitRegZeroed = false;
10867 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
10868 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
10870 #elif defined(_TARGET_ARM64_)
10872 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
10874 // We will just use the initReg since it is an available register
10875 // and we are probably done using it anyway...
10876 regNumber regTmp = initReg;
10877 *pInitRegZeroed = false;
10879 getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
10880 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
10882 #elif defined(_TARGET_AMD64_)
10884 // The PSP sym value is Initial-SP, not Caller-SP!
10885 // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
10886 // has been established.
10889 // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
10891 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
10895 NYI("Set function PSP sym");
10897 #endif // _TARGET_*
10900 #endif // FEATURE_EH_FUNCLETS
10902 /*****************************************************************************
10904 * Generates code for all the function and funclet prologs and epilogs.
10907 void CodeGen::genGeneratePrologsAndEpilogs()
10912 printf("*************** Before prolog / epilog generation\n");
10913 getEmitter()->emitDispIGlist(false);
10917 #ifndef LEGACY_BACKEND
10918 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
10919 // This affects our code that determines which untracked locals need to be zero initialized.
10920 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
10921 #endif // !LEGACY_BACKEND
10923 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
10925 getEmitter()->emitStartPrologEpilogGeneration();
10927 gcInfo.gcResetForBB();
10930 // Generate all the prologs and epilogs.
10931 CLANG_FORMAT_COMMENT_ANCHOR;
10933 #if FEATURE_EH_FUNCLETS
10935 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
10936 // information computed during codegen, or during function prolog generation, like
10937 // frame offsets. It must run after main function prolog generation.
10939 genCaptureFuncletPrologEpilogInfo();
10941 #endif // FEATURE_EH_FUNCLETS
10943 // Walk the list of prologs and epilogs and generate them.
10944 // We maintain a list of prolog and epilog basic blocks in
10945 // the insGroup structure in the emitter. This list was created
10946 // during code generation by the genReserve*() functions.
10948 // TODO: it seems like better design would be to create a list of prologs/epilogs
10949 // in the code generator (not the emitter), and then walk that list. But we already
10950 // have the insGroup list, which serves well, so we don't need the extra allocations
10951 // for a prolog/epilog list in the code generator.
10953 getEmitter()->emitGeneratePrologEpilog();
10955 // Tell the emitter we're done with all prolog and epilog generation.
10957 getEmitter()->emitFinishPrologEpilogGeneration();
10962 printf("*************** After prolog / epilog generation\n");
10963 getEmitter()->emitDispIGlist(false);
10969 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10970 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10972 XX End Prolog / Epilog XX
10974 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10975 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10979 void CodeGen::genGenerateStackProbe()
10981 noway_assert(compiler->opts.compNeedStackProbes);
10983 // If this assert fires, it means somebody has changed the value
10984 // CORINFO_STACKPROBE_DEPTH.
10985 // Why does the EE need such a deep probe? It should just need a couple
10986 // of bytes, to set up a frame in the unmanaged code..
10988 static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
10990 JITDUMP("Emitting stack probe:\n");
10991 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
10992 -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
10994 #endif // STACK_PROBES
10996 /*****************************************************************************
10998 * Record the constant and return a tree node that yields its address.
11001 GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTreePtr cnsTree, bool dblAlign)
11003 // Assign the constant an offset in the data section
11004 UNATIVE_OFFSET cnsSize = genTypeSize(cnsType);
11005 UNATIVE_OFFSET cnum = getEmitter()->emitDataConst(cnsAddr, cnsSize, dblAlign);
11008 if (compiler->opts.dspCode)
11010 printf(" @%s%02u ", "CNS", cnum);
11015 printf("DD %d \n", *(int*)cnsAddr);
11018 printf("DQ %lld\n", *(__int64*)cnsAddr);
11021 printf("DF %f \n", *(float*)cnsAddr);
11024 printf("DQ %lf\n", *(double*)cnsAddr);
11028 noway_assert(!"unexpected constant type");
11033 // Access to inline data is 'abstracted' by a special type of static member
11034 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
11035 // to constant data, not a real static field.
11037 return new (compiler, GT_CLS_VAR) GenTreeClsVar(cnsType, compiler->eeFindJitDataOffs(cnum), nullptr);
11040 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
11041 // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
11042 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
11043 // Here offset = 16-byte aligned offset after pushing integer registers.
11046 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
11047 // non-funclet: this will be compLclFrameSize.
11048 // funclet frames: this will be FuncletInfo.fiSpDelta.
11049 void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
11051 genVzeroupperIfNeeded(false);
11052 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
11054 // Only callee saved floating point registers should be in regMask
11055 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
11057 // fast path return
11058 if (regMask == RBM_NONE)
11063 #ifdef _TARGET_AMD64_
11064 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
11065 unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
11067 // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
11068 assert((offset % 16) == 0);
11069 instruction copyIns = ins_Copy(TYP_FLOAT);
11070 #else // !_TARGET_AMD64_
11071 unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
11072 instruction copyIns = INS_movupd;
11073 #endif // !_TARGET_AMD64_
11075 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
11077 regMaskTP regBit = genRegMask(reg);
11078 if ((regBit & regMask) != 0)
11080 // ABI requires us to preserve lower 128-bits of YMM register.
11081 getEmitter()->emitIns_AR_R(copyIns,
11082 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
11084 reg, REG_SPBASE, offset);
11085 compiler->unwindSaveReg(reg, offset);
11086 regMask &= ~regBit;
11087 offset -= XMM_REGSIZE_BYTES;
11092 // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
11093 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
11094 // Here offset = 16-byte aligned offset after pushing integer registers.
11097 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
11098 // non-funclet: this will be compLclFrameSize.
11099 // funclet frames: this will be FuncletInfo.fiSpDelta.
11100 void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
11102 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
11104 // Only callee saved floating point registers should be in regMask
11105 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
11107 // fast path return
11108 if (regMask == RBM_NONE)
11110 genVzeroupperIfNeeded();
11114 #ifdef _TARGET_AMD64_
11115 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
11116 instruction copyIns = ins_Copy(TYP_FLOAT);
11117 #else // !_TARGET_AMD64_
11118 unsigned firstFPRegPadding = 0;
11119 instruction copyIns = INS_movupd;
11120 #endif // !_TARGET_AMD64_
11124 if (compiler->compLocallocUsed)
11126 // localloc frame: use frame pointer relative offset
11127 assert(isFramePointerUsed());
11128 regBase = REG_FPBASE;
11129 offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
11133 regBase = REG_SPBASE;
11134 offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
11137 #ifdef _TARGET_AMD64_
11138 // Offset is 16-byte aligned since we use movaps for restoring xmm regs
11139 assert((offset % 16) == 0);
11140 #endif // _TARGET_AMD64_
11142 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
11144 regMaskTP regBit = genRegMask(reg);
11145 if ((regBit & regMask) != 0)
11147 // ABI requires us to restore lower 128-bits of YMM register.
11148 getEmitter()->emitIns_R_AR(copyIns,
11149 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
11151 reg, regBase, offset);
11152 regMask &= ~regBit;
11153 offset -= XMM_REGSIZE_BYTES;
11156 genVzeroupperIfNeeded();
11159 // Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
11160 // AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
11161 // (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
11162 // 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
11163 // code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
11164 // if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
11167 // check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
11168 // instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
11170 void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
11172 bool emitVzeroUpper = false;
11173 if (check256bitOnly)
11175 emitVzeroUpper = getEmitter()->Contains256bitAVX();
11179 emitVzeroUpper = getEmitter()->ContainsAVX();
11182 if (emitVzeroUpper)
11184 assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
11185 instGen(INS_vzeroupper);
11189 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
11191 //-----------------------------------------------------------------------------------
11192 // IsMultiRegPassedType: Returns true if the type is returned in multiple registers
11195 // hClass - type handle
11198 // true if type is passed in multiple registers, false otherwise.
11200 bool Compiler::IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass)
11202 if (hClass == NO_CLASS_HANDLE)
11207 structPassingKind howToPassStruct;
11208 var_types returnType = getArgTypeForStruct(hClass, &howToPassStruct);
11210 return (returnType == TYP_STRUCT);
11213 //-----------------------------------------------------------------------------------
11214 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
11217 // hClass - type handle
11220 // true if type is returned in multiple registers, false otherwise.
11222 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
11224 if (hClass == NO_CLASS_HANDLE)
11229 structPassingKind howToReturnStruct;
11230 var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
11232 return (returnType == TYP_STRUCT);
11235 //----------------------------------------------
11236 // Methods that support HFA's for ARM32/ARM64
11237 //----------------------------------------------
11239 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
11242 return varTypeIsFloating(GetHfaType(hClass));
11248 bool Compiler::IsHfa(GenTreePtr tree)
11251 return IsHfa(gtGetStructHandleIfPresent(tree));
11257 var_types Compiler::GetHfaType(GenTreePtr tree)
11260 if (tree->TypeGet() == TYP_STRUCT)
11262 return GetHfaType(gtGetStructHandleIfPresent(tree));
11268 unsigned Compiler::GetHfaCount(GenTreePtr tree)
11270 return GetHfaCount(gtGetStructHandleIfPresent(tree));
11273 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
11275 var_types result = TYP_UNDEF;
11276 if (hClass != NO_CLASS_HANDLE)
11279 CorInfoType corType = info.compCompHnd->getHFAType(hClass);
11280 if (corType != CORINFO_TYPE_UNDEF)
11282 result = JITtype2varType(corType);
11284 #endif // FEATURE_HFA
11289 //------------------------------------------------------------------------
11290 // GetHfaCount: Given a class handle for an HFA struct
11291 // return the number of registers needed to hold the HFA
11293 // Note that on ARM32 the single precision registers overlap with
11294 // the double precision registers and for that reason each
11295 // double register is considered to be two single registers.
11296 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
11297 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
11298 // will return 4 for both.
11300 // hClass: the class handle of a HFA struct
11302 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
11304 assert(IsHfa(hClass));
11305 #ifdef _TARGET_ARM_
11306 // A HFA of doubles is twice as large as an HFA of singles for ARM32
11307 // (i.e. uses twice the number of single precison registers)
11308 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
11309 #else // _TARGET_ARM64_
11310 var_types hfaType = GetHfaType(hClass);
11311 unsigned classSize = info.compCompHnd->getClassSize(hClass);
11312 // Note that the retail build issues a warning about a potential divsion by zero without the Max function
11313 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
11314 return classSize / elemSize;
11315 #endif // _TARGET_ARM64_
11318 #ifdef _TARGET_XARCH_
11320 //------------------------------------------------------------------------
11321 // genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
11322 // map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
11323 // X86/x64 has a special encoding for shift/rotate-by-constant-1.
11326 // ins: the base shift/rotate instruction
11327 // shiftByValue: the constant value by which we are shifting/rotating
11329 instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
11331 assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
11334 // Which format should we use?
11336 instruction shiftByConstantIns;
11338 if (shiftByValue == 1)
11340 // Use the shift-by-one format.
11342 assert(INS_rcl + 1 == INS_rcl_1);
11343 assert(INS_rcr + 1 == INS_rcr_1);
11344 assert(INS_rol + 1 == INS_rol_1);
11345 assert(INS_ror + 1 == INS_ror_1);
11346 assert(INS_shl + 1 == INS_shl_1);
11347 assert(INS_shr + 1 == INS_shr_1);
11348 assert(INS_sar + 1 == INS_sar_1);
11350 shiftByConstantIns = (instruction)(ins + 1);
11354 // Use the shift-by-NNN format.
11356 assert(INS_rcl + 2 == INS_rcl_N);
11357 assert(INS_rcr + 2 == INS_rcr_N);
11358 assert(INS_rol + 2 == INS_rol_N);
11359 assert(INS_ror + 2 == INS_ror_N);
11360 assert(INS_shl + 2 == INS_shl_N);
11361 assert(INS_shr + 2 == INS_shr_N);
11362 assert(INS_sar + 2 == INS_sar_N);
11364 shiftByConstantIns = (instruction)(ins + 2);
11367 return shiftByConstantIns;
11370 #endif // _TARGET_XARCH_
11372 #if !defined(LEGACY_BACKEND) && (defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_))
11374 //------------------------------------------------------------------------------------------------ //
11375 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
11378 // The number of the first argument with stack slot on the caller's frame.
11381 // On x64 Windows the caller always creates slots (homing space) in its frame for the
11382 // first 4 arguments of a callee (register passed args). So, the the variable number
11383 // (lclNum) for the first argument with a stack slot is always 0.
11384 // For System V systems or arm64, there is no such calling convention requirement, and the code needs to find
11385 // the first stack passed argument from the caller. This is done by iterating over
11386 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
11388 unsigned CodeGen::getFirstArgWithStackSlot()
11390 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
11391 unsigned baseVarNum = 0;
11392 #if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
11393 baseVarNum = compiler->lvaFirstStackIncomingArgNum;
11395 if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
11397 baseVarNum = compiler->lvaFirstStackIncomingArgNum;
11400 #endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
11402 // Iterate over all the local variables in the Lcl var table.
11403 // They contain all the implicit arguments - thisPtr, retBuf,
11404 // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
11405 LclVarDsc* varDsc = nullptr;
11406 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
11408 varDsc = &(compiler->lvaTable[i]);
11410 // We are iterating over the arguments only.
11411 assert(varDsc->lvIsParam);
11413 if (varDsc->lvArgReg == REG_STK)
11416 #if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
11417 compiler->lvaFirstStackIncomingArgNum = baseVarNum;
11418 #endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
11422 assert(varDsc != nullptr);
11426 #elif defined(_TARGET_AMD64_)
11429 // Not implemented for x86.
11430 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
11431 return BAD_VAR_NUM;
11432 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
11435 #endif // !LEGACY_BACKEND && (_TARGET_XARCH_ || _TARGET_ARM64_)
11437 //------------------------------------------------------------------------
11438 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
11440 void CodeGen::genSinglePush()
11442 AddStackLevel(REGSIZE_BYTES);
11445 //------------------------------------------------------------------------
11446 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
11448 void CodeGen::genSinglePop()
11450 SubtractStackLevel(REGSIZE_BYTES);
11453 //------------------------------------------------------------------------
11454 // genPushRegs: Push the given registers.
11457 // regs - mask or registers to push
11458 // byrefRegs - OUT arg. Set to byref registers that were pushed.
11459 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
11462 // Mask of registers pushed.
11465 // This function does not check if the register is marked as used, etc.
11467 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
11469 *byrefRegs = RBM_NONE;
11470 *noRefRegs = RBM_NONE;
11472 if (regs == RBM_NONE)
11477 #if FEATURE_FIXED_OUT_ARGS
11479 NYI("Don't call genPushRegs with real regs!");
11482 #else // FEATURE_FIXED_OUT_ARGS
11484 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
11485 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
11487 regMaskTP pushedRegs = regs;
11489 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
11491 regMaskTP regBit = regMaskTP(1) << reg;
11493 if ((regBit & regs) == RBM_NONE)
11497 if (regBit & gcInfo.gcRegGCrefSetCur)
11501 else if (regBit & gcInfo.gcRegByrefSetCur)
11503 *byrefRegs |= regBit;
11506 else if (noRefRegs != NULL)
11508 *noRefRegs |= regBit;
11516 inst_RV(INS_push, reg, type);
11519 gcInfo.gcMarkRegSetNpt(regBit);
11526 #endif // FEATURE_FIXED_OUT_ARGS
11529 //------------------------------------------------------------------------
11530 // genPopRegs: Pop the registers that were pushed by genPushRegs().
11533 // regs - mask of registers to pop
11534 // byrefRegs - The byref registers that were pushed by genPushRegs().
11535 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
11540 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
11542 if (regs == RBM_NONE)
11547 #if FEATURE_FIXED_OUT_ARGS
11549 NYI("Don't call genPopRegs with real regs!");
11551 #else // FEATURE_FIXED_OUT_ARGS
11553 noway_assert((regs & byrefRegs) == byrefRegs);
11554 noway_assert((regs & noRefRegs) == noRefRegs);
11555 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
11557 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
11558 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
11560 // Walk the registers in the reverse order as genPushRegs()
11561 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
11563 regMaskTP regBit = regMaskTP(1) << reg;
11565 if ((regBit & regs) == RBM_NONE)
11569 if (regBit & byrefRegs)
11573 else if (regBit & noRefRegs)
11582 inst_RV(INS_pop, reg, type);
11585 if (type != TYP_INT)
11586 gcInfo.gcMarkRegPtrVal(reg, type);
11591 #endif // FEATURE_FIXED_OUT_ARGS
11594 /*****************************************************************************
11597 * This function should be called only after the sizes of the emitter blocks
11598 * have been finalized.
11601 void CodeGen::genSetScopeInfo()
11603 if (!compiler->opts.compScopeInfo)
11611 printf("*************** In genSetScopeInfo()\n");
11615 if (compiler->info.compVarScopesCount == 0)
11617 compiler->eeSetLVcount(0);
11618 compiler->eeSetLVdone();
11622 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
11623 noway_assert(psiOpenScopeList.scNext == nullptr);
11626 unsigned scopeCnt = siScopeCnt + psiScopeCnt;
11628 compiler->eeSetLVcount(scopeCnt);
11631 genTrnslLocalVarCount = scopeCnt;
11634 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
11638 // Record the scopes found for the parameters over the prolog.
11639 // The prolog needs to be treated differently as a variable may not
11640 // have the same info in the prolog block as is given by compiler->lvaTable.
11641 // eg. A register parameter is actually on the stack, before it is loaded to reg.
11643 CodeGen::psiScope* scopeP;
11645 for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
11647 noway_assert(scopeP != nullptr);
11648 noway_assert(scopeP->scStartLoc.Valid());
11649 noway_assert(scopeP->scEndLoc.Valid());
11651 UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
11652 UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
11654 unsigned varNum = scopeP->scSlotNum;
11655 noway_assert(startOffs <= endOffs);
11657 // The range may be 0 if the prolog is empty. For such a case,
11658 // report the liveness of arguments to span at least the first
11659 // instruction in the method. This will be incorrect (except on
11660 // entry to the method) if the very first instruction of the method
11661 // is part of a loop. However, this should happen
11662 // very rarely, and the incorrectness is worth being able to look
11663 // at the argument on entry to the method.
11664 if (startOffs == endOffs)
11666 noway_assert(startOffs == 0);
11670 Compiler::siVarLoc varLoc;
11672 if (scopeP->scRegister)
11674 varLoc.vlType = Compiler::VLT_REG;
11675 varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
11679 varLoc.vlType = Compiler::VLT_STK;
11680 varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
11681 varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
11684 genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
11687 // Record the scopes for the rest of the method.
11688 // Check that the LocalVarInfo scopes look OK
11689 noway_assert(siOpenScopeList.scNext == nullptr);
11691 CodeGen::siScope* scopeL;
11693 for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
11695 noway_assert(scopeL != nullptr);
11696 noway_assert(scopeL->scStartLoc.Valid());
11697 noway_assert(scopeL->scEndLoc.Valid());
11699 // Find the start and end IP
11701 UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
11702 UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
11704 noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
11706 // For stack vars, find the base register, and offset
11709 signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
11711 if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
11713 baseReg = REG_SPBASE;
11714 offset += scopeL->scStackLevel;
11718 baseReg = REG_FPBASE;
11721 // Now fill in the varLoc
11723 Compiler::siVarLoc varLoc;
11725 // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
11726 // for part of its lifetime, or in different registers for different parts of its lifetime.
11727 // This should only matter for non-debug code, where we do variable enregistration.
11728 // We should store the ranges of variable enregistration in the scope table.
11729 if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
11731 var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
11737 #ifdef _TARGET_64BIT_
11739 #endif // _TARGET_64BIT_
11741 varLoc.vlType = Compiler::VLT_REG;
11742 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11745 #ifndef _TARGET_64BIT_
11747 #if !CPU_HAS_FP_SUPPORT
11751 if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
11753 varLoc.vlType = Compiler::VLT_REG_REG;
11754 varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11755 varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
11759 varLoc.vlType = Compiler::VLT_REG_STK;
11760 varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11761 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
11762 if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
11764 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
11766 varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
11769 #endif // !_TARGET_64BIT_
11771 #ifdef _TARGET_64BIT_
11775 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
11776 // so no XMM registers can get debug information.
11777 varLoc.vlType = Compiler::VLT_REG_FP;
11778 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11781 #else // !_TARGET_64BIT_
11783 #if CPU_HAS_FP_SUPPORT
11786 if (isFloatRegType(type))
11788 varLoc.vlType = Compiler::VLT_FPSTK;
11789 varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11792 #endif // CPU_HAS_FP_SUPPORT
11794 #endif // !_TARGET_64BIT_
11796 #ifdef FEATURE_SIMD
11801 varLoc.vlType = Compiler::VLT_REG_FP;
11803 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
11804 // so no XMM registers can get debug information.
11806 // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
11807 // in eeDispVar() --> getRegName() that regNumber is valid.
11808 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11810 #endif // FEATURE_SIMD
11813 noway_assert(!"Invalid type");
11818 assert(offset != BAD_STK_OFFS);
11819 LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
11820 switch (genActualType(varDsc->TypeGet()))
11827 case TYP_BLK: // Needed because of the TYP_BLK stress mode
11828 #ifdef FEATURE_SIMD
11834 #ifdef _TARGET_64BIT_
11837 #endif // _TARGET_64BIT_
11838 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
11839 // In the AMD64 ABI we are supposed to pass a struct by reference when its
11840 // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
11841 // the IR to comply with the ABI and therefore changes the type of the lclVar
11842 // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
11843 // this is still a struct by setting the lvIsTemp flag.
11844 // The same is true for ARM64 and structs > 16 bytes.
11845 // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
11846 // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
11847 // to accomodate for this situation.
11848 if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
11850 assert(varDsc->lvIsParam);
11851 varLoc.vlType = Compiler::VLT_STK_BYREF;
11854 #endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
11856 varLoc.vlType = Compiler::VLT_STK;
11858 varLoc.vlStk.vlsBaseReg = baseReg;
11859 varLoc.vlStk.vlsOffset = offset;
11860 if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
11862 varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
11866 #ifndef _TARGET_64BIT_
11869 varLoc.vlType = Compiler::VLT_STK2;
11870 varLoc.vlStk2.vls2BaseReg = baseReg;
11871 varLoc.vlStk2.vls2Offset = offset;
11872 if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
11874 varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
11877 #endif // !_TARGET_64BIT_
11880 noway_assert(!"Invalid type");
11884 genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
11885 scopeL->scAvailable, varLoc);
11888 compiler->eeSetLVdone();
11891 //------------------------------------------------------------------------
11892 // genSetScopeInfo: Record scope information for debug info
11896 // startOffs - the starting offset for this scope
11897 // length - the length of this scope
11898 // varNum - the lclVar for this scope info
11904 // Called for every scope info piece to record by the main genSetScopeInfo()
11906 void CodeGen::genSetScopeInfo(unsigned which,
11907 UNATIVE_OFFSET startOffs,
11908 UNATIVE_OFFSET length,
11912 Compiler::siVarLoc& varLoc)
11914 // We need to do some mapping while reporting back these variables.
11916 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
11917 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
11919 #ifdef _TARGET_X86_
11920 // Non-x86 platforms are allowed to access all arguments directly
11921 // so we don't need this code.
11923 // Is this a varargs function?
11925 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
11926 varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
11928 noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
11930 // All stack arguments (except the varargs handle) have to be
11931 // accessed via the varargs cookie. Discard generated info,
11932 // and just find its position relative to the varargs handle
11934 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
11935 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
11937 noway_assert(!compiler->opts.compDbgCode);
11941 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
11942 // arguments of vararg functions to avoid reporting them to GC.
11943 noway_assert(!compiler->lvaTable[varNum].lvRegister);
11944 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
11945 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
11947 noway_assert(cookieOffset < varOffset);
11948 unsigned offset = varOffset - cookieOffset;
11949 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
11950 noway_assert(offset < stkArgSize);
11951 offset = stkArgSize - offset;
11953 varLoc.vlType = Compiler::VLT_FIXED_VA;
11954 varLoc.vlFixedVarArg.vlfvOffset = offset;
11957 #endif // _TARGET_X86_
11959 VarName name = nullptr;
11963 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
11965 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
11967 name = compiler->info.compVarScopes[scopeNum].vsdName;
11971 // Hang on to this compiler->info.
11973 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
11975 tlvi.tlviVarNum = ilVarNum;
11976 tlvi.tlviLVnum = LVnum;
11977 tlvi.tlviName = name;
11978 tlvi.tlviStartPC = startOffs;
11979 tlvi.tlviLength = length;
11980 tlvi.tlviAvailable = avail;
11981 tlvi.tlviVarLoc = varLoc;
11985 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
11988 /*****************************************************************************/
11991 /*****************************************************************************
11994 * Can be called only after lviSetLocalVarInfo() has been called
11998 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
12000 if (!compiler->opts.compScopeInfo)
12003 if (compiler->info.compVarScopesCount == 0)
12006 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
12008 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
12010 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
12011 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
12012 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
12014 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
12021 /*****************************************************************************
12024 * Can be called only after lviSetLocalVarInfo() has been called
12028 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
12030 if (!compiler->opts.compScopeInfo)
12033 if (compiler->info.compVarScopesCount == 0)
12036 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
12038 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
12040 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
12041 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
12042 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
12044 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
12051 /*****************************************************************************/
12052 #endif // defined(DEBUG)
12053 #endif // LATE_DISASM
12057 /*****************************************************************************
12058 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
12061 void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
12063 if (mappingNum != unsigned(-1))
12065 printf("%d: ", mappingNum);
12068 IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
12070 if (offsx == BAD_IL_OFFSET)
12076 Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
12078 if (jitIsStackEmpty(offsx))
12080 printf(" STACK_EMPTY");
12083 if (jitIsCallInstruction(offsx))
12085 printf(" CALL_INSTRUCTION");
12090 ipMapping->ipmdNativeLoc.Print();
12091 // We can only call this after code generation. Is there any way to tell when it's legal to call?
12092 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
12094 if (ipMapping->ipmdIsLabel)
12102 void CodeGen::genIPmappingListDisp()
12104 unsigned mappingNum = 0;
12105 Compiler::IPmappingDsc* ipMapping;
12107 for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
12109 genIPmappingDisp(mappingNum, ipMapping);
12116 /*****************************************************************************
12118 * Append an IPmappingDsc struct to the list that we're maintaining
12119 * for the debugger.
12120 * Record the instr offset as being at the current code gen position.
12123 void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
12125 if (!compiler->opts.compDbgInfo)
12130 assert(offsx != BAD_IL_OFFSET);
12132 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12134 case ICorDebugInfo::PROLOG:
12135 case ICorDebugInfo::EPILOG:
12140 if (offsx != ICorDebugInfo::NO_MAPPING)
12142 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
12145 // Ignore this one if it's the same IL offset as the last one we saw.
12146 // Note that we'll let through two identical IL offsets if the flag bits
12147 // differ, or two identical "special" mappings (e.g., PROLOG).
12148 if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
12150 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
12156 /* Create a mapping entry and append it to the list */
12158 Compiler::IPmappingDsc* addMapping =
12159 (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
12161 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
12162 addMapping->ipmdILoffsx = offsx;
12163 addMapping->ipmdIsLabel = isLabel;
12164 addMapping->ipmdNext = nullptr;
12166 if (compiler->genIPmappingList != nullptr)
12168 assert(compiler->genIPmappingLast != nullptr);
12169 assert(compiler->genIPmappingLast->ipmdNext == nullptr);
12170 compiler->genIPmappingLast->ipmdNext = addMapping;
12174 assert(compiler->genIPmappingLast == nullptr);
12175 compiler->genIPmappingList = addMapping;
12178 compiler->genIPmappingLast = addMapping;
12183 printf("Added IP mapping: ");
12184 genIPmappingDisp(unsigned(-1), addMapping);
12189 /*****************************************************************************
12191 * Prepend an IPmappingDsc struct to the list that we're maintaining
12192 * for the debugger.
12193 * Record the instr offset as being at the current code gen position.
12195 void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
12197 if (!compiler->opts.compDbgInfo)
12202 assert(offsx != BAD_IL_OFFSET);
12203 assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
12205 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12207 case ICorDebugInfo::NO_MAPPING:
12208 case ICorDebugInfo::PROLOG:
12209 case ICorDebugInfo::EPILOG:
12213 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
12217 /* Create a mapping entry and prepend it to the list */
12219 Compiler::IPmappingDsc* addMapping =
12220 (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
12222 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
12223 addMapping->ipmdILoffsx = offsx;
12224 addMapping->ipmdIsLabel = true;
12225 addMapping->ipmdNext = nullptr;
12227 addMapping->ipmdNext = compiler->genIPmappingList;
12228 compiler->genIPmappingList = addMapping;
12230 if (compiler->genIPmappingLast == nullptr)
12232 compiler->genIPmappingLast = addMapping;
12238 printf("Added IP mapping to front: ");
12239 genIPmappingDisp(unsigned(-1), addMapping);
12244 /*****************************************************************************/
12246 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
12247 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
12248 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
12250 C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
12251 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
12252 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
12253 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
12255 //------------------------------------------------------------------------
12256 // jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
12257 // Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
12258 // is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
12261 // offsx - the IL_OFFSETX value with the IL offset to extract.
12266 IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
12268 assert(offsx != BAD_IL_OFFSET);
12270 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12272 case ICorDebugInfo::NO_MAPPING:
12273 case ICorDebugInfo::PROLOG:
12274 case ICorDebugInfo::EPILOG:
12278 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
12282 //------------------------------------------------------------------------
12283 // jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
12284 // distinguished values. Asserts if passed BAD_IL_OFFSET.
12287 // offsx - the IL_OFFSETX value with the IL offset to extract.
12292 IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
12294 assert(offsx != BAD_IL_OFFSET);
12296 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12298 case ICorDebugInfo::NO_MAPPING:
12299 case ICorDebugInfo::PROLOG:
12300 case ICorDebugInfo::EPILOG:
12301 return IL_OFFSET(offsx);
12304 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
12308 //------------------------------------------------------------------------
12309 // jitIsStackEmpty: Does the IL offset have the stack empty bit set?
12310 // Asserts if passed BAD_IL_OFFSET.
12313 // offsx - the IL_OFFSETX value to check
12316 // 'true' if the stack empty bit is set; 'false' otherwise.
12318 bool jitIsStackEmpty(IL_OFFSETX offsx)
12320 assert(offsx != BAD_IL_OFFSET);
12322 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12324 case ICorDebugInfo::NO_MAPPING:
12325 case ICorDebugInfo::PROLOG:
12326 case ICorDebugInfo::EPILOG:
12330 return (offsx & IL_OFFSETX_STKBIT) == 0;
12334 //------------------------------------------------------------------------
12335 // jitIsCallInstruction: Does the IL offset have the call instruction bit set?
12336 // Asserts if passed BAD_IL_OFFSET.
12339 // offsx - the IL_OFFSETX value to check
12342 // 'true' if the call instruction bit is set; 'false' otherwise.
12344 bool jitIsCallInstruction(IL_OFFSETX offsx)
12346 assert(offsx != BAD_IL_OFFSET);
12348 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12350 case ICorDebugInfo::NO_MAPPING:
12351 case ICorDebugInfo::PROLOG:
12352 case ICorDebugInfo::EPILOG:
12356 return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
12360 /*****************************************************************************/
12362 void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
12364 if (!compiler->opts.compDbgCode)
12369 if (offsx == BAD_IL_OFFSET)
12374 /* If other IL were offsets reported, skip */
12376 if (compiler->genIPmappingLast == nullptr)
12381 if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
12386 /* offsx was the last reported offset. Make sure that we generated native code */
12388 if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
12394 /*****************************************************************************
12396 * Shut down the IP-mapping logic, report the info to the EE.
12399 void CodeGen::genIPmappingGen()
12401 if (!compiler->opts.compDbgInfo)
12409 printf("*************** In genIPmappingGen()\n");
12413 if (compiler->genIPmappingList == nullptr)
12415 compiler->eeSetLIcount(0);
12416 compiler->eeSetLIdone();
12420 Compiler::IPmappingDsc* tmpMapping;
12421 Compiler::IPmappingDsc* prevMapping;
12422 unsigned mappingCnt;
12423 UNATIVE_OFFSET lastNativeOfs;
12425 /* First count the number of distinct mapping records */
12428 lastNativeOfs = UNATIVE_OFFSET(~0);
12430 for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
12431 tmpMapping = tmpMapping->ipmdNext)
12433 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
12435 // Managed RetVal - since new sequence points are emitted to identify IL calls,
12436 // make sure that those are not filtered and do not interfere with filtering of
12437 // other sequence points.
12438 if (jitIsCallInstruction(srcIP))
12444 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
12446 if (nextNativeOfs != lastNativeOfs)
12449 lastNativeOfs = nextNativeOfs;
12450 prevMapping = tmpMapping;
12454 /* If there are mappings with the same native offset, then:
12455 o If one of them is NO_MAPPING, ignore it
12456 o If one of them is a label, report that and ignore the other one
12457 o Else report the higher IL offset
12460 PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
12461 if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
12463 // If the previous entry was NO_MAPPING, ignore it
12464 prevMapping->ipmdNativeLoc.Init();
12465 prevMapping = tmpMapping;
12467 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
12469 // If the current entry is NO_MAPPING, ignore it
12470 // Leave prevMapping unchanged as tmpMapping is no longer valid
12471 tmpMapping->ipmdNativeLoc.Init();
12473 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
12475 // counting for special cases: see below
12477 prevMapping = tmpMapping;
12481 noway_assert(prevMapping != nullptr);
12482 noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
12483 lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
12485 /* The previous block had the same native offset. We have to
12486 discard one of the mappings. Simply reinitialize ipmdNativeLoc
12487 and prevMapping will be ignored later. */
12489 if (prevMapping->ipmdIsLabel)
12491 // Leave prevMapping unchanged as tmpMapping is no longer valid
12492 tmpMapping->ipmdNativeLoc.Init();
12496 prevMapping->ipmdNativeLoc.Init();
12497 prevMapping = tmpMapping;
12502 /* Tell them how many mapping records we've got */
12504 compiler->eeSetLIcount(mappingCnt);
12506 /* Now tell them about the mappings */
12509 lastNativeOfs = UNATIVE_OFFSET(~0);
12511 for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
12513 // Do we have to skip this record ?
12514 if (!tmpMapping->ipmdNativeLoc.Valid())
12519 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
12520 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
12522 if (jitIsCallInstruction(srcIP))
12524 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
12526 else if (nextNativeOfs != lastNativeOfs)
12528 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
12529 lastNativeOfs = nextNativeOfs;
12531 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
12533 // For the special case of an IL instruction with no body
12534 // followed by the epilog (say ret void immediately preceding
12535 // the method end), we put two entries in, so that we'll stop
12536 // at the (empty) ret statement if the user tries to put a
12537 // breakpoint there, and then have the option of seeing the
12538 // epilog or not based on SetUnmappedStopMask for the stepper.
12539 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
12545 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
12546 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
12547 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
12548 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
12550 if (compiler->opts.compDbgCode)
12552 //Assert that the first instruction of every basic block with more than one incoming edge has a
12553 //different sequence point from each incoming block.
12555 //It turns out that the only thing we really have to assert is that the first statement in each basic
12556 //block has an IL offset and appears in eeBoundaries.
12557 for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
12559 if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
12561 noway_assert(block->bbTreeList->gtOper == GT_STMT);
12562 bool found = false;
12563 if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
12565 IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
12566 for (unsigned i = 0; i < eeBoundariesCount; ++i)
12568 if (eeBoundaries[i].ilOffset == ilOffs)
12575 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
12581 compiler->eeSetLIdone();
12584 /*============================================================================
12586 * These are empty stubs to help the late dis-assembler to compile
12587 * if the late disassembler is being built into a non-DEBUG build.
12589 *============================================================================
12592 #if defined(LATE_DISASM)
12593 #if !defined(DEBUG)
12596 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
12602 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
12607 /*****************************************************************************/
12608 #endif // !defined(DEBUG)
12609 #endif // defined(LATE_DISASM)
12610 /*****************************************************************************/