1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Code Generator Common: XX
9 XX Methods common to all architectures and register allocation strategies XX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
16 // identical, and which should probably be moved here.
27 #ifndef JIT32_GCENCODER
28 #include "gcinfoencoder.h"
31 /*****************************************************************************/
33 const BYTE genTypeSizes[] = {
34 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
39 const BYTE genTypeAlignments[] = {
40 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
45 const BYTE genTypeStSzs[] = {
46 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
51 const BYTE genActualTypes[] = {
52 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
57 void CodeGenInterface::setFramePointerRequiredEH(bool value)
59 m_cgFramePointerRequired = value;
61 #ifndef JIT32_GCENCODER
64 // EnumGcRefs will only enumerate slots in aborted frames
65 // if they are fully-interruptible. So if we have a catch
66 // or finally that will keep frame-vars alive, we need to
67 // force fully-interruptible.
68 CLANG_FORMAT_COMMENT_ANCHOR;
73 printf("Method has EH, marking method as fully interruptible\n");
77 m_cgInterruptible = true;
79 #endif // JIT32_GCENCODER
82 /*****************************************************************************/
83 CodeGenInterface* getCodeGenerator(Compiler* comp)
85 return new (comp, CMK_Codegen) CodeGen(comp);
88 // CodeGen constructor
89 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler)
94 /*****************************************************************************/
96 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
98 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
99 negBitmaskFlt = nullptr;
100 negBitmaskDbl = nullptr;
101 absBitmaskFlt = nullptr;
102 absBitmaskDbl = nullptr;
103 u8ToDblBitmask = nullptr;
104 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
106 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107 m_stkArgVarNum = BAD_VAR_NUM;
110 #if defined(UNIX_X86_ABI)
111 curNestedAlignment = 0;
112 maxNestedAlignment = 0;
115 regTracker.rsTrackInit(compiler, ®Set);
116 gcInfo.regSet = ®Set;
117 m_cgEmitter = new (compiler->getAllocator()) emitter();
118 m_cgEmitter->codeGen = this;
119 m_cgEmitter->gcInfo = &gcInfo;
122 setVerbose(compiler->verbose);
128 #if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
129 // This appears to be x86-specific. It's attempting to make sure all offsets to temps
130 // are large. For ARM, this doesn't interact well with our decision about whether to use
131 // R10 or not as a reserved register.
132 if (regSet.rsStressRegs())
133 compiler->tmpIntSpillMax = (SCHAR_MAX / sizeof(int));
134 #endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
139 #ifdef LEGACY_BACKEND
140 // TODO-Cleanup: These used to be set in rsInit() - should they be moved to RegSet??
141 // They are also accessed by the register allocators and fgMorphLclVar().
142 intRegState.rsCurRegArgNum = 0;
143 floatRegState.rsCurRegArgNum = 0;
144 #endif // LEGACY_BACKEND
147 getDisAssembler().disInit(compiler);
151 genTempLiveChg = true;
152 genTrnslLocalVarCount = 0;
154 // Shouldn't be used before it is set in genFnProlog()
155 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
157 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
158 // Shouldn't be used before it is set in genFnProlog()
159 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
160 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
163 #ifdef _TARGET_AMD64_
164 // This will be set before final frame layout.
165 compiler->compVSQuirkStackPaddingNeeded = 0;
167 // Set to true if we perform the Quirk that fixes the PPP issue
168 compiler->compQuirkForPPPflag = false;
169 #endif // _TARGET_AMD64_
171 #ifdef LEGACY_BACKEND
172 genFlagsEqualToNone();
173 #endif // LEGACY_BACKEND
175 // Initialize the IP-mapping logic.
176 compiler->genIPmappingList = nullptr;
177 compiler->genIPmappingLast = nullptr;
178 compiler->genCallSite2ILOffsetMap = nullptr;
180 /* Assume that we not fully interruptible */
182 genInterruptible = false;
183 #ifdef _TARGET_ARMARCH_
184 hasTailCalls = false;
185 #endif // _TARGET_ARMARCH_
187 genInterruptibleUsed = false;
188 genCurDispOffset = (unsigned)-1;
192 void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
194 tree->gtRegNum = reg;
195 #ifdef LEGACY_BACKEND
197 #endif // LEGACY_BACKEND
200 #if CPU_LONG_USES_REGPAIR
201 void CodeGenInterface::genMarkTreeInRegPair(GenTree* tree, regPairNo regPair)
203 tree->gtRegPair = regPair;
204 #ifdef LEGACY_BACKEND
206 #endif // LEGACY_BACKEND
210 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
212 //---------------------------------------------------------------------
213 // genTotalFrameSize - return the "total" size of the stack frame, including local size
214 // and callee-saved register size. There are a few things "missing" depending on the
215 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
217 // For ARM, this doesn't include the prespilled registers.
219 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
220 // It also doesn't include the pushed return address.
225 int CodeGenInterface::genTotalFrameSize()
227 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
229 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
231 assert(totalFrameSize >= 0);
232 return totalFrameSize;
235 //---------------------------------------------------------------------
236 // genSPtoFPdelta - return the offset from SP to the frame pointer.
237 // This number is going to be positive, since SP must be at the lowest
240 // There must be a frame pointer to call this function!
242 int CodeGenInterface::genSPtoFPdelta()
244 assert(isFramePointerUsed());
248 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
254 //---------------------------------------------------------------------
255 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
256 // This number is going to be negative, since the Caller-SP is at a higher
257 // address than the frame pointer.
259 // There must be a frame pointer to call this function!
261 int CodeGenInterface::genCallerSPtoFPdelta()
263 assert(isFramePointerUsed());
264 int callerSPtoFPdelta = 0;
266 #if defined(_TARGET_ARM_)
267 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
268 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
269 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
270 #elif defined(_TARGET_X86_)
271 // Thanks to ebp chaining, the difference between ebp-based addresses
272 // and caller-SP-relative addresses is just the 2 pointers:
275 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
277 #error "Unknown _TARGET_"
280 assert(callerSPtoFPdelta <= 0);
281 return callerSPtoFPdelta;
284 //---------------------------------------------------------------------
285 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
287 // This number will be negative.
289 int CodeGenInterface::genCallerSPtoInitialSPdelta()
291 int callerSPtoSPdelta = 0;
293 #if defined(_TARGET_ARM_)
294 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
295 callerSPtoSPdelta -= genTotalFrameSize();
296 #elif defined(_TARGET_X86_)
297 callerSPtoSPdelta -= genTotalFrameSize();
298 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
300 // compCalleeRegsPushed does not account for the frame pointer
301 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
302 if (isFramePointerUsed())
304 callerSPtoSPdelta -= REGSIZE_BYTES;
307 #error "Unknown _TARGET_"
310 assert(callerSPtoSPdelta <= 0);
311 return callerSPtoSPdelta;
314 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
316 /*****************************************************************************
317 * Should we round simple operations (assignments, arithmetic operations, etc.)
322 bool CodeGen::genShouldRoundFP()
324 RoundLevel roundLevel = getRoundFloatLevel();
329 case ROUND_CMP_CONST:
334 assert(roundLevel == ROUND_ALWAYS);
339 /*****************************************************************************
341 * Initialize some global variables.
344 void CodeGen::genPrepForCompiler()
349 /* Figure out which non-register variables hold pointers */
351 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
353 // Figure out which variables live in registers.
354 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
355 // in a register (i.e. they live on the stack for all or part of their lifetime).
356 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
358 VarSetOps::AssignNoCopy(compiler, compiler->raRegVarsMask, VarSetOps::MakeEmpty(compiler));
360 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
362 if (varDsc->lvTracked
363 #ifndef LEGACY_BACKEND
364 || varDsc->lvIsRegCandidate()
365 #endif // !LEGACY_BACKEND
368 if (varDsc->lvRegister
369 #if FEATURE_STACK_FP_X87
370 && !varDsc->IsFloatRegType()
374 VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex);
376 else if (compiler->lvaIsGCTracked(varDsc))
378 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
382 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
383 genLastLiveMask = RBM_NONE;
385 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
389 /*****************************************************************************
390 * To report exception handling information to the VM, we need the size of the exception
391 * handling regions. To compute that, we need to emit labels for the beginning block of
392 * an EH region, and the block that immediately follows a region. Go through the EH
393 * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
395 * The beginning blocks of the EH regions already should have this flag set.
397 * No blocks should be added or removed after this.
399 * This code is closely couple with genReportEH() in the sense that any block
400 * that this procedure has determined it needs to have a label has to be selected
401 * using the same logic both here and in genReportEH(), so basically any time there is
402 * a change in the way we handle EH reporting, we have to keep the logic of these two
406 void CodeGen::genPrepForEHCodegen()
408 assert(!compiler->fgSafeBasicBlockCreation);
413 bool anyFinallys = false;
415 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
416 HBtab < HBtabEnd; HBtab++)
418 assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
419 assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
421 if (HBtab->ebdTryLast->bbNext != nullptr)
423 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
426 if (HBtab->ebdHndLast->bbNext != nullptr)
428 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
431 if (HBtab->HasFilter())
433 assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
434 // The block after the last block of the filter is
435 // the handler begin block, which we already asserted
436 // has BBF_HAS_LABEL set.
439 #if FEATURE_EH_CALLFINALLY_THUNKS
440 if (HBtab->HasFinallyHandler())
444 #endif // FEATURE_EH_CALLFINALLY_THUNKS
447 #if FEATURE_EH_CALLFINALLY_THUNKS
450 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
452 if (block->bbJumpKind == BBJ_CALLFINALLY)
454 BasicBlock* bbToLabel = block->bbNext;
455 if (block->isBBCallAlwaysPair())
457 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
459 if (bbToLabel != nullptr)
461 bbToLabel->bbFlags |= BBF_HAS_LABEL;
463 } // block is BBJ_CALLFINALLY
465 } // if (anyFinallys)
466 #endif // FEATURE_EH_CALLFINALLY_THUNKS
469 void CodeGenInterface::genUpdateLife(GenTree* tree)
471 compiler->compUpdateLife</*ForCodeGen*/ true>(tree);
474 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
476 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
479 #ifdef LEGACY_BACKEND
480 // Returns the liveSet after tree has executed.
481 // "tree" MUST occur in the current statement, AFTER the most recent
482 // update of compiler->compCurLifeTree and compiler->compCurLife.
484 VARSET_VALRET_TP CodeGen::genUpdateLiveSetForward(GenTree* tree)
486 VARSET_TP startLiveSet(VarSetOps::MakeCopy(compiler, compiler->compCurLife));
488 assert(tree != compiler->compCurLifeTree);
489 if (compiler->compCurLifeTree == nullptr)
491 assert(compiler->compCurStmt != nullptr);
492 startNode = compiler->compCurStmt->gtStmt.gtStmtList;
496 startNode = compiler->compCurLifeTree->gtNext;
498 return compiler->fgUpdateLiveSet(startLiveSet, startNode, tree);
501 // Determine the registers that are live after "second" has been evaluated,
502 // but which are not live after "first".
504 // 1. "first" must occur after compiler->compCurLifeTree in execution order for the current statement
505 // 2. "second" must occur after "first" in the current statement
507 regMaskTP CodeGen::genNewLiveRegMask(GenTree* first, GenTree* second)
509 // First, compute the liveset after "first"
510 VARSET_TP firstLiveSet = genUpdateLiveSetForward(first);
511 // Now, update the set forward from "first" to "second"
512 VARSET_TP secondLiveSet = compiler->fgUpdateLiveSet(firstLiveSet, first->gtNext, second);
513 regMaskTP newLiveMask = genLiveMask(VarSetOps::Diff(compiler, secondLiveSet, firstLiveSet));
518 // Return the register mask for the given register variable
520 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
522 regMaskTP regMask = RBM_NONE;
524 assert(varDsc->lvIsInReg());
526 if (varTypeIsFloating(varDsc->TypeGet()))
528 regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
532 regMask = genRegMask(varDsc->lvRegNum);
533 if (isRegPairType(varDsc->lvType))
535 regMask |= genRegMask(varDsc->lvOtherReg);
541 // Return the register mask for the given lclVar or regVar tree node
543 regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
545 assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_REG_VAR);
547 regMaskTP regMask = RBM_NONE;
548 const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
549 if (varDsc->lvPromoted)
551 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
553 noway_assert(compiler->lvaTable[i].lvIsStructField);
554 if (compiler->lvaTable[i].lvIsInReg())
556 regMask |= genGetRegMask(&compiler->lvaTable[i]);
560 else if (varDsc->lvIsInReg())
562 regMask = genGetRegMask(varDsc);
567 // The given lclVar is either going live (being born) or dying.
568 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
569 // Update regSet.rsMaskVars accordingly.
571 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
573 #if FEATURE_STACK_FP_X87
574 // The stack fp reg vars are handled elsewhere
575 if (varTypeIsFloating(varDsc->TypeGet()))
579 regMaskTP regMask = genGetRegMask(varDsc);
582 if (compiler->verbose)
584 printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
585 varDsc->PrintVarReg();
586 printf(" is becoming %s ", (isDying) ? "dead" : "live");
587 Compiler::printTreeID(tree);
594 // We'd like to be able to assert the following, however if we are walking
595 // through a qmark/colon tree, we may encounter multiple last-use nodes.
596 // assert((regSet.rsMaskVars & regMask) == regMask);
597 regSet.RemoveMaskVars(regMask);
601 assert((regSet.rsMaskVars & regMask) == 0);
602 regSet.AddMaskVars(regMask);
606 //----------------------------------------------------------------------
607 // compNoGCHelperCallKillSet:
609 // Gets a register mask that represents the kill set for a helper call.
610 // Not all JIT Helper calls follow the standard ABI on the target architecture.
612 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
613 // enumerated) and not 100% accurate (some killsets are bigger than
614 // what they really are).
615 // There's some work to be done in several places in the JIT to
616 // accurately track the registers that are getting killed by
618 // a) LSRA needs several changes to accomodate more precise killsets
619 // for every helper call it sees (both explicitly [easy] and
620 // implicitly [hard])
621 // b) Currently for AMD64, when we generate code for a helper call
622 // we're independently over-pessimizing the killsets of the call
623 // (independently from LSRA) and this needs changes
624 // both in CodeGenAmd64.cpp and emitx86.cpp.
626 // The best solution for this problem would be to try to centralize
627 // the killset information in a single place but then make the
628 // corresponding changes so every code generation phase is in sync
631 // The interim solution is to only add known helper calls that don't
632 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
635 // helper - The helper being inquired about
638 // Mask of register kills -- registers whose values are no longer guaranteed to be the same.
640 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
644 case CORINFO_HELP_ASSIGN_BYREF:
645 #if defined(_TARGET_AMD64_)
646 return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
647 #elif defined(_TARGET_ARMARCH_)
648 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
649 #elif defined(_TARGET_X86_)
650 return RBM_ESI | RBM_EDI | RBM_ECX;
652 NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
653 return RBM_CALLEE_TRASH;
656 #if defined(_TARGET_ARMARCH_)
657 case CORINFO_HELP_ASSIGN_REF:
658 case CORINFO_HELP_CHECKED_ASSIGN_REF:
659 return RBM_CALLEE_TRASH_WRITEBARRIER;
662 case CORINFO_HELP_PROF_FCN_ENTER:
663 #ifdef RBM_PROFILER_ENTER_TRASH
664 return RBM_PROFILER_ENTER_TRASH;
666 NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
669 case CORINFO_HELP_PROF_FCN_LEAVE:
670 #ifdef RBM_PROFILER_LEAVE_TRASH
671 return RBM_PROFILER_LEAVE_TRASH;
673 NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
676 case CORINFO_HELP_PROF_FCN_TAILCALL:
677 #ifdef RBM_PROFILER_TAILCALL_TRASH
678 return RBM_PROFILER_TAILCALL_TRASH;
680 NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
684 case CORINFO_HELP_ASSIGN_REF_EAX:
685 case CORINFO_HELP_ASSIGN_REF_ECX:
686 case CORINFO_HELP_ASSIGN_REF_EBX:
687 case CORINFO_HELP_ASSIGN_REF_EBP:
688 case CORINFO_HELP_ASSIGN_REF_ESI:
689 case CORINFO_HELP_ASSIGN_REF_EDI:
691 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
692 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
693 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
694 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
695 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
696 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
699 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
700 case CORINFO_HELP_ASSIGN_REF:
701 case CORINFO_HELP_CHECKED_ASSIGN_REF:
702 return RBM_EAX | RBM_EDX;
703 #endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
706 case CORINFO_HELP_STOP_FOR_GC:
707 return RBM_STOP_FOR_GC_TRASH;
709 case CORINFO_HELP_INIT_PINVOKE_FRAME:
710 return RBM_INIT_PINVOKE_FRAME_TRASH;
713 return RBM_CALLEE_TRASH;
717 //----------------------------------------------------------------------
718 // compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer
719 // contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining
720 // what registers to remove from the current live GC/byref sets (and thus what to report as dead in the
721 // GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by
722 // compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src
723 // address registers as killed for liveness purposes, since their values change. However, they still are
724 // valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here.
726 // Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers.
729 // helper - The helper being inquired about
732 // Mask of GC register kills
734 regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
736 assert(emitter::emitNoGChelper(helper));
740 #if defined(_TARGET_XARCH_)
741 case CORINFO_HELP_PROF_FCN_ENTER:
742 return RBM_PROFILER_ENTER_TRASH;
744 case CORINFO_HELP_PROF_FCN_LEAVE:
745 return RBM_PROFILER_LEAVE_TRASH;
747 case CORINFO_HELP_PROF_FCN_TAILCALL:
748 return RBM_PROFILER_TAILCALL_TRASH;
749 #endif // defined(_TARGET_XARCH_)
751 #if defined(_TARGET_X86_)
752 case CORINFO_HELP_ASSIGN_BYREF:
753 // This helper only trashes ECX.
755 #endif // defined(_TARGET_X86_)
757 #if defined(_TARGET_ARMARCH_)
758 case CORINFO_HELP_ASSIGN_BYREF:
759 return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
761 case CORINFO_HELP_ASSIGN_REF:
762 case CORINFO_HELP_CHECKED_ASSIGN_REF:
763 return RBM_CALLEE_GCTRASH_WRITEBARRIER;
766 #if defined(_TARGET_X86_)
767 case CORINFO_HELP_INIT_PINVOKE_FRAME:
768 return RBM_INIT_PINVOKE_FRAME_TRASH;
769 #endif // defined(_TARGET_X86_)
772 return RBM_CALLEE_TRASH_NOGC;
776 // Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e.,
777 // regSet.rsMaskVars as well)
778 // if the given lclVar (or indir(addr(local)))/regVar node is going live (being born) or dying.
779 template <bool ForCodeGen>
780 void Compiler::compUpdateLifeVar(GenTree* tree, VARSET_TP* pLastUseVars)
782 GenTree* indirAddrLocal = fgIsIndirOfAddrOfLocal(tree);
783 assert(tree->OperIsNonPhiLocal() || indirAddrLocal != nullptr);
785 // Get the local var tree -- if "tree" is "Ldobj(addr(x))", or "ind(addr(x))" this is "x", else it's "tree".
786 GenTree* lclVarTree = indirAddrLocal;
787 if (lclVarTree == nullptr)
791 unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
792 LclVarDsc* varDsc = lvaTable + lclNum;
795 #if !defined(_TARGET_AMD64_)
796 // There are no addr nodes on ARM and we are experimenting with encountering vars in 'random' order.
797 // Struct fields are not traversed in a consistent order, so ignore them when
798 // verifying that we see the var nodes in execution order
801 if (tree->OperIsIndir())
803 assert(indirAddrLocal != NULL);
805 else if (tree->gtNext != NULL && tree->gtNext->gtOper == GT_ADDR &&
806 ((tree->gtNext->gtNext == NULL || !tree->gtNext->gtNext->OperIsIndir())))
808 assert(tree->IsLocal()); // Can only take the address of a local.
809 // The ADDR might occur in a context where the address it contributes is eventually
810 // dereferenced, so we can't say that this is not a use or def.
813 // TODO-ARM64-Bug?: These asserts don't seem right for ARM64: I don't understand why we have to assert
814 // two consecutive lclvars (in execution order) can only be observed if the first one is a struct field.
815 // It seems to me this is code only applicable to the legacy JIT and not RyuJIT (and therefore why it was
816 // ifdef'ed out for AMD64).
817 else if (!varDsc->lvIsStructField)
820 for (prevTree = tree->gtPrev;
821 prevTree != NULL && prevTree != compCurLifeTree;
822 prevTree = prevTree->gtPrev)
824 if ((prevTree->gtOper == GT_LCL_VAR) || (prevTree->gtOper == GT_REG_VAR))
826 LclVarDsc * prevVarDsc = lvaTable + prevTree->gtLclVarCommon.gtLclNum;
828 // These are the only things for which this method MUST be called
829 assert(prevVarDsc->lvIsStructField);
832 assert(prevTree == compCurLifeTree);
836 #endif // !_TARGET_AMD64_
839 compCurLifeTree = tree;
840 VARSET_TP newLife(VarSetOps::MakeCopy(this, compCurLife));
842 // By codegen, a struct may not be TYP_STRUCT, so we have to
843 // check lvPromoted, for the case where the fields are being
845 if (!varDsc->lvTracked && !varDsc->lvPromoted)
850 bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0); // if it's "x <op>=
851 // ..." then variable
852 // "x" must have had a
853 // previous, original,
855 bool isDying = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
856 #ifndef LEGACY_BACKEND
857 bool spill = ((tree->gtFlags & GTF_SPILL) != 0);
858 #endif // !LEGACY_BACKEND
860 #ifndef LEGACY_BACKEND
861 // For RyuJIT backend, since all tracked vars are register candidates, but not all are in registers at all times,
862 // we maintain two separate sets of variables - the total set of variables that are either
863 // born or dying here, and the subset of those that are on the stack
864 VARSET_TP stackVarDeltaSet(VarSetOps::MakeEmpty(this));
865 #endif // !LEGACY_BACKEND
867 if (isBorn || isDying)
869 bool hasDeadTrackedFieldVars = false; // If this is true, then, for a LDOBJ(ADDR(<promoted struct local>)),
870 VARSET_TP* deadTrackedFieldVars =
871 nullptr; // *deadTrackedFieldVars indicates which tracked field vars are dying.
872 VARSET_TP varDeltaSet(VarSetOps::MakeEmpty(this));
874 if (varDsc->lvTracked)
876 VarSetOps::AddElemD(this, varDeltaSet, varDsc->lvVarIndex);
879 #ifndef LEGACY_BACKEND
880 if (isBorn && varDsc->lvIsRegCandidate() && tree->gtHasReg())
882 codeGen->genUpdateVarReg(varDsc, tree);
884 #endif // !LEGACY_BACKEND
885 if (varDsc->lvIsInReg()
886 #ifndef LEGACY_BACKEND
887 && tree->gtRegNum != REG_NA
888 #endif // !LEGACY_BACKEND
891 codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree));
893 #ifndef LEGACY_BACKEND
896 VarSetOps::AddElemD(this, stackVarDeltaSet, varDsc->lvVarIndex);
898 #endif // !LEGACY_BACKEND
901 else if (varDsc->lvPromoted)
903 if (indirAddrLocal != nullptr && isDying)
905 assert(!isBorn); // GTF_VAR_DEATH only set for LDOBJ last use.
906 hasDeadTrackedFieldVars = GetPromotedStructDeathVars()->Lookup(indirAddrLocal, &deadTrackedFieldVars);
907 if (hasDeadTrackedFieldVars)
909 VarSetOps::Assign(this, varDeltaSet, *deadTrackedFieldVars);
913 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
915 LclVarDsc* fldVarDsc = &(lvaTable[i]);
916 noway_assert(fldVarDsc->lvIsStructField);
917 if (fldVarDsc->lvTracked)
919 unsigned fldVarIndex = fldVarDsc->lvVarIndex;
920 noway_assert(fldVarIndex < lvaTrackedCount);
921 if (!hasDeadTrackedFieldVars)
923 VarSetOps::AddElemD(this, varDeltaSet, fldVarIndex);
926 // We repeat this call here and below to avoid the VarSetOps::IsMember
927 // test in this, the common case, where we have no deadTrackedFieldVars.
928 if (fldVarDsc->lvIsInReg())
930 #ifndef LEGACY_BACKEND
933 codeGen->genUpdateVarReg(fldVarDsc, tree);
935 #endif // !LEGACY_BACKEND
936 codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
938 #ifndef LEGACY_BACKEND
941 VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
943 #endif // !LEGACY_BACKEND
946 else if (ForCodeGen && VarSetOps::IsMember(this, varDeltaSet, fldVarIndex))
948 if (lvaTable[i].lvIsInReg())
950 #ifndef LEGACY_BACKEND
953 codeGen->genUpdateVarReg(fldVarDsc, tree);
955 #endif // !LEGACY_BACKEND
956 codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
958 #ifndef LEGACY_BACKEND
961 VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
963 #endif // !LEGACY_BACKEND
969 // First, update the live set
972 // We'd like to be able to assert the following, however if we are walking
973 // through a qmark/colon tree, we may encounter multiple last-use nodes.
974 // assert (VarSetOps::IsSubset(compiler, regVarDeltaSet, newLife));
975 VarSetOps::DiffD(this, newLife, varDeltaSet);
976 if (pLastUseVars != nullptr)
978 VarSetOps::Assign(this, *pLastUseVars, varDeltaSet);
983 // This shouldn't be in newLife, unless this is debug code, in which
984 // case we keep vars live everywhere, OR the variable is address-exposed,
985 // OR this block is part of a try block, in which case it may be live at the handler
986 // Could add a check that, if it's in newLife, that it's also in
987 // fgGetHandlerLiveVars(compCurBB), but seems excessive
989 // For a dead store, it can be the case that we set both isBorn and isDying to true.
990 // (We don't eliminate dead stores under MinOpts, so we can't assume they're always
991 // eliminated.) If it's both, we handled it above.
992 VarSetOps::UnionD(this, newLife, varDeltaSet);
996 if (!VarSetOps::Equal(this, compCurLife, newLife))
1001 printf("\t\t\t\t\t\t\tLive vars: ");
1002 dumpConvertedVarSet(this, compCurLife);
1004 dumpConvertedVarSet(this, newLife);
1009 VarSetOps::Assign(this, compCurLife, newLife);
1013 #ifndef LEGACY_BACKEND
1015 // Only add vars to the gcInfo.gcVarPtrSetCur if they are currently on stack, since the
1016 // gcInfo.gcTrkStkPtrLcls
1017 // includes all TRACKED vars that EVER live on the stack (i.e. are not always in a register).
1018 VARSET_TP gcTrkStkDeltaSet(
1019 VarSetOps::Intersection(this, codeGen->gcInfo.gcTrkStkPtrLcls, stackVarDeltaSet));
1020 if (!VarSetOps::IsEmpty(this, gcTrkStkDeltaSet))
1025 printf("\t\t\t\t\t\t\tGCvars: ");
1026 dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
1033 VarSetOps::UnionD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
1037 VarSetOps::DiffD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
1043 dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
1049 #else // LEGACY_BACKEND
1054 VARSET_TP gcVarPtrSetNew(VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
1055 if (!VarSetOps::Equal(this, codeGen->gcInfo.gcVarPtrSetCur, gcVarPtrSetNew))
1057 printf("\t\t\t\t\t\t\tGCvars: ");
1058 dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
1060 dumpConvertedVarSet(this, gcVarPtrSetNew);
1066 VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
1067 VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
1069 #endif // LEGACY_BACKEND
1071 codeGen->siUpdate();
1075 #ifndef LEGACY_BACKEND
1076 if (ForCodeGen && spill)
1078 assert(!varDsc->lvPromoted);
1079 codeGen->genSpillVar(tree);
1080 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
1082 if (!VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
1084 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
1088 printf("\t\t\t\t\t\t\tVar V%02u becoming live\n", varDsc - lvaTable);
1094 #endif // !LEGACY_BACKEND
1097 // Need an explicit instantiation.
1098 template void Compiler::compUpdateLifeVar<false>(GenTree* tree, VARSET_TP* pLastUseVars);
1100 template <bool ForCodeGen>
1101 void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
1108 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
1109 dumpConvertedVarSet(this, compCurLife);
1110 printf(" -> %s ", VarSetOps::ToString(this, newLife));
1111 dumpConvertedVarSet(this, newLife);
1116 /* We should only be called when the live set has actually changed */
1118 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
1122 VarSetOps::Assign(this, compCurLife, newLife);
1126 /* Figure out which variables are becoming live/dead at this point */
1128 // deadSet = compCurLife - newLife
1129 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
1131 // bornSet = newLife - compCurLife
1132 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
1134 /* Can't simultaneously become live and dead at the same time */
1136 // (deadSet UNION bornSet) != EMPTY
1137 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
1138 // (deadSet INTERSECTION bornSet) == EMPTY
1139 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
1141 #ifdef LEGACY_BACKEND
1142 // In the LEGACY_BACKEND case, we only consider variables that are fully enregisterd
1143 // and there may be none.
1144 VarSetOps::IntersectionD(this, deadSet, raRegVarsMask);
1145 VarSetOps::IntersectionD(this, bornSet, raRegVarsMask);
1146 // And all gcTrkStkPtrLcls that are now live will be on the stack
1147 VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
1148 VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
1149 #endif // LEGACY_BACKEND
1151 VarSetOps::Assign(this, compCurLife, newLife);
1153 // Handle the dying vars first, then the newly live vars.
1154 // This is because, in the RyuJIT backend case, they may occupy registers that
1155 // will be occupied by another var that is newly live.
1156 VarSetOps::Iter deadIter(this, deadSet);
1157 unsigned deadVarIndex = 0;
1158 while (deadIter.NextElem(&deadVarIndex))
1160 unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
1161 varDsc = lvaTable + varNum;
1162 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
1163 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
1165 if (varDsc->lvIsInReg())
1167 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
1169 regMaskTP regMask = varDsc->lvRegMask();
1172 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
1176 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
1178 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
1180 #ifndef LEGACY_BACKEND
1181 // This isn't in a register, so update the gcVarPtrSetCur.
1182 // (Note that in the LEGACY_BACKEND case gcVarPtrSetCur is updated above unconditionally
1183 // for all gcTrkStkPtrLcls in newLife, because none of them ever live in a register.)
1184 else if (isGCRef || isByRef)
1186 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
1187 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
1189 #endif // !LEGACY_BACKEND
1192 VarSetOps::Iter bornIter(this, bornSet);
1193 unsigned bornVarIndex = 0;
1194 while (bornIter.NextElem(&bornVarIndex))
1196 unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
1197 varDsc = lvaTable + varNum;
1198 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
1199 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
1201 if (varDsc->lvIsInReg())
1203 #ifndef LEGACY_BACKEND
1205 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
1207 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
1210 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
1211 #endif // !LEGACY_BACKEND
1212 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
1213 regMaskTP regMask = varDsc->lvRegMask();
1216 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
1220 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
1223 #ifndef LEGACY_BACKEND
1224 // This isn't in a register, so update the gcVarPtrSetCur
1225 else if (lvaIsGCTracked(varDsc))
1227 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
1228 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
1230 #endif // !LEGACY_BACKEND
1233 codeGen->siUpdate();
1236 // Need an explicit instantiation.
1237 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
1239 #ifdef LEGACY_BACKEND
1241 /*****************************************************************************
1243 * Get the mask of integer registers that contain 'live' enregistered
1244 * local variables after "tree".
1246 * The output is the mask of integer registers that are currently
1247 * alive and holding the enregistered local variables.
1249 regMaskTP CodeGenInterface::genLiveMask(GenTree* tree)
1251 regMaskTP liveMask = regSet.rsMaskVars;
1254 if (compiler->compCurLifeTree == nullptr)
1256 assert(compiler->compCurStmt != nullptr);
1257 nextNode = compiler->compCurStmt->gtStmt.gtStmtList;
1261 nextNode = compiler->compCurLifeTree->gtNext;
1264 // Theoretically, we should always be able to find "tree" by walking
1265 // forward in execution order. But unfortunately, there is at least
1266 // one case (addressing) where a node may be evaluated out of order
1267 // So, we have to handle that case
1268 bool outOfOrder = false;
1269 for (; nextNode != tree->gtNext; nextNode = nextNode->gtNext)
1271 if (nextNode == nullptr)
1276 if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
1278 bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
1279 bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
1280 if (isBorn || isDying)
1282 regMaskTP regMask = genGetRegMask(nextNode);
1283 if (regMask != RBM_NONE)
1287 liveMask |= regMask;
1291 liveMask &= ~(regMask);
1299 assert(compiler->compCurLifeTree != nullptr);
1300 liveMask = regSet.rsMaskVars;
1301 // We were unable to find "tree" by traversing forward. We must now go
1302 // backward from compiler->compCurLifeTree instead. We have to start with compiler->compCurLifeTree,
1303 // since regSet.rsMaskVars reflects its completed execution
1304 for (nextNode = compiler->compCurLifeTree; nextNode != tree; nextNode = nextNode->gtPrev)
1306 assert(nextNode != nullptr);
1308 if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
1310 bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
1311 bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
1312 if (isBorn || isDying)
1314 regMaskTP regMask = genGetRegMask(nextNode);
1315 if (regMask != RBM_NONE)
1317 // We're going backward - so things born are removed
1321 liveMask &= ~(regMask);
1325 liveMask |= regMask;
1335 /*****************************************************************************
1337 * Get the mask of integer registers that contain 'live' enregistered
1340 * The input is a liveSet which contains a set of local
1341 * variables that are currently alive
1343 * The output is the mask of x86 integer registers that are currently
1344 * alive and holding the enregistered local variables
1347 regMaskTP CodeGenInterface::genLiveMask(VARSET_VALARG_TP liveSet)
1349 // Check for the zero LiveSet mask
1350 if (VarSetOps::IsEmpty(compiler, liveSet))
1355 // set if our liveSet matches the one we have cached: genLastLiveSet -> genLastLiveMask
1356 if (VarSetOps::Equal(compiler, liveSet, genLastLiveSet))
1358 return genLastLiveMask;
1361 regMaskTP liveMask = 0;
1363 VarSetOps::Iter iter(compiler, liveSet);
1364 unsigned varIndex = 0;
1365 while (iter.NextElem(&varIndex))
1368 // If the variable is not enregistered, then it can't contribute to the liveMask
1369 if (!VarSetOps::IsMember(compiler, compiler->raRegVarsMask, varIndex))
1374 // Find the variable in compiler->lvaTable
1375 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1376 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1378 #if !FEATURE_FP_REGALLOC
1379 // If the variable is a floating point type, then it can't contribute to the liveMask
1380 if (varDsc->IsFloatRegType())
1386 noway_assert(compiler->lvaTable[varNum].lvRegister);
1389 if (varTypeIsFloating(varDsc->TypeGet()))
1391 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
1395 regBit = genRegMask(varDsc->lvRegNum);
1397 // For longs we may have two regs
1398 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
1400 regBit |= genRegMask(varDsc->lvOtherReg);
1404 noway_assert(regBit != 0);
1406 // We should not already have any of these bits set
1407 noway_assert((liveMask & regBit) == 0);
1409 // Update the liveMask with the register bits that are live
1413 // cache the last mapping between gtLiveSet -> liveMask
1414 VarSetOps::Assign(compiler, genLastLiveSet, liveSet);
1415 genLastLiveMask = liveMask;
1422 /*****************************************************************************
1426 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
1428 getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
1431 /*****************************************************************************
1433 * Generate a reload.
1435 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
1437 getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
1440 #ifdef LEGACY_BACKEND
1441 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
1442 void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg)
1444 var_types tmpType = tmp->tdTempType();
1445 getEmitter()->emitIns_R_S(ins_FloatLoad(type), emitActualTypeSize(tmpType), reg, tmp->tdTempNum(), 0);
1448 #endif // LEGACY_BACKEND
1451 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
1456 //----------------------------------------------------------------------
1457 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
1460 // tree - spilled GenTree node
1463 // TempDsc corresponding to tree
1464 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
1466 // tree must be in spilled state.
1467 assert((tree->gtFlags & GTF_SPILLED) != 0);
1469 // Get the tree's SpillDsc.
1470 RegSet::SpillDsc* prevDsc;
1471 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
1472 assert(spillDsc != nullptr);
1474 // Get the temp desc.
1475 TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
1479 #ifdef _TARGET_XARCH_
1481 #ifdef _TARGET_AMD64_
1482 // Returns relocation type hint for an addr.
1483 // Note that there are no reloc hints on x86.
1486 // addr - data address
1489 // relocation type hint
1491 unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
1493 return compiler->eeGetRelocTypeHint((void*)addr);
1495 #endif //_TARGET_AMD64_
1497 // Return true if an absolute indirect data address can be encoded as IP-relative.
1498 // offset. Note that this method should be used only when the caller knows that
1499 // the address is an icon value that VM has given and there is no GenTree node
1500 // representing it. Otherwise, one should always use FitsInAddrBase().
1503 // addr - an absolute indirect data address
1506 // true if indir data addr could be encoded as IP-relative offset.
1508 bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
1510 #ifdef _TARGET_AMD64_
1511 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
1513 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
1518 // Return true if an indirect code address can be encoded as IP-relative offset.
1519 // Note that this method should be used only when the caller knows that the
1520 // address is an icon value that VM has given and there is no GenTree node
1521 // representing it. Otherwise, one should always use FitsInAddrBase().
1524 // addr - an absolute indirect code address
1527 // true if indir code addr could be encoded as IP-relative offset.
1529 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
1531 #ifdef _TARGET_AMD64_
1532 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
1534 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
1539 // Return true if an indirect code address can be encoded as 32-bit displacement
1540 // relative to zero. Note that this method should be used only when the caller
1541 // knows that the address is an icon value that VM has given and there is no
1542 // GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
1545 // addr - absolute indirect code address
1548 // true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
1550 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
1552 return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
1555 // Return true if an absolute indirect code address needs a relocation recorded with VM.
1558 // addr - an absolute indirect code address
1561 // true if indir code addr needs a relocation recorded with VM
1563 bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
1565 // If generating relocatable ngen code, then all code addr should go through relocation
1566 if (compiler->opts.compReloc)
1571 #ifdef _TARGET_AMD64_
1572 // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
1573 if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
1578 // It could be possible that the code indir addr could be encoded as 32-bit displacement relative
1579 // to zero. But we don't need to emit a relocation in that case.
1581 #else //_TARGET_X86_
1582 // On x86 there is need for recording relocations during jitting,
1583 // because all addrs fit within 32-bits.
1585 #endif //_TARGET_X86_
1588 // Return true if a direct code address needs to be marked as relocatable.
1591 // addr - absolute direct code address
1594 // true if direct code addr needs a relocation recorded with VM
1596 bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
1598 // If generating relocatable ngen code, then all code addr should go through relocation
1599 if (compiler->opts.compReloc)
1604 #ifdef _TARGET_AMD64_
1605 // By default all direct code addresses go through relocation so that VM will setup
1606 // a jump stub if addr cannot be encoded as pc-relative offset.
1608 #else //_TARGET_X86_
1609 // On x86 there is no need for recording relocations during jitting,
1610 // because all addrs fit within 32-bits.
1612 #endif //_TARGET_X86_
1614 #endif //_TARGET_XARCH_
1616 /*****************************************************************************
1618 * The following can be used to create basic blocks that serve as labels for
1619 * the emitter. Use with caution - these are not real basic blocks!
1624 BasicBlock* CodeGen::genCreateTempLabel()
1627 // These blocks don't affect FP
1628 compiler->fgSafeBasicBlockCreation = true;
1631 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
1634 compiler->fgSafeBasicBlockCreation = false;
1637 block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
1639 // Use coldness of current block, as this label will
1640 // be contained in it.
1641 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
1645 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
1647 block->bbTgtStkDepth = genStackLevel / sizeof(int);
1654 void CodeGen::genDefineTempLabel(BasicBlock* label)
1657 if (compiler->opts.dspCode)
1659 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, label->bbNum);
1663 label->bbEmitCookie =
1664 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1666 #ifdef LEGACY_BACKEND
1667 /* gcInfo.gcRegGCrefSetCur does not account for redundant load-suppression
1668 of GC vars, and the emitter will not know about */
1670 regTracker.rsTrackRegClrPtr();
1674 /*****************************************************************************
1676 * Adjust the stack pointer by the given value; assumes that this follows
1677 * a call so only callee-saved registers (and registers that may hold a
1678 * return value) are used at this point.
1681 void CodeGen::genAdjustSP(ssize_t delta)
1683 #if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
1684 if (delta == sizeof(int))
1685 inst_RV(INS_pop, REG_ECX, TYP_INT);
1688 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
1691 //------------------------------------------------------------------------
1692 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
1695 // block - The BasicBlock for which we are about to generate code.
1698 // Must be called just prior to generating code for 'block'.
1701 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1702 // and if 'block' is a throw helper block with a non-zero stack level.
1704 void CodeGen::genAdjustStackLevel(BasicBlock* block)
1706 #if !FEATURE_FIXED_OUT_ARGS
1707 // Check for inserted throw blocks and adjust genStackLevel.
1708 CLANG_FORMAT_COMMENT_ANCHOR;
1710 #if defined(UNIX_X86_ABI)
1711 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1713 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1714 // at this point if a jump to this block is made in the middle of pushing arugments.
1716 // Here we restore SP to prevent potential stack alignment issues.
1717 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1721 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1723 noway_assert(block->bbFlags & BBF_JMP_TARGET);
1725 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1727 if (genStackLevel != 0)
1730 getEmitter()->emitMarkStackLvl(genStackLevel);
1731 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1733 #else // _TARGET_X86_
1734 NYI("Need emitMarkStackLvl()");
1735 #endif // _TARGET_X86_
1738 #endif // !FEATURE_FIXED_OUT_ARGS
1741 #ifdef _TARGET_ARMARCH_
1743 // alignmentWB is out param
1744 unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1746 unsigned alignment = 0;
1747 unsigned opSize = 0;
1749 if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
1751 opSize = InferStructOpSizeAlign(op, &alignment);
1755 alignment = genTypeAlignments[op->TypeGet()];
1756 opSize = genTypeSizes[op->TypeGet()];
1759 assert(opSize != 0);
1760 assert(alignment != 0);
1762 (*alignmentWB) = alignment;
1766 // alignmentWB is out param
1767 unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1769 unsigned alignment = 0;
1770 unsigned opSize = 0;
1772 while (op->gtOper == GT_COMMA)
1774 op = op->gtOp.gtOp2;
1777 if (op->gtOper == GT_OBJ)
1779 CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1780 opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1782 (unsigned)roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1784 else if (op->gtOper == GT_LCL_VAR)
1786 unsigned varNum = op->gtLclVarCommon.gtLclNum;
1787 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1788 assert(varDsc->lvType == TYP_STRUCT);
1789 opSize = varDsc->lvSize();
1790 #ifndef _TARGET_64BIT_
1791 if (varDsc->lvStructDoubleAlign)
1793 alignment = TARGET_POINTER_SIZE * 2;
1796 #endif // !_TARGET_64BIT_
1798 alignment = TARGET_POINTER_SIZE;
1801 else if (op->OperIsCopyBlkOp())
1803 GenTree* op2 = op->gtOp.gtOp2;
1805 if (op2->OperGet() == GT_CNS_INT)
1807 if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1809 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1810 opSize = (unsigned)roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1811 alignment = (unsigned)roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd),
1812 TARGET_POINTER_SIZE);
1816 opSize = (unsigned)op2->gtIntCon.gtIconVal;
1817 GenTree* op1 = op->gtOp.gtOp1;
1818 assert(op1->OperGet() == GT_LIST);
1819 GenTree* dstAddr = op1->gtOp.gtOp1;
1820 if (dstAddr->OperGet() == GT_ADDR)
1822 InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1826 assert(!"Unhandle dstAddr node");
1827 alignment = TARGET_POINTER_SIZE;
1833 noway_assert(!"Variable sized COPYBLK register arg!");
1835 alignment = TARGET_POINTER_SIZE;
1838 else if (op->gtOper == GT_MKREFANY)
1840 opSize = TARGET_POINTER_SIZE * 2;
1841 alignment = TARGET_POINTER_SIZE;
1843 else if (op->IsArgPlaceHolderNode())
1845 CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1846 assert(clsHnd != 0);
1847 opSize = (unsigned)roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1849 (unsigned)roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1853 assert(!"Unhandled gtOper");
1854 opSize = TARGET_POINTER_SIZE;
1855 alignment = TARGET_POINTER_SIZE;
1858 assert(opSize != 0);
1859 assert(alignment != 0);
1861 (*alignmentWB) = alignment;
1865 #endif // _TARGET_ARMARCH_
1867 /*****************************************************************************
1869 * Take an address expression and try to find the best set of components to
1870 * form an address mode; returns non-zero if this is successful.
1872 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1873 * Refactor this code so that the underlying analysis can be used in
1874 * the RyuJIT Backend to do lowering, instead of having to call this method with the
1875 * option to not generate the code.
1877 * 'fold' specifies if it is OK to fold the array index which hangs off
1880 * If successful, the parameters will be set to the following values:
1882 * *rv1Ptr ... base operand
1883 * *rv2Ptr ... optional operand
1884 * *revPtr ... true if rv2 is before rv1 in the evaluation order
1885 * #if SCALED_ADDR_MODES
1886 * *mulPtr ... optional multiplier (2/4/8) for rv2
1887 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1889 * *cnsPtr ... integer constant [optional]
1891 * The 'mode' parameter may have one of the following values:
1894 * +1 ... we're trying to compute a value via 'LEA'
1897 * 0 ... we're trying to form an address mode
1899 * -1 ... we're generating code for an address mode,
1900 * and thus the address must already form an
1901 * address mode (without any further work)
1903 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1904 * identifies the components that might be used to
1905 * form an address mode later on.
1908 bool CodeGen::genCreateAddrMode(GenTree* addr,
1915 #if SCALED_ADDR_MODES
1921 #ifndef LEGACY_BACKEND
1922 assert(nogen == true);
1923 #endif // !LEGACY_BACKEND
1926 The following indirections are valid address modes on x86/x64:
1928 [ icon] * not handled here
1932 [reg1 + reg2 + icon]
1939 [reg1 + 2 * reg2 + icon]
1940 [reg1 + 4 * reg2 + icon]
1941 [reg1 + 8 * reg2 + icon]
1943 The following indirections are valid address modes on arm64:
1948 [reg1 + reg2 * natural-scale]
1952 /* All indirect address modes require the address to be an addition */
1954 if (addr->gtOper != GT_ADD)
1959 // Can't use indirect addressing mode as we need to check for overflow.
1960 // Also, can't use 'lea' as it doesn't set the flags.
1962 if (addr->gtOverflow())
1967 GenTree* rv1 = nullptr;
1968 GenTree* rv2 = nullptr;
1974 #if SCALED_ADDR_MODES
1980 /* What order are the sub-operands to be evaluated */
1982 if (addr->gtFlags & GTF_REVERSE_OPS)
1984 op1 = addr->gtOp.gtOp2;
1985 op2 = addr->gtOp.gtOp1;
1989 op1 = addr->gtOp.gtOp1;
1990 op2 = addr->gtOp.gtOp2;
1993 bool rev = false; // Is op2 first in the evaluation order?
1996 A complex address mode can combine the following operands:
1998 op1 ... base address
1999 op2 ... optional scaled index
2000 #if SCALED_ADDR_MODES
2001 mul ... optional multiplier (2/4/8) for op2
2003 cns ... optional displacement
2005 Here we try to find such a set of operands and arrange for these
2006 to sit in registers.
2010 #if SCALED_ADDR_MODES
2015 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
2016 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
2017 here if we find a scaled index.
2019 CLANG_FORMAT_COMMENT_ANCHOR;
2021 #if SCALED_ADDR_MODES
2025 #ifdef LEGACY_BACKEND
2026 /* Check both operands as far as being register variables */
2030 if (op1->gtOper == GT_LCL_VAR)
2032 if (op2->gtOper == GT_LCL_VAR)
2035 #endif // LEGACY_BACKEND
2037 /* Special case: keep constants as 'op2' */
2039 if (op1->IsCnsIntOrI())
2041 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
2047 /* Check for an addition of a constant */
2049 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
2051 /* We're adding a constant */
2053 cns += op2->gtIntConCommon.IconValue();
2055 #ifdef LEGACY_BACKEND
2056 /* Can (and should) we use "add reg, icon" ? */
2058 if (op1->InReg() && mode == 1 && !nogen)
2060 regNumber reg1 = op1->gtRegNum;
2062 if ((regMask == 0 || (regMask & genRegMask(reg1))) && genRegTrashable(reg1, addr))
2064 // In case genMarkLclVar(op1) bashed it above and it is
2065 // the last use of the variable.
2069 /* 'reg1' is trashable, so add "icon" into it */
2071 genIncRegBy(reg1, cns, addr, addr->TypeGet());
2073 genUpdateLife(addr);
2077 #endif // LEGACY_BACKEND
2079 #if defined(_TARGET_ARMARCH_) && !defined(LEGACY_BACKEND)
2083 /* Inspect the operand the constant is being added to */
2085 switch (op1->gtOper)
2089 if (op1->gtOverflow())
2094 op2 = op1->gtOp.gtOp2;
2095 op1 = op1->gtOp.gtOp1;
2099 #if SCALED_ADDR_MODES && (!defined(_TARGET_ARMARCH_) || defined(LEGACY_BACKEND))
2100 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
2102 if (op1->gtOverflow())
2104 return false; // Need overflow check
2111 mul = op1->GetScaledIndex();
2114 /* We can use "[mul*rv2 + icon]" */
2117 rv2 = op1->gtOp.gtOp1;
2129 /* The best we can do is "[rv1 + icon]" */
2137 // op2 is not a constant. So keep on trying.
2138 CLANG_FORMAT_COMMENT_ANCHOR;
2140 #ifdef LEGACY_BACKEND
2141 // Does op1 or op2 already sit in a register?
2144 /* op1 is sitting in a register */
2146 else if (op2->InReg())
2148 /* op2 is sitting in a register. Keep the enregistered value as op1 */
2154 noway_assert(rev == false);
2158 #endif // LEGACY_BACKEND
2160 /* Neither op1 nor op2 are sitting in a register right now */
2162 switch (op1->gtOper)
2164 #if !defined(_TARGET_ARMARCH_) || defined(LEGACY_BACKEND)
2165 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
2168 if (op1->gtOverflow())
2173 if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
2175 cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
2176 op1 = op1->gtOp.gtOp1;
2183 #if SCALED_ADDR_MODES
2187 if (op1->gtOverflow())
2196 mul = op1->GetScaledIndex();
2199 /* 'op1' is a scaled value */
2202 rv2 = op1->gtOp.gtOp1;
2205 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
2207 if (jitIsScaleIndexMul(argScale * mul))
2209 mul = mul * argScale;
2210 rv2 = rv2->gtOp.gtOp1;
2218 noway_assert(rev == false);
2225 #endif // SCALED_ADDR_MODES
2226 #endif // !_TARGET_ARMARCH || LEGACY_BACKEND
2235 op1 = op1->gtOp.gtOp1;
2245 op1 = op1->gtOp.gtOp2;
2253 switch (op2->gtOper)
2255 #if !defined(_TARGET_ARMARCH_) || defined(LEGACY_BACKEND)
2256 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
2259 if (op2->gtOverflow())
2264 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
2266 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
2267 op2 = op2->gtOp.gtOp1;
2274 #if SCALED_ADDR_MODES
2278 if (op2->gtOverflow())
2287 mul = op2->GetScaledIndex();
2290 // 'op2' is a scaled value...is it's argument also scaled?
2292 rv2 = op2->gtOp.gtOp1;
2293 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
2295 if (jitIsScaleIndexMul(argScale * mul))
2297 mul = mul * argScale;
2298 rv2 = rv2->gtOp.gtOp1;
2312 #endif // SCALED_ADDR_MODES
2313 #endif // !_TARGET_ARMARCH || LEGACY_BACKEND
2322 op2 = op2->gtOp.gtOp1;
2332 op2 = op2->gtOp.gtOp2;
2342 #ifdef LEGACY_BACKEND
2343 // op1 is in a register.
2344 // Note that this case only occurs during codegen for LEGACY_BACKEND.
2346 // Is op2 an addition or a scaled value?
2350 switch (op2->gtOper)
2354 if (op2->gtOverflow())
2359 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
2361 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
2362 op2 = op2->gtOp.gtOp1;
2368 #if SCALED_ADDR_MODES
2372 if (op2->gtOverflow())
2381 mul = op2->GetScaledIndex();
2385 rv2 = op2->gtOp.gtOp1;
2387 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
2389 if (jitIsScaleIndexMul(argScale * mul))
2391 mul = mul * argScale;
2392 rv2 = rv2->gtOp.gtOp1;
2404 #endif // SCALED_ADDR_MODES
2409 #endif // LEGACY_BACKEND
2413 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
2417 #ifdef _TARGET_ARM64_
2423 #ifdef LEGACY_BACKEND
2424 /* Check for register variables */
2428 if (rv1 && rv1->gtOper == GT_LCL_VAR)
2430 if (rv2 && rv2->gtOper == GT_LCL_VAR)
2433 #endif // LEGACY_BACKEND
2437 /* Make sure a GC address doesn't end up in 'rv2' */
2439 if (varTypeIsGC(rv2->TypeGet()))
2441 noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
2450 /* Special case: constant array index (that is range-checked) */
2451 CLANG_FORMAT_COMMENT_ANCHOR;
2453 #if defined(LEGACY_BACKEND)
2454 // If we've already placed rv2 in a register, we are probably being called in a context that has already
2455 // presumed that an addressing mode will be created, even if rv2 is constant, and if we fold we may not find a
2456 // useful addressing mode (e.g. if we had [mul * rv2 + cns] it might happen to fold to [cns2].
2457 if (mode == -1 && rv2->InReg())
2468 if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
2470 /* For valuetype arrays where we can't use the scaled address
2471 mode, rv2 will point to the scaled index. So we have to do
2474 tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
2482 /* May be a simple array. rv2 will points to the actual index */
2488 /* Get hold of the array index and see if it's a constant */
2489 if (index->IsIntCnsFitsInI32())
2491 /* Get hold of the index value */
2492 ssize_t ixv = index->AsIntConCommon()->IconValue();
2494 #if SCALED_ADDR_MODES
2495 /* Scale the index if necessary */
2502 if (FitsIn<INT32>(cns + ixv))
2504 /* Add the scaled index to the offset value */
2508 #if SCALED_ADDR_MODES
2509 /* There is no scaled operand any more */
2518 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
2519 noway_assert(rv1 || mul != 1);
2521 noway_assert(FitsIn<INT32>(cns));
2523 if (rv1 == nullptr && rv2 == nullptr)
2528 /* Success - return the various components to the caller */
2533 #if SCALED_ADDR_MODES
2536 // TODO-Cleanup: The offset is signed and it should be returned as such. See also
2537 // GenTreeAddrMode::gtOffset and its associated cleanup note.
2538 *cnsPtr = (unsigned)cns;
2543 /*****************************************************************************
2544 * The condition to use for (the jmp/set for) the given type of operation
2546 * In case of amd64, this routine should be used when there is no gentree available
2547 * and one needs to generate jumps based on integer comparisons. When gentree is
2548 * available always use its overloaded version.
2553 emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
2555 const static BYTE genJCCinsSigned[] = {
2556 #if defined(_TARGET_XARCH_)
2563 #ifndef LEGACY_BACKEND
2564 EJ_je, // GT_TEST_EQ
2565 EJ_jne, // GT_TEST_NE
2567 #elif defined(_TARGET_ARMARCH_)
2574 #if defined(_TARGET_ARM64_)
2575 EJ_eq, // GT_TEST_EQ
2576 EJ_ne, // GT_TEST_NE
2581 const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
2583 #if defined(_TARGET_XARCH_)
2590 #ifndef LEGACY_BACKEND
2591 EJ_je, // GT_TEST_EQ
2592 EJ_jne, // GT_TEST_NE
2594 #elif defined(_TARGET_ARMARCH_)
2601 #if defined(_TARGET_ARM64_)
2602 EJ_eq, // GT_TEST_EQ
2603 EJ_ne, // GT_TEST_NE
2608 const static BYTE genJCCinsLogical[] = /* logical operation */
2610 #if defined(_TARGET_XARCH_)
2611 EJ_je, // GT_EQ (Z == 1)
2612 EJ_jne, // GT_NE (Z == 0)
2613 EJ_js, // GT_LT (S == 1)
2615 EJ_jns, // GT_GE (S == 0)
2617 #ifndef LEGACY_BACKEND
2618 EJ_NONE, // GT_TEST_EQ
2619 EJ_NONE, // GT_TEST_NE
2621 #elif defined(_TARGET_ARMARCH_)
2622 EJ_eq, // GT_EQ (Z == 1)
2623 EJ_ne, // GT_NE (Z == 0)
2624 EJ_mi, // GT_LT (N == 1)
2626 EJ_pl, // GT_GE (N == 0)
2628 #if defined(_TARGET_ARM64_)
2629 EJ_eq, // GT_TEST_EQ
2630 EJ_ne, // GT_TEST_NE
2635 #if defined(_TARGET_XARCH_)
2636 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
2637 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
2638 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
2639 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
2640 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
2641 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
2642 #ifndef LEGACY_BACKEND
2643 assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
2644 assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
2647 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
2648 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
2649 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
2650 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
2651 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
2652 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
2653 #ifndef LEGACY_BACKEND
2654 assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
2655 assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
2658 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
2659 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
2660 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
2661 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
2662 #elif defined(_TARGET_ARMARCH_)
2663 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
2664 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
2665 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
2666 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
2667 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
2668 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
2670 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
2671 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
2672 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
2673 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
2674 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
2675 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
2677 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
2678 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
2679 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
2680 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
2682 assert(!"unknown arch");
2684 assert(GenTree::OperIsCompare(cmp));
2686 emitJumpKind result = EJ_COUNT;
2688 if (compareKind == CK_UNSIGNED)
2690 result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
2692 else if (compareKind == CK_SIGNED)
2694 result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
2696 else if (compareKind == CK_LOGICAL)
2698 result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
2700 assert(result != EJ_COUNT);
2704 #ifndef LEGACY_BACKEND
2705 #ifdef _TARGET_ARMARCH_
2706 //------------------------------------------------------------------------
2707 // genEmitGSCookieCheck: Generate code to check that the GS cookie
2708 // wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
2710 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2712 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2714 // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
2715 // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
2716 if (!pushReg && (compiler->info.compRetType == TYP_REF))
2717 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2719 // We need two temporary registers, to load the GS cookie values and compare them. We can't use
2720 // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
2721 // callee-trash registers, which should not contain anything interesting at this point.
2722 // We don't have any IR node representing this check, so LSRA can't communicate registers
2725 regNumber regGSConst = REG_GSCOOKIE_TMP_0;
2726 regNumber regGSValue = REG_GSCOOKIE_TMP_1;
2728 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
2730 // load the GS cookie constant into a reg
2732 genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
2736 // Ngen case - GS cookie constant needs to be accessed through an indirection.
2737 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2738 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
2740 // Load this method's GS value from the stack frame
2741 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
2742 // Compare with the GC cookie constant
2743 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
2745 BasicBlock* gsCheckBlk = genCreateTempLabel();
2746 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2747 inst_JMP(jmpEqual, gsCheckBlk);
2748 // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
2749 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
2750 genDefineTempLabel(gsCheckBlk);
2752 #endif // _TARGET_ARMARCH_
2753 #endif // !LEGACY_BACKEND
2755 /*****************************************************************************
2757 * Generate an exit sequence for a return from a method (note: when compiling
2758 * for speed there might be multiple exit points).
2761 void CodeGen::genExitCode(BasicBlock* block)
2763 /* Just wrote the first instruction of the epilog - inform debugger
2764 Note that this may result in a duplicate IPmapping entry, and
2767 // For non-optimized debuggable code, there is only one epilog.
2768 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
2770 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
2771 if (compiler->getNeedsGSSecurityCookie())
2773 genEmitGSCookieCheck(jmpEpilog);
2778 // The GS cookie check created a temp label that has no live
2779 // incoming GC registers, we need to fix that
2784 /* Figure out which register parameters hold pointers */
2786 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
2789 noway_assert(varDsc->lvIsParam);
2791 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
2794 getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
2795 getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
2799 genReserveEpilog(block);
2802 //------------------------------------------------------------------------
2803 // genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
2806 // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
2807 // Otherwise, we generate the 'throw' inline.
2810 // jumpKind - jump kind to generate;
2811 // codeKind - the special throw-helper kind;
2812 // failBlk - optional fail target block, if it is already known;
2814 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTree* failBlk)
2816 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
2817 #if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
2818 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
2819 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
2820 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
2824 // For code with throw helper blocks, find and use the helper block for
2825 // raising the exception. The block may be shared by other trees too.
2827 BasicBlock* excpRaisingBlock;
2829 if (failBlk != nullptr)
2831 // We already know which block to jump to. Use that.
2832 assert(failBlk->gtOper == GT_LABEL);
2833 excpRaisingBlock = failBlk->gtLabel.gtLabBB;
2836 Compiler::AddCodeDsc* add =
2837 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2838 assert(excpRaisingBlock == add->acdDstBlk);
2839 #if !FEATURE_FIXED_OUT_ARGS
2840 assert(add->acdStkLvlInit || isFramePointerUsed());
2841 #endif // !FEATURE_FIXED_OUT_ARGS
2846 // Find the helper-block which raises the exception.
2847 Compiler::AddCodeDsc* add =
2848 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2849 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
2850 excpRaisingBlock = add->acdDstBlk;
2851 #if !FEATURE_FIXED_OUT_ARGS
2852 assert(add->acdStkLvlInit || isFramePointerUsed());
2853 #endif // !FEATURE_FIXED_OUT_ARGS
2856 noway_assert(excpRaisingBlock != nullptr);
2858 // Jump to the exception-throwing block on error.
2859 inst_JMP(jumpKind, excpRaisingBlock);
2863 // The code to throw the exception will be generated inline, and
2864 // we will jump around it in the normal non-exception case.
2866 BasicBlock* tgtBlk = nullptr;
2867 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
2868 if (reverseJumpKind != jumpKind)
2870 tgtBlk = genCreateTempLabel();
2871 inst_JMP(reverseJumpKind, tgtBlk);
2874 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
2876 // Define the spot for the normal non-exception case to jump to.
2877 if (tgtBlk != nullptr)
2879 assert(reverseJumpKind != jumpKind);
2880 genDefineTempLabel(tgtBlk);
2885 /*****************************************************************************
2887 * The last operation done was generating code for "tree" and that would
2888 * have set the flags. Check if the operation caused an overflow.
2892 void CodeGen::genCheckOverflow(GenTree* tree)
2894 // Overflow-check should be asked for this tree
2895 noway_assert(tree->gtOverflow());
2897 const var_types type = tree->TypeGet();
2899 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
2900 noway_assert(!varTypeIsSmall(type));
2902 emitJumpKind jumpKind;
2904 #ifdef _TARGET_ARM64_
2905 if (tree->OperGet() == GT_MUL)
2912 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
2914 #if defined(_TARGET_XARCH_)
2916 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
2918 #elif defined(_TARGET_ARMARCH_)
2920 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
2922 if (jumpKind == EJ_lo)
2924 if ((tree->OperGet() != GT_SUB)
2925 #ifdef LEGACY_BACKEND
2926 && (tree->gtOper != GT_ASG_SUB)
2934 #endif // defined(_TARGET_ARMARCH_)
2937 // Jump to the block which will throw the expection
2939 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
2942 #if FEATURE_EH_FUNCLETS
2944 /*****************************************************************************
2946 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
2947 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
2952 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
2954 if (block->bbFlags & BBF_FUNCLET_BEG)
2956 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
2957 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2959 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
2963 // We shouldn't see FUNC_ROOT
2964 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2965 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
2970 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
2971 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2973 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
2975 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
2977 assert(!block->hasHndIndex());
2981 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2982 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
2987 #if defined(_TARGET_ARM_)
2988 void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
2990 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
2991 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
2992 // calls the funclet during non-exceptional control flow.
2993 if (block->bbFlags & BBF_FINALLY_TARGET)
2995 assert(block->bbFlags & BBF_JMP_TARGET);
2998 if (compiler->verbose)
3000 printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
3003 // Create a label that we'll use for computing the start of an EH region, if this block is
3004 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
3005 // determining the EH regions, then this NOP would end up outside of the region, if this
3006 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
3007 // would be executed, which we would prefer not to do.
3009 block->bbUnwindNopEmitCookie =
3010 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
3017 #endif // FEATURE_EH_FUNCLETS
3019 /*****************************************************************************
3021 * Generate code for the function.
3024 void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
3029 printf("*************** In genGenerateCode()\n");
3030 compiler->fgDispBasicBlocks(compiler->verboseTrees);
3035 unsigned prologSize;
3036 unsigned epilogSize;
3041 genInterruptibleUsed = true;
3044 genNeedPrologStackProbe = false;
3047 compiler->fgDebugCheckBBlist();
3050 /* This is the real thing */
3052 genPrepForCompiler();
3054 /* Prepare the emitter */
3055 getEmitter()->Init();
3057 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
3061 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
3063 compiler->opts.disAsm = true;
3066 if (compiler->opts.disAsm)
3068 printf("; Assembly listing for method %s\n", compiler->info.compFullName);
3070 printf("; Emitting ");
3072 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
3074 printf("SMALL_CODE");
3076 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
3078 printf("FAST_CODE");
3082 printf("BLENDED_CODE");
3087 if (compiler->info.genCPU == CPU_X86)
3089 printf("generic X86 CPU");
3091 else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
3093 printf("Pentium 4");
3095 else if (compiler->info.genCPU == CPU_X64)
3097 if (compiler->canUseVexEncoding())
3099 printf("X64 CPU with AVX");
3103 printf("X64 CPU with SSE2");
3107 else if (compiler->info.genCPU == CPU_ARM)
3109 printf("generic ARM CPU");
3114 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
3116 printf("; optimized code\n");
3118 else if (compiler->opts.compDbgCode)
3120 printf("; debuggable code\n");
3122 else if (compiler->opts.MinOpts())
3124 printf("; compiler->opts.MinOpts() is true\n");
3128 printf("; unknown optimization flags\n");
3132 if (compiler->genDoubleAlign())
3133 printf("; double-aligned frame\n");
3136 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
3138 if (genInterruptible)
3140 printf("; fully interruptible\n");
3144 printf("; partially interruptible\n");
3147 if (compiler->fgHaveProfileData())
3149 printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
3150 compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
3153 if (compiler->fgProfileData_ILSizeMismatch)
3155 printf("; discarded IBC profile data due to mismatch in ILSize\n");
3160 #ifndef LEGACY_BACKEND
3162 // For RyuJIT backend, we compute the final frame layout before code generation. This is because LSRA
3163 // has already computed exactly the maximum concurrent number of spill temps of each type that are
3164 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
3165 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
3166 // memory from the VM.
3170 unsigned maxTmpSize = compiler->tmpSize; // This is precise after LSRA has pre-allocated the temps.
3172 #else // LEGACY_BACKEND
3174 // Estimate the frame size: first, estimate the number of spill temps needed by taking the register
3175 // predictor spill temp estimates and stress levels into consideration. Then, compute the tentative
3176 // frame layout using conservative callee-save register estimation (namely, guess they'll all be used
3177 // and thus saved on the frame).
3179 // Compute the maximum estimated spill temp size.
3180 unsigned maxTmpSize = sizeof(double) + sizeof(float) + sizeof(__int64) + TARGET_POINTER_SIZE;
3182 maxTmpSize += (compiler->tmpDoubleSpillMax * sizeof(double)) + (compiler->tmpIntSpillMax * sizeof(int));
3186 /* When StressRegs is >=1, there will be a bunch of spills not predicted by
3187 the predictor (see logic in rsPickReg). It will be very hard to teach
3188 the predictor about the behavior of rsPickReg for StressRegs >= 1, so
3189 instead let's make maxTmpSize large enough so that we won't be wrong.
3190 This means that at StressRegs >= 1, we will not be testing the logic
3191 that sets the maxTmpSize size.
3194 if (regSet.rsStressRegs() >= 1)
3196 maxTmpSize += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
3199 // JIT uses 2 passes when assigning stack variable (i.e. args, temps, and locals) locations in varDsc->lvStkOffs.
3200 // During the 1st pass (in genGenerateCode), it estimates the maximum possible size for stack temps
3201 // and put it in maxTmpSize. Then it calculates the varDsc->lvStkOffs for each variable based on this estimation.
3202 // However during stress mode, we might spill more temps on the stack, which might grow the
3203 // size of the temp area.
3204 // This might cause varDsc->lvStkOffs to change during the 2nd pass (in emitEndCodeGen).
3205 // If the change of varDsc->lvStkOffs crosses the threshold for the instruction size,
3206 // we will then have a mismatched estimated code size (during the 1st pass) and the actual emitted code size
3207 // (during the 2nd pass).
3208 // Also, if STRESS_UNSAFE_BUFFER_CHECKS is turned on, we might reorder the stack variable locations,
3209 // which could cause the mismatch too.
3211 // The following code is simply bump the maxTmpSize up to at least BYTE_MAX+1 during the stress mode, so that
3212 // we don't run into code size problem during stress.
3214 if (getJitStressLevel() != 0)
3216 if (maxTmpSize < BYTE_MAX + 1)
3218 maxTmpSize = BYTE_MAX + 1;
3223 /* Estimate the offsets of locals/arguments and size of frame */
3225 unsigned lclSize = compiler->lvaFrameSize(Compiler::TENTATIVE_FRAME_LAYOUT);
3229 // Display the local frame offsets that we have tentatively decided upon
3233 compiler->lvaTableDump();
3237 #endif // LEGACY_BACKEND
3239 getEmitter()->emitBegFN(isFramePointerUsed()
3242 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
3243 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
3245 #ifdef LEGACY_BACKEND
3248 #endif // LEGACY_BACKEND
3252 /* Now generate code for the function */
3255 #ifndef LEGACY_BACKEND
3257 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
3258 // generation hasn't touched it (it shouldn't!).
3261 compiler->lvaTableDump();
3264 #endif // !LEGACY_BACKEND
3266 /* We can now generate the function prolog and epilog */
3268 genGeneratePrologsAndEpilogs();
3270 /* Bind jump distances */
3272 getEmitter()->emitJumpDistBind();
3274 /* The code is now complete and final; it should not change after this. */
3276 /* Compute the size of the code sections that we are going to ask the VM
3277 to allocate. Note that this might not be precisely the size of the
3278 code we emit, though it's fatal if we emit more code than the size we
3280 (Note: an example of a case where we emit less code would be useful.)
3283 getEmitter()->emitComputeCodeSizes();
3287 // Code to test or stress our ability to run a fallback compile.
3288 // We trigger the fallback here, before asking the VM for any memory,
3289 // because if not, we will leak mem, as the current codebase can't free
3290 // the mem after the emitter asks the VM for it. As this is only a stress
3291 // mode, we only want the functionality, and don't care about the relative
3292 // ugliness of having the failure here.
3293 if (!compiler->jitFallbackCompile)
3295 // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
3296 // especially that caused by enabling JIT stress.
3297 if (!JitConfig.JitNoForceFallback())
3299 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
3301 NO_WAY_NOASSERT("Stress failure");
3308 /* We've finished collecting all the unwind information for the function. Now reserve
3309 space for it from the VM.
3312 compiler->unwindReserve();
3316 size_t dataSize = getEmitter()->emitDataSize();
3318 #endif // DISPLAY_SIZES
3322 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
3324 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
3325 trackedStackPtrsContig = false;
3326 #elif defined(_TARGET_ARM_)
3327 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
3328 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
3330 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
3334 /* We're done generating code for this function */
3335 compiler->compCodeGenDone = true;
3338 compiler->EndPhase(PHASE_GENERATE_CODE);
3340 codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
3341 (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
3342 &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
3344 compiler->EndPhase(PHASE_EMIT_CODE);
3347 if (compiler->opts.disAsm)
3349 printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
3350 compiler->info.compFullName);
3351 printf("; ============================================================\n");
3352 printf(""); // in our logic this causes a flush
3357 printf("*************** After end code gen, before unwindEmit()\n");
3358 getEmitter()->emitDispIGlist(true);
3362 #if EMIT_TRACK_STACK_DEPTH
3363 /* Check our max stack level. Needed for fgAddCodeRef().
3364 We need to relax the assert as our estimation won't include code-gen
3365 stack changes (which we know don't affect fgAddCodeRef()) */
3367 unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
3368 compiler->compHndBBtabCount + // Return address for locally-called finallys
3369 genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
3370 (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
3371 #if defined(UNIX_X86_ABI)
3372 maxAllowedStackDepth += maxNestedAlignment;
3374 noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
3376 #endif // EMIT_TRACK_STACK_DEPTH
3378 *nativeSizeOfCode = codeSize;
3379 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
3381 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
3383 // Make sure that the x86 alignment and cache prefetch optimization rules
3386 // Don't start a method in the last 7 bytes of a 16-byte alignment area
3387 // unless we are generating SMALL_CODE
3388 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
3390 /* Now that the code is issued, we can finalize and emit the unwind data */
3392 compiler->unwindEmit(*codePtr, coldCodePtr);
3394 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
3398 /* Finalize the Local Var info in terms of generated code */
3403 unsigned finalHotCodeSize;
3404 unsigned finalColdCodeSize;
3405 if (compiler->fgFirstColdBlock != nullptr)
3407 // We did some hot/cold splitting. The hot section is always padded out to the
3408 // size we thought it would be, but the cold section is not.
3409 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
3410 assert(compiler->info.compTotalHotCodeSize > 0);
3411 assert(compiler->info.compTotalColdCodeSize > 0);
3412 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
3413 finalColdCodeSize = codeSize - finalHotCodeSize;
3417 // No hot/cold splitting
3418 assert(codeSize <= compiler->info.compTotalHotCodeSize);
3419 assert(compiler->info.compTotalHotCodeSize > 0);
3420 assert(compiler->info.compTotalColdCodeSize == 0);
3421 finalHotCodeSize = codeSize;
3422 finalColdCodeSize = 0;
3424 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
3425 #endif // LATE_DISASM
3427 /* Report any exception handlers to the VM */
3431 #ifdef JIT32_GCENCODER
3436 // Create and store the GC info for this method.
3437 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
3440 FILE* dmpf = jitstdout;
3442 compiler->opts.dmpHex = false;
3443 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
3446 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
3451 compiler->opts.dmpHex = true;
3454 if (compiler->opts.dmpHex)
3456 size_t consSize = getEmitter()->emitDataSize();
3457 size_t infoSize = compiler->compInfoBlkSize;
3459 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
3460 fprintf(dmpf, "\n");
3464 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
3468 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
3470 #ifdef JIT32_GCENCODER
3472 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
3473 #endif // JIT32_GCENCODER
3475 fprintf(dmpf, "\n");
3479 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
3483 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
3485 #ifdef JIT32_GCENCODER
3487 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
3488 #endif // JIT32_GCENCODER
3493 if (dmpf != jitstdout)
3500 /* Tell the emitter that we're done with this function */
3502 getEmitter()->emitEndFN();
3504 /* Shut down the spill logic */
3506 regSet.rsSpillDone();
3508 /* Shut down the temp logic */
3510 compiler->tmpDone();
3514 grossVMsize += compiler->info.compILCodeSize;
3515 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
3516 grossNCsize += codeSize + dataSize;
3518 #endif // DISPLAY_SIZES
3520 compiler->EndPhase(PHASE_EMIT_GCEH);
3523 /*****************************************************************************
3525 * Report EH clauses to the VM
3528 void CodeGen::genReportEH()
3530 if (compiler->compHndBBtabCount == 0)
3536 if (compiler->opts.dspEHTable)
3538 printf("*************** EH table for %s\n", compiler->info.compFullName);
3546 bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
3548 unsigned EHCount = compiler->compHndBBtabCount;
3550 #if FEATURE_EH_FUNCLETS
3551 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
3553 unsigned duplicateClauseCount = 0;
3554 unsigned enclosingTryIndex;
3556 // Duplicate clauses are not used by CoreRT ABI
3559 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
3561 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
3562 // ignoring 'mutual protect' trys
3563 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
3564 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
3566 ++duplicateClauseCount;
3569 EHCount += duplicateClauseCount;
3572 #if FEATURE_EH_CALLFINALLY_THUNKS
3573 unsigned clonedFinallyCount = 0;
3575 // Duplicate clauses are not used by CoreRT ABI
3578 // We don't keep track of how many cloned finally there are. So, go through and count.
3579 // We do a quick pass first through the EH table to see if there are any try/finally
3580 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
3582 bool anyFinallys = false;
3583 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
3584 HBtab < HBtabEnd; HBtab++)
3586 if (HBtab->HasFinallyHandler())
3594 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3596 if (block->bbJumpKind == BBJ_CALLFINALLY)
3598 ++clonedFinallyCount;
3602 EHCount += clonedFinallyCount;
3605 #endif // FEATURE_EH_CALLFINALLY_THUNKS
3607 #endif // FEATURE_EH_FUNCLETS
3610 if (compiler->opts.dspEHTable)
3612 #if FEATURE_EH_FUNCLETS
3613 #if FEATURE_EH_CALLFINALLY_THUNKS
3614 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
3615 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
3616 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
3617 #else // !FEATURE_EH_CALLFINALLY_THUNKS
3618 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
3619 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
3620 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
3621 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
3622 #else // !FEATURE_EH_FUNCLETS
3623 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
3624 assert(compiler->compHndBBtabCount == EHCount);
3625 #endif // !FEATURE_EH_FUNCLETS
3629 // Tell the VM how many EH clauses to expect.
3630 compiler->eeSetEHcount(EHCount);
3632 XTnum = 0; // This is the index we pass to the VM
3634 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
3635 HBtab < HBtabEnd; HBtab++)
3637 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
3639 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
3640 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
3642 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3643 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
3644 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3645 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
3647 if (HBtab->HasFilter())
3649 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
3653 hndTyp = HBtab->ebdTyp;
3656 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
3658 if (isCoreRTABI && (XTnum > 0))
3660 // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
3661 // try block as the previous one. The runtime cannot reliably infer this information from
3662 // native code offsets because of different try blocks can have same offsets. Alternative
3663 // solution to this problem would be inserting extra nops to ensure that different try
3664 // blocks have different offsets.
3665 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
3667 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
3668 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
3669 // IL as "try { try {} catch {} catch {} } finally {}".
3670 assert(HBtab->HasCatchHandler());
3671 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
3675 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
3676 // the fields aren't accurate.
3678 CORINFO_EH_CLAUSE clause;
3679 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
3680 clause.Flags = flags;
3681 clause.TryOffset = tryBeg;
3682 clause.TryLength = tryEnd;
3683 clause.HandlerOffset = hndBeg;
3684 clause.HandlerLength = hndEnd;
3686 assert(XTnum < EHCount);
3688 // Tell the VM about this EH clause.
3689 compiler->eeSetEHinfo(XTnum, &clause);
3694 #if FEATURE_EH_FUNCLETS
3695 // Now output duplicated clauses.
3697 // If a funclet has been created by moving a handler out of a try region that it was originally nested
3698 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
3699 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
3700 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
3701 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
3702 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
3703 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
3706 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
3707 // try or handler region):
3725 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
3726 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
3727 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
3728 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
3729 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
3730 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
3731 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
3732 // representing try (1) protecting the new funclets catch (3) and (4).
3733 // The code will be generated as follows:
3735 // ABCFH // "main" code
3740 // The EH regions are:
3745 // D -> G // "duplicate" clause
3746 // E -> G // "duplicate" clause
3748 // Note that we actually need to generate one of these additional "duplicate" clauses for every
3749 // region the funclet is nested in. Take this example:
3781 // When we pull out funclets, we get the following generated code:
3783 // ABCDEHJMO // "main" function
3791 // And the EH regions we report to the VM are (in order; main clauses
3792 // first in most-to-least nested order, funclets ("duplicated clauses")
3793 // last, in most-to-least nested) are:
3801 // F -> I // funclet clause #1 for F
3802 // F -> K // funclet clause #2 for F
3803 // F -> L // funclet clause #3 for F
3804 // F -> N // funclet clause #4 for F
3805 // G -> I // funclet clause #1 for G
3806 // G -> K // funclet clause #2 for G
3807 // G -> L // funclet clause #3 for G
3808 // G -> N // funclet clause #4 for G
3809 // I -> K // funclet clause #1 for I
3810 // I -> L // funclet clause #2 for I
3811 // I -> N // funclet clause #3 for I
3812 // K -> N // funclet clause #1 for K
3813 // L -> N // funclet clause #1 for L
3815 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
3816 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
3817 // to add a clause "F -> G" because F is NOT protected by G, but we still have
3818 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
3820 // The overall ordering of the clauses is still the same most-to-least nesting
3821 // after front-to-back start offset. Because we place the funclets at the end
3822 // these new clauses should also go at the end by this ordering.
3825 if (duplicateClauseCount > 0)
3827 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
3829 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
3831 unsigned enclosingTryIndex;
3833 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
3835 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
3836 // ignoring 'mutual protect' trys
3837 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
3838 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
3840 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
3841 // that will have the enclosing try protecting the funclet.
3843 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
3844 // greater EH table index
3846 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
3848 // The try region is the handler of the funclet. Note that for filters, we don't protect the
3849 // filter region, only the filter handler region. This is because exceptions in filters never
3850 // escape; the VM swallows them.
3852 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
3853 BasicBlock* bbTryLast = fletTab->ebdHndLast;
3855 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
3856 BasicBlock* bbHndLast = encTab->ebdHndLast;
3858 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
3860 tryBeg = compiler->ehCodeOffset(bbTryBeg);
3861 hndBeg = compiler->ehCodeOffset(bbHndBeg);
3863 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3864 : compiler->ehCodeOffset(bbTryLast->bbNext);
3865 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
3866 : compiler->ehCodeOffset(bbHndLast->bbNext);
3868 if (encTab->HasFilter())
3870 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
3874 hndTyp = encTab->ebdTyp;
3877 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
3879 // Tell the VM this is an extra clause caused by moving funclets out of line.
3880 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
3882 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
3883 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
3884 // instruction immediately after the 'try' body. So, it really could be more accurately named
3887 CORINFO_EH_CLAUSE clause;
3888 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
3889 clause.Flags = flags;
3890 clause.TryOffset = tryBeg;
3891 clause.TryLength = tryEnd;
3892 clause.HandlerOffset = hndBeg;
3893 clause.HandlerLength = hndEnd;
3895 assert(XTnum < EHCount);
3897 // Tell the VM about this EH clause (a duplicated clause).
3898 compiler->eeSetEHinfo(XTnum, &clause);
3901 ++reportedDuplicateClauseCount;
3904 if (duplicateClauseCount == reportedDuplicateClauseCount)
3906 break; // we've reported all of them; no need to continue looking
3910 } // for each 'true' enclosing 'try'
3911 } // for each EH table entry
3913 assert(duplicateClauseCount == reportedDuplicateClauseCount);
3914 } // if (duplicateClauseCount > 0)
3916 #if FEATURE_EH_CALLFINALLY_THUNKS
3917 if (clonedFinallyCount > 0)
3919 unsigned reportedClonedFinallyCount = 0;
3920 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3922 if (block->bbJumpKind == BBJ_CALLFINALLY)
3924 UNATIVE_OFFSET hndBeg, hndEnd;
3926 hndBeg = compiler->ehCodeOffset(block);
3928 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
3929 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
3930 BasicBlock* bbLabel = block->bbNext;
3931 if (block->isBBCallAlwaysPair())
3933 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
3935 if (bbLabel == nullptr)
3937 hndEnd = compiler->info.compNativeCodeSize;
3941 assert(bbLabel->bbEmitCookie != nullptr);
3942 hndEnd = compiler->ehCodeOffset(bbLabel);
3945 CORINFO_EH_CLAUSE clause;
3946 clause.ClassToken = 0; // unused
3947 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
3948 clause.TryOffset = hndBeg;
3949 clause.TryLength = hndBeg;
3950 clause.HandlerOffset = hndBeg;
3951 clause.HandlerLength = hndEnd;
3953 assert(XTnum < EHCount);
3955 // Tell the VM about this EH clause (a cloned finally clause).
3956 compiler->eeSetEHinfo(XTnum, &clause);
3959 ++reportedClonedFinallyCount;
3962 if (clonedFinallyCount == reportedClonedFinallyCount)
3964 break; // we're done; no need to keep looking
3967 } // block is BBJ_CALLFINALLY
3970 assert(clonedFinallyCount == reportedClonedFinallyCount);
3971 } // if (clonedFinallyCount > 0)
3972 #endif // FEATURE_EH_CALLFINALLY_THUNKS
3974 #endif // FEATURE_EH_FUNCLETS
3976 assert(XTnum == EHCount);
3979 //----------------------------------------------------------------------
3980 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3981 // helper should be used.
3984 // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
3987 // true if an optimized write barrier helper should be used, false otherwise.
3988 // Note: only x86 implements register-specific source optimized write
3989 // barriers currently.
3991 bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
3993 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3995 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
4004 //----------------------------------------------------------------------
4005 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
4006 // helper should be used.
4008 // This has the same functionality as the version of
4009 // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
4010 // determining what the required write barrier form is, if possible.
4013 // tgt - target tree of write (e.g., GT_STOREIND)
4014 // assignVal - tree with value to write
4017 // true if an optimized write barrier helper should be used, false otherwise.
4018 // Note: only x86 implements register-specific source optimized write
4019 // barriers currently.
4021 bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
4023 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
4025 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
4026 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
4035 //----------------------------------------------------------------------
4036 // genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
4037 // barrier, and the write barrier form required, determine the helper to call.
4040 // tgt - target tree of write (e.g., GT_STOREIND)
4041 // wbf - already computed write barrier form to use
4044 // Write barrier helper to use.
4046 // Note: do not call this function to get an optimized write barrier helper (e.g.,
4049 CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
4051 #ifndef LEGACY_BACKEND
4052 noway_assert(tgt->gtOper == GT_STOREIND);
4053 #else // LEGACY_BACKEND
4054 noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); // enforced by gcIsWriteBarrierCandidate
4055 #endif // LEGACY_BACKEND
4057 CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
4060 if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
4062 helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
4066 if (tgt->gtOper != GT_CLS_VAR)
4068 if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
4070 if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
4072 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
4074 else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
4076 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
4080 assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
4081 ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
4082 (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
4083 ((helper == CORINFO_HELP_ASSIGN_REF) &&
4084 (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
4089 //----------------------------------------------------------------------
4090 // genGCWriteBarrier: Generate a write barrier for a node.
4093 // tgt - target tree of write (e.g., GT_STOREIND)
4094 // wbf - already computed write barrier form to use
4096 void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
4098 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
4100 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
4101 // We classify the "tgt" trees as follows:
4102 // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
4103 // IND [-> ADDR -> IND] -> { GT_LCL_VAR, GT_REG_VAR, ADD({GT_LCL_VAR, GT_REG_VAR}, X), ADD(X, (GT_LCL_VAR,
4105 // then let "v" be the GT_LCL_VAR or GT_REG_VAR.
4106 // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
4107 // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
4108 // * Otherwise, classify as CWBKind_OtherByRefLocal.
4109 // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
4110 // Otherwise, classify as CWBKind_Unclassified.
4112 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
4113 if (tgt->gtOper == GT_IND)
4115 GenTree* lcl = NULL;
4117 GenTree* indArg = tgt->gtOp.gtOp1;
4118 if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
4120 indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
4122 if (indArg->gtOper == GT_LCL_VAR || indArg->gtOper == GT_REG_VAR)
4126 else if (indArg->gtOper == GT_ADD)
4128 if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp1->gtOper == GT_REG_VAR)
4130 lcl = indArg->gtOp.gtOp1;
4132 else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp2->gtOper == GT_REG_VAR)
4134 lcl = indArg->gtOp.gtOp2;
4139 wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
4140 unsigned lclNum = 0;
4141 if (lcl->gtOper == GT_LCL_VAR)
4142 lclNum = lcl->gtLclVarCommon.gtLclNum;
4145 assert(lcl->gtOper == GT_REG_VAR);
4146 lclNum = lcl->gtRegVar.gtLclNum;
4148 if (lclNum == compiler->info.compRetBuffArg)
4150 wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
4154 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
4155 if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
4157 wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
4163 // We should have eliminated the barrier for this case.
4164 assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
4168 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
4172 // Enable this to sample the unclassified trees.
4173 static int unclassifiedBarrierSite = 0;
4174 if (wbKind == CWBKind_Unclassified)
4176 unclassifiedBarrierSite++;
4177 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
4182 inst_IV(INS_push, wbKind);
4183 genEmitHelperCall(helper,
4185 EA_PTRSIZE); // retSize
4186 SubtractStackLevel(4);
4190 genEmitHelperCall(helper,
4192 EA_PTRSIZE); // retSize
4195 #else // !FEATURE_COUNT_GC_WRITE_BARRIERS
4196 genEmitHelperCall(helper,
4198 EA_PTRSIZE); // retSize
4199 #endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
4203 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4204 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4206 XX Prolog / Epilog XX
4208 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4209 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
4212 /*****************************************************************************
4214 * Generates code for moving incoming register arguments to their
4215 * assigned location, in the function prolog.
4219 #pragma warning(push)
4220 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
4222 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
4227 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
4231 #ifdef _TARGET_ARM64_
4232 if (compiler->info.compIsVarArgs)
4234 // We've already saved all int registers at the top of stack in the prolog.
4235 // No need further action.
4240 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
4241 unsigned argNum; // current argNum, always in [0..argMax-1]
4242 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
4243 unsigned regArgNum; // index into the regArgTab[] table
4244 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
4245 bool doingFloat = regState->rsIsFloat;
4247 // We should be generating the prolog block when we are called
4248 assert(compiler->compGeneratingProlog);
4250 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
4251 noway_assert(regArgMaskLive != 0);
4253 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
4254 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
4256 // The regArgTab can always have unused entries,
4257 // for example if an architecture always increments the arg register number but uses either
4258 // an integer register or a floating point register to hold the next argument
4259 // then with a mix of float and integer args you could have:
4261 // sampleMethod(int i, float x, int j, float y, int k, float z);
4262 // r0, r2 and r4 as valid integer arguments with argMax as 5
4263 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
4264 // The first one is doingFloat==false and the second one is doingFloat==true
4266 // If a fixed return buffer (in r8) was also present then the first one would become:
4267 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
4270 argMax = regState->rsCalleeRegArgCount;
4271 fixedRetBufIndex = (unsigned)-1; // Invalid value
4273 // If necessary we will select a correct xtraReg for circular floating point args later.
4277 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
4279 else // we are doing the integer registers
4281 noway_assert(argMax <= MAX_REG_ARG);
4282 if (hasFixedRetBuffReg())
4284 fixedRetBufIndex = theFixedRetBuffArgNum();
4285 // We have an additional integer register argument when hasFixedRetBuffReg() is true
4286 argMax = fixedRetBufIndex + 1;
4287 assert(argMax == (MAX_REG_ARG + 1));
4292 // Construct a table with the register arguments, for detecting circular and
4293 // non-circular dependencies between the register arguments. A dependency is when
4294 // an argument register Rn needs to be moved to register Rm that is also an argument
4295 // register. The table is constructed in the order the arguments are passed in
4296 // registers: the first register argument is in regArgTab[0], the second in
4297 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
4298 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
4299 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
4300 // we have increased the allocated size of the regArgTab[] by one.
4304 unsigned varNum; // index into compiler->lvaTable[] for this register argument
4305 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4306 var_types type; // the Jit type of this regArgTab entry
4307 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4308 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
4309 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
4310 // argument register number 'x'. Only used when circular = true.
4311 char slot; // 0 means the register is not used for a register argument
4312 // 1 means the first part of a register argument
4313 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
4314 bool stackArg; // true if the argument gets homed to the stack
4315 bool processed; // true after we've processed the argument (and it is in its final location)
4316 bool circular; // true if this register participates in a circular dependency loop.
4318 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4320 // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
4321 // the type of the lclVar in ways that are not ascertainable from lvType.
4322 // So, for that case we retain the type of the register in the regArgTab.
4324 var_types getRegType(Compiler* compiler)
4326 return type; // UNIX_AMD64 implementation
4329 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4331 // In other cases, we simply use the type of the lclVar to determine the type of the register.
4332 var_types getRegType(Compiler* compiler)
4334 LclVarDsc varDsc = compiler->lvaTable[varNum];
4335 // Check if this is an HFA register arg and return the HFA type
4336 if (varDsc.lvIsHfaRegArg())
4338 return varDsc.GetHfaType();
4340 return varDsc.lvType;
4343 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4344 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
4348 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4350 // Is this variable a register arg?
4351 if (!varDsc->lvIsParam)
4356 if (!varDsc->lvIsRegArg)
4361 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
4362 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
4363 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
4364 // use the the original TYP_STRUCT argument.
4366 if (varDsc->lvPromoted || varDsc->lvIsStructField)
4368 LclVarDsc* parentVarDsc = varDsc;
4369 if (varDsc->lvIsStructField)
4371 assert(!varDsc->lvPromoted);
4372 parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
4375 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
4377 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
4379 noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
4381 // For register arguments that are independent promoted structs we put the promoted field varNum in the
4383 if (varDsc->lvPromoted)
4390 // For register arguments that are not independent promoted structs we put the parent struct varNum in
4392 if (varDsc->lvIsStructField)
4399 var_types regType = varDsc->TypeGet();
4400 // Change regType to the HFA type when we have a HFA argument
4401 if (varDsc->lvIsHfaRegArg())
4403 regType = varDsc->GetHfaType();
4406 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4407 if (!varTypeIsStruct(regType))
4408 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4410 // A struct might be passed partially in XMM register for System V calls.
4411 // So a single arg might use both register files.
4412 if (isFloatRegType(regType) != doingFloat)
4420 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4421 if (varTypeIsStruct(varDsc))
4423 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
4424 assert(typeHnd != nullptr);
4425 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
4426 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
4427 if (!structDesc.passedInRegisters)
4429 // The var is not passed in registers.
4433 unsigned firstRegSlot = 0;
4434 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
4436 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
4441 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
4442 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
4443 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
4444 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
4447 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
4448 // registers or on stack, the upper most 4-bytes will be zero.
4450 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
4451 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
4454 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
4455 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
4456 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
4457 // there is no need to clear upper 4-bytes of Vector3 type args.
4459 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
4460 // Vector3 return values are returned two return registers and Caller assembles them into a
4461 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
4462 // type args in prolog and Vector3 type return value of a call
4464 if (varDsc->lvType == TYP_SIMD12)
4466 regType = TYP_DOUBLE;
4471 regType = compiler->GetEightByteType(structDesc, slotCounter);
4474 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
4476 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
4477 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
4479 // Store the reg for the first slot.
4482 firstRegSlot = regArgNum;
4485 // Bingo - add it to our table
4486 noway_assert(regArgNum < argMax);
4487 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
4488 // not be multiple vars representing this argument
4490 regArgTab[regArgNum].varNum = varNum;
4491 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
4492 regArgTab[regArgNum].type = regType;
4499 continue; // Nothing to do for this regState set.
4502 regArgNum = firstRegSlot;
4505 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4507 // Bingo - add it to our table
4508 regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
4510 noway_assert(regArgNum < argMax);
4511 // We better not have added it already (there better not be multiple vars representing this argument
4513 noway_assert(regArgTab[regArgNum].slot == 0);
4515 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4516 // Set the register type.
4517 regArgTab[regArgNum].type = regType;
4518 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4520 regArgTab[regArgNum].varNum = varNum;
4521 regArgTab[regArgNum].slot = 1;
4525 #if FEATURE_MULTIREG_ARGS
4526 if (compiler->lvaIsMultiregStruct(varDsc))
4528 if (varDsc->lvIsHfaRegArg())
4530 // We have an HFA argument, set slots to the number of registers used
4531 slots = varDsc->lvHfaSlots();
4535 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
4536 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
4537 // We have a non-HFA multireg argument, set slots to two
4541 // Note that regArgNum+1 represents an argument index not an actual argument register.
4542 // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
4544 // This is the setup for the rest of a multireg struct arg
4546 for (int i = 1; i < slots; i++)
4548 noway_assert((regArgNum + i) < argMax);
4550 // We better not have added it already (there better not be multiple vars representing this argument
4552 noway_assert(regArgTab[regArgNum + i].slot == 0);
4554 regArgTab[regArgNum + i].varNum = varNum;
4555 regArgTab[regArgNum + i].slot = (char)(i + 1);
4558 #endif // FEATURE_MULTIREG_ARGS
4562 int lclSize = compiler->lvaLclSize(varNum);
4564 if (lclSize > REGSIZE_BYTES)
4566 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
4567 slots = lclSize / REGSIZE_BYTES;
4568 if (regArgNum + slots > maxRegArgNum)
4570 slots = maxRegArgNum - regArgNum;
4573 C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
4574 assert(slots < INT8_MAX);
4575 for (char i = 1; i < slots; i++)
4577 regArgTab[regArgNum + i].varNum = varNum;
4578 regArgTab[regArgNum + i].slot = i + 1;
4580 #endif // _TARGET_ARM_
4582 for (int i = 0; i < slots; i++)
4584 regType = regArgTab[regArgNum + i].getRegType(compiler);
4585 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
4587 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4588 // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
4589 // The type of the register depends on the classification of the first eightbyte
4590 // of the struct. For information on classification refer to the System V x86_64 ABI at:
4591 // http://www.x86-64.org/documentation/abi.pdf
4593 assert((i > 0) || (regNum == varDsc->lvArgReg));
4594 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4595 // Is the arg dead on entry to the method ?
4597 if ((regArgMaskLive & genRegMask(regNum)) == 0)
4599 if (varDsc->lvTrackedNonStruct())
4601 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
4606 noway_assert(varDsc->lvType == TYP_STRUCT);
4607 #else // !_TARGET_X86_
4608 #ifndef LEGACY_BACKEND
4609 // For LSRA, it may not be in regArgMaskLive if it has a zero
4610 // refcnt. This is in contrast with the non-LSRA case in which all
4611 // non-tracked args are assumed live on entry.
4612 noway_assert((varDsc->lvRefCnt == 0) || (varDsc->lvType == TYP_STRUCT) ||
4613 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs));
4614 #else // LEGACY_BACKEND
4616 varDsc->lvType == TYP_STRUCT ||
4617 (varDsc->lvAddrExposed && (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)));
4618 #endif // LEGACY_BACKEND
4619 #endif // !_TARGET_X86_
4621 // Mark it as processed and be done with it
4622 regArgTab[regArgNum + i].processed = true;
4627 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
4628 // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
4629 // they've already been spilled.
4631 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
4632 #endif // _TARGET_ARM_
4634 #if !defined(UNIX_AMD64_ABI)
4635 noway_assert(xtraReg != (varDsc->lvArgReg + i));
4637 noway_assert(regArgMaskLive & genRegMask(regNum));
4640 regArgTab[regArgNum + i].processed = false;
4642 /* mark stack arguments since we will take care of those first */
4643 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
4645 /* If it goes on the stack or in a register that doesn't hold
4646 * an argument anymore -> CANNOT form a circular dependency */
4648 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
4650 /* will trash another argument -> possible dependency
4651 * We may need several passes after the table is constructed
4652 * to decide on that */
4654 /* Maybe the argument stays in the register (IDEAL) */
4656 if ((i == 0) && (varDsc->lvRegNum == regNum))
4661 #if !defined(_TARGET_64BIT_)
4662 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
4666 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
4671 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
4672 (REG_NEXT(varDsc->lvRegNum) == regNum))
4676 #endif // !defined(_TARGET_64BIT_)
4677 regArgTab[regArgNum + i].circular = true;
4682 regArgTab[regArgNum + i].circular = false;
4684 /* mark the argument register as free */
4685 regArgMaskLive &= ~genRegMask(regNum);
4690 /* Find the circular dependencies for the argument registers, if any.
4691 * A circular dependency is a set of registers R1, R2, ..., Rn
4692 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
4697 /* Possible circular dependencies still exist; the previous pass was not enough
4698 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
4704 for (argNum = 0; argNum < argMax; argNum++)
4706 // If we already marked the argument as non-circular then continue
4708 if (!regArgTab[argNum].circular)
4713 if (regArgTab[argNum].slot == 0) // Not a register argument
4718 varNum = regArgTab[argNum].varNum;
4719 noway_assert(varNum < compiler->lvaCount);
4720 varDsc = compiler->lvaTable + varNum;
4721 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4723 /* cannot possibly have stack arguments */
4724 noway_assert(varDsc->lvIsInReg());
4725 noway_assert(!regArgTab[argNum].stackArg);
4727 var_types regType = regArgTab[argNum].getRegType(compiler);
4728 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4730 regNumber destRegNum = REG_NA;
4731 if (regArgTab[argNum].slot == 1)
4733 destRegNum = varDsc->lvRegNum;
4735 #if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
4738 assert(regArgTab[argNum].slot == 2);
4740 assert(regArgTab[argNum - 1].slot == 1);
4741 assert(regArgTab[argNum - 1].varNum == varNum);
4742 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
4743 regArgMaskLive &= ~genRegMask(regNum);
4744 regArgTab[argNum].circular = false;
4748 #elif !defined(_TARGET_64BIT_)
4749 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
4751 destRegNum = varDsc->lvOtherReg;
4755 assert(regArgTab[argNum].slot == 2);
4756 assert(varDsc->TypeGet() == TYP_DOUBLE);
4757 destRegNum = REG_NEXT(varDsc->lvRegNum);
4759 #endif // !defined(_TARGET_64BIT_)
4760 noway_assert(destRegNum != REG_NA);
4761 if (genRegMask(destRegNum) & regArgMaskLive)
4763 /* we are trashing a live argument register - record it */
4764 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
4765 noway_assert(destRegArgNum < argMax);
4766 regArgTab[destRegArgNum].trashBy = argNum;
4770 /* argument goes to a free register */
4771 regArgTab[argNum].circular = false;
4774 /* mark the argument register as free */
4775 regArgMaskLive &= ~genRegMask(regNum);
4781 /* At this point, everything that has the "circular" flag
4782 * set to "true" forms a circular dependency */
4783 CLANG_FORMAT_COMMENT_ANCHOR;
4790 printf("Circular dependencies found while home-ing the incoming arguments.\n");
4795 // LSRA allocates registers to incoming parameters in order and will not overwrite
4796 // a register still holding a live parameter.
4797 CLANG_FORMAT_COMMENT_ANCHOR;
4799 #ifndef LEGACY_BACKEND
4800 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
4801 "Homing of float argument registers with circular dependencies not implemented.");
4802 #endif // LEGACY_BACKEND
4804 /* Now move the arguments to their locations.
4805 * First consider ones that go on the stack since they may
4806 * free some registers. */
4808 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
4809 for (argNum = 0; argNum < argMax; argNum++)
4813 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4814 // If this is the wrong register file, just continue.
4815 if (regArgTab[argNum].type == TYP_UNDEF)
4817 // This could happen if the reg in regArgTab[argNum] is of the other register file -
4818 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
4819 // The next register file processing will process it.
4822 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4823 // If the arg is dead on entry to the method, skip it
4825 if (regArgTab[argNum].processed)
4830 if (regArgTab[argNum].slot == 0) // Not a register argument
4835 varNum = regArgTab[argNum].varNum;
4836 noway_assert(varNum < compiler->lvaCount);
4837 varDsc = compiler->lvaTable + varNum;
4839 #ifndef _TARGET_64BIT_
4840 // If not a stack arg go to the next one
4841 if (varDsc->lvType == TYP_LONG)
4843 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
4847 else if (varDsc->lvOtherReg != REG_STK)
4853 #endif // !_TARGET_64BIT_
4855 // If not a stack arg go to the next one
4856 if (!regArgTab[argNum].stackArg)
4862 #if defined(_TARGET_ARM_)
4863 if (varDsc->lvType == TYP_DOUBLE)
4865 if (regArgTab[argNum].slot == 2)
4867 // We handled the entire double when processing the first half (slot == 1)
4873 noway_assert(regArgTab[argNum].circular == false);
4875 noway_assert(varDsc->lvIsParam);
4876 noway_assert(varDsc->lvIsRegArg);
4877 noway_assert(varDsc->lvIsInReg() == false ||
4878 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
4880 var_types storeType = TYP_UNDEF;
4881 unsigned slotSize = TARGET_POINTER_SIZE;
4883 if (varTypeIsStruct(varDsc))
4885 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
4886 #if FEATURE_MULTIREG_ARGS
4887 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
4888 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
4889 #endif // FEATURE_MULTIREG_ARGS
4890 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4891 storeType = regArgTab[argNum].type;
4892 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4893 if (varDsc->lvIsHfaRegArg())
4896 // On ARM32 the storeType for HFA args is always TYP_FLOAT
4897 storeType = TYP_FLOAT;
4898 slotSize = (unsigned)emitActualTypeSize(storeType);
4899 #else // _TARGET_ARM64_
4900 storeType = genActualType(varDsc->GetHfaType());
4901 slotSize = (unsigned)emitActualTypeSize(storeType);
4902 #endif // _TARGET_ARM64_
4905 else // Not a struct type
4907 storeType = genActualType(varDsc->TypeGet());
4909 size = emitActualTypeSize(storeType);
4911 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
4912 #endif //_TARGET_X86_
4914 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
4916 // Stack argument - if the ref count is 0 don't care about it
4918 if (!varDsc->lvOnFrame)
4920 noway_assert(varDsc->lvRefCnt == 0);
4924 // Since slot is typically 1, baseOffset is typically 0
4925 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
4927 getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
4929 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4930 // Check if we are writing past the end of the struct
4931 if (varTypeIsStruct(varDsc))
4933 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
4935 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4937 if (regArgTab[argNum].slot == 1)
4939 psiMoveToStack(varNum);
4943 /* mark the argument as processed */
4945 regArgTab[argNum].processed = true;
4946 regArgMaskLive &= ~genRegMask(srcRegNum);
4948 #if defined(_TARGET_ARM_)
4949 if (storeType == TYP_DOUBLE)
4951 regArgTab[argNum + 1].processed = true;
4952 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
4957 /* Process any circular dependencies */
4960 unsigned begReg, destReg, srcReg;
4961 unsigned varNumDest, varNumSrc;
4962 LclVarDsc* varDscDest;
4963 LclVarDsc* varDscSrc;
4964 instruction insCopy = INS_mov;
4968 #if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4969 insCopy = ins_Copy(TYP_DOUBLE);
4970 // Compute xtraReg here when we have a float argument
4971 assert(xtraReg == REG_NA);
4973 regMaskTP fpAvailMask;
4975 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
4976 #if defined(FEATURE_HFA)
4977 fpAvailMask &= RBM_ALLDOUBLE;
4979 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4980 #error Error. Wrong architecture.
4981 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4982 #endif // defined(FEATURE_HFA)
4984 if (fpAvailMask == RBM_NONE)
4986 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
4987 #if defined(FEATURE_HFA)
4988 fpAvailMask &= RBM_ALLDOUBLE;
4990 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4991 #error Error. Wrong architecture.
4992 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4993 #endif // defined(FEATURE_HFA)
4996 assert(fpAvailMask != RBM_NONE);
4998 // We pick the lowest avail register number
4999 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
5000 xtraReg = genRegNumFromMask(tempMask);
5001 #elif defined(_TARGET_X86_)
5002 // This case shouldn't occur on x86 since NYI gets converted to an assert
5003 NYI("Homing circular FP registers via xtraReg");
5007 for (argNum = 0; argNum < argMax; argNum++)
5009 // If not a circular dependency then continue
5010 if (!regArgTab[argNum].circular)
5015 // If already processed the dependency then continue
5017 if (regArgTab[argNum].processed)
5022 if (regArgTab[argNum].slot == 0) // Not a register argument
5027 destReg = begReg = argNum;
5028 srcReg = regArgTab[argNum].trashBy;
5030 varNumDest = regArgTab[destReg].varNum;
5031 noway_assert(varNumDest < compiler->lvaCount);
5032 varDscDest = compiler->lvaTable + varNumDest;
5033 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
5035 noway_assert(srcReg < argMax);
5036 varNumSrc = regArgTab[srcReg].varNum;
5037 noway_assert(varNumSrc < compiler->lvaCount);
5038 varDscSrc = compiler->lvaTable + varNumSrc;
5039 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
5041 emitAttr size = EA_PTRSIZE;
5043 #ifdef _TARGET_XARCH_
5045 // The following code relies upon the target architecture having an
5046 // 'xchg' instruction which directly swaps the values held in two registers.
5047 // On the ARM architecture we do not have such an instruction.
5049 if (destReg == regArgTab[srcReg].trashBy)
5051 /* only 2 registers form the circular dependency - use "xchg" */
5053 varNum = regArgTab[argNum].varNum;
5054 noway_assert(varNum < compiler->lvaCount);
5055 varDsc = compiler->lvaTable + varNum;
5056 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
5058 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
5060 /* Set "size" to indicate GC if one and only one of
5061 * the operands is a pointer
5062 * RATIONALE: If both are pointers, nothing changes in
5063 * the GC pointer tracking. If only one is a pointer we
5064 * have to "swap" the registers in the GC reg pointer mask
5067 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
5072 noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
5074 getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
5075 regTracker.rsTrackRegTrash(varDscSrc->lvRegNum);
5076 regTracker.rsTrackRegTrash(varDscSrc->lvArgReg);
5078 /* mark both arguments as processed */
5079 regArgTab[destReg].processed = true;
5080 regArgTab[srcReg].processed = true;
5082 regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
5083 regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
5085 psiMoveToReg(varNumSrc);
5086 psiMoveToReg(varNumDest);
5089 #endif // _TARGET_XARCH_
5091 var_types destMemType = varDscDest->TypeGet();
5094 bool cycleAllDouble = true; // assume the best
5096 unsigned iter = begReg;
5099 if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
5101 cycleAllDouble = false;
5104 iter = regArgTab[iter].trashBy;
5105 } while (iter != begReg);
5107 // We may treat doubles as floats for ARM because we could have partial circular
5108 // dependencies of a float with a lo/hi part of the double. We mark the
5109 // trashBy values for each slot of the double, so let the circular dependency
5110 // logic work its way out for floats rather than doubles. If a cycle has all
5111 // doubles, then optimize so that instead of two vmov.f32's to move a double,
5112 // we can use one vmov.f64.
5114 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
5116 destMemType = TYP_FLOAT;
5118 #endif // _TARGET_ARM_
5120 if (destMemType == TYP_REF)
5124 else if (destMemType == TYP_BYREF)
5128 else if (destMemType == TYP_DOUBLE)
5132 else if (destMemType == TYP_FLOAT)
5137 /* move the dest reg (begReg) in the extra reg */
5139 assert(xtraReg != REG_NA);
5141 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
5143 getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
5145 regTracker.rsTrackRegCopy(xtraReg, begRegNum);
5147 *pXtraRegClobbered = true;
5149 psiMoveToReg(varNumDest, xtraReg);
5151 /* start moving everything to its right place */
5153 while (srcReg != begReg)
5157 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
5158 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
5160 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
5162 regTracker.rsTrackRegCopy(destRegNum, srcRegNum);
5164 /* mark 'src' as processed */
5165 noway_assert(srcReg < argMax);
5166 regArgTab[srcReg].processed = true;
5168 if (size == EA_8BYTE)
5169 regArgTab[srcReg + 1].processed = true;
5171 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
5173 /* move to the next pair */
5175 srcReg = regArgTab[srcReg].trashBy;
5177 varDscDest = varDscSrc;
5178 destMemType = varDscDest->TypeGet();
5180 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
5182 destMemType = TYP_FLOAT;
5185 varNumSrc = regArgTab[srcReg].varNum;
5186 noway_assert(varNumSrc < compiler->lvaCount);
5187 varDscSrc = compiler->lvaTable + varNumSrc;
5188 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
5190 if (destMemType == TYP_REF)
5194 else if (destMemType == TYP_DOUBLE)
5204 /* take care of the beginning register */
5206 noway_assert(srcReg == begReg);
5208 /* move the dest reg (begReg) in the extra reg */
5210 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
5212 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
5214 regTracker.rsTrackRegCopy(destRegNum, xtraReg);
5216 psiMoveToReg(varNumSrc);
5218 /* mark the beginning register as processed */
5220 regArgTab[srcReg].processed = true;
5222 if (size == EA_8BYTE)
5223 regArgTab[srcReg + 1].processed = true;
5225 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
5230 /* Finally take care of the remaining arguments that must be enregistered */
5231 while (regArgMaskLive)
5233 regMaskTP regArgMaskLiveSave = regArgMaskLive;
5235 for (argNum = 0; argNum < argMax; argNum++)
5237 /* If already processed go to the next one */
5238 if (regArgTab[argNum].processed)
5243 if (regArgTab[argNum].slot == 0)
5244 { // Not a register argument
5248 varNum = regArgTab[argNum].varNum;
5249 noway_assert(varNum < compiler->lvaCount);
5250 varDsc = compiler->lvaTable + varNum;
5251 var_types regType = regArgTab[argNum].getRegType(compiler);
5252 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
5254 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5255 if (regType == TYP_UNDEF)
5257 // This could happen if the reg in regArgTab[argNum] is of the other register file -
5258 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
5259 // The next register file processing will process it.
5260 regArgMaskLive &= ~genRegMask(regNum);
5263 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5265 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
5266 #ifndef _TARGET_64BIT_
5267 #ifndef _TARGET_ARM_
5268 // Right now we think that incoming arguments are not pointer sized. When we eventually
5269 // understand the calling convention, this still won't be true. But maybe we'll have a better
5270 // idea of how to ignore it.
5272 // On Arm, a long can be passed in register
5273 noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE);
5275 #endif //_TARGET_64BIT_
5277 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
5279 /* Register argument - hopefully it stays in the same register */
5280 regNumber destRegNum = REG_NA;
5281 var_types destMemType = varDsc->TypeGet();
5283 if (regArgTab[argNum].slot == 1)
5285 destRegNum = varDsc->lvRegNum;
5288 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
5290 // The second half of the double has already been processed! Treat this as a single.
5291 destMemType = TYP_FLOAT;
5293 #endif // _TARGET_ARM_
5295 #ifndef _TARGET_64BIT_
5296 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
5298 #ifndef LEGACY_BACKEND
5299 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
5300 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
5302 destRegNum = regNum;
5305 #endif // !LEGACY_BACKEND
5306 destRegNum = varDsc->lvOtherReg;
5308 assert(destRegNum != REG_STK);
5312 assert(regArgTab[argNum].slot == 2);
5313 assert(destMemType == TYP_DOUBLE);
5315 // For doubles, we move the entire double using the argNum representing
5316 // the first half of the double. There are two things we won't do:
5317 // (1) move the double when the 1st half of the destination is free but the
5318 // 2nd half is occupied, and (2) move the double when the 2nd half of the
5319 // destination is free but the 1st half is occupied. Here we consider the
5320 // case where the first half can't be moved initially because its target is
5321 // still busy, but the second half can be moved. We wait until the entire
5322 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
5323 // and F2 single moving to F16. When we process F0, its target F2 is busy,
5324 // so we skip it on the first pass. When we process F1, its target F3 is
5325 // available. However, we want to move F0/F1 all at once, so we skip it here.
5326 // We process F2, which frees up F2. The next pass through, we process F0 and
5327 // F2/F3 are empty, so we move it. Note that if half of a double is involved
5328 // in a circularity with a single, then we will have already moved that half
5329 // above, so we go ahead and move the remaining half as a single.
5330 // Because there are no circularities left, we are guaranteed to terminate.
5333 assert(regArgTab[argNum - 1].slot == 1);
5335 if (!regArgTab[argNum - 1].processed)
5337 // The first half of the double hasn't been processed; try to be processed at the same time
5341 // The first half of the double has been processed but the second half hasn't!
5342 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
5343 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
5344 // dependency logic above will move them as singles, leaving just F3 to move. Treat
5345 // it as a single to finish the shuffling.
5347 destMemType = TYP_FLOAT;
5348 destRegNum = REG_NEXT(varDsc->lvRegNum);
5350 #endif // !_TARGET_64BIT_
5351 #if (defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
5354 assert(regArgTab[argNum].slot == 2);
5356 assert(regArgTab[argNum - 1].slot == 1);
5357 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
5358 destRegNum = varDsc->lvRegNum;
5359 noway_assert(regNum != destRegNum);
5362 #endif // (defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
5363 noway_assert(destRegNum != REG_NA);
5364 if (destRegNum != regNum)
5366 /* Cannot trash a currently live register argument.
5367 * Skip this one until its target will be free
5368 * which is guaranteed to happen since we have no circular dependencies. */
5370 regMaskTP destMask = genRegMask(destRegNum);
5372 // Don't process the double until both halves of the destination are clear.
5373 if (genActualType(destMemType) == TYP_DOUBLE)
5375 assert((destMask & RBM_DBL_REGS) != 0);
5376 destMask |= genRegMask(REG_NEXT(destRegNum));
5380 if (destMask & regArgMaskLive)
5385 /* Move it to the new register */
5387 emitAttr size = emitActualTypeSize(destMemType);
5389 #if defined(_TARGET_ARM64_)
5390 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
5392 // For a SIMD type that is passed in two integer registers,
5393 // Limit the copy below to the first 8 bytes from the first integer register.
5394 // Handle the remaining 8 bytes from the second slot in the code further below
5395 assert(EA_SIZE(size) >= 8);
5400 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
5402 psiMoveToReg(varNum);
5405 /* mark the argument as processed */
5407 assert(!regArgTab[argNum].processed);
5408 regArgTab[argNum].processed = true;
5409 regArgMaskLive &= ~genRegMask(regNum);
5410 #if FEATURE_MULTIREG_ARGS
5411 int argRegCount = 1;
5413 if (genActualType(destMemType) == TYP_DOUBLE)
5418 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
5419 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
5422 int nextArgNum = argNum + 1;
5423 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
5424 noway_assert(regArgTab[nextArgNum].varNum == varNum);
5425 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
5426 // and moves the 0th element of the src reg into the 1st element of the dest reg.
5427 getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
5428 // Set destRegNum to regNum so that we skip the setting of the register below,
5429 // but mark argNum as processed and clear regNum from the live mask.
5430 destRegNum = regNum;
5432 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
5433 #if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
5434 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
5436 // For a SIMD type that is passed in two integer registers,
5437 // Code above copies the first integer argument register into the lower 8 bytes
5438 // of the target register. Here we must handle the second 8 bytes of the slot pair by
5439 // inserting the second integer register into the upper 8 bytes of the target
5440 // SIMD floating point register.
5442 int nextArgNum = argNum + 1;
5443 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
5444 noway_assert(regArgTab[nextArgNum].varNum == varNum);
5445 noway_assert(genIsValidIntReg(nextRegNum));
5446 noway_assert(genIsValidFloatReg(destRegNum));
5447 getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1);
5449 #endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
5450 // Mark the rest of the argument registers corresponding to this multi-reg type as
5451 // being processed and no longer live.
5452 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
5454 int nextArgNum = argNum + regSlot;
5455 assert(!regArgTab[nextArgNum].processed);
5456 regArgTab[nextArgNum].processed = true;
5457 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
5458 regArgMaskLive &= ~genRegMask(nextRegNum);
5460 #endif // FEATURE_MULTIREG_ARGS
5463 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
5467 #pragma warning(pop)
5470 /*****************************************************************************
5471 * If any incoming stack arguments live in registers, load them.
5473 void CodeGen::genEnregisterIncomingStackArgs()
5478 printf("*************** In genEnregisterIncomingStackArgs()\n");
5482 assert(compiler->compGeneratingProlog);
5484 unsigned varNum = 0;
5486 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
5488 /* Is this variable a parameter? */
5490 if (!varDsc->lvIsParam)
5495 /* If it's a register argument then it's already been taken care of.
5496 But, on Arm when under a profiler, we would have prespilled a register argument
5497 and hence here we need to load it from its prespilled location.
5499 bool isPrespilledForProfiling = false;
5500 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
5501 isPrespilledForProfiling =
5502 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
5505 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
5510 /* Has the parameter been assigned to a register? */
5512 if (!varDsc->lvIsInReg())
5517 var_types type = genActualType(varDsc->TypeGet());
5519 #if FEATURE_STACK_FP_X87
5520 // Floating point locals are loaded onto the x86-FPU in the next section
5521 if (varTypeIsFloating(type))
5525 /* Is the variable dead on entry */
5527 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
5532 /* Load the incoming parameter into the register */
5534 /* Figure out the home offset of the incoming argument */
5539 #ifndef LEGACY_BACKEND
5541 if (type == TYP_LONG)
5543 regPairNo regPair = varDsc->lvArgInitRegPair;
5544 regNum = genRegPairLo(regPair);
5545 otherReg = genRegPairHi(regPair);
5548 #endif // _TARGET_ARM_
5550 regNum = varDsc->lvArgInitReg;
5553 #else // LEGACY_BACKEND
5554 regNum = varDsc->lvRegNum;
5555 if (type == TYP_LONG)
5557 otherReg = varDsc->lvOtherReg;
5563 #endif // LEGACY_BACKEND
5565 assert(regNum != REG_STK);
5567 #ifndef _TARGET_64BIT_
5568 if (type == TYP_LONG)
5570 /* long - at least the low half must be enregistered */
5572 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, regNum, varNum, 0);
5573 regTracker.rsTrackRegTrash(regNum);
5575 /* Is the upper half also enregistered? */
5577 if (otherReg != REG_STK)
5579 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, otherReg, varNum, sizeof(int));
5580 regTracker.rsTrackRegTrash(otherReg);
5584 #endif // _TARGET_64BIT_
5586 /* Loading a single register - this is the easy/common case */
5588 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
5589 regTracker.rsTrackRegTrash(regNum);
5592 psiMoveToReg(varNum);
5596 /*-------------------------------------------------------------------------
5598 * We have to decide whether we're going to use block initialization
5599 * in the prolog before we assign final stack offsets. This is because
5600 * when using block initialization we may need additional callee-saved
5601 * registers which need to be saved on the frame, thus increasing the
5604 * We'll count the number of locals we have to initialize,
5605 * and if there are lots of them we'll use block initialization.
5606 * Thus, the local variable table must have accurate register location
5607 * information for enregistered locals for their register state on entry
5610 * At the same time we set lvMustInit for locals (enregistered or on stack)
5611 * that must be initialized (e.g. initialize memory (comInitMem),
5612 * untracked pointers or disable DFA)
5614 void CodeGen::genCheckUseBlockInit()
5616 #ifndef LEGACY_BACKEND // this is called before codegen in RyuJIT backend
5617 assert(!compiler->compGeneratingProlog);
5618 #else // LEGACY_BACKEND
5619 assert(compiler->compGeneratingProlog);
5620 #endif // LEGACY_BACKEND
5622 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
5623 // larger than int count for more than 1).
5624 unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
5625 // determine whether to use block init.
5630 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
5632 if (varDsc->lvIsParam)
5637 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
5639 noway_assert(varDsc->lvRefCnt == 0);
5643 if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
5648 #if FEATURE_FIXED_OUT_ARGS
5649 if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
5653 if (varNum == compiler->lvaOutgoingArgSpaceVar)
5659 #if FEATURE_EH_FUNCLETS
5660 // There's no need to force 0-initialization of the PSPSym, it will be
5661 // initialized with a real value in the prolog
5662 if (varNum == compiler->lvaPSPSym)
5668 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5670 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
5671 // initialized by the parent struct. No need to set the lvMustInit bit in the
5676 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
5679 if (varDsc->lvTracked)
5681 /* For uninitialized use of tracked variables, the liveness
5682 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
5684 if (varDsc->lvMustInit ||
5685 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
5687 /* This var must be initialized */
5689 varDsc->lvMustInit = 1;
5691 /* See if the variable is on the stack will be initialized
5692 * using rep stos - compute the total size to be zero-ed */
5694 if (varDsc->lvOnFrame)
5696 if (!varDsc->lvRegister)
5698 #ifndef LEGACY_BACKEND
5699 if (!varDsc->lvIsInReg())
5700 #endif // !LEGACY_BACKEND
5702 // Var is completely on the stack, in the legacy JIT case, or
5703 // on the stack at entry, in the RyuJIT case.
5705 (unsigned)roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
5710 // Var is partially enregistered
5711 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
5712 initStkLclCnt += genTypeStSz(TYP_INT);
5718 /* With compInitMem, all untracked vars will have to be init'ed */
5719 /* VSW 102460 - Do not force initialization of compiler generated temps,
5720 unless they are untracked GC type or structs that contain GC pointers */
5721 CLANG_FORMAT_COMMENT_ANCHOR;
5724 // TODO-1stClassStructs
5725 // This is here to duplicate previous behavior, where TYP_SIMD8 locals
5726 // were not being re-typed correctly.
5727 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
5728 #else // !FEATURE_SIMD
5729 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
5730 #endif // !FEATURE_SIMD
5731 varDsc->lvOnFrame &&
5732 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
5734 varDsc->lvMustInit = true;
5736 initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
5742 /* Ignore if not a pointer variable or value class with a GC field */
5744 if (!compiler->lvaTypeIsGC(varNum))
5749 /* If we don't know lifetimes of variables, must be conservative */
5750 if (!compiler->backendRequiresLocalVarLifetimes())
5752 varDsc->lvMustInit = true;
5753 noway_assert(!varDsc->lvRegister);
5757 if (!varDsc->lvTracked)
5759 varDsc->lvMustInit = true;
5763 /* Is this a 'must-init' stack pointer local? */
5765 if (varDsc->lvMustInit && varDsc->lvOnFrame)
5767 initStkLclCnt += varDsc->lvStructGcCount;
5770 if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4))
5776 /* Don't forget about spill temps that hold pointers */
5778 if (!TRACK_GC_TEMP_LIFETIMES)
5780 assert(compiler->tmpAllFree());
5781 for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
5783 if (varTypeIsGC(tempThis->tdTempType()))
5790 // After debugging this further it was found that this logic is incorrect:
5791 // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
5792 // and this also double counts variables (we saw this in the debugger) around line 4829.
5793 // Even though this doesn't pose a problem with correctness it will improperly decide to
5794 // zero init the stack using a block operation instead of a 'case by case' basis.
5795 genInitStkLclCnt = initStkLclCnt;
5797 /* If we have more than 4 untracked locals, use block initialization */
5798 /* TODO-Review: If we have large structs, bias toward not using block initialization since
5799 we waste all the other slots. Really need to compute the correct
5800 and compare that against zeroing the slots individually */
5802 genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
5804 if (genUseBlockInit)
5806 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
5808 // If there is a secret stub param, don't count it, as it will no longer
5809 // be live when we do block init.
5810 if (compiler->info.compPublishStubParam)
5812 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
5815 #ifdef _TARGET_XARCH_
5816 // If we're going to use "REP STOS", remember that we will trash EDI
5817 // For fastcall we will have to save ECX, EAX
5818 // so reserve two extra callee saved
5819 // This is better than pushing eax, ecx, because we in the later
5820 // we will mess up already computed offsets on the stack (for ESP frames)
5821 regSet.rsSetRegsModified(RBM_EDI);
5823 #ifdef UNIX_AMD64_ABI
5824 // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
5825 // In such case use R12 and R13 registers.
5826 if (maskCalleeRegArgMask & RBM_RCX)
5828 regSet.rsSetRegsModified(RBM_R12);
5831 if (maskCalleeRegArgMask & RBM_RDI)
5833 regSet.rsSetRegsModified(RBM_R13);
5835 #else // !UNIX_AMD64_ABI
5836 if (maskCalleeRegArgMask & RBM_ECX)
5838 regSet.rsSetRegsModified(RBM_ESI);
5840 #endif // !UNIX_AMD64_ABI
5842 if (maskCalleeRegArgMask & RBM_EAX)
5844 regSet.rsSetRegsModified(RBM_EBX);
5847 #endif // _TARGET_XARCH_
5850 // On the Arm if we are using a block init to initialize, then we
5851 // must force spill R4/R5/R6 so that we can use them during
5852 // zero-initialization process.
5854 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
5855 if (forceSpillRegCount > 0)
5856 regSet.rsSetRegsModified(RBM_R4);
5857 if (forceSpillRegCount > 1)
5858 regSet.rsSetRegsModified(RBM_R5);
5859 if (forceSpillRegCount > 2)
5860 regSet.rsSetRegsModified(RBM_R6);
5861 #endif // _TARGET_ARM_
5865 /*-----------------------------------------------------------------------------
5867 * Push any callee-saved registers we have used
5870 #if defined(_TARGET_ARM64_)
5871 void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
5873 void CodeGen::genPushCalleeSavedRegisters()
5876 assert(compiler->compGeneratingProlog);
5878 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
5879 // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
5880 // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
5882 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
5883 #else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
5884 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5888 if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
5890 noway_assert(!"Used register RBM_FPBASE as a scratch register!");
5894 #ifdef _TARGET_XARCH_
5895 // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
5896 if (isFramePointerUsed())
5898 rsPushRegs &= ~RBM_FPBASE;
5902 #ifdef _TARGET_ARMARCH_
5903 // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
5904 if (isFramePointerUsed())
5905 rsPushRegs |= RBM_FPBASE;
5908 // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
5909 // changes in GC suspension architecture.
5911 // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
5912 // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
5913 // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
5914 // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
5915 // be saved on the stack and the GC suspension would time out.
5917 // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
5918 // the following to make GC suspension work in the above scenario:
5919 // - Make return address hijacking work even when lr is not saved on the stack.
5920 // - Generate fully interruptible code for loops that contains calls
5921 // - Generate fully interruptible code for leaf methods
5923 // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
5926 rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
5928 regSet.rsMaskCalleeSaved = rsPushRegs;
5929 #endif // _TARGET_ARMARCH_
5932 if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
5934 printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
5935 compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
5936 dspRegMask(rsPushRegs);
5938 assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
5942 #if defined(_TARGET_ARM_)
5943 regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5944 regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
5946 maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
5948 assert(FitsIn<int>(maskPushRegsInt));
5949 inst_IV(INS_push, (int)maskPushRegsInt);
5950 compiler->unwindPushMaskInt(maskPushRegsInt);
5952 if (maskPushRegsFloat != 0)
5954 genPushFltRegs(maskPushRegsFloat);
5955 compiler->unwindPushMaskFloat(maskPushRegsFloat);
5957 #elif defined(_TARGET_ARM64_)
5958 // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
5959 // options. Case numbers in comments here refer to this document.
5961 // For most frames, generate, e.g.:
5962 // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
5963 // // ensures stack stays aligned.
5964 // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
5965 // // at top of frame (highest addresses).
5966 // stp r21, r22, [sp, 0x70]
5969 // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
5970 // at the top of the frame.
5971 // 2. If we save FP, then the first store is FP, LR.
5972 // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
5973 // preserve their lower 8 bytes, by calling convention.
5974 // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
5976 // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
5978 int totalFrameSize = genTotalFrameSize();
5980 int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
5982 regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5983 regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
5985 if (compiler->info.compIsVarArgs)
5987 assert(maskSaveRegsFloat == RBM_NONE);
5990 int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
5991 // generate based on various sizes.
5992 int calleeSaveSPDelta = 0;
5993 int calleeSaveSPDeltaUnaligned = 0;
5995 if (isFramePointerUsed())
5997 // We need to save both FP and LR.
5999 assert((maskSaveRegsInt & RBM_FP) != 0);
6000 assert((maskSaveRegsInt & RBM_LR) != 0);
6002 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
6007 // stp fp,lr,[sp,#-framesz]!
6009 // The (totalFrameSize < 512) condition ensures that both the predecrement
6010 // and the postincrement of SP can occur with STP.
6012 // After saving callee-saved registers, we establish the frame pointer with:
6014 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
6018 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
6019 INS_OPTS_PRE_INDEX);
6020 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
6022 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
6023 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
6025 else if (totalFrameSize <= 512)
6030 // sub sp,sp,#framesz
6031 // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
6033 // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
6034 // signed offset encoding.
6036 // After saving callee-saved registers, we establish the frame pointer with:
6038 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
6042 assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
6044 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
6045 compiler->unwindAllocStack(totalFrameSize);
6047 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
6048 compiler->lvaOutgoingArgSpaceSize);
6049 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
6051 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
6052 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
6058 // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
6059 // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
6060 // stored. For example:
6061 // stp r19,r20,[sp,#-96]!
6062 // stp d8,d9,[sp,#16]
6063 // ... save varargs incoming integer registers ...
6064 // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
6065 // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
6066 // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
6070 // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
6071 // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
6072 // above them. If that is preferable, we could implement it.
6073 // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
6075 // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
6076 // padding from above).
6077 // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
6080 // sub sp,sp,#remainingFrameSz
6081 // or, for large frames:
6082 // mov rX, #remainingFrameSz // maybe multiple instructions
6086 // stp fp,lr,[sp,#outsz]
6089 // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
6090 // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
6091 // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
6092 // following sequences:
6094 // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
6096 // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
6099 // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
6103 // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
6104 // // always guaranteed to be 8 byte aligned).
6105 // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
6107 // sub sp,sp,#outsz - #8
6109 // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
6110 // mov rX, #outsz - #8 // maybe multiple instructions
6116 calleeSaveSPDeltaUnaligned =
6117 totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
6118 assert(calleeSaveSPDeltaUnaligned >= 0);
6119 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
6120 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
6122 offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
6123 assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
6124 // store the callee-saved registers.
6126 // We'll take care of these later, but callee-saved regs code shouldn't see them.
6127 maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
6132 // No frame pointer (no chaining).
6133 assert((maskSaveRegsInt & RBM_FP) == 0);
6134 assert((maskSaveRegsInt & RBM_LR) != 0);
6136 // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
6137 // if we only have one callee-saved register plus LR to save.
6139 NYI("Frame without frame pointer");
6143 assert(frameType != 0);
6145 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
6147 offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
6149 // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
6150 // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
6151 // need to add codes at all.
6153 if (compiler->info.compIsVarArgs)
6155 // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
6156 assert((offset % 16) == 0);
6157 for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
6159 regNumber reg2 = REG_NEXT(reg1);
6160 // stp REG, REG + 1, [SP, #offset]
6161 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
6162 compiler->unwindNop();
6163 offset += 2 * REGSIZE_BYTES;
6169 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
6170 compiler->unwindSetFrameReg(REG_FPBASE, 0);
6172 else if (frameType == 2)
6174 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
6175 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
6177 else if (frameType == 3)
6179 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
6180 assert(remainingFrameSz > 0);
6181 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
6182 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
6184 if (compiler->lvaOutgoingArgSpaceSize >= 504)
6186 // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
6187 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
6188 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
6189 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
6190 int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
6191 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
6192 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
6194 genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
6195 offset += spAdjustment2;
6197 // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
6200 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
6201 assert(spAdjustment3 > 0);
6202 assert((spAdjustment3 % 16) == 0);
6204 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
6205 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
6207 genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
6208 offset += spAdjustment3;
6212 genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
6214 offset += remainingFrameSz;
6216 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
6217 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
6221 assert(offset == totalFrameSize);
6223 #elif defined(_TARGET_XARCH_)
6224 // Push backwards so we match the order we will pop them in the epilog
6225 // and all the other code that expects it to be in this order.
6226 for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
6228 regMaskTP regBit = genRegMask(reg);
6230 if ((regBit & rsPushRegs) != 0)
6232 inst_RV(INS_push, reg, TYP_REF);
6233 compiler->unwindPush(reg);
6235 if (!doubleAlignOrFramePointerUsed())
6237 psiAdjustStackLevel(REGSIZE_BYTES);
6240 rsPushRegs &= ~regBit;
6245 assert(!"Unknown TARGET");
6249 /*-----------------------------------------------------------------------------
6251 * Probe the stack and allocate the local stack frame: subtract from SP.
6252 * On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
6255 void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
6257 assert(compiler->compGeneratingProlog);
6264 const size_t pageSize = compiler->eeGetPageSize();
6267 assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
6268 #endif // _TARGET_ARM_
6270 #ifdef _TARGET_XARCH_
6271 if (frameSize == REGSIZE_BYTES)
6273 // Frame size is the same as register size.
6274 inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
6277 #endif // _TARGET_XARCH_
6278 if (frameSize < pageSize)
6280 #ifndef _TARGET_ARM64_
6281 // Frame size is (0x0008..0x1000)
6282 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
6283 #endif // !_TARGET_ARM64_
6285 else if (frameSize < compiler->getVeryLargeFrameSize())
6287 // Frame size is (0x1000..0x3000)
6288 CLANG_FORMAT_COMMENT_ANCHOR;
6290 #if CPU_LOAD_STORE_ARCH
6291 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
6292 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
6293 regTracker.rsTrackRegTrash(initReg);
6294 *pInitRegZeroed = false; // The initReg does not contain zero
6296 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
6299 if (frameSize >= 0x2000)
6301 #if CPU_LOAD_STORE_ARCH
6302 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
6303 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
6304 regTracker.rsTrackRegTrash(initReg);
6306 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
6310 #ifdef _TARGET_ARM64_
6311 compiler->unwindPadding();
6312 #else // !_TARGET_ARM64_
6313 #if CPU_LOAD_STORE_ARCH
6314 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
6315 compiler->unwindPadding();
6316 getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
6318 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
6320 #endif // !_TARGET_ARM64_
6324 // Frame size >= 0x3000
6325 assert(frameSize >= compiler->getVeryLargeFrameSize());
6327 // Emit the following sequence to 'tickle' the pages.
6328 // Note it is important that stack pointer not change until this is
6329 // complete since the tickles could cause a stack overflow, and we
6330 // need to be able to crawl the stack afterward (which means the
6331 // stack pointer needs to be known).
6332 CLANG_FORMAT_COMMENT_ANCHOR;
6334 #ifdef _TARGET_XARCH_
6335 bool pushedStubParam = false;
6336 if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
6338 // push register containing the StubParam
6339 inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
6340 pushedStubParam = true;
6342 #endif // !_TARGET_XARCH_
6344 #if CPU_LOAD_STORE_ARCH || !defined(_TARGET_UNIX_)
6345 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
6349 // Can't have a label inside the ReJIT padding area
6351 genPrologPadForReJit();
6353 #if CPU_LOAD_STORE_ARCH
6355 // TODO-ARM64-Bug?: set the availMask properly!
6356 regMaskTP availMask =
6357 (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
6358 availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
6359 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
6361 regNumber rOffset = initReg;
6366 // We pick the next lowest register number for rTemp
6367 noway_assert(availMask != RBM_NONE);
6368 tempMask = genFindLowestBit(availMask);
6369 rTemp = genRegNumFromMask(tempMask);
6370 availMask &= ~tempMask;
6372 // We pick the next lowest register number for rLimit
6373 noway_assert(availMask != RBM_NONE);
6374 tempMask = genFindLowestBit(availMask);
6375 rLimit = genRegNumFromMask(tempMask);
6376 availMask &= ~tempMask;
6378 // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
6380 // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
6382 // mov rLimit, -frameSize
6384 // ldr rTemp, [sp+rOffset]
6385 // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
6386 // cmp rOffset, rLimit
6388 noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
6389 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
6390 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
6391 regTracker.rsTrackRegTrash(rTemp);
6392 #if defined(_TARGET_ARM_)
6393 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
6394 #elif defined(_TARGET_ARM64_)
6395 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
6396 #endif // _TARGET_ARM64_
6397 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
6398 getEmitter()->emitIns_J(INS_bhi, NULL, -4);
6400 #else // !CPU_LOAD_STORE_ARCH
6402 #ifndef _TARGET_UNIX_
6403 // Code size for each instruction. We need this because the
6404 // backward branch is hard-coded with the number of bytes to branch.
6405 // The encoding differs based on the architecture and what register is
6406 // used (namely, using RAX has a smaller encoding).
6410 // test [esp + eax], eax 3
6411 // sub eax, 0x1000 5
6412 // cmp EAX, -frameSize 5
6415 // For AMD64 using RAX
6416 // test [rsp + rax], rax 4
6417 // sub rax, 0x1000 6
6418 // cmp rax, -frameSize 6
6421 // For AMD64 using RBP
6422 // test [rsp + rbp], rbp 4
6423 // sub rbp, 0x1000 7
6424 // cmp rbp, -frameSize 7
6427 getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
6428 inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
6429 inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);
6431 int bytesForBackwardJump;
6432 #ifdef _TARGET_AMD64_
6433 assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
6434 bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
6435 #else // !_TARGET_AMD64_
6436 assert(initReg == REG_EAX);
6437 bytesForBackwardJump = -15;
6438 #endif // !_TARGET_AMD64_
6440 inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
6441 #else // _TARGET_UNIX_
6442 // Code size for each instruction. We need this because the
6443 // backward branch is hard-coded with the number of bytes to branch.
6444 // The encoding differs based on the architecture and what register is
6445 // used (namely, using RAX has a smaller encoding).
6448 // lea eax, [esp - frameSize]
6450 // lea esp, [esp - pageSize] 7
6451 // test [esp], eax 3
6454 // lea rsp, [rbp + frameSize]
6456 // For AMD64 using RAX
6457 // lea rax, [rsp - frameSize]
6459 // lea rsp, [rsp - pageSize] 8
6460 // test [rsp], rax 4
6463 // lea rsp, [rax + frameSize]
6465 // For AMD64 using RBP
6466 // lea rbp, [rsp - frameSize]
6468 // lea rsp, [rsp - pageSize] 8
6469 // test [rsp], rbp 4
6472 // lea rsp, [rbp + frameSize]
6474 int sPageSize = (int)pageSize;
6476 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border
6478 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize);
6479 getEmitter()->emitIns_R_AR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, 0);
6480 inst_RV_RV(INS_cmp, REG_SPBASE, initReg);
6482 int bytesForBackwardJump;
6483 #ifdef _TARGET_AMD64_
6484 assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
6485 bytesForBackwardJump = -17;
6486 #else // !_TARGET_AMD64_
6487 assert(initReg == REG_EAX);
6488 bytesForBackwardJump = -14;
6489 #endif // !_TARGET_AMD64_
6491 inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
6493 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer
6494 #endif // _TARGET_UNIX_
6496 #endif // !CPU_LOAD_STORE_ARCH
6498 *pInitRegZeroed = false; // The initReg does not contain zero
6500 #ifdef _TARGET_XARCH_
6501 if (pushedStubParam)
6504 inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
6505 regTracker.rsTrackRegTrash(REG_SECRET_STUB_PARAM);
6507 #endif // _TARGET_XARCH_
6509 #if CPU_LOAD_STORE_ARCH
6510 compiler->unwindPadding();
6513 #if CPU_LOAD_STORE_ARCH
6514 #ifndef _TARGET_ARM64_
6515 inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
6516 #endif // !_TARGET_ARM64_
6518 // sub esp, frameSize 6
6519 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
6523 #ifndef _TARGET_ARM64_
6524 compiler->unwindAllocStack(frameSize);
6526 if (!doubleAlignOrFramePointerUsed())
6528 psiAdjustStackLevel(frameSize);
6530 #endif // !_TARGET_ARM64_
6533 #if defined(_TARGET_ARM_)
6535 void CodeGen::genPushFltRegs(regMaskTP regMask)
6537 assert(regMask != 0); // Don't call uness we have some registers to push
6538 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
6540 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
6541 int slots = genCountBits(regMask);
6542 // regMask should be contiguously set
6543 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
6544 assert((tmpMask & (tmpMask - 1)) == 0);
6545 assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
6547 // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
6548 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
6549 noway_assert((slots % 2) == 0);
6551 getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
6554 void CodeGen::genPopFltRegs(regMaskTP regMask)
6556 assert(regMask != 0); // Don't call uness we have some registers to pop
6557 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
6559 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
6560 int slots = genCountBits(regMask);
6561 // regMask should be contiguously set
6562 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
6563 assert((tmpMask & (tmpMask - 1)) == 0);
6565 // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
6566 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
6567 noway_assert((slots % 2) == 0);
6569 getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
6572 /*-----------------------------------------------------------------------------
6574 * If we have a jmp call, then the argument registers cannot be used in the
6575 * epilog. So return the current call's argument registers as the argument
6576 * registers for the jmp call.
6578 regMaskTP CodeGen::genJmpCallArgMask()
6580 assert(compiler->compGeneratingEpilog);
6582 regMaskTP argMask = RBM_NONE;
6583 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
6585 const LclVarDsc& desc = compiler->lvaTable[varNum];
6586 if (desc.lvIsRegArg)
6588 argMask |= genRegMask(desc.lvArgReg);
6594 /*-----------------------------------------------------------------------------
6596 * Free the local stack frame: add to SP.
6597 * If epilog unwind hasn't been started, and we generate code, we start unwind
6598 * and set *pUnwindStarted = true.
6601 void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
6603 assert(compiler->compGeneratingEpilog);
6608 // Add 'frameSize' to SP.
6610 // Unfortunately, we can't just use:
6612 // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
6614 // because we need to generate proper unwind codes for each instruction generated,
6615 // and large frame sizes might generate a temp register load which might
6616 // need an unwind code. We don't want to generate a "NOP" code for this
6617 // temp register load; we want the unwind codes to start after that.
6619 if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
6621 if (!*pUnwindStarted)
6623 compiler->unwindBegEpilog();
6624 *pUnwindStarted = true;
6627 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
6631 regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
6634 // Do not use argument registers as scratch registers in the jmp epilog.
6635 grabMask &= ~genJmpCallArgMask();
6637 #ifndef LEGACY_BACKEND
6640 #else // LEGACY_BACKEND
6641 regNumber tmpReg = regSet.rsGrabReg(grabMask);
6642 #endif // LEGACY_BACKEND
6643 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
6644 if (*pUnwindStarted)
6646 compiler->unwindPadding();
6649 // We're going to generate an unwindable instruction, so check again if
6650 // we need to start the unwind codes.
6652 if (!*pUnwindStarted)
6654 compiler->unwindBegEpilog();
6655 *pUnwindStarted = true;
6658 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
6661 compiler->unwindAllocStack(frameSize);
6664 /*-----------------------------------------------------------------------------
6666 * Move of relocatable displacement value to register
6668 void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
6670 getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
6671 getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
6673 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
6675 getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
6679 /*-----------------------------------------------------------------------------
6681 * Move of relocatable data-label to register
6683 void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
6685 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
6686 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
6688 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
6690 getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
6694 /*-----------------------------------------------------------------------------
6696 * Move of relocatable immediate to register
6698 void CodeGen::genMov32RelocatableImmediate(emitAttr size, size_t value, regNumber reg)
6700 _ASSERTE(EA_IS_RELOC(size));
6702 getEmitter()->emitIns_R_I(INS_movw, size, reg, value);
6703 getEmitter()->emitIns_R_I(INS_movt, size, reg, value);
6705 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
6707 getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
6711 /*-----------------------------------------------------------------------------
6713 * Returns register mask to push/pop to allocate a small stack frame,
6714 * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
6715 * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
6717 regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
6719 assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
6721 // We can't do this optimization with callee saved floating point registers because
6722 // the stack would be allocated in a wrong spot.
6723 if (maskCalleeSavedFloat != RBM_NONE)
6726 // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
6727 // that extra sub sp,XXX/add sp,XXX.
6728 // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
6729 // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
6730 // significant negative side-effects (more memory bus traffic).
6735 case 2 * REGSIZE_BYTES:
6736 return RBM_R2 | RBM_R3;
6742 #endif // _TARGET_ARM_
6744 #if !FEATURE_STACK_FP_X87
6746 /*****************************************************************************
6748 * initFltRegs -- The mask of float regs to be zeroed.
6749 * initDblRegs -- The mask of double regs to be zeroed.
6750 * initReg -- A zero initialized integer reg to copy from.
6752 * Does best effort to move between VFP/xmm regs if one is already
6753 * initialized to 0. (Arm Only) Else copies from the integer register which
6756 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
6758 assert(compiler->compGeneratingProlog);
6760 // The first float/double reg that is initialized to 0. So they can be used to
6761 // initialize the remaining registers.
6762 regNumber fltInitReg = REG_NA;
6763 regNumber dblInitReg = REG_NA;
6765 // Iterate through float/double registers and initialize them to 0 or
6766 // copy from already initialized register of the same type.
6767 regMaskTP regMask = genRegMask(REG_FP_FIRST);
6768 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
6770 if (regMask & initFltRegs)
6772 // Do we have a float register already set to 0?
6773 if (fltInitReg != REG_NA)
6776 inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
6781 // Do we have a double register initialized to 0?
6782 if (dblInitReg != REG_NA)
6784 // Copy from double.
6785 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
6790 inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
6792 #elif defined(_TARGET_XARCH_)
6793 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
6794 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
6796 #elif defined(_TARGET_ARM64_)
6797 NYI("Initialize floating-point register to zero");
6799 #error Unsupported or unset target architecture
6804 else if (regMask & initDblRegs)
6806 // Do we have a double register already set to 0?
6807 if (dblInitReg != REG_NA)
6809 // Copy from double.
6810 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
6815 // Do we have a float register initialized to 0?
6816 if (fltInitReg != REG_NA)
6819 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
6824 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
6826 #elif defined(_TARGET_XARCH_)
6827 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
6828 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
6830 #elif defined(_TARGET_ARM64_)
6831 // We will just zero out the entire vector register. This sets it to a double zero value
6832 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
6834 #error Unsupported or unset target architecture
6841 #endif // !FEATURE_STACK_FP_X87
6843 /*-----------------------------------------------------------------------------
6845 * Restore any callee-saved registers we have used
6848 #if defined(_TARGET_ARM_)
6850 bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
6852 assert(compiler->compGeneratingEpilog);
6854 if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
6860 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
6862 assert(compiler->compGeneratingEpilog);
6864 regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
6865 regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
6866 regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
6868 // First, pop float registers
6870 if (maskPopRegsFloat != RBM_NONE)
6872 genPopFltRegs(maskPopRegsFloat);
6873 compiler->unwindPopMaskFloat(maskPopRegsFloat);
6876 // Next, pop integer registers
6880 regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
6881 maskPopRegsInt |= maskStackAlloc;
6884 if (isFramePointerUsed())
6886 assert(!regSet.rsRegsModified(RBM_FPBASE));
6887 maskPopRegsInt |= RBM_FPBASE;
6890 if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
6892 maskPopRegsInt |= RBM_PC;
6893 // Record the fact that we use a pop to the PC to perform the return
6894 genUsedPopToReturn = true;
6898 maskPopRegsInt |= RBM_LR;
6899 // Record the fact that we did not use a pop to the PC to perform the return
6900 genUsedPopToReturn = false;
6903 assert(FitsIn<int>(maskPopRegsInt));
6904 inst_IV(INS_pop, (int)maskPopRegsInt);
6905 compiler->unwindPopMaskInt(maskPopRegsInt);
6908 #elif defined(_TARGET_ARM64_)
6910 void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
6912 assert(compiler->compGeneratingEpilog);
6914 regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
6916 if (isFramePointerUsed())
6918 rsRestoreRegs |= RBM_FPBASE;
6921 rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
6923 regMaskTP regsToRestoreMask = rsRestoreRegs;
6925 int totalFrameSize = genTotalFrameSize();
6927 int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
6929 int frameType = 0; // An indicator of what type of frame we are popping.
6930 int calleeSaveSPDelta = 0;
6931 int calleeSaveSPDeltaUnaligned = 0;
6933 if (isFramePointerUsed())
6935 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
6938 if (compiler->compLocallocUsed)
6940 // Restore sp from fp
6942 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
6943 compiler->unwindSetFrameReg(REG_FPBASE, 0);
6946 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
6948 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
6950 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
6952 else if (totalFrameSize <= 512)
6955 if (compiler->compLocallocUsed)
6957 // Restore sp from fp
6958 // sub sp, fp, #outsz
6959 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
6960 compiler->lvaOutgoingArgSpaceSize);
6961 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
6964 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
6966 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
6968 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
6974 calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
6975 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
6976 assert(calleeSaveSPDeltaUnaligned >= 0);
6977 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
6978 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
6980 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
6982 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
6983 assert(remainingFrameSz > 0);
6985 if (compiler->lvaOutgoingArgSpaceSize >= 504)
6987 // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
6988 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
6989 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
6990 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
6991 int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
6992 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
6993 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
6995 if (compiler->compLocallocUsed)
6997 // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
6999 // sub sp, fp, #alignmentAdjustment2
7000 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
7001 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
7006 // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
7008 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
7009 assert(spAdjustment3 > 0);
7010 assert((spAdjustment3 % 16) == 0);
7011 genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
7016 // add sp,sp,#remainingFrameSz
7017 genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP1, nullptr);
7021 if (compiler->compLocallocUsed)
7023 // Restore sp from fp
7024 // sub sp, fp, #outsz
7025 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
7026 compiler->lvaOutgoingArgSpaceSize);
7027 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
7031 // ldp fp,lr,[sp,#outsz]
7032 // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
7035 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP1,
7039 // Unlike frameType=1 or frameType=2 that restore SP at the end,
7040 // frameType=3 already adjusted SP above to delete local frame.
7041 // There is at most one alignment slot between SP and where we store the callee-saved registers.
7042 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
7043 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
7048 // No frame pointer (no chaining).
7049 NYI("Frame without frame pointer");
7050 calleeSaveSPOffset = 0;
7053 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
7058 // ldp fp,lr,[sp],#framesz
7060 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
7061 INS_OPTS_POST_INDEX);
7062 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
7064 else if (frameType == 2)
7067 // ldr fp,lr,[sp,#outsz]
7068 // add sp,sp,#framesz
7070 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
7071 compiler->lvaOutgoingArgSpaceSize);
7072 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
7074 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
7075 compiler->unwindAllocStack(totalFrameSize);
7077 else if (frameType == 3)
7079 // Nothing to do after restoring callee-saved registers.
7087 #elif defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
7089 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
7091 assert(compiler->compGeneratingEpilog);
7093 unsigned popCount = 0;
7094 if (regSet.rsRegsModified(RBM_EBX))
7097 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
7099 if (regSet.rsRegsModified(RBM_FPBASE))
7101 // EBP cannot be directly modified for EBP frame and double-aligned frames
7102 assert(!doubleAlignOrFramePointerUsed());
7105 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
7108 #ifndef UNIX_AMD64_ABI
7109 // For System V AMD64 calling convention ESI and EDI are volatile registers.
7110 if (regSet.rsRegsModified(RBM_ESI))
7113 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
7115 if (regSet.rsRegsModified(RBM_EDI))
7118 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
7120 #endif // !defined(UNIX_AMD64_ABI)
7122 #ifdef _TARGET_AMD64_
7123 if (regSet.rsRegsModified(RBM_R12))
7126 inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
7128 if (regSet.rsRegsModified(RBM_R13))
7131 inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
7133 if (regSet.rsRegsModified(RBM_R14))
7136 inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
7138 if (regSet.rsRegsModified(RBM_R15))
7141 inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
7143 #endif // _TARGET_AMD64_
7145 // Amd64/x86 doesn't support push/pop of xmm registers.
7146 // These will get saved to stack separately after allocating
7147 // space on stack in prolog sequence. PopCount is essentially
7148 // tracking the count of integer registers pushed.
7150 noway_assert(compiler->compCalleeRegsPushed == popCount);
7153 #elif defined(_TARGET_X86_)
7155 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
7157 assert(compiler->compGeneratingEpilog);
7159 unsigned popCount = 0;
7161 /* NOTE: The EBP-less frame code below depends on the fact that
7162 all of the pops are generated right at the start and
7163 each takes one byte of machine code.
7166 if (regSet.rsRegsModified(RBM_FPBASE))
7168 // EBP cannot be directly modified for EBP frame and double-aligned frames
7169 noway_assert(!doubleAlignOrFramePointerUsed());
7171 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
7174 if (regSet.rsRegsModified(RBM_EBX))
7177 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
7179 if (regSet.rsRegsModified(RBM_ESI))
7182 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
7184 if (regSet.rsRegsModified(RBM_EDI))
7187 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
7189 noway_assert(compiler->compCalleeRegsPushed == popCount);
7194 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
7195 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
7196 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
7198 #ifdef _TARGET_ARM64_
7200 #else // !_TARGET_ARM64_
7201 if (*pInitRegZeroed == false)
7203 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
7204 *pInitRegZeroed = true;
7207 #endif // !_TARGET_ARM64_
7210 /*-----------------------------------------------------------------------------
7212 * Do we have any untracked pointer locals at all,
7213 * or do we need to initialize memory for locspace?
7215 * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
7216 * initializing memory (not inclusive).
7217 * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
7218 * initializing memory.
7219 * initReg - A scratch register (that gets set to zero on some platforms).
7220 * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
7222 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
7224 assert(compiler->compGeneratingProlog);
7226 if (genUseBlockInit)
7228 assert(untrLclHi > untrLclLo);
7229 #ifdef _TARGET_ARMARCH_
7231 Generate the following code:
7233 For cnt less than 10
7238 stm <rZero1,rZero2>,[rAddr!]
7239 <optional> stm <rZero1,rZero2>,[rAddr!]
7240 <optional> stm <rZero1,rZero2>,[rAddr!]
7241 <optional> stm <rZero1,rZero2>,[rAddr!]
7242 <optional> str rZero1,[rAddr]
7244 For rCnt greater than or equal to 10
7252 stm <rZero1,rZero2>,[rAddr!]
7256 <optional> str rZero1,[rAddr] // When cnt is odd
7258 NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
7262 regNumber rCnt = REG_NA; // Invalid
7265 regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
7266 availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
7268 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
7269 // a large constant.
7271 #if defined(_TARGET_ARM_)
7273 if (compiler->compLocallocUsed)
7275 availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
7278 regNumber rZero1; // We're going to use initReg for rZero1
7281 // We pick the next lowest register number for rZero2
7282 noway_assert(availMask != RBM_NONE);
7283 regMask = genFindLowestBit(availMask);
7284 rZero2 = genRegNumFromMask(regMask);
7285 availMask &= ~regMask;
7286 assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7287 0); // rZero2 is not a live incoming argument reg
7289 // We pick the next lowest register number for rAddr
7290 noway_assert(availMask != RBM_NONE);
7291 regMask = genFindLowestBit(availMask);
7292 rAddr = genRegNumFromMask(regMask);
7293 availMask &= ~regMask;
7295 #else // !define(_TARGET_ARM_)
7297 regNumber rZero1 = REG_ZR;
7299 *pInitRegZeroed = false;
7301 #endif // !defined(_TARGET_ARM_)
7303 bool useLoop = false;
7304 unsigned uCntBytes = untrLclHi - untrLclLo;
7305 assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
7306 unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
7308 // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
7309 // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
7310 // In both of these cases the stm/stp instruction will write two zeros to memory
7311 // and we will use a single str instruction at the end whenever we have an odd count.
7312 if (uCntSlots >= 10)
7317 // We pick the next lowest register number for rCnt
7318 noway_assert(availMask != RBM_NONE);
7319 regMask = genFindLowestBit(availMask);
7320 rCnt = genRegNumFromMask(regMask);
7321 availMask &= ~regMask;
7324 assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7325 0); // rAddr is not a live incoming argument reg
7326 #if defined(_TARGET_ARM_)
7327 if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
7328 #else // !_TARGET_ARM_
7329 if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
7330 #endif // !_TARGET_ARM_
7332 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
7336 // Load immediate into the InitReg register
7337 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
7338 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
7339 *pInitRegZeroed = false;
7344 noway_assert(uCntSlots >= 2);
7345 assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7346 0); // rCnt is not a live incoming argument reg
7347 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
7350 #if defined(_TARGET_ARM_)
7351 rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
7352 instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
7353 ssize_t stmImm = (ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
7354 #endif // _TARGET_ARM_
7358 while (uCntBytes >= REGSIZE_BYTES * 2)
7361 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
7362 #else // !_TARGET_ARM_
7363 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
7364 INS_OPTS_POST_INDEX);
7365 #endif // !_TARGET_ARM_
7366 uCntBytes -= REGSIZE_BYTES * 2;
7369 else // useLoop is true
7372 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
7373 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
7374 #else // !_TARGET_ARM_
7375 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
7376 INS_OPTS_POST_INDEX); // zero stack slots
7377 getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
7378 #endif // !_TARGET_ARM_
7379 getEmitter()->emitIns_J(INS_bhi, NULL, -3);
7380 uCntBytes %= REGSIZE_BYTES * 2;
7383 if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
7386 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
7387 #else // _TARGET_ARM_
7388 if ((uCntBytes - REGSIZE_BYTES) == 0)
7390 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
7394 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
7396 #endif // !_TARGET_ARM_
7397 uCntBytes -= REGSIZE_BYTES;
7399 #ifdef _TARGET_ARM64_
7402 assert(uCntBytes == sizeof(int));
7403 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
7404 uCntBytes -= sizeof(int);
7406 #endif // _TARGET_ARM64_
7407 noway_assert(uCntBytes == 0);
7409 #elif defined(_TARGET_XARCH_)
7411 Generate the following code:
7413 lea edi, [ebp/esp-OFFS]
7419 noway_assert(regSet.rsRegsModified(RBM_EDI));
7421 #ifdef UNIX_AMD64_ABI
7422 // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
7423 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
7425 noway_assert(regSet.rsRegsModified(RBM_R12));
7426 inst_RV_RV(INS_mov, REG_R12, REG_RCX);
7427 regTracker.rsTrackRegTrash(REG_R12);
7430 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
7432 noway_assert(regSet.rsRegsModified(RBM_R13));
7433 inst_RV_RV(INS_mov, REG_R13, REG_RDI);
7434 regTracker.rsTrackRegTrash(REG_R13);
7436 #else // !UNIX_AMD64_ABI
7437 // For register arguments we may have to save ECX
7438 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
7440 noway_assert(regSet.rsRegsModified(RBM_ESI));
7441 inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
7442 regTracker.rsTrackRegTrash(REG_ESI);
7444 #endif // !UNIX_AMD64_ABI
7446 noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
7448 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
7449 regTracker.rsTrackRegTrash(REG_EDI);
7451 inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
7452 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
7453 instGen(INS_r_stosd);
7455 #ifdef UNIX_AMD64_ABI
7456 // Move back the argument registers
7457 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
7459 inst_RV_RV(INS_mov, REG_RCX, REG_R12);
7462 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
7464 inst_RV_RV(INS_mov, REG_RDI, REG_R13);
7466 #else // !UNIX_AMD64_ABI
7467 // Move back the argument registers
7468 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
7470 inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
7472 #endif // !UNIX_AMD64_ABI
7475 #error Unsupported or unset target architecture
7478 else if (genInitStkLclCnt > 0)
7480 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
7481 0); // initReg is not a live incoming argument reg
7483 /* Initialize any lvMustInit vars on the stack */
7488 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7490 if (!varDsc->lvMustInit)
7495 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
7496 // partially-enregistered vars in the case where we don't use a block init.
7497 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
7499 // lvMustInit can only be set for GC types or TYP_STRUCT types
7500 // or when compInitMem is true
7501 // or when in debug code
7503 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
7504 compiler->info.compInitMem || compiler->opts.compDbgCode);
7506 #ifndef LEGACY_BACKEND
7507 if (!varDsc->lvOnFrame)
7511 #else // LEGACY_BACKEND
7512 if (varDsc->lvRegister)
7514 if (varDsc->lvOnFrame)
7516 /* This is a partially enregistered TYP_LONG var */
7517 noway_assert(varDsc->lvOtherReg == REG_STK);
7518 noway_assert(varDsc->lvType == TYP_LONG);
7520 noway_assert(compiler->info.compInitMem);
7522 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, genGetZeroReg(initReg, pInitRegZeroed),
7523 varNum, sizeof(int));
7527 #endif // LEGACY_BACKEND
7529 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
7530 (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
7532 // We only initialize the GC variables in the TYP_STRUCT
7533 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
7534 const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
7536 for (unsigned i = 0; i < slots; i++)
7538 if (gcPtrs[i] != TYPE_GC_NONE)
7540 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
7541 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
7547 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
7549 // zero out the whole thing rounded up to a single stack slot size
7550 unsigned lclSize = (unsigned)roundUp(compiler->lvaLclSize(varNum), sizeof(int));
7552 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
7554 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
7557 #ifdef _TARGET_64BIT_
7558 assert(i == lclSize || (i + sizeof(int) == lclSize));
7561 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
7564 #endif // _TARGET_64BIT_
7565 assert(i == lclSize);
7569 if (!TRACK_GC_TEMP_LIFETIMES)
7571 assert(compiler->tmpAllFree());
7572 for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr;
7573 tempThis = compiler->tmpListNxt(tempThis))
7575 if (!varTypeIsGC(tempThis->tdTempType()))
7580 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
7582 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
7588 /*-----------------------------------------------------------------------------
7590 * Save the generic context argument.
7592 * We need to do this within the "prolog" in case anyone tries to inspect
7593 * the param-type-arg/this (which can be done after the prolog) using
7594 * ICodeManager::GetParamTypeArg().
7597 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
7599 assert(compiler->compGeneratingProlog);
7601 bool reportArg = compiler->lvaReportParamTypeArg();
7603 // We should report either generic context arg or "this" when used so.
7606 #ifndef JIT32_GCENCODER
7607 if (!compiler->lvaKeepAliveAndReportThis())
7614 // For JIT32_GCENCODER, we won't be here if reportArg is false.
7615 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
7617 noway_assert(contextArg != BAD_VAR_NUM);
7618 LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
7620 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
7621 // moved to its final home location. So we need to use it from the
7622 // incoming location.
7626 bool isPrespilledForProfiling = false;
7627 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
7628 isPrespilledForProfiling =
7629 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
7632 // Load from the argument register only if it is not prespilled.
7633 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
7635 reg = varDsc->lvArgReg;
7639 if (isFramePointerUsed())
7641 #if defined(_TARGET_ARM_)
7642 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
7643 // will become enregistered.
7644 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
7645 noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
7646 (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
7648 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
7649 // will become enregistered.
7650 noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
7654 // We will just use the initReg since it is an available register
7655 // and we are probably done using it anyway...
7657 *pInitRegZeroed = false;
7659 // mov reg, [compiler->info.compTypeCtxtArg]
7660 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
7661 regTracker.rsTrackRegTrash(reg);
7664 #if CPU_LOAD_STORE_ARCH
7665 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
7666 compiler->lvaCachedGenericContextArgOffset());
7667 #else // CPU_LOAD_STORE_ARCH
7668 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
7669 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
7670 compiler->lvaCachedGenericContextArgOffset());
7671 #endif // !CPU_LOAD_STORE_ARCH
7674 /*-----------------------------------------------------------------------------
7676 * Set the "GS" security cookie in the prolog.
7679 void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
7681 assert(compiler->compGeneratingProlog);
7683 if (!compiler->getNeedsGSSecurityCookie())
7688 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
7690 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
7692 #ifdef _TARGET_AMD64_
7693 // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
7694 getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
7695 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
7697 // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
7698 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
7699 compiler->lvaGSSecurityCookie, 0, initReg);
7705 #ifdef _TARGET_XARCH_
7706 // Always use EAX on x86 and x64
7707 // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
7710 // We will just use the initReg since it is an available register
7714 *pInitRegZeroed = false;
7716 #if CPU_LOAD_STORE_ARCH
7717 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
7718 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
7719 regTracker.rsTrackRegTrash(reg);
7721 // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
7722 // mov dword ptr [frame.GSSecurityCookie], reg
7723 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
7724 regTracker.rsTrackRegTrash(reg);
7726 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
7730 #ifdef PROFILING_SUPPORTED
7732 //-----------------------------------------------------------------------------------
7733 // genProfilingEnterCallback: Generate the profiling function enter callback.
7736 // initReg - register to use as scratch register
7737 // pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
7738 // not zero after this call.
7744 // The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
7745 // VM\i386\asmhelpers.asm for details):
7746 // 1. The calling sequence for calling the helper is:
7747 // push FunctionIDOrClientID
7748 // call ProfileEnterHelper
7749 // 2. The calling function has an EBP frame.
7750 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
7751 // the following prolog is assumed:
7754 // 4. All registers are preserved.
7755 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
7757 void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
7759 assert(compiler->compGeneratingProlog);
7761 // Give profiler a chance to back out of hooking this method
7762 if (!compiler->compIsProfilerHookNeeded())
7767 #if defined(_TARGET_AMD64_)
7768 #if !defined(UNIX_AMD64_ABI)
7773 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
7774 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7775 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
7777 // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
7778 // In case of vararg methods, arg regs are already homed.
7780 // Note: Here we don't need to worry about updating gc'info since enter
7781 // callback is generated as part of prolog which is non-gc interruptible.
7782 // Moreover GC cannot kick while executing inside profiler callback which is a
7783 // profiler requirement so it can examine arguments which could be obj refs.
7784 if (!compiler->info.compIsVarArgs)
7786 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
7788 noway_assert(varDsc->lvIsParam);
7790 if (!varDsc->lvIsRegArg)
7795 var_types storeType = varDsc->lvaArgType();
7796 regNumber argReg = varDsc->lvArgReg;
7798 instruction store_ins = ins_Store(storeType);
7801 if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
7803 store_ins = INS_mov;
7805 #endif // FEATURE_SIMD
7807 getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
7811 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
7812 // RCX = ProfilerMethHnd
7813 if (compiler->compProfilerMethHndIndirected)
7815 // Profiler hooks enabled during Ngen time.
7816 // Profiler handle needs to be accessed through an indirection of a pointer.
7817 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7821 // No need to record relocations, if we are generating ELT hooks under the influence
7822 // of COMPlus_JitELTHookEnabled=1
7823 if (compiler->opts.compJitELTHookEnabled)
7825 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
7829 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7833 // RDX = caller's SP
7835 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
7836 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
7837 // of that offset to FramePointer to obtain caller's SP value.
7838 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7839 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
7840 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
7842 // Can't have a call until we have enough padding for rejit
7843 genPrologPadForReJit();
7845 // This will emit either
7846 // "call ip-relative 32-bit offset" or
7847 // "mov rax, helper addr; call rax"
7848 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
7850 // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
7851 // generation logic that moves args around as required by first BB entry point conditions
7852 // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
7853 // and genEnregisterIncomingStackArgs().
7855 // Now reload arg registers from home locations.
7857 // - we need to reload only known (i.e. fixed) reg args.
7858 // - if floating point type, also reload it into corresponding integer reg
7859 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
7861 noway_assert(varDsc->lvIsParam);
7863 if (!varDsc->lvIsRegArg)
7868 var_types loadType = varDsc->lvaArgType();
7869 regNumber argReg = varDsc->lvArgReg;
7871 instruction load_ins = ins_Load(loadType);
7874 if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
7878 #endif // FEATURE_SIMD
7880 getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
7883 if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
7885 regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
7886 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
7887 inst_RV_RV(ins, argReg, intArgReg, loadType);
7889 #endif // FEATURE_VARARG
7892 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
7893 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
7895 *pInitRegZeroed = false;
7898 #else // !defined(UNIX_AMD64_ABI)
7900 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
7901 // R14 = ProfilerMethHnd
7902 if (compiler->compProfilerMethHndIndirected)
7904 // Profiler hooks enabled during Ngen time.
7905 // Profiler handle needs to be accessed through an indirection of a pointer.
7906 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
7907 (ssize_t)compiler->compProfilerMethHnd);
7911 // No need to record relocations, if we are generating ELT hooks under the influence
7912 // of COMPlus_JitELTHookEnabled=1
7913 if (compiler->opts.compJitELTHookEnabled)
7915 genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
7919 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7923 // R15 = caller's SP
7925 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
7926 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
7927 // of that offset to FramePointer to obtain caller's SP value.
7928 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7929 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
7930 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
7932 // Can't have a call until we have enough padding for rejit
7933 genPrologPadForReJit();
7935 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
7936 // We use R11 here. This will emit either
7937 // "call ip-relative 32-bit offset" or
7938 // "mov r11, helper addr; call r11"
7939 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
7941 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
7942 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
7944 *pInitRegZeroed = false;
7947 #endif // !defined(UNIX_AMD64_ABI)
7949 #elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
7951 unsigned saveStackLvl2 = genStackLevel;
7953 #if defined(_TARGET_X86_)
7954 // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
7955 // for x86 stack unwinding
7957 // Push the profilerHandle
7958 if (compiler->compProfilerMethHndIndirected)
7960 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
7964 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
7966 #elif defined(_TARGET_ARM_)
7967 // On Arm arguments are prespilled on stack, which frees r0-r3.
7968 // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
7969 // The call target register could be any free register.
7970 #ifdef LEGACY_BACKEND
7971 regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_ENTER_ARG);
7972 noway_assert(argReg == REG_PROFILER_ENTER_ARG);
7973 regSet.rsLockReg(RBM_PROFILER_ENTER_ARG);
7974 #else // !LEGACY_BACKEND
7975 regNumber argReg = REG_PROFILER_ENTER_ARG;
7976 #endif // !LEGACY_BACKEND
7978 regMaskTP argRegMask = genRegMask(argReg);
7979 assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
7981 if (compiler->compProfilerMethHndIndirected)
7983 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
7984 regTracker.rsTrackRegTrash(argReg);
7988 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
7991 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
7995 // Can't have a call until we have enough padding for rejit
7997 genPrologPadForReJit();
7999 // This will emit either
8000 // "call ip-relative 32-bit offset" or
8001 // "mov rax, helper addr; call rax"
8002 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
8003 0, // argSize. Again, we have to lie about it
8004 EA_UNKNOWN); // retSize
8006 #if defined(_TARGET_X86_)
8008 // Adjust the number of stack slots used by this managed method if necessary.
8010 if (compiler->fgPtrArgCntMax < 1)
8012 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
8013 compiler->fgPtrArgCntMax = 1;
8015 #elif defined(_TARGET_ARM_)
8016 #ifdef LEGACY_BACKEND
8018 regSet.rsUnlockReg(RBM_PROFILER_ENTER_ARG);
8019 #endif // LEGACY_BACKEND
8021 if (initReg == argReg)
8023 *pInitRegZeroed = false;
8026 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
8029 /* Restore the stack level */
8031 SetStackLevel(saveStackLvl2);
8034 NYI("Emit Profiler Enter callback");
8038 //-----------------------------------------------------------------------------------
8039 // genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
8040 // Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
8043 // helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
8049 // The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
8050 // ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
8051 // 1. The calling sequence for calling the helper is:
8052 // push FunctionIDOrClientID
8053 // call ProfileLeaveHelper or ProfileTailcallHelper
8054 // 2. The calling function has an EBP frame.
8055 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
8056 // the following prolog is assumed:
8059 // 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
8060 // helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
8061 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
8063 void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
8065 assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
8067 // Only hook if profiler says it's okay.
8068 if (!compiler->compIsProfilerHookNeeded())
8073 compiler->info.compProfilerCallback = true;
8075 // Need to save on to the stack level, since the helper call will pop the argument
8076 unsigned saveStackLvl2 = genStackLevel;
8078 #if defined(_TARGET_AMD64_)
8079 #if !defined(UNIX_AMD64_ABI)
8081 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
8082 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
8083 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
8085 // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
8086 // registers that profiler callback kills.
8087 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
8089 regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
8090 noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
8093 // At this point return value is computed and stored in RAX or XMM0.
8094 // On Amd64, Leave callback preserves the return register. We keep
8095 // RAX alive by not reporting as trashed by helper call. Also note
8096 // that GC cannot kick-in while executing inside profiler callback,
8097 // which is a requirement of profiler as well since it needs to examine
8098 // return value which could be an obj ref.
8100 // RCX = ProfilerMethHnd
8101 if (compiler->compProfilerMethHndIndirected)
8103 // Profiler hooks enabled during Ngen time.
8104 // Profiler handle needs to be accessed through an indirection of an address.
8105 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8109 // Don't record relocations, if we are generating ELT hooks under the influence
8110 // of COMPlus_JitELTHookEnabled=1
8111 if (compiler->opts.compJitELTHookEnabled)
8113 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
8117 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8121 // RDX = caller's SP
8122 // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
8123 // of the stmnts to execute unconditionally and clean-up rest.
8124 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
8126 // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
8127 // value of that offset to FramePointer to obtain caller's SP value.
8128 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
8129 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
8133 // If we are here means that it is a tentative frame layout during which we
8134 // cannot use caller's SP offset since it is an estimate. For now we require the
8135 // method to have at least a single arg so that we can use it to obtain caller's
8137 LclVarDsc* varDsc = compiler->lvaTable;
8138 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
8140 // lea rdx, [FramePointer + Arg0's offset]
8141 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
8144 // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
8145 // We use R8 here. This will emit either
8146 // "call ip-relative 32-bit offset" or
8147 // "mov r8, helper addr; call r8"
8148 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
8150 #else // !defined(UNIX_AMD64_ABI)
8152 // RDI = ProfilerMethHnd
8153 if (compiler->compProfilerMethHndIndirected)
8155 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8159 if (compiler->opts.compJitELTHookEnabled)
8161 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
8165 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8169 // RSI = caller's SP
8170 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
8172 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
8173 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
8177 LclVarDsc* varDsc = compiler->lvaTable;
8178 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
8180 // lea rdx, [FramePointer + Arg0's offset]
8181 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
8184 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
8185 // We use R11 here. This will emit either
8186 // "call ip-relative 32-bit offset" or
8187 // "mov r11, helper addr; call r11"
8188 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
8190 #endif // !defined(UNIX_AMD64_ABI)
8192 #elif defined(_TARGET_X86_)
8195 // Push the profilerHandle
8198 if (compiler->compProfilerMethHndIndirected)
8200 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
8204 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
8208 genEmitHelperCall(helper,
8209 sizeof(int) * 1, // argSize
8210 EA_UNKNOWN); // retSize
8213 // Adjust the number of stack slots used by this managed method if necessary.
8215 if (compiler->fgPtrArgCntMax < 1)
8217 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
8218 compiler->fgPtrArgCntMax = 1;
8221 #elif defined(_TARGET_ARM_)
8223 // Push the profilerHandle
8226 // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock
8227 // RBM_PROFILER_RET_USED always.
8228 #ifdef LEGACY_BACKEND
8229 regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
8230 noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);
8231 regSet.rsLockReg(RBM_PROFILER_RET_USED);
8232 #endif // LEGACY_BACKEND
8234 // Contract between JIT and Profiler Leave callout on arm:
8235 // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
8236 // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
8237 // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
8238 // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
8240 // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
8243 emitAttr attr = EA_UNKNOWN;
8245 if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
8246 (varTypeIsFloating(compiler->info.compRetType) ||
8247 compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
8253 // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
8254 // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
8255 if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
8258 gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
8260 else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
8263 gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
8270 getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
8271 regTracker.rsTrackRegTrash(REG_PROFILER_RET_SCRATCH);
8272 gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
8276 if (compiler->compProfilerMethHndIndirected)
8278 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8279 regTracker.rsTrackRegTrash(REG_ARG_0);
8283 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
8286 genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
8288 EA_UNKNOWN); // retSize
8290 // Restore state that existed before profiler callback
8293 getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
8294 regTracker.rsTrackRegTrash(REG_ARG_0);
8295 gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
8298 #ifdef LEGACY_BACKEND
8299 regSet.rsUnlockReg(RBM_PROFILER_RET_USED);
8300 #endif // LEGACY_BACKEND
8303 NYI("Emit Profiler Leave callback");
8306 /* Restore the stack level */
8307 SetStackLevel(saveStackLvl2);
8310 #endif // PROFILING_SUPPORTED
8312 /*****************************************************************************
8317 These instructions are just a reordering of the instructions used today.
8323 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
8325 add esp, LOCALS_SIZE / pop dummyReg
8335 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
8336 Everything else is similar, though in a different order.
8338 The security object will no longer be at a fixed offset. However, the
8339 offset can still be determined by looking up the GC-info and determining
8340 how many callee-saved registers are pushed.
8347 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
8349 add esp, LOCALS_SIZE / pop dummyReg
8353 (mov esp, ebp if there are no callee-saved registers)
8357 Double-aligned frame :
8358 --------------------
8360 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
8361 of callee-saved registers are pushed on the stack so that the locals
8362 themselves are qword-aligned. The instructions are the same as today,
8363 just in a different order.
8371 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
8373 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
8382 localloc (with ebp) frames :
8383 --------------------------
8385 The instructions are the same as today, just in a different order.
8386 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
8387 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
8394 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
8396 lea esp, [ebp-calleeSavedRegsPushedSize]
8400 (mov esp, ebp if there are no callee-saved registers)
8404 *****************************************************************************/
8406 /*****************************************************************************
8408 * Generates appropriate NOP padding for a function prolog to support ReJIT.
8411 void CodeGen::genPrologPadForReJit()
8413 assert(compiler->compGeneratingProlog);
8415 #ifdef _TARGET_XARCH_
8416 if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
8421 #if FEATURE_EH_FUNCLETS
8423 // No need to generate pad (nops) for funclets.
8424 // When compiling the main function (and not a funclet)
8425 // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
8426 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
8431 #endif // FEATURE_EH_FUNCLETS
8433 unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
8441 /*****************************************************************************
8443 * Reserve space for a function prolog.
8446 void CodeGen::genReserveProlog(BasicBlock* block)
8448 assert(block != nullptr);
8450 JITDUMP("Reserving prolog IG for block BB%02u\n", block->bbNum);
8452 /* Nothing is live on entry to the prolog */
8454 getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
8457 /*****************************************************************************
8459 * Reserve space for a function epilog.
8462 void CodeGen::genReserveEpilog(BasicBlock* block)
8464 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
8465 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
8467 /* The return value is special-cased: make sure it goes live for the epilog */
8469 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8471 if (genFullPtrRegMap && !jmpEpilog)
8473 if (varTypeIsGC(compiler->info.compRetNativeType))
8475 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
8477 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
8479 switch (compiler->info.compRetNativeType)
8482 gcrefRegsArg |= RBM_INTRET;
8485 byrefRegsArg |= RBM_INTRET;
8493 JITDUMP("Reserving epilog IG for block BB%02u\n", block->bbNum);
8495 assert(block != nullptr);
8496 const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
8497 bool last = (block->bbNext == nullptr);
8498 getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
8501 #if FEATURE_EH_FUNCLETS
8503 /*****************************************************************************
8505 * Reserve space for a funclet prolog.
8508 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
8510 assert(block != nullptr);
8512 /* Currently, no registers are live on entry to the prolog, except maybe
8513 the exception object. There might be some live stack vars, but they
8514 cannot be accessed until after the frame pointer is re-established.
8515 In order to potentially prevent emitting a death before the prolog
8516 and a birth right after it, we just report it as live during the
8517 prolog, and rely on the prolog being non-interruptible. Trust
8518 genCodeForBBlist to correctly initialize all the sets.
8520 We might need to relax these asserts if the VM ever starts
8521 restoring any registers, then we could have live-in reg vars...
8524 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
8525 noway_assert(gcInfo.gcRegByrefSetCur == 0);
8527 JITDUMP("Reserving funclet prolog IG for block BB%02u\n", block->bbNum);
8529 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
8530 gcInfo.gcRegByrefSetCur, false);
8533 /*****************************************************************************
8535 * Reserve space for a funclet epilog.
8538 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
8540 assert(block != nullptr);
8542 JITDUMP("Reserving funclet epilog IG for block BB%02u\n", block->bbNum);
8544 bool last = (block->bbNext == nullptr);
8545 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
8546 gcInfo.gcRegByrefSetCur, last);
8549 #endif // FEATURE_EH_FUNCLETS
8551 /*****************************************************************************
8552 * Finalize the frame size and offset assignments.
8554 * No changes can be made to the modified register set after this, since that can affect how many
8555 * callee-saved registers get saved.
8557 void CodeGen::genFinalizeFrame()
8559 JITDUMP("Finalizing stack frame\n");
8561 #ifndef LEGACY_BACKEND
8562 // Initializations need to happen based on the var locations at the start
8563 // of the first basic block, so load those up. In particular, the determination
8564 // of whether or not to use block init in the prolog is dependent on the variable
8565 // locations on entry to the function.
8566 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
8567 #endif // !LEGACY_BACKEND
8569 genCheckUseBlockInit();
8571 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
8572 CLANG_FORMAT_COMMENT_ANCHOR;
8574 #if defined(_TARGET_X86_)
8576 if (compiler->compTailCallUsed)
8578 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
8579 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
8580 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
8581 // actually get saved.
8583 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
8585 #endif // _TARGET_X86_
8587 #if defined(_TARGET_ARMARCH_)
8588 // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
8589 // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
8590 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
8592 regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
8594 #endif // defined(_TARGET_ARMARCH_)
8596 #if defined(_TARGET_ARM_)
8597 // If there are any reserved registers, add them to the
8598 if (regSet.rsMaskResvd != RBM_NONE)
8600 regSet.rsSetRegsModified(regSet.rsMaskResvd);
8602 #endif // _TARGET_ARM_
8607 printf("Modified regs: ");
8608 dspRegMask(regSet.rsGetModifiedRegsMask());
8613 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
8614 if (compiler->opts.compDbgEnC)
8616 // We always save FP.
8617 noway_assert(isFramePointerUsed());
8618 #ifdef _TARGET_AMD64_
8619 // On x64 we always save exactly RBP, RSI and RDI for EnC.
8620 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
8621 regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
8622 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
8623 #else // !_TARGET_AMD64_
8624 // On x86 we save all callee saved regs so the saved reg area size is consistent
8625 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
8626 #endif // !_TARGET_AMD64_
8629 /* If we have any pinvoke calls, we might potentially trash everything */
8630 if (compiler->info.compCallUnmanaged)
8632 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
8633 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
8636 #ifdef UNIX_AMD64_ABI
8637 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
8638 if (compiler->compIsProfilerHookNeeded())
8640 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
8644 /* Count how many callee-saved registers will actually be saved (pushed) */
8646 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
8647 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
8650 // EBP cannot be (directly) modified
8651 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
8654 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
8656 #ifdef _TARGET_ARMARCH_
8657 if (isFramePointerUsed())
8659 // For a FP based frame we have to push/pop the FP register
8661 maskCalleeRegsPushed |= RBM_FPBASE;
8663 // This assert check that we are not using REG_FP
8664 // as both the frame pointer and as a codegen register
8666 assert(!regSet.rsRegsModified(RBM_FPBASE));
8669 // we always push LR. See genPushCalleeSavedRegisters
8671 maskCalleeRegsPushed |= RBM_LR;
8673 #if defined(_TARGET_ARM_)
8674 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
8675 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
8676 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
8678 if ((maskPushRegsFloat != RBM_NONE) ||
8679 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
8681 // Here we try to keep stack double-aligned before the vpush
8682 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
8684 regNumber extraPushedReg = REG_R4;
8685 while (maskPushRegsInt & genRegMask(extraPushedReg))
8687 extraPushedReg = REG_NEXT(extraPushedReg);
8689 if (extraPushedReg < REG_R11)
8691 maskPushRegsInt |= genRegMask(extraPushedReg);
8692 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
8695 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
8698 // We currently only expect to push/pop consecutive FP registers
8699 // and these have to be double-sized registers as well.
8700 // Here we will insure that maskPushRegsFloat obeys these requirements.
8702 if (maskPushRegsFloat != RBM_NONE)
8704 regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
8705 while (maskPushRegsFloat > contiguousMask)
8707 contiguousMask <<= 2;
8708 contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
8710 if (maskPushRegsFloat != contiguousMask)
8712 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
8713 maskPushRegsFloat |= maskExtraRegs;
8714 regSet.rsSetRegsModified(maskExtraRegs);
8715 maskCalleeRegsPushed |= maskExtraRegs;
8718 #endif // _TARGET_ARM_
8719 #endif // _TARGET_ARMARCH_
8721 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
8722 // Compute the count of callee saved float regs saved on stack.
8723 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
8724 // regs are stack allocated and preserved in their stack locations.
8725 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
8726 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
8727 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
8729 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
8734 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
8735 dspRegMask(maskCalleeRegsPushed);
8740 /* Assign the final offsets to things living on the stack frame */
8742 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
8744 /* We want to make sure that the prolog size calculated here is accurate
8745 (that is instructions will not shrink because of conservative stack
8746 frame approximations). We do this by filling in the correct size
8747 here (where we have committed to the final numbers for the frame offsets)
8748 This will ensure that the prolog size is always correct
8750 getEmitter()->emitMaxTmpSize = compiler->tmpSize;
8753 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
8755 compiler->lvaTableDump();
8760 //------------------------------------------------------------------------
8761 // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
8764 // delta - the offset to add to the current stack pointer to establish the frame pointer
8765 // reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
8767 void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
8769 assert(compiler->compGeneratingProlog);
8771 #if defined(_TARGET_XARCH_)
8775 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
8780 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
8781 // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
8785 if (reportUnwindData)
8787 compiler->unwindSetFrameReg(REG_FPBASE, delta);
8790 #elif defined(_TARGET_ARM_)
8792 assert(arm_Valid_Imm_For_Add_SP(delta));
8793 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
8795 if (reportUnwindData)
8797 compiler->unwindPadding();
8801 NYI("establish frame pointer");
8805 /*****************************************************************************
8807 * Generates code for a function prolog.
8809 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
8811 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
8812 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
8813 * only instructions which result in control not going to the next instruction. Basically, any time execution would
8814 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
8815 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
8816 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
8818 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
8819 * debugger team to ensure that stepping still works.
8821 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
8825 #pragma warning(push)
8826 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
8828 void CodeGen::genFnProlog()
8830 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
8832 compiler->funSetCurrentFunc(0);
8837 printf("*************** In genFnProlog()\n");
8842 genInterruptibleUsed = true;
8845 #ifdef LEGACY_BACKEND
8847 #endif // LEGACY_BACKEND
8849 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
8851 /* Ready to start on the prolog proper */
8853 getEmitter()->emitBegProlog();
8854 compiler->unwindBegProlog();
8856 // Do this so we can put the prolog instruction group ahead of
8857 // other instruction groups
8858 genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
8861 if (compiler->opts.dspCode)
8863 printf("\n__prolog:\n");
8867 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
8869 // Create new scopes for the method-parameters for the prolog-block.
8875 if (compiler->compJitHaltMethod())
8877 /* put a nop first because the debugger and other tools are likely to
8878 put an int3 at the begining and we don't want to confuse them */
8881 instGen(INS_BREAKPOINT);
8883 #ifdef _TARGET_ARMARCH_
8884 // Avoid asserts in the unwind info because these instructions aren't accounted for.
8885 compiler->unwindPadding();
8886 #endif // _TARGET_ARMARCH_
8890 #if FEATURE_EH_FUNCLETS && defined(DEBUG)
8892 // We cannot force 0-initialization of the PSPSym
8893 // as it will overwrite the real value
8894 if (compiler->lvaPSPSym != BAD_VAR_NUM)
8896 LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
8897 assert(!varDsc->lvMustInit);
8900 #endif // FEATURE_EH_FUNCLETS && DEBUG
8902 /*-------------------------------------------------------------------------
8904 * Record the stack frame ranges that will cover all of the tracked
8905 * and untracked pointer variables.
8906 * Also find which registers will need to be zero-initialized.
8908 * 'initRegs': - Generally, enregistered variables should not need to be
8909 * zero-inited. They only need to be zero-inited when they
8910 * have a possibly uninitialized read on some control
8911 * flow path. Apparently some of the IL_STUBs that we
8912 * generate have this property.
8915 int untrLclLo = +INT_MAX;
8916 int untrLclHi = -INT_MAX;
8917 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
8918 // Note that they may be tracked, but simply not allocated to a register.
8919 bool hasUntrLcl = false;
8921 int GCrefLo = +INT_MAX;
8922 int GCrefHi = -INT_MAX;
8923 bool hasGCRef = false;
8925 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
8926 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
8927 regMaskTP initDblRegs = RBM_NONE;
8932 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
8934 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
8939 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
8941 noway_assert(varDsc->lvRefCnt == 0);
8945 signed int loOffs = varDsc->lvStkOffs;
8946 signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
8948 /* We need to know the offset range of tracked stack GC refs */
8949 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
8951 if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
8953 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
8954 // taken care of by the parent struct.
8955 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
8959 if (loOffs < GCrefLo)
8963 if (hiOffs > GCrefHi)
8970 /* For lvMustInit vars, gather pertinent info */
8972 if (!varDsc->lvMustInit)
8977 if (varDsc->lvIsInReg())
8979 regMaskTP regMask = genRegMask(varDsc->lvRegNum);
8980 if (!varDsc->IsFloatRegType())
8982 initRegs |= regMask;
8984 if (varTypeIsMultiReg(varDsc))
8986 if (varDsc->lvOtherReg != REG_STK)
8988 initRegs |= genRegMask(varDsc->lvOtherReg);
8992 /* Upper DWORD is on the stack, and needs to be inited */
8994 loOffs += sizeof(int);
8999 #if !FEATURE_STACK_FP_X87
9000 else if (varDsc->TypeGet() == TYP_DOUBLE)
9002 initDblRegs |= regMask;
9006 initFltRegs |= regMask;
9008 #endif // !FEATURE_STACK_FP_X87
9016 if (loOffs < untrLclLo)
9020 if (hiOffs > untrLclHi)
9027 /* Don't forget about spill temps that hold pointers */
9029 if (!TRACK_GC_TEMP_LIFETIMES)
9031 assert(compiler->tmpAllFree());
9032 for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
9034 if (!varTypeIsGC(tempThis->tdTempType()))
9039 signed int loOffs = tempThis->tdTempOffs();
9040 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
9042 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
9043 // previous frame pointer. Thus, stkOffs can't be zero.
9044 CLANG_FORMAT_COMMENT_ANCHOR;
9046 #if !defined(_TARGET_AMD64_)
9047 // However, on amd64 there is no requirement to chain frame pointers.
9049 noway_assert(!isFramePointerUsed() || loOffs != 0);
9050 #endif // !defined(_TARGET_AMD64_)
9051 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
9055 if (loOffs < untrLclLo)
9059 if (hiOffs > untrLclHi)
9066 assert((genInitStkLclCnt > 0) == hasUntrLcl);
9071 if (genInitStkLclCnt > 0)
9073 printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
9080 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
9081 // Ditto for all enregistered user arguments in a varargs method.
9082 // These registers will be available to use for the initReg. We just remove
9083 // all of these registers from the rsCalleeRegArgMaskLiveIn.
9085 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
9088 /* Choose the register to use for zero initialization */
9090 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
9091 bool initRegZeroed = false;
9092 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
9095 // We should not use the special PINVOKE registers as the initReg
9096 // since they are trashed by the jithelper call to setup the PINVOKE frame
9097 if (compiler->info.compCallUnmanaged)
9099 excludeMask |= RBM_PINVOKE_FRAME;
9101 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
9102 if (!compiler->opts.ShouldUsePInvokeHelpers())
9104 noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
9106 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
9108 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
9110 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
9111 if (varDsc->lvRegister)
9113 excludeMask |= genRegMask(varDsc->lvRegNum);
9119 // If we have a variable sized frame (compLocallocUsed is true)
9120 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
9121 if (compiler->compLocallocUsed)
9123 excludeMask |= RBM_SAVED_LOCALLOC_SP;
9125 #endif // _TARGET_ARM_
9127 #if defined(_TARGET_XARCH_)
9128 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
9130 // We currently must use REG_EAX on x86 here
9131 // because the loop's backwards branch depends upon the size of EAX encodings
9132 assert(initReg == REG_EAX);
9135 #endif // _TARGET_XARCH_
9137 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
9139 if (tempMask != RBM_NONE)
9141 // We will use one of the registers that we were planning to zero init anyway.
9142 // We pick the lowest register number.
9143 tempMask = genFindLowestBit(tempMask);
9144 initReg = genRegNumFromMask(tempMask);
9146 // Next we prefer to use one of the unused argument registers.
9147 // If they aren't available we use one of the caller-saved integer registers.
9150 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
9151 if (tempMask != RBM_NONE)
9153 // We pick the lowest register number
9154 tempMask = genFindLowestBit(tempMask);
9155 initReg = genRegNumFromMask(tempMask);
9160 noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
9162 #if defined(_TARGET_AMD64_)
9163 // If we are a varargs call, in order to set up the arguments correctly this
9164 // must be done in a 2 step process. As per the x64 ABI:
9165 // a) The caller sets up the argument shadow space (just before the return
9166 // address, 4 pointer sized slots).
9167 // b) The callee is responsible to home the arguments on the shadow space
9168 // provided by the caller.
9169 // This way, the varargs iterator will be able to retrieve the
9170 // call arguments properly since both the arg regs and the stack allocated
9171 // args will be contiguous.
9172 if (compiler->info.compIsVarArgs)
9174 getEmitter()->spillIntArgRegsToShadowSlots();
9177 #endif // _TARGET_AMD64_
9180 /*-------------------------------------------------------------------------
9182 * Now start emitting the part of the prolog which sets up the frame
9185 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
9187 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
9188 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
9190 #endif // _TARGET_ARM_
9192 #ifdef _TARGET_XARCH_
9193 if (doubleAlignOrFramePointerUsed())
9195 inst_RV(INS_push, REG_FPBASE, TYP_REF);
9196 compiler->unwindPush(REG_FPBASE);
9197 psiAdjustStackLevel(REGSIZE_BYTES);
9199 #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
9200 genEstablishFramePointer(0, /*reportUnwindData*/ true);
9201 #endif // !_TARGET_AMD64_
9204 if (compiler->genDoubleAlign())
9206 noway_assert(isFramePointerUsed() == false);
9207 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
9209 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
9211 #endif // DOUBLE_ALIGN
9213 #endif // _TARGET_XARCH_
9215 #ifdef _TARGET_ARM64_
9216 // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
9217 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
9218 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
9219 #else // !_TARGET_ARM64_
9220 genPushCalleeSavedRegisters();
9221 #endif // !_TARGET_ARM64_
9224 bool needToEstablishFP = false;
9225 int afterLclFrameSPtoFPdelta = 0;
9226 if (doubleAlignOrFramePointerUsed())
9228 needToEstablishFP = true;
9230 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
9231 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
9232 // too big, we go ahead and do it here.
9234 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
9235 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
9236 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
9238 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
9239 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
9240 needToEstablishFP = false;
9243 #endif // _TARGET_ARM_
9245 //-------------------------------------------------------------------------
9247 // Subtract the local frame size from SP.
9249 //-------------------------------------------------------------------------
9250 CLANG_FORMAT_COMMENT_ANCHOR;
9252 #ifndef _TARGET_ARM64_
9253 regMaskTP maskStackAlloc = RBM_NONE;
9257 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
9258 #endif // _TARGET_ARM_
9260 if (maskStackAlloc == RBM_NONE)
9262 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
9264 #endif // !_TARGET_ARM64_
9266 //-------------------------------------------------------------------------
9269 if (compiler->compLocallocUsed)
9271 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
9272 regTracker.rsTrackRegTrash(REG_SAVED_LOCALLOC_SP);
9273 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
9275 #endif // _TARGET_ARMARCH_
9277 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
9278 // Preserve callee saved float regs to stack.
9279 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
9280 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
9282 #ifdef _TARGET_AMD64_
9283 // Establish the AMD64 frame pointer after the OS-reported prolog.
9284 if (doubleAlignOrFramePointerUsed())
9286 bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
9287 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
9289 #endif //_TARGET_AMD64_
9291 //-------------------------------------------------------------------------
9293 // This is the end of the OS-reported prolog for purposes of unwinding
9295 //-------------------------------------------------------------------------
9298 if (needToEstablishFP)
9300 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
9301 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
9303 #endif // _TARGET_ARM_
9305 if (compiler->info.compPublishStubParam)
9307 #if CPU_LOAD_STORE_ARCH
9308 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
9309 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
9311 // mov [lvaStubArgumentVar], EAX
9312 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
9313 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
9315 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
9317 // It's no longer live; clear it out so it can be used after this in the prolog
9318 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
9322 // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
9323 // when creating the stack frame. Don't think it's worth it, though.
9324 if (genNeedPrologStackProbe)
9327 // Can't have a call until we have enough padding for rejit
9329 genPrologPadForReJit();
9330 noway_assert(compiler->opts.compNeedStackProbes);
9331 genGenerateStackProbe();
9332 compiler->compStackProbePrologDone = true;
9334 #endif // STACK_PROBES
9337 // Zero out the frame as needed
9340 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
9342 #if FEATURE_EH_FUNCLETS
9344 genSetPSPSym(initReg, &initRegZeroed);
9346 #else // !FEATURE_EH_FUNCLETS
9348 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
9349 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
9351 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
9352 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
9354 // Zero out the slot for nesting level 0
9355 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
9359 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
9360 initRegZeroed = true;
9363 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
9367 #endif // !FEATURE_EH_FUNCLETS
9369 genReportGenericContextArg(initReg, &initRegZeroed);
9371 #if defined(LEGACY_BACKEND) // in RyuJIT backend this has already been expanded into trees
9372 if (compiler->info.compCallUnmanaged && !compiler->opts.ShouldUsePInvokeHelpers())
9374 getEmitter()->emitDisableRandomNops();
9375 initRegs = genPInvokeMethodProlog(initRegs);
9376 getEmitter()->emitEnableRandomNops();
9378 #endif // defined(LEGACY_BACKEND)
9380 // The local variable representing the security object must be on the stack frame
9381 // and must be 0 initialized.
9382 noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
9383 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
9384 compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
9386 // Initialize any "hidden" slots/locals
9388 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
9390 #ifdef _TARGET_ARM64_
9391 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0);
9393 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
9397 // Set up the GS security cookie
9399 genSetGSSecurityCookie(initReg, &initRegZeroed);
9401 #ifdef PROFILING_SUPPORTED
9403 // Insert a function entry callback for profiling, if requested.
9404 genProfilingEnterCallback(initReg, &initRegZeroed);
9406 #endif // PROFILING_SUPPORTED
9408 if (!genInterruptible)
9410 /*-------------------------------------------------------------------------
9412 * The 'real' prolog ends here for non-interruptible methods.
9413 * For fully-interruptible methods, we extend the prolog so that
9414 * we do not need to track GC inforation while shuffling the
9417 * Make sure there's enough padding for ReJIT.
9420 genPrologPadForReJit();
9421 getEmitter()->emitMarkPrologEnd();
9424 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
9425 // The unused bits of Vector3 arguments must be cleared
9426 // since native compiler doesn't initize the upper bits to zeros.
9428 // TODO-Cleanup: This logic can be implemented in
9429 // genFnPrologCalleeRegArgs() for argument registers and
9430 // genEnregisterIncomingStackArgs() for stack arguments.
9431 genClearStackVec3ArgUpperBits();
9432 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
9434 /*-----------------------------------------------------------------------------
9435 * Take care of register arguments first
9440 #ifndef LEGACY_BACKEND
9441 // Update the arg initial register locations.
9442 compiler->lvaUpdateArgsWithInitialReg();
9443 #endif // !LEGACY_BACKEND
9445 FOREACH_REGISTER_FILE(regState)
9447 if (regState->rsCalleeRegArgMaskLiveIn)
9449 // If we need an extra register to shuffle around the incoming registers
9450 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
9451 // if we don't need to use the xtraReg then this flag will stay false
9454 bool xtraRegClobbered = false;
9456 if (genRegMask(initReg) & RBM_ARG_REGS)
9462 xtraReg = REG_SCRATCH;
9463 initRegZeroed = false;
9466 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
9468 if (xtraRegClobbered)
9470 initRegZeroed = false;
9475 // Home the incoming arguments
9476 genEnregisterIncomingStackArgs();
9478 /* Initialize any must-init registers variables now */
9482 regMaskTP regMask = 0x1;
9484 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
9486 if (regMask & initRegs)
9488 // Check if we have already zeroed this register
9489 if ((reg == initReg) && initRegZeroed)
9495 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
9498 initRegZeroed = true;
9505 #if !FEATURE_STACK_FP_X87
9506 if (initFltRegs | initDblRegs)
9508 // If initReg is not in initRegs then we will use REG_SCRATCH
9509 if ((genRegMask(initReg) & initRegs) == 0)
9511 initReg = REG_SCRATCH;
9512 initRegZeroed = false;
9516 // This is needed only for Arm since it can use a zero initialized int register
9517 // to initialize vfp registers.
9520 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
9521 initRegZeroed = true;
9523 #endif // _TARGET_ARM_
9525 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
9527 #endif // !FEATURE_STACK_FP_X87
9529 #if FEATURE_STACK_FP_X87
9531 // Here is where we load the enregistered floating point arguments
9532 // and locals onto the x86-FPU.
9534 genCodeForPrologStackFP();
9537 //-----------------------------------------------------------------------------
9540 // Increase the prolog size here only if fully interruptible.
9541 // And again make sure it's big enough for ReJIT
9544 if (genInterruptible)
9546 genPrologPadForReJit();
9547 getEmitter()->emitMarkPrologEnd();
9550 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
9557 getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
9561 noway_assert(GCrefLo == +INT_MAX);
9562 noway_assert(GCrefHi == -INT_MAX);
9566 if (compiler->opts.dspCode)
9573 // On non-x86 the VARARG cookie does not need any special treatment.
9575 // Load up the VARARG argument pointer register so it doesn't get clobbered.
9576 // only do this if we actually access any statically declared args
9577 // (our argument pointer register has a refcount > 0).
9578 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
9580 if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt > 0)
9582 varDsc = &compiler->lvaTable[argsStartVar];
9584 noway_assert(compiler->info.compArgsCount > 0);
9586 // MOV EAX, <VARARGS HANDLE>
9587 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
9588 regTracker.rsTrackRegTrash(REG_EAX);
9591 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
9593 // EDX might actually be holding something here. So make sure to only use EAX for this code
9596 LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
9597 noway_assert(!lastArg->lvRegister);
9598 signed offset = lastArg->lvStkOffs;
9599 assert(offset != BAD_STK_OFFS);
9600 noway_assert(lastArg->lvFramePointerBased);
9602 // LEA EAX, &<VARARGS HANDLE> + EAX
9603 getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
9605 if (varDsc->lvIsInReg())
9607 if (varDsc->lvRegNum != REG_EAX)
9609 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
9610 regTracker.rsTrackRegTrash(varDsc->lvRegNum);
9615 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
9619 #endif // _TARGET_X86_
9622 if (compiler->opts.compStackCheckOnRet)
9624 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
9625 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
9626 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
9627 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
9631 getEmitter()->emitEndProlog();
9632 compiler->unwindEndProlog();
9634 noway_assert(getEmitter()->emitMaxTmpSize == compiler->tmpSize);
9637 #pragma warning(pop)
9640 /*****************************************************************************
9642 * Generates code for a function epilog.
9644 * Please consult the "debugger team notification" comment in genFnProlog().
9647 #if defined(_TARGET_ARMARCH_)
9649 void CodeGen::genFnEpilog(BasicBlock* block)
9653 printf("*************** In genFnEpilog()\n");
9656 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9658 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
9659 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
9660 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
9663 if (compiler->opts.dspCode)
9664 printf("\n__epilog:\n");
9668 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
9669 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
9670 printf(", gcRegGCrefSetCur=");
9671 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
9672 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
9673 printf(", gcRegByrefSetCur=");
9674 printRegMaskInt(gcInfo.gcRegByrefSetCur);
9675 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
9680 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
9683 // We delay starting the unwind codes until we have an instruction which we know
9684 // needs an unwind code. In particular, for large stack frames in methods without
9685 // localloc, the sequence might look something like this:
9688 // pop {r4,r5,r6,r10,r11,pc}
9689 // In this case, the "movw" should not be part of the unwind codes, since it will
9690 // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
9691 // also sets the current location as the beginning offset of the epilog, so every
9692 // instruction afterwards needs an unwind code. In the case above, if you call
9693 // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
9695 bool unwindStarted = false;
9697 // Tear down the stack frame
9699 if (compiler->compLocallocUsed)
9703 compiler->unwindBegEpilog();
9704 unwindStarted = true;
9708 inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
9709 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
9713 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
9716 genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
9721 // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
9722 compiler->unwindBegEpilog();
9723 unwindStarted = true;
9726 genPopCalleeSavedRegisters(jmpEpilog);
9728 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
9730 // We better not have used a pop PC to return otherwise this will be unreachable code
9731 noway_assert(!genUsedPopToReturn);
9733 int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9734 inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
9735 compiler->unwindAllocStack(preSpillRegArgSize);
9738 #else // _TARGET_ARM64_
9739 compiler->unwindBegEpilog();
9741 genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
9742 #endif // _TARGET_ARM64_
9746 #ifdef _TARGET_ARMARCH_
9747 hasTailCalls = true;
9748 #endif // _TARGET_ARMARCH_
9750 noway_assert(block->bbJumpKind == BBJ_RETURN);
9751 noway_assert(block->bbTreeList != nullptr);
9754 // We better not have used a pop PC to return otherwise this will be unreachable code
9755 noway_assert(!genUsedPopToReturn);
9756 #endif // _TARGET_ARM_
9758 /* figure out what jump we have */
9759 GenTree* jmpNode = block->lastNode();
9760 #if !FEATURE_FASTTAILCALL
9761 noway_assert(jmpNode->gtOper == GT_JMP);
9762 #else // FEATURE_FASTTAILCALL
9764 // If jmpNode is GT_JMP then gtNext must be null.
9765 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
9766 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
9768 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
9769 noway_assert((jmpNode->gtOper == GT_JMP) ||
9770 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
9772 // The next block is associated with this "if" stmt
9773 if (jmpNode->gtOper == GT_JMP)
9774 #endif // FEATURE_FASTTAILCALL
9776 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
9777 // the same descriptor with some minor adjustments.
9778 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
9780 CORINFO_CONST_LOOKUP addrInfo;
9781 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
9784 emitter::EmitCallType callType;
9786 regNumber indCallReg;
9787 switch (addrInfo.accessType)
9790 if (arm_Valid_Imm_For_BL((ssize_t)addrInfo.addr))
9792 // Simple direct call
9793 callType = emitter::EC_FUNC_TOKEN;
9794 addr = addrInfo.addr;
9795 indCallReg = REG_NA;
9799 // otherwise the target address doesn't fit in an immediate
9800 // so we have to burn a register...
9804 // Load the address into a register, load indirect and call through a register
9805 // We have to use R12 since we assume the argument registers are in use
9806 callType = emitter::EC_INDIR_R;
9807 indCallReg = REG_R12;
9809 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
9810 if (addrInfo.accessType == IAT_PVALUE)
9812 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
9813 regTracker.rsTrackRegTrash(indCallReg);
9819 NO_WAY("Unsupported JMP indirection");
9822 /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
9823 * the same descriptor with some minor adjustments.
9827 getEmitter()->emitIns_Call(callType,
9829 INDEBUG_LDISASM_COMMA(nullptr)
9832 EA_UNKNOWN, // retSize
9833 gcInfo.gcVarPtrSetCur,
9834 gcInfo.gcRegGCrefSetCur,
9835 gcInfo.gcRegByrefSetCur,
9836 BAD_IL_OFFSET, // IL offset
9843 CLANG_FORMAT_COMMENT_ANCHOR;
9845 #else // _TARGET_ARM64_
9846 if (addrInfo.accessType != IAT_VALUE)
9848 NYI_ARM64("Unsupported JMP indirection");
9851 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
9853 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
9854 // the same descriptor with some minor adjustments.
9857 getEmitter()->emitIns_Call(callType,
9859 INDEBUG_LDISASM_COMMA(nullptr)
9862 EA_UNKNOWN, // retSize
9863 EA_UNKNOWN, // secondRetSize
9864 gcInfo.gcVarPtrSetCur,
9865 gcInfo.gcRegGCrefSetCur,
9866 gcInfo.gcRegByrefSetCur,
9867 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
9870 CLANG_FORMAT_COMMENT_ANCHOR;
9872 #endif // _TARGET_ARM64_
9874 #if FEATURE_FASTTAILCALL
9878 // Call target = REG_FASTTAILCALL_TARGET
9879 // https://github.com/dotnet/coreclr/issues/4827
9880 // Do we need a special encoding for stack walker like rex.w prefix for x64?
9881 getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
9883 #endif // FEATURE_FASTTAILCALL
9888 if (!genUsedPopToReturn)
9890 // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
9891 // so we need a "bx lr" instruction to return from the function.
9892 inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
9893 compiler->unwindBranch16();
9895 #else // _TARGET_ARM64_
9896 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
9897 compiler->unwindReturn(REG_LR);
9898 #endif // _TARGET_ARM64_
9901 compiler->unwindEndEpilog();
9904 #elif defined(_TARGET_XARCH_)
9906 void CodeGen::genFnEpilog(BasicBlock* block)
9911 printf("*************** In genFnEpilog()\n");
9915 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9917 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
9918 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
9919 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
9921 noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
9924 genInterruptibleUsed = true;
9927 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
9930 if (compiler->opts.dspCode)
9932 printf("\n__epilog:\n");
9937 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
9938 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
9939 printf(", gcRegGCrefSetCur=");
9940 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
9941 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
9942 printf(", gcRegByrefSetCur=");
9943 printRegMaskInt(gcInfo.gcRegByrefSetCur);
9944 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
9949 #if !FEATURE_STACK_FP_X87
9950 // Restore float registers that were saved to stack before SP is modified.
9951 genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
9952 #endif // !FEATURE_STACK_FP_X87
9954 #ifdef JIT32_GCENCODER
9955 // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
9956 // the above call to `genRestoreCalleeSavedFltRegs` because that function
9957 // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
9958 // which is the only target that uses the JIT32 GC encoder
9959 // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
9960 // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
9961 // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
9962 // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
9963 // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
9964 // we will need to rethink this.
9965 getEmitter()->emitStartEpilog();
9968 /* Compute the size in bytes we've pushed/popped */
9970 if (!doubleAlignOrFramePointerUsed())
9972 // We have an ESP frame */
9974 noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
9976 /* Get rid of our local variables */
9978 if (compiler->compLclFrameSize)
9981 /* Add 'compiler->compLclFrameSize' to ESP */
9982 /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
9984 if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
9986 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
9987 regTracker.rsTrackRegTrash(REG_ECX);
9990 #endif // _TARGET_X86
9992 /* Add 'compiler->compLclFrameSize' to ESP */
9993 /* Generate "add esp, <stack-size>" */
9994 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
9998 genPopCalleeSavedRegisters();
10002 noway_assert(doubleAlignOrFramePointerUsed());
10004 /* Tear down the stack frame */
10006 bool needMovEspEbp = false;
10009 if (compiler->genDoubleAlign())
10012 // add esp, compLclFrameSize
10014 // We need not do anything (except the "mov esp, ebp") if
10015 // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
10016 // also complicates the code manager. Hence, we ignore that case.
10018 noway_assert(compiler->compLclFrameSize != 0);
10019 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
10021 needMovEspEbp = true;
10024 #endif // DOUBLE_ALIGN
10026 bool needLea = false;
10028 if (compiler->compLocallocUsed)
10030 // ESP may be variable if a localloc was actually executed. Reset it.
10031 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
10035 else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
10037 if (compiler->compLclFrameSize != 0)
10039 #ifdef _TARGET_AMD64_
10040 // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
10041 // do an LEA to "pop off" the frame allocation.
10043 #else // !_TARGET_AMD64_
10044 // We will just generate "mov esp, ebp" and be done with it.
10045 needMovEspEbp = true;
10046 #endif // !_TARGET_AMD64_
10049 else if (compiler->compLclFrameSize == 0)
10051 // do nothing before popping the callee-saved registers
10053 #ifdef _TARGET_X86_
10054 else if (compiler->compLclFrameSize == REGSIZE_BYTES)
10056 // "pop ecx" will make ESP point to the callee-saved registers
10057 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
10058 regTracker.rsTrackRegTrash(REG_ECX);
10060 #endif // _TARGET_X86
10063 // We need to make ESP point to the callee-saved registers
10071 #ifdef _TARGET_AMD64_
10072 // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
10074 // Case 1: localloc not used.
10075 // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
10076 // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
10077 // The amount to be subtracted from RBP to point at callee saved int regs.
10079 // Case 2: localloc used
10080 // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
10081 // Offset = Amount to be aded to RBP to point at callee saved int regs.
10082 offset = genSPtoFPdelta() - compiler->compLclFrameSize;
10084 // Offset should fit within a byte if localloc is not used.
10085 if (!compiler->compLocallocUsed)
10087 noway_assert(offset < UCHAR_MAX);
10090 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
10091 offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
10092 noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
10095 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
10100 // Pop the callee-saved registers (if any)
10103 genPopCalleeSavedRegisters();
10105 #ifdef _TARGET_AMD64_
10106 assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
10107 #else // !_TARGET_AMD64_
10111 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
10113 #endif // !_TARGET_AMD64_
10116 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
10119 getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
10121 /* Check if this a special return block i.e.
10122 * CEE_JMP instruction */
10126 noway_assert(block->bbJumpKind == BBJ_RETURN);
10127 noway_assert(block->bbTreeList);
10129 // figure out what jump we have
10130 GenTree* jmpNode = block->lastNode();
10131 #if !FEATURE_FASTTAILCALL
10133 noway_assert(jmpNode->gtOper == GT_JMP);
10136 // If jmpNode is GT_JMP then gtNext must be null.
10137 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
10138 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
10140 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
10141 noway_assert((jmpNode->gtOper == GT_JMP) ||
10142 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
10144 // The next block is associated with this "if" stmt
10145 if (jmpNode->gtOper == GT_JMP)
10148 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
10149 // the same descriptor with some minor adjustments.
10150 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
10152 CORINFO_CONST_LOOKUP addrInfo;
10153 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
10154 if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
10156 NO_WAY("Unsupported JMP indirection");
10159 const emitter::EmitCallType callType =
10160 (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
10162 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
10163 // the same descriptor with some minor adjustments.
10165 // clang-format off
10166 getEmitter()->emitIns_Call(callType,
10168 INDEBUG_LDISASM_COMMA(nullptr)
10171 EA_UNKNOWN // retSize
10172 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
10173 gcInfo.gcVarPtrSetCur,
10174 gcInfo.gcRegGCrefSetCur,
10175 gcInfo.gcRegByrefSetCur,
10176 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
10177 true); /* isJump */
10180 #if FEATURE_FASTTAILCALL
10183 #ifdef _TARGET_AMD64_
10185 // Call target = RAX.
10186 // Stack walker requires that a register indirect tail call be rex.w prefixed.
10187 getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
10189 assert(!"Fast tail call as epilog+jmp");
10191 #endif //_TARGET_AMD64_
10193 #endif // FEATURE_FASTTAILCALL
10197 unsigned stkArgSize = 0; // Zero on all platforms except x86
10199 #if defined(_TARGET_X86_)
10200 bool fCalleePop = true;
10202 // varargs has caller pop
10203 if (compiler->info.compIsVarArgs)
10204 fCalleePop = false;
10206 #ifdef UNIX_X86_ABI
10207 if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
10208 fCalleePop = false;
10209 #endif // UNIX_X86_ABI
10213 noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
10214 stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
10216 noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
10218 #endif // _TARGET_X86_
10220 /* Return, popping our arguments (if any) */
10221 instGen_Return(stkArgSize);
10226 #error Unsupported or unset target architecture
10227 #endif // _TARGET_*
10229 #if FEATURE_EH_FUNCLETS
10231 #ifdef _TARGET_ARM_
10233 /*****************************************************************************
10235 * Generates code for an EH funclet prolog.
10237 * Funclets have the following incoming arguments:
10239 * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
10240 * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
10241 * finally/fault: none
10243 * Funclets set the following registers on exit:
10245 * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
10246 * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
10247 * finally/fault: none
10249 * The ARM funclet prolog sequence is:
10251 * push {regs,lr} ; We push the callee-saved regs and 'lr'.
10252 * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
10253 * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
10254 * ; calculated for the entire function.
10255 * sub sp, XXX ; Establish the rest of the frame.
10256 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
10257 * ; up to preserve stack alignment. If we push an odd number of registers, we also
10258 * ; generate this, to keep the stack aligned.
10260 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
10262 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
10265 * if (this is a filter funclet)
10267 * // r1 on entry to a filter funclet is CallerSP of the containing function:
10268 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
10269 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
10270 * // a funclet. Consider:
10274 * // throw new Exception();
10275 * // } catch(Exception) {
10276 * // throw new Exception(); // The exception thrown here ...
10278 * // } filter { // ... will be processed here, while the "catch" funclet frame is
10279 * // // still on the stack
10280 * // } filter-handler {
10283 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
10284 * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
10285 * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
10287 * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
10288 * ; the dynamically containing funclet or function)
10289 * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
10290 * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
10294 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
10295 * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
10297 * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
10298 * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
10301 * The epilog sequence is then:
10303 * add sp, XXX ; if necessary
10306 * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
10307 * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
10309 * The funclet frame is thus:
10312 * |-----------------------|
10315 * +=======================+ <---- Caller's SP
10316 * |Callee saved registers |
10317 * |-----------------------|
10318 * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
10319 * | | // in function and funclet
10320 * |-----------------------|
10321 * | PSP slot | // Omitted in CoreRT ABI
10322 * |-----------------------|
10323 * ~ possible 4 byte pad ~
10324 * ~ for alignment ~
10325 * |-----------------------|
10326 * | Outgoing arg space |
10327 * |-----------------------| <---- Ambient SP
10329 * ~ | Stack grows ~
10334 void CodeGen::genFuncletProlog(BasicBlock* block)
10338 printf("*************** In genFuncletProlog()\n");
10341 assert(block != NULL);
10342 assert(block->bbFlags & BBF_FUNCLET_BEG);
10344 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
10346 gcInfo.gcResetForBB();
10348 compiler->unwindBegProlog();
10350 regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
10351 regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
10353 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
10354 maskPushRegsInt |= maskStackAlloc;
10356 assert(FitsIn<int>(maskPushRegsInt));
10357 inst_IV(INS_push, (int)maskPushRegsInt);
10358 compiler->unwindPushMaskInt(maskPushRegsInt);
10360 if (maskPushRegsFloat != RBM_NONE)
10362 genPushFltRegs(maskPushRegsFloat);
10363 compiler->unwindPushMaskFloat(maskPushRegsFloat);
10366 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
10368 regMaskTP maskArgRegsLiveIn;
10371 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
10373 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
10375 maskArgRegsLiveIn = RBM_NONE;
10379 maskArgRegsLiveIn = RBM_R0;
10382 regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
10383 bool initRegZeroed = false;
10385 if (maskStackAlloc == RBM_NONE)
10387 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
10390 // This is the end of the OS-reported prolog for purposes of unwinding
10391 compiler->unwindEndProlog();
10395 // This is the first block of a filter
10397 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
10398 genFuncletInfo.fiPSP_slot_CallerSP_offset);
10399 regTracker.rsTrackRegTrash(REG_R1);
10400 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
10401 genFuncletInfo.fiPSP_slot_SP_offset);
10402 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
10403 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
10407 // This is a non-filter funclet
10408 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
10409 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
10410 regTracker.rsTrackRegTrash(REG_R3);
10411 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
10412 genFuncletInfo.fiPSP_slot_SP_offset);
10416 /*****************************************************************************
10418 * Generates code for an EH funclet epilog.
10421 void CodeGen::genFuncletEpilog()
10425 printf("*************** In genFuncletEpilog()\n");
10428 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
10430 // Just as for the main function, we delay starting the unwind codes until we have
10431 // an instruction which we know needs an unwind code. This is to support code like
10435 // pop {r4,r5,r6,r10,r11,pc}
10436 // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
10438 bool unwindStarted = false;
10440 /* The saved regs info saves the LR register. We need to pop the PC register to return */
10441 assert(genFuncletInfo.fiSaveRegs & RBM_LR);
10443 regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
10444 regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
10446 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
10447 maskPopRegsInt |= maskStackAlloc;
10449 if (maskStackAlloc == RBM_NONE)
10451 genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
10454 if (!unwindStarted)
10456 // We'll definitely generate an unwindable instruction next
10457 compiler->unwindBegEpilog();
10458 unwindStarted = true;
10461 maskPopRegsInt &= ~RBM_LR;
10462 maskPopRegsInt |= RBM_PC;
10464 if (maskPopRegsFloat != RBM_NONE)
10466 genPopFltRegs(maskPopRegsFloat);
10467 compiler->unwindPopMaskFloat(maskPopRegsFloat);
10470 assert(FitsIn<int>(maskPopRegsInt));
10471 inst_IV(INS_pop, (int)maskPopRegsInt);
10472 compiler->unwindPopMaskInt(maskPopRegsInt);
10474 compiler->unwindEndEpilog();
10477 /*****************************************************************************
10479 * Capture the information used to generate the funclet prologs and epilogs.
10480 * Note that all funclet prologs are identical, and all funclet epilogs are
10481 * identical (per type: filters are identical, and non-filters are identical).
10482 * Thus, we compute the data used for these just once.
10484 * See genFuncletProlog() for more information about the prolog/epilog sequences.
10487 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10489 if (compiler->ehAnyFunclets())
10491 assert(isFramePointerUsed());
10492 assert(compiler->lvaDoneFrameLayout ==
10493 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
10495 // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
10496 // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
10497 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
10498 // (also assumed in genFnProlog()).
10499 assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
10500 unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
10501 genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
10503 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
10504 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
10505 unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
10506 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
10507 unsigned funcletFrameSize =
10508 preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
10510 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
10511 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
10512 unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
10514 unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
10515 int PSP_slot_CallerSP_offset =
10516 -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
10518 /* Now save it for future use */
10520 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
10521 genFuncletInfo.fiSpDelta = spDelta;
10522 genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
10523 genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
10529 printf("Funclet prolog / epilog info\n");
10530 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
10531 printf(" Save regs: ");
10532 dspRegMask(rsMaskSaveRegs);
10534 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
10535 printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
10536 printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
10538 if (PSP_slot_CallerSP_offset !=
10539 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
10540 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
10541 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
10545 assert(PSP_slot_CallerSP_offset < 0);
10546 if (compiler->lvaPSPSym != BAD_VAR_NUM)
10548 assert(PSP_slot_CallerSP_offset ==
10549 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
10550 // function and funclet!
10555 #elif defined(_TARGET_AMD64_)
10557 /*****************************************************************************
10559 * Generates code for an EH funclet prolog.
10561 * Funclets have the following incoming arguments:
10563 * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
10564 * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
10565 * finally/fault: rcx = InitialSP
10567 * Funclets set the following registers on exit:
10569 * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
10570 * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
10571 * finally/fault: none
10573 * The AMD64 funclet prolog sequence is:
10576 * push callee-saved regs
10577 * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
10578 * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
10579 * ; the entire function.
10580 * sub sp, XXX ; Establish the rest of the frame.
10581 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
10582 * ; up to preserve stack alignment. If we push an odd number of registers, we also
10583 * ; generate this, to keep the stack aligned.
10585 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
10587 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
10589 * ; Also, re-establish the frame pointer from the PSP.
10591 * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
10592 * ; PSP of the dynamically containing funclet or function)
10593 * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
10594 * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
10595 * ; Function_InitialSP_to_FP_delta==0, we don't need this
10598 * The epilog sequence is then:
10601 * pop callee-saved regs ; if necessary
10605 * The funclet frame is thus:
10608 * |-----------------------|
10611 * +=======================+ <---- Caller's SP
10612 * | Return address |
10613 * |-----------------------|
10615 * |-----------------------|
10616 * |Callee saved registers |
10617 * |-----------------------|
10618 * ~ possible 8 byte pad ~
10619 * ~ for alignment ~
10620 * |-----------------------|
10621 * | PSP slot | // Omitted in CoreRT ABI
10622 * |-----------------------|
10623 * | Outgoing arg space | // this only exists if the function makes a call
10624 * |-----------------------| <---- Initial SP
10626 * ~ | Stack grows ~
10630 * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
10631 * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
10632 * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
10633 * "FRAMEPTR OFFSETS" for details.
10636 void CodeGen::genFuncletProlog(BasicBlock* block)
10641 printf("*************** In genFuncletProlog()\n");
10645 assert(!regSet.rsRegsModified(RBM_FPBASE));
10646 assert(block != nullptr);
10647 assert(block->bbFlags & BBF_FUNCLET_BEG);
10648 assert(isFramePointerUsed());
10650 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
10652 gcInfo.gcResetForBB();
10654 compiler->unwindBegProlog();
10656 // We need to push ebp, since it's callee-saved.
10657 // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
10658 // keep track of that on a per-funclet basis, so we push the same set as in the main function.
10659 // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
10660 // is stored here (all temps are allocated in the parent frame).
10661 // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
10662 // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
10664 inst_RV(INS_push, REG_FPBASE, TYP_REF);
10665 compiler->unwindPush(REG_FPBASE);
10667 // Callee saved int registers are pushed to stack.
10668 genPushCalleeSavedRegisters();
10670 regMaskTP maskArgRegsLiveIn;
10671 if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
10673 maskArgRegsLiveIn = RBM_ARG_0;
10677 maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
10680 regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
10681 bool initRegZeroed = false;
10683 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
10685 // Callee saved float registers are copied to stack in their assigned stack slots
10686 // after allocating space for them as part of funclet frame.
10687 genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
10689 // This is the end of the OS-reported prolog for purposes of unwinding
10690 compiler->unwindEndProlog();
10692 // If there is no PSPSym (CoreRT ABI), we are done.
10693 if (compiler->lvaPSPSym == BAD_VAR_NUM)
10698 getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
10700 regTracker.rsTrackRegTrash(REG_FPBASE);
10702 getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
10704 if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
10706 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
10707 genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
10710 // We've modified EBP, but not really. Say that we haven't...
10711 regSet.rsRemoveRegsModified(RBM_FPBASE);
10714 /*****************************************************************************
10716 * Generates code for an EH funclet epilog.
10718 * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
10721 void CodeGen::genFuncletEpilog()
10726 printf("*************** In genFuncletEpilog()\n");
10730 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
10732 // Restore callee saved XMM regs from their stack slots before modifying SP
10733 // to position at callee saved int regs.
10734 genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
10735 inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
10736 genPopCalleeSavedRegisters();
10737 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
10741 /*****************************************************************************
10743 * Capture the information used to generate the funclet prologs and epilogs.
10746 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10748 if (!compiler->ehAnyFunclets())
10753 // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
10754 // because we're not going to allocate the same size frame as the parent.
10756 assert(isFramePointerUsed());
10757 assert(compiler->lvaDoneFrameLayout ==
10758 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
10759 assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
10761 // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
10762 // that's ok, because we're figuring out an offset in the parent frame.
10763 genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
10764 compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
10767 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
10768 #ifndef UNIX_AMD64_ABI
10769 // No 4 slots for outgoing params on the stack for System V systems.
10770 assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
10771 (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
10772 // slots if there are any calls in the function.
10773 #endif // UNIX_AMD64_ABI
10774 unsigned offset = compiler->lvaOutgoingArgSpaceSize;
10776 genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
10778 // How much stack do we allocate in the funclet?
10779 // We need to 16-byte align the stack.
10781 unsigned totalFrameSize =
10782 REGSIZE_BYTES // return address
10783 + REGSIZE_BYTES // pushed EBP
10784 + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
10786 // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
10787 // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
10788 unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
10789 unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
10791 unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
10793 totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
10794 + calleeFPRegsSavedSize // pushed callee-saved float regs
10795 // below calculated 'pad' will go here
10796 + PSPSymSize // PSPSym
10797 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
10800 unsigned pad = AlignmentPad(totalFrameSize, 16);
10802 genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
10803 + calleeFPRegsSavedSize // Callee saved xmm regs
10804 + pad + PSPSymSize // PSPSym
10805 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
10812 printf("Funclet prolog / epilog info\n");
10813 printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
10814 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
10815 printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
10818 if (compiler->lvaPSPSym != BAD_VAR_NUM)
10820 assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
10821 compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
10827 #elif defined(_TARGET_ARM64_)
10829 // Look in CodeGenArm64.cpp
10831 #elif defined(_TARGET_X86_)
10833 /*****************************************************************************
10835 * Generates code for an EH funclet prolog.
10838 * Funclets have the following incoming arguments:
10840 * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
10841 * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
10842 * finally/fault: none
10844 * Funclets set the following registers on exit:
10846 * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
10847 * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
10848 * finally/fault: none
10850 * Funclet prolog/epilog sequence and funclet frame layout are TBD.
10854 void CodeGen::genFuncletProlog(BasicBlock* block)
10859 printf("*************** In genFuncletProlog()\n");
10863 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
10865 gcInfo.gcResetForBB();
10867 compiler->unwindBegProlog();
10869 // This is the end of the OS-reported prolog for purposes of unwinding
10870 compiler->unwindEndProlog();
10872 // TODO We may need EBP restore sequence here if we introduce PSPSym
10874 // Add a padding for 16-byte alignment
10875 inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
10878 /*****************************************************************************
10880 * Generates code for an EH funclet epilog.
10883 void CodeGen::genFuncletEpilog()
10888 printf("*************** In genFuncletEpilog()\n");
10892 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
10894 // Revert a padding that was added for 16-byte alignment
10895 inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
10900 /*****************************************************************************
10902 * Capture the information used to generate the funclet prologs and epilogs.
10905 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10907 if (!compiler->ehAnyFunclets())
10915 /*****************************************************************************
10917 * Generates code for an EH funclet prolog.
10920 void CodeGen::genFuncletProlog(BasicBlock* block)
10922 NYI("Funclet prolog");
10925 /*****************************************************************************
10927 * Generates code for an EH funclet epilog.
10930 void CodeGen::genFuncletEpilog()
10932 NYI("Funclet epilog");
10935 /*****************************************************************************
10937 * Capture the information used to generate the funclet prologs and epilogs.
10940 void CodeGen::genCaptureFuncletPrologEpilogInfo()
10942 if (compiler->ehAnyFunclets())
10944 NYI("genCaptureFuncletPrologEpilogInfo()");
10948 #endif // _TARGET_*
10950 /*-----------------------------------------------------------------------------
10952 * Set the main function PSPSym value in the frame.
10953 * Funclets use different code to load the PSP sym and save it in their frame.
10954 * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
10955 * The PSPSym section of that document is copied here.
10957 ***********************************
10958 * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
10959 * accesses locals from the main function body.
10961 * First, two definitions.
10963 * Caller-SP is the value of the stack pointer in a function's caller before the call
10964 * instruction is executed. That is, when function A calls function B, Caller-SP for B
10965 * is the value of the stack pointer immediately before the call instruction in A
10966 * (calling B) was executed. Note that this definition holds for both AMD64, which
10967 * pushes the return value when a call instruction is executed, and for ARM, which
10968 * doesn't. For AMD64, Caller-SP is the address above the call return address.
10970 * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
10971 * the frame has been allocated. That is, before any "alloca"-type allocations.
10973 * The PSPSym is a pointer-sized local variable in the frame of the main function and
10974 * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
10975 * for the main function. The stack offset of the PSPSym is reported to the VM in the
10976 * GC information header. The value reported in the GC information is the offset of the
10977 * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
10978 * value is reported to the VM, differs between architectures. In particular, note that
10979 * most things in the GC information header are reported as offsets relative to Caller-SP,
10980 * but PSPSym on AMD64 is one (maybe the only) exception.)
10982 * The VM uses the PSPSym to find other locals it cares about (such as the generics context
10983 * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
10984 * the frame pointer is the same value in a funclet as it is in the main function body.
10986 * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
10987 * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
10988 * only true for first pass funclets (currently just filters) and it is passed as the second
10989 * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
10990 * frame in the exception processing system. For the CLR, it points either to the main function
10991 * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
10992 * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
10994 * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
10995 * don't know if the Establisher Frame is from the main function or a funclet, we design the
10996 * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
10997 * offset from the Establisher Frame in each case. (This is also required because we only report
10998 * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
10999 * function and all of its funclets). Then, the funclet uses this known offset to compute the
11000 * PSPSym address and read its value. From this, it can compute the value of the frame pointer
11001 * (which is a constant offset from the PSPSym value) and set the frame register to be the same
11002 * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
11003 * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
11004 * for every nested funclet invocation.
11006 * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
11007 * restores all non-volatile registers to their values within the parent frame. This includes
11008 * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
11009 * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
11011 * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
11012 * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
11013 * ARM this is the first argument and passed in R0.
11015 * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
11016 * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
11017 * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
11018 * is required in all funclets as well as the main function, whereas if the establisher frame was
11019 * correctly reported, the PSPSym could be omitted in some cases.)
11020 ***********************************
11022 void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
11024 assert(compiler->compGeneratingProlog);
11026 if (compiler->lvaPSPSym == BAD_VAR_NUM)
11031 noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
11033 #if defined(_TARGET_ARM_)
11035 // We either generate:
11037 // str r1, [reg + PSPSymOffset]
11040 // str r1, [reg + PSPSymOffset]
11041 // depending on the smallest encoding
11043 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
11048 if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
11050 // use the "add <reg>, sp, imm" form
11052 callerSPOffs = SPtoCallerSPdelta;
11053 regBase = REG_SPBASE;
11057 // use the "add <reg>, r11, imm" form
11059 int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
11060 noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
11062 callerSPOffs = FPtoCallerSPdelta;
11063 regBase = REG_FPBASE;
11066 // We will just use the initReg since it is an available register
11067 // and we are probably done using it anyway...
11068 regNumber regTmp = initReg;
11069 *pInitRegZeroed = false;
11071 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
11072 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
11074 #elif defined(_TARGET_ARM64_)
11076 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
11078 // We will just use the initReg since it is an available register
11079 // and we are probably done using it anyway...
11080 regNumber regTmp = initReg;
11081 *pInitRegZeroed = false;
11083 getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
11084 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
11086 #elif defined(_TARGET_AMD64_)
11088 // The PSP sym value is Initial-SP, not Caller-SP!
11089 // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
11090 // has been established.
11093 // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
11095 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
11099 NYI("Set function PSP sym");
11101 #endif // _TARGET_*
11104 #endif // FEATURE_EH_FUNCLETS
11106 /*****************************************************************************
11108 * Generates code for all the function and funclet prologs and epilogs.
11111 void CodeGen::genGeneratePrologsAndEpilogs()
11116 printf("*************** Before prolog / epilog generation\n");
11117 getEmitter()->emitDispIGlist(false);
11121 #ifndef LEGACY_BACKEND
11122 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
11123 // This affects our code that determines which untracked locals need to be zero initialized.
11124 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
11125 #endif // !LEGACY_BACKEND
11127 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
11129 getEmitter()->emitStartPrologEpilogGeneration();
11131 gcInfo.gcResetForBB();
11134 // Generate all the prologs and epilogs.
11135 CLANG_FORMAT_COMMENT_ANCHOR;
11137 #if FEATURE_EH_FUNCLETS
11139 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
11140 // information computed during codegen, or during function prolog generation, like
11141 // frame offsets. It must run after main function prolog generation.
11143 genCaptureFuncletPrologEpilogInfo();
11145 #endif // FEATURE_EH_FUNCLETS
11147 // Walk the list of prologs and epilogs and generate them.
11148 // We maintain a list of prolog and epilog basic blocks in
11149 // the insGroup structure in the emitter. This list was created
11150 // during code generation by the genReserve*() functions.
11152 // TODO: it seems like better design would be to create a list of prologs/epilogs
11153 // in the code generator (not the emitter), and then walk that list. But we already
11154 // have the insGroup list, which serves well, so we don't need the extra allocations
11155 // for a prolog/epilog list in the code generator.
11157 getEmitter()->emitGeneratePrologEpilog();
11159 // Tell the emitter we're done with all prolog and epilog generation.
11161 getEmitter()->emitFinishPrologEpilogGeneration();
11166 printf("*************** After prolog / epilog generation\n");
11167 getEmitter()->emitDispIGlist(false);
11173 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11174 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11176 XX End Prolog / Epilog XX
11178 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11179 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11183 void CodeGen::genGenerateStackProbe()
11185 noway_assert(compiler->opts.compNeedStackProbes);
11187 // If this assert fires, it means somebody has changed the value
11188 // CORINFO_STACKPROBE_DEPTH.
11189 // Why does the EE need such a deep probe? It should just need a couple
11190 // of bytes, to set up a frame in the unmanaged code..
11192 static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
11194 JITDUMP("Emitting stack probe:\n");
11195 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
11196 -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
11198 #endif // STACK_PROBES
11200 #ifdef LEGACY_BACKEND
11201 /*****************************************************************************
11203 * Record the constant and return a tree node that yields its address.
11206 GenTree* CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTree* cnsTree, bool dblAlign)
11208 // Assign the constant an offset in the data section
11209 UNATIVE_OFFSET cnsSize = genTypeSize(cnsType);
11210 UNATIVE_OFFSET cnum = getEmitter()->emitDataConst(cnsAddr, cnsSize, dblAlign);
11213 if (compiler->opts.dspCode)
11215 printf(" @%s%02u ", "CNS", cnum);
11220 printf("DD %d \n", *(int*)cnsAddr);
11223 printf("DQ %lld\n", *(__int64*)cnsAddr);
11226 printf("DF %f \n", *(float*)cnsAddr);
11229 printf("DQ %lf\n", *(double*)cnsAddr);
11233 noway_assert(!"unexpected constant type");
11238 // Access to inline data is 'abstracted' by a special type of static member
11239 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
11240 // to constant data, not a real static field.
11242 return new (compiler, GT_CLS_VAR) GenTreeClsVar(cnsType, compiler->eeFindJitDataOffs(cnum), nullptr);
11244 #endif // LEGACY_BACKEND
11246 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
11247 // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
11248 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
11249 // Here offset = 16-byte aligned offset after pushing integer registers.
11252 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
11253 // non-funclet: this will be compLclFrameSize.
11254 // funclet frames: this will be FuncletInfo.fiSpDelta.
11255 void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
11257 genVzeroupperIfNeeded(false);
11258 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
11260 // Only callee saved floating point registers should be in regMask
11261 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
11263 // fast path return
11264 if (regMask == RBM_NONE)
11269 #ifdef _TARGET_AMD64_
11270 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
11271 unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
11273 // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
11274 assert((offset % 16) == 0);
11275 instruction copyIns = ins_Copy(TYP_FLOAT);
11276 #else // !_TARGET_AMD64_
11277 unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
11278 instruction copyIns = INS_movupd;
11279 #endif // !_TARGET_AMD64_
11281 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
11283 regMaskTP regBit = genRegMask(reg);
11284 if ((regBit & regMask) != 0)
11286 // ABI requires us to preserve lower 128-bits of YMM register.
11287 getEmitter()->emitIns_AR_R(copyIns,
11288 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
11290 reg, REG_SPBASE, offset);
11291 compiler->unwindSaveReg(reg, offset);
11292 regMask &= ~regBit;
11293 offset -= XMM_REGSIZE_BYTES;
11298 // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
11299 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
11300 // Here offset = 16-byte aligned offset after pushing integer registers.
11303 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
11304 // non-funclet: this will be compLclFrameSize.
11305 // funclet frames: this will be FuncletInfo.fiSpDelta.
11306 void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
11308 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
11310 // Only callee saved floating point registers should be in regMask
11311 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
11313 // fast path return
11314 if (regMask == RBM_NONE)
11316 genVzeroupperIfNeeded();
11320 #ifdef _TARGET_AMD64_
11321 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
11322 instruction copyIns = ins_Copy(TYP_FLOAT);
11323 #else // !_TARGET_AMD64_
11324 unsigned firstFPRegPadding = 0;
11325 instruction copyIns = INS_movupd;
11326 #endif // !_TARGET_AMD64_
11330 if (compiler->compLocallocUsed)
11332 // localloc frame: use frame pointer relative offset
11333 assert(isFramePointerUsed());
11334 regBase = REG_FPBASE;
11335 offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
11339 regBase = REG_SPBASE;
11340 offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
11343 #ifdef _TARGET_AMD64_
11344 // Offset is 16-byte aligned since we use movaps for restoring xmm regs
11345 assert((offset % 16) == 0);
11346 #endif // _TARGET_AMD64_
11348 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
11350 regMaskTP regBit = genRegMask(reg);
11351 if ((regBit & regMask) != 0)
11353 // ABI requires us to restore lower 128-bits of YMM register.
11354 getEmitter()->emitIns_R_AR(copyIns,
11355 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
11357 reg, regBase, offset);
11358 regMask &= ~regBit;
11359 offset -= XMM_REGSIZE_BYTES;
11362 genVzeroupperIfNeeded();
11365 // Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
11366 // AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
11367 // (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
11368 // 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
11369 // code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
11370 // if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
11373 // check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
11374 // instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
11376 void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
11378 bool emitVzeroUpper = false;
11379 if (check256bitOnly)
11381 emitVzeroUpper = getEmitter()->Contains256bitAVX();
11385 emitVzeroUpper = getEmitter()->ContainsAVX();
11388 if (emitVzeroUpper)
11390 assert(compiler->canUseVexEncoding());
11391 instGen(INS_vzeroupper);
11395 #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
11397 //-----------------------------------------------------------------------------------
11398 // IsMultiRegPassedType: Returns true if the type is returned in multiple registers
11401 // hClass - type handle
11404 // true if type is passed in multiple registers, false otherwise.
11406 bool Compiler::IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass)
11408 if (hClass == NO_CLASS_HANDLE)
11413 structPassingKind howToPassStruct;
11414 var_types returnType = getArgTypeForStruct(hClass, &howToPassStruct);
11416 return (varTypeIsStruct(returnType));
11419 //-----------------------------------------------------------------------------------
11420 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
11423 // hClass - type handle
11426 // true if type is returned in multiple registers, false otherwise.
11428 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
11430 if (hClass == NO_CLASS_HANDLE)
11435 structPassingKind howToReturnStruct;
11436 var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
11438 return (varTypeIsStruct(returnType));
11441 //----------------------------------------------
11442 // Methods that support HFA's for ARM32/ARM64
11443 //----------------------------------------------
11445 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
11448 return varTypeIsFloating(GetHfaType(hClass));
11454 bool Compiler::IsHfa(GenTree* tree)
11457 return IsHfa(gtGetStructHandleIfPresent(tree));
11463 var_types Compiler::GetHfaType(GenTree* tree)
11466 return GetHfaType(gtGetStructHandleIfPresent(tree));
11472 unsigned Compiler::GetHfaCount(GenTree* tree)
11474 return GetHfaCount(gtGetStructHandleIfPresent(tree));
11477 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
11479 var_types result = TYP_UNDEF;
11480 if (hClass != NO_CLASS_HANDLE)
11483 CorInfoType corType = info.compCompHnd->getHFAType(hClass);
11484 if (corType != CORINFO_TYPE_UNDEF)
11486 result = JITtype2varType(corType);
11488 #endif // FEATURE_HFA
11493 //------------------------------------------------------------------------
11494 // GetHfaCount: Given a class handle for an HFA struct
11495 // return the number of registers needed to hold the HFA
11497 // Note that on ARM32 the single precision registers overlap with
11498 // the double precision registers and for that reason each
11499 // double register is considered to be two single registers.
11500 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
11501 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
11502 // will return 4 for both.
11504 // hClass: the class handle of a HFA struct
11506 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
11508 assert(IsHfa(hClass));
11509 #ifdef _TARGET_ARM_
11510 // A HFA of doubles is twice as large as an HFA of singles for ARM32
11511 // (i.e. uses twice the number of single precison registers)
11512 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
11513 #else // _TARGET_ARM64_
11514 var_types hfaType = GetHfaType(hClass);
11515 unsigned classSize = info.compCompHnd->getClassSize(hClass);
11516 // Note that the retail build issues a warning about a potential divsion by zero without the Max function
11517 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
11518 return classSize / elemSize;
11519 #endif // _TARGET_ARM64_
11522 #ifdef _TARGET_XARCH_
11524 //------------------------------------------------------------------------
11525 // genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
11526 // map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
11527 // X86/x64 has a special encoding for shift/rotate-by-constant-1.
11530 // ins: the base shift/rotate instruction
11531 // shiftByValue: the constant value by which we are shifting/rotating
11533 instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
11535 assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
11538 // Which format should we use?
11540 instruction shiftByConstantIns;
11542 if (shiftByValue == 1)
11544 // Use the shift-by-one format.
11546 assert(INS_rcl + 1 == INS_rcl_1);
11547 assert(INS_rcr + 1 == INS_rcr_1);
11548 assert(INS_rol + 1 == INS_rol_1);
11549 assert(INS_ror + 1 == INS_ror_1);
11550 assert(INS_shl + 1 == INS_shl_1);
11551 assert(INS_shr + 1 == INS_shr_1);
11552 assert(INS_sar + 1 == INS_sar_1);
11554 shiftByConstantIns = (instruction)(ins + 1);
11558 // Use the shift-by-NNN format.
11560 assert(INS_rcl + 2 == INS_rcl_N);
11561 assert(INS_rcr + 2 == INS_rcr_N);
11562 assert(INS_rol + 2 == INS_rol_N);
11563 assert(INS_ror + 2 == INS_ror_N);
11564 assert(INS_shl + 2 == INS_shl_N);
11565 assert(INS_shr + 2 == INS_shr_N);
11566 assert(INS_sar + 2 == INS_sar_N);
11568 shiftByConstantIns = (instruction)(ins + 2);
11571 return shiftByConstantIns;
11574 #endif // _TARGET_XARCH_
11576 #if !defined(LEGACY_BACKEND)
11578 //------------------------------------------------------------------------------------------------ //
11579 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
11582 // The number of the first argument with stack slot on the caller's frame.
11585 // On x64 Windows the caller always creates slots (homing space) in its frame for the
11586 // first 4 arguments of a callee (register passed args). So, the the variable number
11587 // (lclNum) for the first argument with a stack slot is always 0.
11588 // For System V systems or armarch, there is no such calling convention requirement, and the code needs to find
11589 // the first stack passed argument from the caller. This is done by iterating over
11590 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
11592 unsigned CodeGen::getFirstArgWithStackSlot()
11594 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARMARCH_)
11595 unsigned baseVarNum = 0;
11596 #if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
11597 baseVarNum = compiler->lvaFirstStackIncomingArgNum;
11599 if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
11601 baseVarNum = compiler->lvaFirstStackIncomingArgNum;
11604 #endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
11606 // Iterate over all the local variables in the Lcl var table.
11607 // They contain all the implicit arguments - thisPtr, retBuf,
11608 // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
11609 LclVarDsc* varDsc = nullptr;
11610 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
11612 varDsc = &(compiler->lvaTable[i]);
11614 // We are iterating over the arguments only.
11615 assert(varDsc->lvIsParam);
11617 if (varDsc->lvArgReg == REG_STK)
11620 #if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
11621 compiler->lvaFirstStackIncomingArgNum = baseVarNum;
11622 #endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
11626 assert(varDsc != nullptr);
11630 #elif defined(_TARGET_AMD64_)
11632 #else // _TARGET_X86
11633 // Not implemented for x86.
11634 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
11635 return BAD_VAR_NUM;
11636 #endif // _TARGET_X86_
11639 #endif // !LEGACY_BACKEND
11641 //------------------------------------------------------------------------
11642 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
11644 void CodeGen::genSinglePush()
11646 AddStackLevel(REGSIZE_BYTES);
11649 //------------------------------------------------------------------------
11650 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
11652 void CodeGen::genSinglePop()
11654 SubtractStackLevel(REGSIZE_BYTES);
11657 //------------------------------------------------------------------------
11658 // genPushRegs: Push the given registers.
11661 // regs - mask or registers to push
11662 // byrefRegs - OUT arg. Set to byref registers that were pushed.
11663 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
11666 // Mask of registers pushed.
11669 // This function does not check if the register is marked as used, etc.
11671 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
11673 *byrefRegs = RBM_NONE;
11674 *noRefRegs = RBM_NONE;
11676 if (regs == RBM_NONE)
11681 #if FEATURE_FIXED_OUT_ARGS
11683 NYI("Don't call genPushRegs with real regs!");
11686 #else // FEATURE_FIXED_OUT_ARGS
11688 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
11689 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
11691 regMaskTP pushedRegs = regs;
11693 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
11695 regMaskTP regBit = regMaskTP(1) << reg;
11697 if ((regBit & regs) == RBM_NONE)
11701 if (regBit & gcInfo.gcRegGCrefSetCur)
11705 else if (regBit & gcInfo.gcRegByrefSetCur)
11707 *byrefRegs |= regBit;
11710 else if (noRefRegs != NULL)
11712 *noRefRegs |= regBit;
11720 inst_RV(INS_push, reg, type);
11723 gcInfo.gcMarkRegSetNpt(regBit);
11730 #endif // FEATURE_FIXED_OUT_ARGS
11733 //------------------------------------------------------------------------
11734 // genPopRegs: Pop the registers that were pushed by genPushRegs().
11737 // regs - mask of registers to pop
11738 // byrefRegs - The byref registers that were pushed by genPushRegs().
11739 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
11744 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
11746 if (regs == RBM_NONE)
11751 #if FEATURE_FIXED_OUT_ARGS
11753 NYI("Don't call genPopRegs with real regs!");
11755 #else // FEATURE_FIXED_OUT_ARGS
11757 noway_assert((regs & byrefRegs) == byrefRegs);
11758 noway_assert((regs & noRefRegs) == noRefRegs);
11759 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
11761 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
11762 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
11764 // Walk the registers in the reverse order as genPushRegs()
11765 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
11767 regMaskTP regBit = regMaskTP(1) << reg;
11769 if ((regBit & regs) == RBM_NONE)
11773 if (regBit & byrefRegs)
11777 else if (regBit & noRefRegs)
11786 inst_RV(INS_pop, reg, type);
11789 if (type != TYP_INT)
11790 gcInfo.gcMarkRegPtrVal(reg, type);
11795 #endif // FEATURE_FIXED_OUT_ARGS
11798 /*****************************************************************************
11801 * This function should be called only after the sizes of the emitter blocks
11802 * have been finalized.
11805 void CodeGen::genSetScopeInfo()
11807 if (!compiler->opts.compScopeInfo)
11815 printf("*************** In genSetScopeInfo()\n");
11819 if (compiler->info.compVarScopesCount == 0)
11821 compiler->eeSetLVcount(0);
11822 compiler->eeSetLVdone();
11826 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
11827 noway_assert(psiOpenScopeList.scNext == nullptr);
11830 unsigned scopeCnt = siScopeCnt + psiScopeCnt;
11832 compiler->eeSetLVcount(scopeCnt);
11835 genTrnslLocalVarCount = scopeCnt;
11838 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
11842 // Record the scopes found for the parameters over the prolog.
11843 // The prolog needs to be treated differently as a variable may not
11844 // have the same info in the prolog block as is given by compiler->lvaTable.
11845 // eg. A register parameter is actually on the stack, before it is loaded to reg.
11847 CodeGen::psiScope* scopeP;
11849 for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
11851 noway_assert(scopeP != nullptr);
11852 noway_assert(scopeP->scStartLoc.Valid());
11853 noway_assert(scopeP->scEndLoc.Valid());
11855 UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
11856 UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
11858 unsigned varNum = scopeP->scSlotNum;
11859 noway_assert(startOffs <= endOffs);
11861 // The range may be 0 if the prolog is empty. For such a case,
11862 // report the liveness of arguments to span at least the first
11863 // instruction in the method. This will be incorrect (except on
11864 // entry to the method) if the very first instruction of the method
11865 // is part of a loop. However, this should happen
11866 // very rarely, and the incorrectness is worth being able to look
11867 // at the argument on entry to the method.
11868 if (startOffs == endOffs)
11870 noway_assert(startOffs == 0);
11874 Compiler::siVarLoc varLoc;
11876 if (scopeP->scRegister)
11878 varLoc.vlType = Compiler::VLT_REG;
11879 varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
11883 varLoc.vlType = Compiler::VLT_STK;
11884 varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
11885 varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
11888 genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
11891 // Record the scopes for the rest of the method.
11892 // Check that the LocalVarInfo scopes look OK
11893 noway_assert(siOpenScopeList.scNext == nullptr);
11895 CodeGen::siScope* scopeL;
11897 for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
11899 noway_assert(scopeL != nullptr);
11900 noway_assert(scopeL->scStartLoc.Valid());
11901 noway_assert(scopeL->scEndLoc.Valid());
11903 // Find the start and end IP
11905 UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
11906 UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
11908 noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
11910 // For stack vars, find the base register, and offset
11913 signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
11915 if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
11917 baseReg = REG_SPBASE;
11918 offset += scopeL->scStackLevel;
11922 baseReg = REG_FPBASE;
11925 // Now fill in the varLoc
11927 Compiler::siVarLoc varLoc;
11929 // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
11930 // for part of its lifetime, or in different registers for different parts of its lifetime.
11931 // This should only matter for non-debug code, where we do variable enregistration.
11932 // We should store the ranges of variable enregistration in the scope table.
11933 if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
11935 var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
11941 #ifdef _TARGET_64BIT_
11943 #endif // _TARGET_64BIT_
11945 varLoc.vlType = Compiler::VLT_REG;
11946 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11949 #ifndef _TARGET_64BIT_
11951 #if !CPU_HAS_FP_SUPPORT
11955 if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
11957 varLoc.vlType = Compiler::VLT_REG_REG;
11958 varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11959 varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
11963 varLoc.vlType = Compiler::VLT_REG_STK;
11964 varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11965 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
11966 if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
11968 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
11970 varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
11973 #endif // !_TARGET_64BIT_
11975 #ifdef _TARGET_64BIT_
11979 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
11980 // so no XMM registers can get debug information.
11981 varLoc.vlType = Compiler::VLT_REG_FP;
11982 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11985 #else // !_TARGET_64BIT_
11987 #if CPU_HAS_FP_SUPPORT
11990 if (isFloatRegType(type))
11992 varLoc.vlType = Compiler::VLT_FPSTK;
11993 varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
11996 #endif // CPU_HAS_FP_SUPPORT
11998 #endif // !_TARGET_64BIT_
12000 #ifdef FEATURE_SIMD
12005 varLoc.vlType = Compiler::VLT_REG_FP;
12007 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
12008 // so no XMM registers can get debug information.
12010 // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
12011 // in eeDispVar() --> getRegName() that regNumber is valid.
12012 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
12014 #endif // FEATURE_SIMD
12017 noway_assert(!"Invalid type");
12022 assert(offset != BAD_STK_OFFS);
12023 LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
12024 switch (genActualType(varDsc->TypeGet()))
12031 case TYP_BLK: // Needed because of the TYP_BLK stress mode
12032 #ifdef FEATURE_SIMD
12038 #ifdef _TARGET_64BIT_
12041 #endif // _TARGET_64BIT_
12042 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
12043 // In the AMD64 ABI we are supposed to pass a struct by reference when its
12044 // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
12045 // the IR to comply with the ABI and therefore changes the type of the lclVar
12046 // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
12047 // this is still a struct by setting the lvIsTemp flag.
12048 // The same is true for ARM64 and structs > 16 bytes.
12049 // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
12050 // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
12051 // to accomodate for this situation.
12052 if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
12054 assert(varDsc->lvIsParam);
12055 varLoc.vlType = Compiler::VLT_STK_BYREF;
12058 #endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
12060 varLoc.vlType = Compiler::VLT_STK;
12062 varLoc.vlStk.vlsBaseReg = baseReg;
12063 varLoc.vlStk.vlsOffset = offset;
12064 if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
12066 varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
12070 #ifndef _TARGET_64BIT_
12073 varLoc.vlType = Compiler::VLT_STK2;
12074 varLoc.vlStk2.vls2BaseReg = baseReg;
12075 varLoc.vlStk2.vls2Offset = offset;
12076 if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
12078 varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
12081 #endif // !_TARGET_64BIT_
12084 noway_assert(!"Invalid type");
12088 genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
12089 scopeL->scAvailable, varLoc);
12092 compiler->eeSetLVdone();
12095 //------------------------------------------------------------------------
12096 // genSetScopeInfo: Record scope information for debug info
12100 // startOffs - the starting offset for this scope
12101 // length - the length of this scope
12102 // varNum - the lclVar for this scope info
12108 // Called for every scope info piece to record by the main genSetScopeInfo()
12110 void CodeGen::genSetScopeInfo(unsigned which,
12111 UNATIVE_OFFSET startOffs,
12112 UNATIVE_OFFSET length,
12116 Compiler::siVarLoc& varLoc)
12118 // We need to do some mapping while reporting back these variables.
12120 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
12121 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
12123 #ifdef _TARGET_X86_
12124 // Non-x86 platforms are allowed to access all arguments directly
12125 // so we don't need this code.
12127 // Is this a varargs function?
12129 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
12130 varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
12132 noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
12134 // All stack arguments (except the varargs handle) have to be
12135 // accessed via the varargs cookie. Discard generated info,
12136 // and just find its position relative to the varargs handle
12138 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
12139 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
12141 noway_assert(!compiler->opts.compDbgCode);
12145 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
12146 // arguments of vararg functions to avoid reporting them to GC.
12147 noway_assert(!compiler->lvaTable[varNum].lvRegister);
12148 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
12149 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
12151 noway_assert(cookieOffset < varOffset);
12152 unsigned offset = varOffset - cookieOffset;
12153 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
12154 noway_assert(offset < stkArgSize);
12155 offset = stkArgSize - offset;
12157 varLoc.vlType = Compiler::VLT_FIXED_VA;
12158 varLoc.vlFixedVarArg.vlfvOffset = offset;
12161 #endif // _TARGET_X86_
12163 VarName name = nullptr;
12167 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
12169 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
12171 name = compiler->info.compVarScopes[scopeNum].vsdName;
12175 // Hang on to this compiler->info.
12177 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
12179 tlvi.tlviVarNum = ilVarNum;
12180 tlvi.tlviLVnum = LVnum;
12181 tlvi.tlviName = name;
12182 tlvi.tlviStartPC = startOffs;
12183 tlvi.tlviLength = length;
12184 tlvi.tlviAvailable = avail;
12185 tlvi.tlviVarLoc = varLoc;
12189 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
12192 /*****************************************************************************/
12195 /*****************************************************************************
12198 * Can be called only after lviSetLocalVarInfo() has been called
12202 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
12204 if (!compiler->opts.compScopeInfo)
12207 if (compiler->info.compVarScopesCount == 0)
12210 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
12212 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
12214 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
12215 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
12216 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
12218 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
12225 /*****************************************************************************
12228 * Can be called only after lviSetLocalVarInfo() has been called
12232 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
12234 if (!compiler->opts.compScopeInfo)
12237 if (compiler->info.compVarScopesCount == 0)
12240 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
12242 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
12244 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
12245 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
12246 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
12248 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
12255 /*****************************************************************************/
12256 #endif // defined(DEBUG)
12257 #endif // LATE_DISASM
12261 /*****************************************************************************
12262 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
12265 void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
12267 if (mappingNum != unsigned(-1))
12269 printf("%d: ", mappingNum);
12272 IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
12274 if (offsx == BAD_IL_OFFSET)
12280 Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
12282 if (jitIsStackEmpty(offsx))
12284 printf(" STACK_EMPTY");
12287 if (jitIsCallInstruction(offsx))
12289 printf(" CALL_INSTRUCTION");
12294 ipMapping->ipmdNativeLoc.Print();
12295 // We can only call this after code generation. Is there any way to tell when it's legal to call?
12296 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
12298 if (ipMapping->ipmdIsLabel)
12306 void CodeGen::genIPmappingListDisp()
12308 unsigned mappingNum = 0;
12309 Compiler::IPmappingDsc* ipMapping;
12311 for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
12313 genIPmappingDisp(mappingNum, ipMapping);
12320 /*****************************************************************************
12322 * Append an IPmappingDsc struct to the list that we're maintaining
12323 * for the debugger.
12324 * Record the instr offset as being at the current code gen position.
12327 void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
12329 if (!compiler->opts.compDbgInfo)
12334 assert(offsx != BAD_IL_OFFSET);
12336 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12338 case ICorDebugInfo::PROLOG:
12339 case ICorDebugInfo::EPILOG:
12344 if (offsx != ICorDebugInfo::NO_MAPPING)
12346 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
12349 // Ignore this one if it's the same IL offset as the last one we saw.
12350 // Note that we'll let through two identical IL offsets if the flag bits
12351 // differ, or two identical "special" mappings (e.g., PROLOG).
12352 if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
12354 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
12360 /* Create a mapping entry and append it to the list */
12362 Compiler::IPmappingDsc* addMapping =
12363 (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
12365 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
12366 addMapping->ipmdILoffsx = offsx;
12367 addMapping->ipmdIsLabel = isLabel;
12368 addMapping->ipmdNext = nullptr;
12370 if (compiler->genIPmappingList != nullptr)
12372 assert(compiler->genIPmappingLast != nullptr);
12373 assert(compiler->genIPmappingLast->ipmdNext == nullptr);
12374 compiler->genIPmappingLast->ipmdNext = addMapping;
12378 assert(compiler->genIPmappingLast == nullptr);
12379 compiler->genIPmappingList = addMapping;
12382 compiler->genIPmappingLast = addMapping;
12387 printf("Added IP mapping: ");
12388 genIPmappingDisp(unsigned(-1), addMapping);
12393 /*****************************************************************************
12395 * Prepend an IPmappingDsc struct to the list that we're maintaining
12396 * for the debugger.
12397 * Record the instr offset as being at the current code gen position.
12399 void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
12401 if (!compiler->opts.compDbgInfo)
12406 assert(offsx != BAD_IL_OFFSET);
12407 assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
12409 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12411 case ICorDebugInfo::NO_MAPPING:
12412 case ICorDebugInfo::PROLOG:
12413 case ICorDebugInfo::EPILOG:
12417 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
12421 /* Create a mapping entry and prepend it to the list */
12423 Compiler::IPmappingDsc* addMapping =
12424 (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
12426 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
12427 addMapping->ipmdILoffsx = offsx;
12428 addMapping->ipmdIsLabel = true;
12429 addMapping->ipmdNext = nullptr;
12431 addMapping->ipmdNext = compiler->genIPmappingList;
12432 compiler->genIPmappingList = addMapping;
12434 if (compiler->genIPmappingLast == nullptr)
12436 compiler->genIPmappingLast = addMapping;
12442 printf("Added IP mapping to front: ");
12443 genIPmappingDisp(unsigned(-1), addMapping);
12448 /*****************************************************************************/
12450 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
12451 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
12452 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
12454 C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
12455 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
12456 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
12457 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
12459 //------------------------------------------------------------------------
12460 // jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
12461 // Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
12462 // is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
12465 // offsx - the IL_OFFSETX value with the IL offset to extract.
12470 IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
12472 assert(offsx != BAD_IL_OFFSET);
12474 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12476 case ICorDebugInfo::NO_MAPPING:
12477 case ICorDebugInfo::PROLOG:
12478 case ICorDebugInfo::EPILOG:
12482 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
12486 //------------------------------------------------------------------------
12487 // jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
12488 // distinguished values. Asserts if passed BAD_IL_OFFSET.
12491 // offsx - the IL_OFFSETX value with the IL offset to extract.
12496 IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
12498 assert(offsx != BAD_IL_OFFSET);
12500 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12502 case ICorDebugInfo::NO_MAPPING:
12503 case ICorDebugInfo::PROLOG:
12504 case ICorDebugInfo::EPILOG:
12505 return IL_OFFSET(offsx);
12508 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
12512 //------------------------------------------------------------------------
12513 // jitIsStackEmpty: Does the IL offset have the stack empty bit set?
12514 // Asserts if passed BAD_IL_OFFSET.
12517 // offsx - the IL_OFFSETX value to check
12520 // 'true' if the stack empty bit is set; 'false' otherwise.
12522 bool jitIsStackEmpty(IL_OFFSETX offsx)
12524 assert(offsx != BAD_IL_OFFSET);
12526 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12528 case ICorDebugInfo::NO_MAPPING:
12529 case ICorDebugInfo::PROLOG:
12530 case ICorDebugInfo::EPILOG:
12534 return (offsx & IL_OFFSETX_STKBIT) == 0;
12538 //------------------------------------------------------------------------
12539 // jitIsCallInstruction: Does the IL offset have the call instruction bit set?
12540 // Asserts if passed BAD_IL_OFFSET.
12543 // offsx - the IL_OFFSETX value to check
12546 // 'true' if the call instruction bit is set; 'false' otherwise.
12548 bool jitIsCallInstruction(IL_OFFSETX offsx)
12550 assert(offsx != BAD_IL_OFFSET);
12552 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
12554 case ICorDebugInfo::NO_MAPPING:
12555 case ICorDebugInfo::PROLOG:
12556 case ICorDebugInfo::EPILOG:
12560 return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
12564 /*****************************************************************************/
12566 void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
12568 if (!compiler->opts.compDbgCode)
12573 if (offsx == BAD_IL_OFFSET)
12578 /* If other IL were offsets reported, skip */
12580 if (compiler->genIPmappingLast == nullptr)
12585 if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
12590 /* offsx was the last reported offset. Make sure that we generated native code */
12592 if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
12598 /*****************************************************************************
12600 * Shut down the IP-mapping logic, report the info to the EE.
12603 void CodeGen::genIPmappingGen()
12605 if (!compiler->opts.compDbgInfo)
12613 printf("*************** In genIPmappingGen()\n");
12617 if (compiler->genIPmappingList == nullptr)
12619 compiler->eeSetLIcount(0);
12620 compiler->eeSetLIdone();
12624 Compiler::IPmappingDsc* tmpMapping;
12625 Compiler::IPmappingDsc* prevMapping;
12626 unsigned mappingCnt;
12627 UNATIVE_OFFSET lastNativeOfs;
12629 /* First count the number of distinct mapping records */
12632 lastNativeOfs = UNATIVE_OFFSET(~0);
12634 for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
12635 tmpMapping = tmpMapping->ipmdNext)
12637 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
12639 // Managed RetVal - since new sequence points are emitted to identify IL calls,
12640 // make sure that those are not filtered and do not interfere with filtering of
12641 // other sequence points.
12642 if (jitIsCallInstruction(srcIP))
12648 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
12650 if (nextNativeOfs != lastNativeOfs)
12653 lastNativeOfs = nextNativeOfs;
12654 prevMapping = tmpMapping;
12658 /* If there are mappings with the same native offset, then:
12659 o If one of them is NO_MAPPING, ignore it
12660 o If one of them is a label, report that and ignore the other one
12661 o Else report the higher IL offset
12664 PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
12665 if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
12667 // If the previous entry was NO_MAPPING, ignore it
12668 prevMapping->ipmdNativeLoc.Init();
12669 prevMapping = tmpMapping;
12671 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
12673 // If the current entry is NO_MAPPING, ignore it
12674 // Leave prevMapping unchanged as tmpMapping is no longer valid
12675 tmpMapping->ipmdNativeLoc.Init();
12677 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
12679 // counting for special cases: see below
12681 prevMapping = tmpMapping;
12685 noway_assert(prevMapping != nullptr);
12686 noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
12687 lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
12689 /* The previous block had the same native offset. We have to
12690 discard one of the mappings. Simply reinitialize ipmdNativeLoc
12691 and prevMapping will be ignored later. */
12693 if (prevMapping->ipmdIsLabel)
12695 // Leave prevMapping unchanged as tmpMapping is no longer valid
12696 tmpMapping->ipmdNativeLoc.Init();
12700 prevMapping->ipmdNativeLoc.Init();
12701 prevMapping = tmpMapping;
12706 /* Tell them how many mapping records we've got */
12708 compiler->eeSetLIcount(mappingCnt);
12710 /* Now tell them about the mappings */
12713 lastNativeOfs = UNATIVE_OFFSET(~0);
12715 for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
12717 // Do we have to skip this record ?
12718 if (!tmpMapping->ipmdNativeLoc.Valid())
12723 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
12724 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
12726 if (jitIsCallInstruction(srcIP))
12728 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
12730 else if (nextNativeOfs != lastNativeOfs)
12732 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
12733 lastNativeOfs = nextNativeOfs;
12735 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
12737 // For the special case of an IL instruction with no body
12738 // followed by the epilog (say ret void immediately preceding
12739 // the method end), we put two entries in, so that we'll stop
12740 // at the (empty) ret statement if the user tries to put a
12741 // breakpoint there, and then have the option of seeing the
12742 // epilog or not based on SetUnmappedStopMask for the stepper.
12743 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
12749 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
12750 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
12751 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
12752 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
12754 if (compiler->opts.compDbgCode)
12756 //Assert that the first instruction of every basic block with more than one incoming edge has a
12757 //different sequence point from each incoming block.
12759 //It turns out that the only thing we really have to assert is that the first statement in each basic
12760 //block has an IL offset and appears in eeBoundaries.
12761 for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
12763 if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
12765 noway_assert(block->bbTreeList->gtOper == GT_STMT);
12766 bool found = false;
12767 if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
12769 IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
12770 for (unsigned i = 0; i < eeBoundariesCount; ++i)
12772 if (eeBoundaries[i].ilOffset == ilOffs)
12779 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
12785 compiler->eeSetLIdone();
12788 /*============================================================================
12790 * These are empty stubs to help the late dis-assembler to compile
12791 * if the late disassembler is being built into a non-DEBUG build.
12793 *============================================================================
12796 #if defined(LATE_DISASM)
12797 #if !defined(DEBUG)
12800 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
12806 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
12811 /*****************************************************************************/
12812 #endif // !defined(DEBUG)
12813 #endif // defined(LATE_DISASM)
12814 /*****************************************************************************/
12816 #ifndef LEGACY_BACKEND
12818 //------------------------------------------------------------------------
12819 // indirForm: Make a temporary indir we can feed to pattern matching routines
12820 // in cases where we don't want to instantiate all the indirs that happen.
12822 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
12824 GenTreeIndir i(GT_IND, type, base, nullptr);
12825 i.gtRegNum = REG_NA;
12830 //------------------------------------------------------------------------
12831 // intForm: Make a temporary int we can feed to pattern matching routines
12832 // in cases where we don't want to instantiate.
12834 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
12836 GenTreeIntCon i(type, value);
12837 i.gtRegNum = REG_NA;
12841 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
12842 //------------------------------------------------------------------------
12843 // genLongReturn: Generates code for long return statement for x86 and arm.
12845 // Note: treeNode's and op1's registers are already consumed.
12848 // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
12853 void CodeGen::genLongReturn(GenTree* treeNode)
12855 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
12856 assert(treeNode->TypeGet() == TYP_LONG);
12857 GenTree* op1 = treeNode->gtGetOp1();
12858 var_types targetType = treeNode->TypeGet();
12860 assert(op1 != nullptr);
12861 assert(op1->OperGet() == GT_LONG);
12862 GenTree* loRetVal = op1->gtGetOp1();
12863 GenTree* hiRetVal = op1->gtGetOp2();
12864 assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
12866 genConsumeReg(loRetVal);
12867 genConsumeReg(hiRetVal);
12868 if (loRetVal->gtRegNum != REG_LNGRET_LO)
12870 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
12872 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
12874 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
12877 #endif // _TARGET_X86_ || _TARGET_ARM_
12879 //------------------------------------------------------------------------
12880 // genReturn: Generates code for return statement.
12881 // In case of struct return, delegates to the genStructReturn method.
12884 // treeNode - The GT_RETURN or GT_RETFILT tree node.
12889 void CodeGen::genReturn(GenTree* treeNode)
12891 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
12892 GenTree* op1 = treeNode->gtGetOp1();
12893 var_types targetType = treeNode->TypeGet();
12895 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
12896 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
12897 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
12898 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
12901 if (targetType == TYP_VOID)
12903 assert(op1 == nullptr);
12907 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
12908 if (targetType == TYP_LONG)
12910 genLongReturn(treeNode);
12913 #endif // _TARGET_X86_ || _TARGET_ARM_
12915 if (isStructReturn(treeNode))
12917 genStructReturn(treeNode);
12919 else if (targetType != TYP_VOID)
12921 assert(op1 != nullptr);
12922 noway_assert(op1->gtRegNum != REG_NA);
12924 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
12925 // consumed a reg for the operand. This is because the variable
12926 // is dead after return. But we are issuing more instructions
12927 // like "profiler leave callback" after this consumption. So
12928 // if you are issuing more instructions after this point,
12929 // remember to keep the variable live up until the new method
12930 // exit point where it is actually dead.
12931 genConsumeReg(op1);
12933 #if defined(_TARGET_ARM64_)
12934 genSimpleReturn(treeNode);
12935 #else // !_TARGET_ARM64_
12936 #if defined(_TARGET_X86_)
12937 if (varTypeIsFloating(treeNode))
12939 genFloatReturn(treeNode);
12942 #elif defined(_TARGET_ARM_)
12943 if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
12945 if (targetType == TYP_FLOAT)
12947 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
12951 assert(targetType == TYP_DOUBLE);
12952 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
12957 #endif // _TARGET_ARM_
12959 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
12960 if (op1->gtRegNum != retReg)
12962 inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
12965 #endif // !_TARGET_ARM64_
12969 #ifdef PROFILING_SUPPORTED
12971 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
12972 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
12973 // in the handling of the GT_RETURN statement.
12974 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
12975 // for the return registers containing GC refs.
12977 // There will be a single return block while generating profiler ELT callbacks.
12979 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
12980 // In flowgraph and other places assert that the last node of a block marked as
12981 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
12982 // maintain such an invariant irrespective of whether profiler hook needed or not.
12983 // Also, there is not much to be gained by materializing it as an explicit node.
12984 if (compiler->compCurBB == compiler->genReturnBB)
12987 // Since we are invalidating the assumption that we would slip into the epilog
12988 // right after the "return", we need to preserve the return reg's GC state
12989 // across the call until actual method return.
12990 ReturnTypeDesc retTypeDesc;
12991 unsigned regCount = 0;
12992 if (compiler->compMethodReturnsMultiRegRetType())
12994 if (varTypeIsLong(compiler->info.compRetNativeType))
12996 retTypeDesc.InitializeLongReturnType(compiler);
12998 else // we must have a struct return type
13000 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
13002 regCount = retTypeDesc.GetReturnRegCount();
13005 if (varTypeIsGC(compiler->info.compRetType))
13007 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
13009 else if (compiler->compMethodReturnsMultiRegRetType())
13011 for (unsigned i = 0; i < regCount; ++i)
13013 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
13015 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
13020 genProfilingLeaveCallback();
13022 if (varTypeIsGC(compiler->info.compRetType))
13024 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
13026 else if (compiler->compMethodReturnsMultiRegRetType())
13028 for (unsigned i = 0; i < regCount; ++i)
13030 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
13032 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
13037 #endif // PROFILING_SUPPORTED
13040 #endif // !LEGACY_BACKEND