1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Code Generator Common: XX
9 XX Methods common to all architectures and register allocation strategies XX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
16 // identical, and which should probably be moved here.
27 #ifndef JIT32_GCENCODER
28 #include "gcinfoencoder.h"
31 /*****************************************************************************/
33 const BYTE genTypeSizes[] = {
34 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
39 const BYTE genTypeAlignments[] = {
40 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
45 const BYTE genTypeStSzs[] = {
46 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
51 const BYTE genActualTypes[] = {
52 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
57 void CodeGenInterface::setFramePointerRequiredEH(bool value)
59 m_cgFramePointerRequired = value;
61 #ifndef JIT32_GCENCODER
64 // EnumGcRefs will only enumerate slots in aborted frames
65 // if they are fully-interruptible. So if we have a catch
66 // or finally that will keep frame-vars alive, we need to
67 // force fully-interruptible.
68 CLANG_FORMAT_COMMENT_ANCHOR;
73 printf("Method has EH, marking method as fully interruptible\n");
77 m_cgInterruptible = true;
79 #endif // JIT32_GCENCODER
82 /*****************************************************************************/
83 CodeGenInterface* getCodeGenerator(Compiler* comp)
85 return new (comp, CMK_Codegen) CodeGen(comp);
88 // CodeGen constructor
89 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
94 /*****************************************************************************/
96 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
98 #if defined(_TARGET_XARCH_)
99 negBitmaskFlt = nullptr;
100 negBitmaskDbl = nullptr;
101 absBitmaskFlt = nullptr;
102 absBitmaskDbl = nullptr;
103 u8ToDblBitmask = nullptr;
104 #endif // defined(_TARGET_XARCH_)
106 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107 m_stkArgVarNum = BAD_VAR_NUM;
110 #if defined(UNIX_X86_ABI)
111 curNestedAlignment = 0;
112 maxNestedAlignment = 0;
115 gcInfo.regSet = ®Set;
116 m_cgEmitter = new (compiler->getAllocator()) emitter();
117 m_cgEmitter->codeGen = this;
118 m_cgEmitter->gcInfo = &gcInfo;
121 setVerbose(compiler->verbose);
129 getDisAssembler().disInit(compiler);
133 genTempLiveChg = true;
134 genTrnslLocalVarCount = 0;
136 // Shouldn't be used before it is set in genFnProlog()
137 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
139 #if defined(_TARGET_XARCH_)
140 // Shouldn't be used before it is set in genFnProlog()
141 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
142 #endif // defined(_TARGET_XARCH_)
145 #ifdef _TARGET_AMD64_
146 // This will be set before final frame layout.
147 compiler->compVSQuirkStackPaddingNeeded = 0;
149 // Set to true if we perform the Quirk that fixes the PPP issue
150 compiler->compQuirkForPPPflag = false;
151 #endif // _TARGET_AMD64_
153 // Initialize the IP-mapping logic.
154 compiler->genIPmappingList = nullptr;
155 compiler->genIPmappingLast = nullptr;
156 compiler->genCallSite2ILOffsetMap = nullptr;
158 /* Assume that we not fully interruptible */
160 genInterruptible = false;
161 #ifdef _TARGET_ARMARCH_
162 hasTailCalls = false;
163 #endif // _TARGET_ARMARCH_
165 genInterruptibleUsed = false;
166 genCurDispOffset = (unsigned)-1;
170 void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
172 tree->gtRegNum = reg;
175 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
177 //---------------------------------------------------------------------
178 // genTotalFrameSize - return the "total" size of the stack frame, including local size
179 // and callee-saved register size. There are a few things "missing" depending on the
180 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
182 // For ARM, this doesn't include the prespilled registers.
184 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
185 // It also doesn't include the pushed return address.
190 int CodeGenInterface::genTotalFrameSize()
192 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
194 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
196 assert(totalFrameSize >= 0);
197 return totalFrameSize;
200 //---------------------------------------------------------------------
201 // genSPtoFPdelta - return the offset from SP to the frame pointer.
202 // This number is going to be positive, since SP must be at the lowest
205 // There must be a frame pointer to call this function!
207 int CodeGenInterface::genSPtoFPdelta()
209 assert(isFramePointerUsed());
213 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
219 //---------------------------------------------------------------------
220 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
221 // This number is going to be negative, since the Caller-SP is at a higher
222 // address than the frame pointer.
224 // There must be a frame pointer to call this function!
226 int CodeGenInterface::genCallerSPtoFPdelta()
228 assert(isFramePointerUsed());
229 int callerSPtoFPdelta = 0;
231 #if defined(_TARGET_ARM_)
232 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
233 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
234 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
235 #elif defined(_TARGET_X86_)
236 // Thanks to ebp chaining, the difference between ebp-based addresses
237 // and caller-SP-relative addresses is just the 2 pointers:
240 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
242 #error "Unknown _TARGET_"
245 assert(callerSPtoFPdelta <= 0);
246 return callerSPtoFPdelta;
249 //---------------------------------------------------------------------
250 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
252 // This number will be negative.
254 int CodeGenInterface::genCallerSPtoInitialSPdelta()
256 int callerSPtoSPdelta = 0;
258 #if defined(_TARGET_ARM_)
259 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
260 callerSPtoSPdelta -= genTotalFrameSize();
261 #elif defined(_TARGET_X86_)
262 callerSPtoSPdelta -= genTotalFrameSize();
263 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
265 // compCalleeRegsPushed does not account for the frame pointer
266 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
267 if (isFramePointerUsed())
269 callerSPtoSPdelta -= REGSIZE_BYTES;
272 #error "Unknown _TARGET_"
275 assert(callerSPtoSPdelta <= 0);
276 return callerSPtoSPdelta;
279 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
281 /*****************************************************************************
282 * Should we round simple operations (assignments, arithmetic operations, etc.)
287 bool CodeGen::genShouldRoundFP()
289 RoundLevel roundLevel = getRoundFloatLevel();
294 case ROUND_CMP_CONST:
299 assert(roundLevel == ROUND_ALWAYS);
304 /*****************************************************************************
306 * Initialize some global variables.
309 void CodeGen::genPrepForCompiler()
311 treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
313 /* Figure out which non-register variables hold pointers */
315 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
317 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
318 // in a register (i.e. they live on the stack for all or part of their lifetime).
319 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
323 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
325 if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
327 if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
329 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
333 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
334 genLastLiveMask = RBM_NONE;
336 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
340 /*****************************************************************************
341 * To report exception handling information to the VM, we need the size of the exception
342 * handling regions. To compute that, we need to emit labels for the beginning block of
343 * an EH region, and the block that immediately follows a region. Go through the EH
344 * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
346 * The beginning blocks of the EH regions already should have this flag set.
348 * No blocks should be added or removed after this.
350 * This code is closely couple with genReportEH() in the sense that any block
351 * that this procedure has determined it needs to have a label has to be selected
352 * using the same logic both here and in genReportEH(), so basically any time there is
353 * a change in the way we handle EH reporting, we have to keep the logic of these two
357 void CodeGen::genPrepForEHCodegen()
359 assert(!compiler->fgSafeBasicBlockCreation);
364 bool anyFinallys = false;
366 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
367 HBtab < HBtabEnd; HBtab++)
369 assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
370 assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
372 if (HBtab->ebdTryLast->bbNext != nullptr)
374 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
377 if (HBtab->ebdHndLast->bbNext != nullptr)
379 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
382 if (HBtab->HasFilter())
384 assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
385 // The block after the last block of the filter is
386 // the handler begin block, which we already asserted
387 // has BBF_HAS_LABEL set.
390 #if FEATURE_EH_CALLFINALLY_THUNKS
391 if (HBtab->HasFinallyHandler())
395 #endif // FEATURE_EH_CALLFINALLY_THUNKS
398 #if FEATURE_EH_CALLFINALLY_THUNKS
401 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
403 if (block->bbJumpKind == BBJ_CALLFINALLY)
405 BasicBlock* bbToLabel = block->bbNext;
406 if (block->isBBCallAlwaysPair())
408 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
410 if (bbToLabel != nullptr)
412 bbToLabel->bbFlags |= BBF_HAS_LABEL;
414 } // block is BBJ_CALLFINALLY
416 } // if (anyFinallys)
417 #endif // FEATURE_EH_CALLFINALLY_THUNKS
420 void CodeGenInterface::genUpdateLife(GenTree* tree)
422 treeLifeUpdater->UpdateLife(tree);
425 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
427 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
430 // Return the register mask for the given register variable
432 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
434 regMaskTP regMask = RBM_NONE;
436 assert(varDsc->lvIsInReg());
438 if (varTypeIsFloating(varDsc->TypeGet()))
440 regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
444 regMask = genRegMask(varDsc->lvRegNum);
449 // Return the register mask for the given lclVar or regVar tree node
451 regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
453 assert(tree->gtOper == GT_LCL_VAR);
455 regMaskTP regMask = RBM_NONE;
456 const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
457 if (varDsc->lvPromoted)
459 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
461 noway_assert(compiler->lvaTable[i].lvIsStructField);
462 if (compiler->lvaTable[i].lvIsInReg())
464 regMask |= genGetRegMask(&compiler->lvaTable[i]);
468 else if (varDsc->lvIsInReg())
470 regMask = genGetRegMask(varDsc);
475 // The given lclVar is either going live (being born) or dying.
476 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
477 // Update regSet.rsMaskVars accordingly.
479 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
481 regMaskTP regMask = genGetRegMask(varDsc);
484 if (compiler->verbose)
486 printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
487 varDsc->PrintVarReg();
488 printf(" is becoming %s ", (isDying) ? "dead" : "live");
489 Compiler::printTreeID(tree);
496 // We'd like to be able to assert the following, however if we are walking
497 // through a qmark/colon tree, we may encounter multiple last-use nodes.
498 // assert((regSet.rsMaskVars & regMask) == regMask);
499 regSet.RemoveMaskVars(regMask);
503 assert((regSet.rsMaskVars & regMask) == 0);
504 regSet.AddMaskVars(regMask);
508 //----------------------------------------------------------------------
509 // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
510 // Not all JIT Helper calls follow the standard ABI on the target architecture.
512 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
513 // enumerated) and not 100% accurate (some killsets are bigger than
514 // what they really are).
515 // There's some work to be done in several places in the JIT to
516 // accurately track the registers that are getting killed by
518 // a) LSRA needs several changes to accomodate more precise killsets
519 // for every helper call it sees (both explicitly [easy] and
520 // implicitly [hard])
521 // b) Currently for AMD64, when we generate code for a helper call
522 // we're independently over-pessimizing the killsets of the call
523 // (independently from LSRA) and this needs changes
524 // both in CodeGenAmd64.cpp and emitx86.cpp.
526 // The best solution for this problem would be to try to centralize
527 // the killset information in a single place but then make the
528 // corresponding changes so every code generation phase is in sync
531 // The interim solution is to only add known helper calls that don't
532 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
535 // helper - The helper being inquired about
538 // Mask of register kills -- registers whose values are no longer guaranteed to be the same.
540 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
544 case CORINFO_HELP_ASSIGN_BYREF:
545 #if defined(_TARGET_AMD64_)
546 return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
547 #elif defined(_TARGET_ARMARCH_)
548 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
549 #elif defined(_TARGET_X86_)
550 return RBM_ESI | RBM_EDI | RBM_ECX;
552 NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
553 return RBM_CALLEE_TRASH;
556 #if defined(_TARGET_ARMARCH_)
557 case CORINFO_HELP_ASSIGN_REF:
558 case CORINFO_HELP_CHECKED_ASSIGN_REF:
559 return RBM_CALLEE_TRASH_WRITEBARRIER;
562 case CORINFO_HELP_PROF_FCN_ENTER:
563 #ifdef RBM_PROFILER_ENTER_TRASH
564 return RBM_PROFILER_ENTER_TRASH;
566 NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
569 case CORINFO_HELP_PROF_FCN_LEAVE:
570 #ifdef RBM_PROFILER_LEAVE_TRASH
571 return RBM_PROFILER_LEAVE_TRASH;
573 NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
576 case CORINFO_HELP_PROF_FCN_TAILCALL:
577 #ifdef RBM_PROFILER_TAILCALL_TRASH
578 return RBM_PROFILER_TAILCALL_TRASH;
580 NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
584 case CORINFO_HELP_ASSIGN_REF_EAX:
585 case CORINFO_HELP_ASSIGN_REF_ECX:
586 case CORINFO_HELP_ASSIGN_REF_EBX:
587 case CORINFO_HELP_ASSIGN_REF_EBP:
588 case CORINFO_HELP_ASSIGN_REF_ESI:
589 case CORINFO_HELP_ASSIGN_REF_EDI:
591 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
592 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
593 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
594 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
595 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
596 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
599 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
600 case CORINFO_HELP_ASSIGN_REF:
601 case CORINFO_HELP_CHECKED_ASSIGN_REF:
602 return RBM_EAX | RBM_EDX;
603 #endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
606 case CORINFO_HELP_STOP_FOR_GC:
607 return RBM_STOP_FOR_GC_TRASH;
609 case CORINFO_HELP_INIT_PINVOKE_FRAME:
610 return RBM_INIT_PINVOKE_FRAME_TRASH;
613 return RBM_CALLEE_TRASH;
617 //----------------------------------------------------------------------
618 // compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer
619 // contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining
620 // what registers to remove from the current live GC/byref sets (and thus what to report as dead in the
621 // GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by
622 // compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src
623 // address registers as killed for liveness purposes, since their values change. However, they still are
624 // valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here.
626 // Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers.
629 // helper - The helper being inquired about
632 // Mask of GC register kills
634 regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
636 assert(emitter::emitNoGChelper(helper));
640 case CORINFO_HELP_ASSIGN_BYREF:
641 #if defined(_TARGET_X86_)
642 // This helper only trashes ECX.
644 #elif defined(_TARGET_AMD64_)
645 // This uses and defs RDI and RSI.
646 return RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
647 #elif defined(_TARGET_ARMARCH_)
648 return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
650 assert(!"unknown arch");
653 #if defined(_TARGET_XARCH_)
654 case CORINFO_HELP_PROF_FCN_ENTER:
655 return RBM_PROFILER_ENTER_TRASH;
657 case CORINFO_HELP_PROF_FCN_LEAVE:
658 return RBM_PROFILER_LEAVE_TRASH;
660 case CORINFO_HELP_PROF_FCN_TAILCALL:
661 return RBM_PROFILER_TAILCALL_TRASH;
662 #endif // defined(_TARGET_XARCH_)
664 #if defined(_TARGET_ARMARCH_)
665 case CORINFO_HELP_ASSIGN_REF:
666 case CORINFO_HELP_CHECKED_ASSIGN_REF:
667 return RBM_CALLEE_GCTRASH_WRITEBARRIER;
668 case CORINFO_HELP_PROF_FCN_LEAVE:
669 // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH on ARMARCH.
670 return RBM_CALLEE_TRASH_NOGC & ~RBM_PROFILER_RET_SCRATCH;
673 #if defined(_TARGET_X86_)
674 case CORINFO_HELP_INIT_PINVOKE_FRAME:
675 return RBM_INIT_PINVOKE_FRAME_TRASH;
676 #endif // defined(_TARGET_X86_)
679 return RBM_CALLEE_TRASH_NOGC;
683 template <bool ForCodeGen>
684 void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
691 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
692 dumpConvertedVarSet(this, compCurLife);
693 printf(" -> %s ", VarSetOps::ToString(this, newLife));
694 dumpConvertedVarSet(this, newLife);
699 /* We should only be called when the live set has actually changed */
701 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
705 VarSetOps::Assign(this, compCurLife, newLife);
709 /* Figure out which variables are becoming live/dead at this point */
711 // deadSet = compCurLife - newLife
712 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
714 // bornSet = newLife - compCurLife
715 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
717 /* Can't simultaneously become live and dead at the same time */
719 // (deadSet UNION bornSet) != EMPTY
720 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
721 // (deadSet INTERSECTION bornSet) == EMPTY
722 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
724 VarSetOps::Assign(this, compCurLife, newLife);
726 // Handle the dying vars first, then the newly live vars.
727 // This is because, in the RyuJIT backend case, they may occupy registers that
728 // will be occupied by another var that is newly live.
729 VarSetOps::Iter deadIter(this, deadSet);
730 unsigned deadVarIndex = 0;
731 while (deadIter.NextElem(&deadVarIndex))
733 unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
734 varDsc = lvaTable + varNum;
735 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
736 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
738 if (varDsc->lvIsInReg())
740 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
742 regMaskTP regMask = varDsc->lvRegMask();
745 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
749 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
751 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
753 // This isn't in a register, so update the gcVarPtrSetCur.
754 else if (isGCRef || isByRef)
756 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
757 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
761 VarSetOps::Iter bornIter(this, bornSet);
762 unsigned bornVarIndex = 0;
763 while (bornIter.NextElem(&bornVarIndex))
765 unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
766 varDsc = lvaTable + varNum;
767 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
768 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
770 if (varDsc->lvIsInReg())
773 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
775 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
778 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
779 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
780 regMaskTP regMask = varDsc->lvRegMask();
783 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
787 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
790 // This isn't in a register, so update the gcVarPtrSetCur
791 else if (lvaIsGCTracked(varDsc))
793 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
794 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
801 // Need an explicit instantiation.
802 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
804 /*****************************************************************************
808 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
810 getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
813 /*****************************************************************************
817 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
819 getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
823 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
828 //----------------------------------------------------------------------
829 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
832 // tree - spilled GenTree node
835 // TempDsc corresponding to tree
836 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
838 // tree must be in spilled state.
839 assert((tree->gtFlags & GTF_SPILLED) != 0);
841 // Get the tree's SpillDsc.
842 RegSet::SpillDsc* prevDsc;
843 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
844 assert(spillDsc != nullptr);
846 // Get the temp desc.
847 TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
851 #ifdef _TARGET_XARCH_
853 #ifdef _TARGET_AMD64_
854 // Returns relocation type hint for an addr.
855 // Note that there are no reloc hints on x86.
858 // addr - data address
861 // relocation type hint
863 unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
865 return compiler->eeGetRelocTypeHint((void*)addr);
867 #endif //_TARGET_AMD64_
869 // Return true if an absolute indirect data address can be encoded as IP-relative.
870 // offset. Note that this method should be used only when the caller knows that
871 // the address is an icon value that VM has given and there is no GenTree node
872 // representing it. Otherwise, one should always use FitsInAddrBase().
875 // addr - an absolute indirect data address
878 // true if indir data addr could be encoded as IP-relative offset.
880 bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
882 #ifdef _TARGET_AMD64_
883 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
885 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
890 // Return true if an indirect code address can be encoded as IP-relative offset.
891 // Note that this method should be used only when the caller knows that the
892 // address is an icon value that VM has given and there is no GenTree node
893 // representing it. Otherwise, one should always use FitsInAddrBase().
896 // addr - an absolute indirect code address
899 // true if indir code addr could be encoded as IP-relative offset.
901 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
903 #ifdef _TARGET_AMD64_
904 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
906 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
911 // Return true if an indirect code address can be encoded as 32-bit displacement
912 // relative to zero. Note that this method should be used only when the caller
913 // knows that the address is an icon value that VM has given and there is no
914 // GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
917 // addr - absolute indirect code address
920 // true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
922 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
924 return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
927 // Return true if an absolute indirect code address needs a relocation recorded with VM.
930 // addr - an absolute indirect code address
933 // true if indir code addr needs a relocation recorded with VM
935 bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
937 // If generating relocatable ngen code, then all code addr should go through relocation
938 if (compiler->opts.compReloc)
943 #ifdef _TARGET_AMD64_
944 // See if the code indir addr can be encoded as 32-bit displacement relative to zero.
945 // We don't need a relocation in that case.
946 if (genCodeIndirAddrCanBeEncodedAsZeroRelOffset(addr))
951 // Else we need a relocation.
954 // On x86 there is no need to record or ask for relocations during jitting,
955 // because all addrs fit within 32-bits.
957 #endif //_TARGET_X86_
960 // Return true if a direct code address needs to be marked as relocatable.
963 // addr - absolute direct code address
966 // true if direct code addr needs a relocation recorded with VM
968 bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
970 // If generating relocatable ngen code, then all code addr should go through relocation
971 if (compiler->opts.compReloc)
976 #ifdef _TARGET_AMD64_
977 // By default all direct code addresses go through relocation so that VM will setup
978 // a jump stub if addr cannot be encoded as pc-relative offset.
981 // On x86 there is no need for recording relocations during jitting,
982 // because all addrs fit within 32-bits.
984 #endif //_TARGET_X86_
986 #endif //_TARGET_XARCH_
988 /*****************************************************************************
990 * The following can be used to create basic blocks that serve as labels for
991 * the emitter. Use with caution - these are not real basic blocks!
996 BasicBlock* CodeGen::genCreateTempLabel()
999 // These blocks don't affect FP
1000 compiler->fgSafeBasicBlockCreation = true;
1003 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
1006 compiler->fgSafeBasicBlockCreation = false;
1009 block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
1011 // Use coldness of current block, as this label will
1012 // be contained in it.
1013 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
1017 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
1019 block->bbTgtStkDepth = genStackLevel / sizeof(int);
1026 void CodeGen::genDefineTempLabel(BasicBlock* label)
1029 if (compiler->opts.dspCode)
1031 printf("\n L_M%03u_" FMT_BB ":\n", Compiler::s_compMethodsCount, label->bbNum);
1035 label->bbEmitCookie =
1036 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1039 /*****************************************************************************
1041 * Adjust the stack pointer by the given value; assumes that this follows
1042 * a call so only callee-saved registers (and registers that may hold a
1043 * return value) are used at this point.
1046 void CodeGen::genAdjustSP(target_ssize_t delta)
1048 #if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
1049 if (delta == sizeof(int))
1050 inst_RV(INS_pop, REG_ECX, TYP_INT);
1053 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
1056 //------------------------------------------------------------------------
1057 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
1060 // block - The BasicBlock for which we are about to generate code.
1063 // Must be called just prior to generating code for 'block'.
1066 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1067 // and if 'block' is a throw helper block with a non-zero stack level.
1069 void CodeGen::genAdjustStackLevel(BasicBlock* block)
1071 #if !FEATURE_FIXED_OUT_ARGS
1072 // Check for inserted throw blocks and adjust genStackLevel.
1073 CLANG_FORMAT_COMMENT_ANCHOR;
1075 #if defined(UNIX_X86_ABI)
1076 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1078 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1079 // at this point if a jump to this block is made in the middle of pushing arugments.
1081 // Here we restore SP to prevent potential stack alignment issues.
1082 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1086 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1088 noway_assert(block->bbFlags & BBF_JMP_TARGET);
1090 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1092 if (genStackLevel != 0)
1095 getEmitter()->emitMarkStackLvl(genStackLevel);
1096 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1098 #else // _TARGET_X86_
1099 NYI("Need emitMarkStackLvl()");
1100 #endif // _TARGET_X86_
1103 #endif // !FEATURE_FIXED_OUT_ARGS
1106 #ifdef _TARGET_ARMARCH_
1108 // alignmentWB is out param
1109 unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1111 unsigned alignment = 0;
1112 unsigned opSize = 0;
1114 if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
1116 opSize = InferStructOpSizeAlign(op, &alignment);
1120 alignment = genTypeAlignments[op->TypeGet()];
1121 opSize = genTypeSizes[op->TypeGet()];
1124 assert(opSize != 0);
1125 assert(alignment != 0);
1127 (*alignmentWB) = alignment;
1131 // alignmentWB is out param
1132 unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1134 unsigned alignment = 0;
1135 unsigned opSize = 0;
1137 while (op->gtOper == GT_COMMA)
1139 op = op->gtOp.gtOp2;
1142 if (op->gtOper == GT_OBJ)
1144 CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1145 opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1146 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1148 else if (op->gtOper == GT_LCL_VAR)
1150 unsigned varNum = op->gtLclVarCommon.gtLclNum;
1151 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1152 assert(varDsc->lvType == TYP_STRUCT);
1153 opSize = varDsc->lvSize();
1154 #ifndef _TARGET_64BIT_
1155 if (varDsc->lvStructDoubleAlign)
1157 alignment = TARGET_POINTER_SIZE * 2;
1160 #endif // !_TARGET_64BIT_
1162 alignment = TARGET_POINTER_SIZE;
1165 else if (op->OperIsCopyBlkOp())
1167 GenTree* op2 = op->gtOp.gtOp2;
1169 if (op2->OperGet() == GT_CNS_INT)
1171 if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1173 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1174 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1176 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1180 opSize = (unsigned)op2->gtIntCon.gtIconVal;
1181 GenTree* op1 = op->gtOp.gtOp1;
1182 assert(op1->OperGet() == GT_LIST);
1183 GenTree* dstAddr = op1->gtOp.gtOp1;
1184 if (dstAddr->OperGet() == GT_ADDR)
1186 InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1190 assert(!"Unhandle dstAddr node");
1191 alignment = TARGET_POINTER_SIZE;
1197 noway_assert(!"Variable sized COPYBLK register arg!");
1199 alignment = TARGET_POINTER_SIZE;
1202 else if (op->gtOper == GT_MKREFANY)
1204 opSize = TARGET_POINTER_SIZE * 2;
1205 alignment = TARGET_POINTER_SIZE;
1207 else if (op->IsArgPlaceHolderNode())
1209 CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1210 assert(clsHnd != 0);
1211 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1212 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1216 assert(!"Unhandled gtOper");
1217 opSize = TARGET_POINTER_SIZE;
1218 alignment = TARGET_POINTER_SIZE;
1221 assert(opSize != 0);
1222 assert(alignment != 0);
1224 (*alignmentWB) = alignment;
1228 #endif // _TARGET_ARMARCH_
1230 /*****************************************************************************
1232 * Take an address expression and try to find the best set of components to
1233 * form an address mode; returns non-zero if this is successful.
1235 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1236 * Refactor this code so that the underlying analysis can be used in
1237 * the RyuJIT Backend to do lowering, instead of having to call this method with the
1238 * option to not generate the code.
1240 * 'fold' specifies if it is OK to fold the array index which hangs off
1243 * If successful, the parameters will be set to the following values:
1245 * *rv1Ptr ... base operand
1246 * *rv2Ptr ... optional operand
1247 * *revPtr ... true if rv2 is before rv1 in the evaluation order
1248 * #if SCALED_ADDR_MODES
1249 * *mulPtr ... optional multiplier (2/4/8) for rv2
1250 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1252 * *cnsPtr ... integer constant [optional]
1254 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1255 * identifies the components that might be used to
1256 * form an address mode later on.
1259 bool CodeGen::genCreateAddrMode(GenTree* addr,
1264 #if SCALED_ADDR_MODES
1266 #endif // SCALED_ADDR_MODES
1270 The following indirections are valid address modes on x86/x64:
1272 [ icon] * not handled here
1276 [reg1 + reg2 + icon]
1283 [reg1 + 2 * reg2 + icon]
1284 [reg1 + 4 * reg2 + icon]
1285 [reg1 + 8 * reg2 + icon]
1287 The following indirections are valid address modes on arm64:
1292 [reg1 + reg2 * natural-scale]
1296 /* All indirect address modes require the address to be an addition */
1298 if (addr->gtOper != GT_ADD)
1303 // Can't use indirect addressing mode as we need to check for overflow.
1304 // Also, can't use 'lea' as it doesn't set the flags.
1306 if (addr->gtOverflow())
1311 GenTree* rv1 = nullptr;
1312 GenTree* rv2 = nullptr;
1318 #if SCALED_ADDR_MODES
1320 #endif // SCALED_ADDR_MODES
1324 /* What order are the sub-operands to be evaluated */
1326 if (addr->gtFlags & GTF_REVERSE_OPS)
1328 op1 = addr->gtOp.gtOp2;
1329 op2 = addr->gtOp.gtOp1;
1333 op1 = addr->gtOp.gtOp1;
1334 op2 = addr->gtOp.gtOp2;
1337 bool rev = false; // Is op2 first in the evaluation order?
1340 A complex address mode can combine the following operands:
1342 op1 ... base address
1343 op2 ... optional scaled index
1344 #if SCALED_ADDR_MODES
1345 mul ... optional multiplier (2/4/8) for op2
1347 cns ... optional displacement
1349 Here we try to find such a set of operands and arrange for these
1350 to sit in registers.
1354 #if SCALED_ADDR_MODES
1356 #endif // SCALED_ADDR_MODES
1359 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1360 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1361 here if we find a scaled index.
1363 CLANG_FORMAT_COMMENT_ANCHOR;
1365 #if SCALED_ADDR_MODES
1367 #endif // SCALED_ADDR_MODES
1369 /* Special case: keep constants as 'op2' */
1371 if (op1->IsCnsIntOrI())
1373 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1379 /* Check for an addition of a constant */
1381 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
1383 /* We're adding a constant */
1385 cns += op2->gtIntConCommon.IconValue();
1387 #if defined(_TARGET_ARMARCH_)
1391 /* Inspect the operand the constant is being added to */
1393 switch (op1->gtOper)
1397 if (op1->gtOverflow())
1402 op2 = op1->gtOp.gtOp2;
1403 op1 = op1->gtOp.gtOp1;
1407 #if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1408 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1410 if (op1->gtOverflow())
1412 return false; // Need overflow check
1419 mul = op1->GetScaledIndex();
1422 /* We can use "[mul*rv2 + icon]" */
1425 rv2 = op1->gtOp.gtOp1;
1430 #endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1437 /* The best we can do is "[rv1 + icon]" */
1445 // op2 is not a constant. So keep on trying.
1447 /* Neither op1 nor op2 are sitting in a register right now */
1449 switch (op1->gtOper)
1451 #if !defined(_TARGET_ARMARCH_)
1452 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1455 if (op1->gtOverflow())
1460 if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
1462 cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
1463 op1 = op1->gtOp.gtOp1;
1470 #if SCALED_ADDR_MODES
1474 if (op1->gtOverflow())
1483 mul = op1->GetScaledIndex();
1486 /* 'op1' is a scaled value */
1489 rv2 = op1->gtOp.gtOp1;
1492 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1494 if (jitIsScaleIndexMul(argScale * mul))
1496 mul = mul * argScale;
1497 rv2 = rv2->gtOp.gtOp1;
1505 noway_assert(rev == false);
1512 #endif // SCALED_ADDR_MODES
1513 #endif // !_TARGET_ARMARCH
1517 op1 = op1->gtOp.gtOp1;
1522 op1 = op1->gtOp.gtOp2;
1530 switch (op2->gtOper)
1532 #if !defined(_TARGET_ARMARCH_)
1533 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1536 if (op2->gtOverflow())
1541 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
1543 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1544 op2 = op2->gtOp.gtOp1;
1551 #if SCALED_ADDR_MODES
1555 if (op2->gtOverflow())
1564 mul = op2->GetScaledIndex();
1567 // 'op2' is a scaled value...is it's argument also scaled?
1569 rv2 = op2->gtOp.gtOp1;
1570 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1572 if (jitIsScaleIndexMul(argScale * mul))
1574 mul = mul * argScale;
1575 rv2 = rv2->gtOp.gtOp1;
1589 #endif // SCALED_ADDR_MODES
1590 #endif // !_TARGET_ARMARCH
1594 op2 = op2->gtOp.gtOp1;
1599 op2 = op2->gtOp.gtOp2;
1606 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
1610 #ifdef _TARGET_ARM64_
1618 /* Make sure a GC address doesn't end up in 'rv2' */
1620 if (varTypeIsGC(rv2->TypeGet()))
1622 noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
1631 /* Special case: constant array index (that is range-checked) */
1638 if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
1640 /* For valuetype arrays where we can't use the scaled address
1641 mode, rv2 will point to the scaled index. So we have to do
1644 tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1652 /* May be a simple array. rv2 will points to the actual index */
1658 /* Get hold of the array index and see if it's a constant */
1659 if (index->IsIntCnsFitsInI32())
1661 /* Get hold of the index value */
1662 ssize_t ixv = index->AsIntConCommon()->IconValue();
1664 #if SCALED_ADDR_MODES
1665 /* Scale the index if necessary */
1672 if (FitsIn<INT32>(cns + ixv))
1674 /* Add the scaled index to the offset value */
1678 #if SCALED_ADDR_MODES
1679 /* There is no scaled operand any more */
1688 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
1689 noway_assert(rv1 || mul != 1);
1691 noway_assert(FitsIn<INT32>(cns));
1693 if (rv1 == nullptr && rv2 == nullptr)
1698 /* Success - return the various components to the caller */
1703 #if SCALED_ADDR_MODES
1711 /*****************************************************************************
1712 * The condition to use for (the jmp/set for) the given type of operation
1714 * In case of amd64, this routine should be used when there is no gentree available
1715 * and one needs to generate jumps based on integer comparisons. When gentree is
1716 * available always use its overloaded version.
1721 emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
1723 const static BYTE genJCCinsSigned[] = {
1724 #if defined(_TARGET_XARCH_)
1731 EJ_je, // GT_TEST_EQ
1732 EJ_jne, // GT_TEST_NE
1733 #elif defined(_TARGET_ARMARCH_)
1740 #if defined(_TARGET_ARM64_)
1741 EJ_eq, // GT_TEST_EQ
1742 EJ_ne, // GT_TEST_NE
1747 const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
1749 #if defined(_TARGET_XARCH_)
1756 EJ_je, // GT_TEST_EQ
1757 EJ_jne, // GT_TEST_NE
1758 #elif defined(_TARGET_ARMARCH_)
1765 #if defined(_TARGET_ARM64_)
1766 EJ_eq, // GT_TEST_EQ
1767 EJ_ne, // GT_TEST_NE
1772 const static BYTE genJCCinsLogical[] = /* logical operation */
1774 #if defined(_TARGET_XARCH_)
1775 EJ_je, // GT_EQ (Z == 1)
1776 EJ_jne, // GT_NE (Z == 0)
1777 EJ_js, // GT_LT (S == 1)
1779 EJ_jns, // GT_GE (S == 0)
1781 EJ_NONE, // GT_TEST_EQ
1782 EJ_NONE, // GT_TEST_NE
1783 #elif defined(_TARGET_ARMARCH_)
1784 EJ_eq, // GT_EQ (Z == 1)
1785 EJ_ne, // GT_NE (Z == 0)
1786 EJ_mi, // GT_LT (N == 1)
1788 EJ_pl, // GT_GE (N == 0)
1790 #if defined(_TARGET_ARM64_)
1791 EJ_eq, // GT_TEST_EQ
1792 EJ_ne, // GT_TEST_NE
1797 #if defined(_TARGET_XARCH_)
1798 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
1799 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
1800 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
1801 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
1802 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
1803 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
1804 assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1805 assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1807 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
1808 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
1809 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
1810 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
1811 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
1812 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
1813 assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1814 assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1816 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
1817 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
1818 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
1819 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
1820 #elif defined(_TARGET_ARMARCH_)
1821 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
1822 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
1823 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
1824 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
1825 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
1826 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
1828 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
1829 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
1830 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
1831 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
1832 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
1833 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
1835 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
1836 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
1837 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
1838 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
1840 assert(!"unknown arch");
1842 assert(GenTree::OperIsCompare(cmp));
1844 emitJumpKind result = EJ_COUNT;
1846 if (compareKind == CK_UNSIGNED)
1848 result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
1850 else if (compareKind == CK_SIGNED)
1852 result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
1854 else if (compareKind == CK_LOGICAL)
1856 result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
1858 assert(result != EJ_COUNT);
1862 #ifdef _TARGET_ARMARCH_
1863 //------------------------------------------------------------------------
1864 // genEmitGSCookieCheck: Generate code to check that the GS cookie
1865 // wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
1867 void CodeGen::genEmitGSCookieCheck(bool pushReg)
1869 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
1871 // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
1872 // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
1873 if (!pushReg && (compiler->info.compRetType == TYP_REF))
1874 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
1876 // We need two temporary registers, to load the GS cookie values and compare them. We can't use
1877 // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
1878 // callee-trash registers, which should not contain anything interesting at this point.
1879 // We don't have any IR node representing this check, so LSRA can't communicate registers
1882 regNumber regGSConst = REG_GSCOOKIE_TMP_0;
1883 regNumber regGSValue = REG_GSCOOKIE_TMP_1;
1885 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
1887 // load the GS cookie constant into a reg
1889 genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
1893 // Ngen case - GS cookie constant needs to be accessed through an indirection.
1894 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
1895 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
1897 // Load this method's GS value from the stack frame
1898 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
1899 // Compare with the GC cookie constant
1900 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
1902 BasicBlock* gsCheckBlk = genCreateTempLabel();
1903 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1904 inst_JMP(jmpEqual, gsCheckBlk);
1905 // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
1906 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
1907 genDefineTempLabel(gsCheckBlk);
1909 #endif // _TARGET_ARMARCH_
1911 /*****************************************************************************
1913 * Generate an exit sequence for a return from a method (note: when compiling
1914 * for speed there might be multiple exit points).
1917 void CodeGen::genExitCode(BasicBlock* block)
1919 /* Just wrote the first instruction of the epilog - inform debugger
1920 Note that this may result in a duplicate IPmapping entry, and
1923 // For non-optimized debuggable code, there is only one epilog.
1924 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
1926 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
1927 if (compiler->getNeedsGSSecurityCookie())
1929 genEmitGSCookieCheck(jmpEpilog);
1934 // The GS cookie check created a temp label that has no live
1935 // incoming GC registers, we need to fix that
1940 /* Figure out which register parameters hold pointers */
1942 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1945 noway_assert(varDsc->lvIsParam);
1947 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
1950 getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1951 getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1955 genReserveEpilog(block);
1958 //------------------------------------------------------------------------
1959 // genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1962 // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1963 // Otherwise, we generate the 'throw' inline.
1966 // jumpKind - jump kind to generate;
1967 // codeKind - the special throw-helper kind;
1968 // failBlk - optional fail target block, if it is already known;
1970 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTree* failBlk)
1972 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1973 #if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
1974 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1975 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1976 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1980 // For code with throw helper blocks, find and use the helper block for
1981 // raising the exception. The block may be shared by other trees too.
1983 BasicBlock* excpRaisingBlock;
1985 if (failBlk != nullptr)
1987 // We already know which block to jump to. Use that.
1988 assert(failBlk->gtOper == GT_LABEL);
1989 excpRaisingBlock = failBlk->gtLabel.gtLabBB;
1992 Compiler::AddCodeDsc* add =
1993 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1994 assert(excpRaisingBlock == add->acdDstBlk);
1995 #if !FEATURE_FIXED_OUT_ARGS
1996 assert(add->acdStkLvlInit || isFramePointerUsed());
1997 #endif // !FEATURE_FIXED_OUT_ARGS
2002 // Find the helper-block which raises the exception.
2003 Compiler::AddCodeDsc* add =
2004 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2005 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
2006 excpRaisingBlock = add->acdDstBlk;
2007 #if !FEATURE_FIXED_OUT_ARGS
2008 assert(add->acdStkLvlInit || isFramePointerUsed());
2009 #endif // !FEATURE_FIXED_OUT_ARGS
2012 noway_assert(excpRaisingBlock != nullptr);
2014 // Jump to the exception-throwing block on error.
2015 inst_JMP(jumpKind, excpRaisingBlock);
2019 // The code to throw the exception will be generated inline, and
2020 // we will jump around it in the normal non-exception case.
2022 BasicBlock* tgtBlk = nullptr;
2023 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
2024 if (reverseJumpKind != jumpKind)
2026 tgtBlk = genCreateTempLabel();
2027 inst_JMP(reverseJumpKind, tgtBlk);
2030 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
2032 // Define the spot for the normal non-exception case to jump to.
2033 if (tgtBlk != nullptr)
2035 assert(reverseJumpKind != jumpKind);
2036 genDefineTempLabel(tgtBlk);
2041 /*****************************************************************************
2043 * The last operation done was generating code for "tree" and that would
2044 * have set the flags. Check if the operation caused an overflow.
2048 void CodeGen::genCheckOverflow(GenTree* tree)
2050 // Overflow-check should be asked for this tree
2051 noway_assert(tree->gtOverflow());
2053 const var_types type = tree->TypeGet();
2055 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
2056 noway_assert(!varTypeIsSmall(type));
2058 emitJumpKind jumpKind;
2060 #ifdef _TARGET_ARM64_
2061 if (tree->OperGet() == GT_MUL)
2068 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
2070 #if defined(_TARGET_XARCH_)
2072 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
2074 #elif defined(_TARGET_ARMARCH_)
2076 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
2078 if (jumpKind == EJ_lo)
2080 if (tree->OperGet() != GT_SUB)
2086 #endif // defined(_TARGET_ARMARCH_)
2089 // Jump to the block which will throw the expection
2091 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
2094 #if FEATURE_EH_FUNCLETS
2096 /*****************************************************************************
2098 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
2099 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
2104 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
2106 if (block->bbFlags & BBF_FUNCLET_BEG)
2108 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
2109 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2111 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
2115 // We shouldn't see FUNC_ROOT
2116 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2117 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
2122 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
2123 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2125 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
2127 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
2129 assert(!block->hasHndIndex());
2133 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2134 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
2139 #if defined(_TARGET_ARM_)
2140 void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
2142 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
2143 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
2144 // calls the funclet during non-exceptional control flow.
2145 if (block->bbFlags & BBF_FINALLY_TARGET)
2147 assert(block->bbFlags & BBF_JMP_TARGET);
2150 if (compiler->verbose)
2152 printf("\nEmitting finally target NOP predecessor for " FMT_BB "\n", block->bbNum);
2155 // Create a label that we'll use for computing the start of an EH region, if this block is
2156 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
2157 // determining the EH regions, then this NOP would end up outside of the region, if this
2158 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
2159 // would be executed, which we would prefer not to do.
2161 block->bbUnwindNopEmitCookie =
2162 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
2169 #endif // FEATURE_EH_FUNCLETS
2171 /*****************************************************************************
2173 * Generate code for the function.
2176 void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
2181 printf("*************** In genGenerateCode()\n");
2182 compiler->fgDispBasicBlocks(compiler->verboseTrees);
2187 unsigned prologSize;
2188 unsigned epilogSize;
2193 genInterruptibleUsed = true;
2196 genNeedPrologStackProbe = false;
2199 compiler->fgDebugCheckBBlist();
2202 /* This is the real thing */
2204 genPrepForCompiler();
2206 /* Prepare the emitter */
2207 getEmitter()->Init();
2209 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
2213 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
2215 compiler->opts.disAsm = true;
2218 if (compiler->opts.disAsm)
2220 printf("; Assembly listing for method %s\n", compiler->info.compFullName);
2222 printf("; Emitting ");
2224 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
2226 printf("SMALL_CODE");
2228 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
2230 printf("FAST_CODE");
2234 printf("BLENDED_CODE");
2239 if (compiler->info.genCPU == CPU_X86)
2241 printf("generic X86 CPU");
2243 else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
2245 printf("Pentium 4");
2247 else if (compiler->info.genCPU == CPU_X64)
2249 if (compiler->canUseVexEncoding())
2251 printf("X64 CPU with AVX");
2255 printf("X64 CPU with SSE2");
2258 else if (compiler->info.genCPU == CPU_ARM)
2260 printf("generic ARM CPU");
2262 else if (compiler->info.genCPU == CPU_ARM64)
2264 printf("generic ARM64 CPU");
2268 printf("unknown architecture");
2271 #if defined(_TARGET_WINDOWS_)
2272 printf(" - Windows");
2273 #elif defined(_TARGET_UNIX_)
2279 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2281 printf("; Tier-0 compilation\n");
2283 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
2285 printf("; Tier-1 compilation\n");
2288 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
2290 printf("; optimized code\n");
2292 else if (compiler->opts.compDbgCode)
2294 printf("; debuggable code\n");
2296 else if (compiler->opts.MinOpts())
2298 printf("; compiler->opts.MinOpts() is true\n");
2302 printf("; unknown optimization flags\n");
2306 if (compiler->genDoubleAlign())
2307 printf("; double-aligned frame\n");
2310 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
2312 if (genInterruptible)
2314 printf("; fully interruptible\n");
2318 printf("; partially interruptible\n");
2321 if (compiler->fgHaveProfileData())
2323 printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
2324 compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
2327 if (compiler->fgProfileData_ILSizeMismatch)
2329 printf("; discarded IBC profile data due to mismatch in ILSize\n");
2334 // We compute the final frame layout before code generation. This is because LSRA
2335 // has already computed exactly the maximum concurrent number of spill temps of each type that are
2336 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
2337 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
2338 // memory from the VM.
2342 unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps.
2344 getEmitter()->emitBegFN(isFramePointerUsed()
2347 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
2348 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
2353 /* Now generate code for the function */
2357 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
2358 // generation hasn't touched it (it shouldn't!).
2361 compiler->lvaTableDump();
2365 /* We can now generate the function prolog and epilog */
2367 genGeneratePrologsAndEpilogs();
2369 /* Bind jump distances */
2371 getEmitter()->emitJumpDistBind();
2373 /* The code is now complete and final; it should not change after this. */
2375 /* Compute the size of the code sections that we are going to ask the VM
2376 to allocate. Note that this might not be precisely the size of the
2377 code we emit, though it's fatal if we emit more code than the size we
2379 (Note: an example of a case where we emit less code would be useful.)
2382 getEmitter()->emitComputeCodeSizes();
2386 // Code to test or stress our ability to run a fallback compile.
2387 // We trigger the fallback here, before asking the VM for any memory,
2388 // because if not, we will leak mem, as the current codebase can't free
2389 // the mem after the emitter asks the VM for it. As this is only a stress
2390 // mode, we only want the functionality, and don't care about the relative
2391 // ugliness of having the failure here.
2392 if (!compiler->jitFallbackCompile)
2394 // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
2395 // especially that caused by enabling JIT stress.
2396 if (!JitConfig.JitNoForceFallback())
2398 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
2400 NO_WAY_NOASSERT("Stress failure");
2407 /* We've finished collecting all the unwind information for the function. Now reserve
2408 space for it from the VM.
2411 compiler->unwindReserve();
2415 size_t dataSize = getEmitter()->emitDataSize();
2417 #endif // DISPLAY_SIZES
2421 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
2423 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2424 trackedStackPtrsContig = false;
2425 #elif defined(_TARGET_ARM_)
2426 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
2427 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
2429 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
2433 /* We're done generating code for this function */
2434 compiler->compCodeGenDone = true;
2437 compiler->EndPhase(PHASE_GENERATE_CODE);
2439 codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
2440 (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
2441 &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
2443 compiler->EndPhase(PHASE_EMIT_CODE);
2446 if (compiler->opts.disAsm)
2448 printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
2449 compiler->info.compFullName);
2450 printf("; ============================================================\n");
2451 printf(""); // in our logic this causes a flush
2456 printf("*************** After end code gen, before unwindEmit()\n");
2457 getEmitter()->emitDispIGlist(true);
2461 #if EMIT_TRACK_STACK_DEPTH
2462 // Check our max stack level. Needed for fgAddCodeRef().
2463 // We need to relax the assert as our estimation won't include code-gen
2464 // stack changes (which we know don't affect fgAddCodeRef()).
2465 // NOTE: after emitEndCodeGen (including here), emitMaxStackDepth is a
2466 // count of DWORD-sized arguments, NOT argument size in bytes.
2468 unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
2469 compiler->compHndBBtabCount + // Return address for locally-called finallys
2470 genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
2471 (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
2472 #if defined(UNIX_X86_ABI)
2473 // Convert maxNestedAlignment to DWORD count before adding to maxAllowedStackDepth.
2474 assert(maxNestedAlignment % sizeof(int) == 0);
2475 maxAllowedStackDepth += maxNestedAlignment / sizeof(int);
2477 noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
2479 #endif // EMIT_TRACK_STACK_DEPTH
2481 *nativeSizeOfCode = codeSize;
2482 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2484 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2486 // Make sure that the x86 alignment and cache prefetch optimization rules
2489 // Don't start a method in the last 7 bytes of a 16-byte alignment area
2490 // unless we are generating SMALL_CODE
2491 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
2493 /* Now that the code is issued, we can finalize and emit the unwind data */
2495 compiler->unwindEmit(*codePtr, coldCodePtr);
2497 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
2501 /* Finalize the Local Var info in terms of generated code */
2506 unsigned finalHotCodeSize;
2507 unsigned finalColdCodeSize;
2508 if (compiler->fgFirstColdBlock != nullptr)
2510 // We did some hot/cold splitting. The hot section is always padded out to the
2511 // size we thought it would be, but the cold section is not.
2512 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2513 assert(compiler->info.compTotalHotCodeSize > 0);
2514 assert(compiler->info.compTotalColdCodeSize > 0);
2515 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2516 finalColdCodeSize = codeSize - finalHotCodeSize;
2520 // No hot/cold splitting
2521 assert(codeSize <= compiler->info.compTotalHotCodeSize);
2522 assert(compiler->info.compTotalHotCodeSize > 0);
2523 assert(compiler->info.compTotalColdCodeSize == 0);
2524 finalHotCodeSize = codeSize;
2525 finalColdCodeSize = 0;
2527 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2528 #endif // LATE_DISASM
2530 /* Report any exception handlers to the VM */
2534 #ifdef JIT32_GCENCODER
2539 // Create and store the GC info for this method.
2540 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2543 FILE* dmpf = jitstdout;
2545 compiler->opts.dmpHex = false;
2546 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2549 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2554 compiler->opts.dmpHex = true;
2557 if (compiler->opts.dmpHex)
2559 size_t consSize = getEmitter()->emitDataSize();
2560 size_t infoSize = compiler->compInfoBlkSize;
2562 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2563 fprintf(dmpf, "\n");
2567 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2571 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2573 #ifdef JIT32_GCENCODER
2575 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2576 #endif // JIT32_GCENCODER
2578 fprintf(dmpf, "\n");
2582 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
2586 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2588 #ifdef JIT32_GCENCODER
2590 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2591 #endif // JIT32_GCENCODER
2596 if (dmpf != jitstdout)
2603 /* Tell the emitter that we're done with this function */
2605 getEmitter()->emitEndFN();
2607 /* Shut down the spill logic */
2609 regSet.rsSpillDone();
2611 /* Shut down the temp logic */
2617 grossVMsize += compiler->info.compILCodeSize;
2618 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2619 grossNCsize += codeSize + dataSize;
2621 #endif // DISPLAY_SIZES
2623 compiler->EndPhase(PHASE_EMIT_GCEH);
2626 /*****************************************************************************
2628 * Report EH clauses to the VM
2631 void CodeGen::genReportEH()
2633 if (compiler->compHndBBtabCount == 0)
2639 if (compiler->opts.dspEHTable)
2641 printf("*************** EH table for %s\n", compiler->info.compFullName);
2649 bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
2651 unsigned EHCount = compiler->compHndBBtabCount;
2653 #if FEATURE_EH_FUNCLETS
2654 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2656 unsigned duplicateClauseCount = 0;
2657 unsigned enclosingTryIndex;
2659 // Duplicate clauses are not used by CoreRT ABI
2662 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
2664 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2665 // ignoring 'mutual protect' trys
2666 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2667 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2669 ++duplicateClauseCount;
2672 EHCount += duplicateClauseCount;
2675 #if FEATURE_EH_CALLFINALLY_THUNKS
2676 unsigned clonedFinallyCount = 0;
2678 // Duplicate clauses are not used by CoreRT ABI
2681 // We don't keep track of how many cloned finally there are. So, go through and count.
2682 // We do a quick pass first through the EH table to see if there are any try/finally
2683 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2685 bool anyFinallys = false;
2686 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2687 HBtab < HBtabEnd; HBtab++)
2689 if (HBtab->HasFinallyHandler())
2697 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
2699 if (block->bbJumpKind == BBJ_CALLFINALLY)
2701 ++clonedFinallyCount;
2705 EHCount += clonedFinallyCount;
2708 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2710 #endif // FEATURE_EH_FUNCLETS
2713 if (compiler->opts.dspEHTable)
2715 #if FEATURE_EH_FUNCLETS
2716 #if FEATURE_EH_CALLFINALLY_THUNKS
2717 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2718 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2719 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2720 #else // !FEATURE_EH_CALLFINALLY_THUNKS
2721 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2722 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2723 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2724 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
2725 #else // !FEATURE_EH_FUNCLETS
2726 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2727 assert(compiler->compHndBBtabCount == EHCount);
2728 #endif // !FEATURE_EH_FUNCLETS
2732 // Tell the VM how many EH clauses to expect.
2733 compiler->eeSetEHcount(EHCount);
2735 XTnum = 0; // This is the index we pass to the VM
2737 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2738 HBtab < HBtabEnd; HBtab++)
2740 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2742 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2743 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2745 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2746 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2747 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2748 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2750 if (HBtab->HasFilter())
2752 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2756 hndTyp = HBtab->ebdTyp;
2759 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2761 if (isCoreRTABI && (XTnum > 0))
2763 // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2764 // try block as the previous one. The runtime cannot reliably infer this information from
2765 // native code offsets because of different try blocks can have same offsets. Alternative
2766 // solution to this problem would be inserting extra nops to ensure that different try
2767 // blocks have different offsets.
2768 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
2770 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2771 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2772 // IL as "try { try {} catch {} catch {} } finally {}".
2773 assert(HBtab->HasCatchHandler());
2774 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
2778 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2779 // the fields aren't accurate.
2781 CORINFO_EH_CLAUSE clause;
2782 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2783 clause.Flags = flags;
2784 clause.TryOffset = tryBeg;
2785 clause.TryLength = tryEnd;
2786 clause.HandlerOffset = hndBeg;
2787 clause.HandlerLength = hndEnd;
2789 assert(XTnum < EHCount);
2791 // Tell the VM about this EH clause.
2792 compiler->eeSetEHinfo(XTnum, &clause);
2797 #if FEATURE_EH_FUNCLETS
2798 // Now output duplicated clauses.
2800 // If a funclet has been created by moving a handler out of a try region that it was originally nested
2801 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
2802 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2803 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2804 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2805 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2806 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2809 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2810 // try or handler region):
2828 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2829 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
2830 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2831 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2832 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2833 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2834 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2835 // representing try (1) protecting the new funclets catch (3) and (4).
2836 // The code will be generated as follows:
2838 // ABCFH // "main" code
2843 // The EH regions are:
2848 // D -> G // "duplicate" clause
2849 // E -> G // "duplicate" clause
2851 // Note that we actually need to generate one of these additional "duplicate" clauses for every
2852 // region the funclet is nested in. Take this example:
2884 // When we pull out funclets, we get the following generated code:
2886 // ABCDEHJMO // "main" function
2894 // And the EH regions we report to the VM are (in order; main clauses
2895 // first in most-to-least nested order, funclets ("duplicated clauses")
2896 // last, in most-to-least nested) are:
2904 // F -> I // funclet clause #1 for F
2905 // F -> K // funclet clause #2 for F
2906 // F -> L // funclet clause #3 for F
2907 // F -> N // funclet clause #4 for F
2908 // G -> I // funclet clause #1 for G
2909 // G -> K // funclet clause #2 for G
2910 // G -> L // funclet clause #3 for G
2911 // G -> N // funclet clause #4 for G
2912 // I -> K // funclet clause #1 for I
2913 // I -> L // funclet clause #2 for I
2914 // I -> N // funclet clause #3 for I
2915 // K -> N // funclet clause #1 for K
2916 // L -> N // funclet clause #1 for L
2918 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2919 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2920 // to add a clause "F -> G" because F is NOT protected by G, but we still have
2921 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2923 // The overall ordering of the clauses is still the same most-to-least nesting
2924 // after front-to-back start offset. Because we place the funclets at the end
2925 // these new clauses should also go at the end by this ordering.
2928 if (duplicateClauseCount > 0)
2930 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
2932 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2934 unsigned enclosingTryIndex;
2936 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2938 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2939 // ignoring 'mutual protect' trys
2940 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2941 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2943 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2944 // that will have the enclosing try protecting the funclet.
2946 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2947 // greater EH table index
2949 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2951 // The try region is the handler of the funclet. Note that for filters, we don't protect the
2952 // filter region, only the filter handler region. This is because exceptions in filters never
2953 // escape; the VM swallows them.
2955 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2956 BasicBlock* bbTryLast = fletTab->ebdHndLast;
2958 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2959 BasicBlock* bbHndLast = encTab->ebdHndLast;
2961 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2963 tryBeg = compiler->ehCodeOffset(bbTryBeg);
2964 hndBeg = compiler->ehCodeOffset(bbHndBeg);
2966 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2967 : compiler->ehCodeOffset(bbTryLast->bbNext);
2968 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2969 : compiler->ehCodeOffset(bbHndLast->bbNext);
2971 if (encTab->HasFilter())
2973 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2977 hndTyp = encTab->ebdTyp;
2980 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2982 // Tell the VM this is an extra clause caused by moving funclets out of line.
2983 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
2985 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2986 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2987 // instruction immediately after the 'try' body. So, it really could be more accurately named
2990 CORINFO_EH_CLAUSE clause;
2991 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2992 clause.Flags = flags;
2993 clause.TryOffset = tryBeg;
2994 clause.TryLength = tryEnd;
2995 clause.HandlerOffset = hndBeg;
2996 clause.HandlerLength = hndEnd;
2998 assert(XTnum < EHCount);
3000 // Tell the VM about this EH clause (a duplicated clause).
3001 compiler->eeSetEHinfo(XTnum, &clause);
3004 ++reportedDuplicateClauseCount;
3007 if (duplicateClauseCount == reportedDuplicateClauseCount)
3009 break; // we've reported all of them; no need to continue looking
3013 } // for each 'true' enclosing 'try'
3014 } // for each EH table entry
3016 assert(duplicateClauseCount == reportedDuplicateClauseCount);
3017 } // if (duplicateClauseCount > 0)
3019 #if FEATURE_EH_CALLFINALLY_THUNKS
3020 if (clonedFinallyCount > 0)
3022 unsigned reportedClonedFinallyCount = 0;
3023 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3025 if (block->bbJumpKind == BBJ_CALLFINALLY)
3027 UNATIVE_OFFSET hndBeg, hndEnd;
3029 hndBeg = compiler->ehCodeOffset(block);
3031 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
3032 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
3033 BasicBlock* bbLabel = block->bbNext;
3034 if (block->isBBCallAlwaysPair())
3036 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
3038 if (bbLabel == nullptr)
3040 hndEnd = compiler->info.compNativeCodeSize;
3044 assert(bbLabel->bbEmitCookie != nullptr);
3045 hndEnd = compiler->ehCodeOffset(bbLabel);
3048 CORINFO_EH_CLAUSE clause;
3049 clause.ClassToken = 0; // unused
3050 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
3051 clause.TryOffset = hndBeg;
3052 clause.TryLength = hndBeg;
3053 clause.HandlerOffset = hndBeg;
3054 clause.HandlerLength = hndEnd;
3056 assert(XTnum < EHCount);
3058 // Tell the VM about this EH clause (a cloned finally clause).
3059 compiler->eeSetEHinfo(XTnum, &clause);
3062 ++reportedClonedFinallyCount;
3065 if (clonedFinallyCount == reportedClonedFinallyCount)
3067 break; // we're done; no need to keep looking
3070 } // block is BBJ_CALLFINALLY
3073 assert(clonedFinallyCount == reportedClonedFinallyCount);
3074 } // if (clonedFinallyCount > 0)
3075 #endif // FEATURE_EH_CALLFINALLY_THUNKS
3077 #endif // FEATURE_EH_FUNCLETS
3079 assert(XTnum == EHCount);
3082 //----------------------------------------------------------------------
3083 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3084 // helper should be used.
3087 // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
3090 // true if an optimized write barrier helper should be used, false otherwise.
3091 // Note: only x86 implements register-specific source optimized write
3092 // barriers currently.
3094 bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
3096 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3098 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3107 //----------------------------------------------------------------------
3108 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3109 // helper should be used.
3111 // This has the same functionality as the version of
3112 // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
3113 // determining what the required write barrier form is, if possible.
3116 // tgt - target tree of write (e.g., GT_STOREIND)
3117 // assignVal - tree with value to write
3120 // true if an optimized write barrier helper should be used, false otherwise.
3121 // Note: only x86 implements register-specific source optimized write
3122 // barriers currently.
3124 bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
3126 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3128 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3129 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3138 //----------------------------------------------------------------------
3139 // genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
3140 // barrier, and the write barrier form required, determine the helper to call.
3143 // tgt - target tree of write (e.g., GT_STOREIND)
3144 // wbf - already computed write barrier form to use
3147 // Write barrier helper to use.
3149 // Note: do not call this function to get an optimized write barrier helper (e.g.,
3152 CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3154 noway_assert(tgt->gtOper == GT_STOREIND);
3156 CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
3159 if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
3161 helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
3165 if (tgt->gtOper != GT_CLS_VAR)
3167 if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
3169 if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
3171 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3173 else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
3175 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3179 assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
3180 ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
3181 (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
3182 ((helper == CORINFO_HELP_ASSIGN_REF) &&
3183 (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
3188 //----------------------------------------------------------------------
3189 // genGCWriteBarrier: Generate a write barrier for a node.
3192 // tgt - target tree of write (e.g., GT_STOREIND)
3193 // wbf - already computed write barrier form to use
3195 void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3197 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
3199 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
3200 // We classify the "tgt" trees as follows:
3201 // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
3202 // IND [-> ADDR -> IND] -> { GT_LCL_VAR, ADD({GT_LCL_VAR}, X), ADD(X, (GT_LCL_VAR)) }
3203 // then let "v" be the GT_LCL_VAR.
3204 // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
3205 // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
3206 // * Otherwise, classify as CWBKind_OtherByRefLocal.
3207 // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
3208 // Otherwise, classify as CWBKind_Unclassified.
3210 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
3211 if (tgt->gtOper == GT_IND)
3213 GenTree* lcl = NULL;
3215 GenTree* indArg = tgt->gtOp.gtOp1;
3216 if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
3218 indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
3220 if (indArg->gtOper == GT_LCL_VAR)
3224 else if (indArg->gtOper == GT_ADD)
3226 if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR)
3228 lcl = indArg->gtOp.gtOp1;
3230 else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR)
3232 lcl = indArg->gtOp.gtOp2;
3237 wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
3238 unsigned lclNum = lcl->AsLclVar()->GetLclNum();
3239 if (lclNum == compiler->info.compRetBuffArg)
3241 wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
3245 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
3246 if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
3248 wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
3254 // We should have eliminated the barrier for this case.
3255 assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
3259 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
3263 // Enable this to sample the unclassified trees.
3264 static int unclassifiedBarrierSite = 0;
3265 if (wbKind == CWBKind_Unclassified)
3267 unclassifiedBarrierSite++;
3268 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
3273 inst_IV(INS_push, wbKind);
3274 genEmitHelperCall(helper,
3276 EA_PTRSIZE); // retSize
3277 SubtractStackLevel(4);
3281 genEmitHelperCall(helper,
3283 EA_PTRSIZE); // retSize
3286 #else // !FEATURE_COUNT_GC_WRITE_BARRIERS
3287 genEmitHelperCall(helper,
3289 EA_PTRSIZE); // retSize
3290 #endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
3294 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3295 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3297 XX Prolog / Epilog XX
3299 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3300 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3303 /*****************************************************************************
3305 * Generates code for moving incoming register arguments to their
3306 * assigned location, in the function prolog.
3310 #pragma warning(push)
3311 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3313 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
3318 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
3322 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
3323 unsigned argNum; // current argNum, always in [0..argMax-1]
3324 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
3325 unsigned regArgNum; // index into the regArgTab[] table
3326 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
3327 bool doingFloat = regState->rsIsFloat;
3329 // We should be generating the prolog block when we are called
3330 assert(compiler->compGeneratingProlog);
3332 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
3333 noway_assert(regArgMaskLive != 0);
3335 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
3336 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
3338 // The regArgTab can always have unused entries,
3339 // for example if an architecture always increments the arg register number but uses either
3340 // an integer register or a floating point register to hold the next argument
3341 // then with a mix of float and integer args you could have:
3343 // sampleMethod(int i, float x, int j, float y, int k, float z);
3344 // r0, r2 and r4 as valid integer arguments with argMax as 5
3345 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
3346 // The first one is doingFloat==false and the second one is doingFloat==true
3348 // If a fixed return buffer (in r8) was also present then the first one would become:
3349 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
3352 argMax = regState->rsCalleeRegArgCount;
3353 fixedRetBufIndex = (unsigned)-1; // Invalid value
3355 // If necessary we will select a correct xtraReg for circular floating point args later.
3359 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
3361 else // we are doing the integer registers
3363 noway_assert(argMax <= MAX_REG_ARG);
3364 if (hasFixedRetBuffReg())
3366 fixedRetBufIndex = theFixedRetBuffArgNum();
3367 // We have an additional integer register argument when hasFixedRetBuffReg() is true
3368 argMax = fixedRetBufIndex + 1;
3369 assert(argMax == (MAX_REG_ARG + 1));
3374 // Construct a table with the register arguments, for detecting circular and
3375 // non-circular dependencies between the register arguments. A dependency is when
3376 // an argument register Rn needs to be moved to register Rm that is also an argument
3377 // register. The table is constructed in the order the arguments are passed in
3378 // registers: the first register argument is in regArgTab[0], the second in
3379 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
3380 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
3381 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
3382 // we have increased the allocated size of the regArgTab[] by one.
3386 unsigned varNum; // index into compiler->lvaTable[] for this register argument
3387 #if defined(UNIX_AMD64_ABI)
3388 var_types type; // the Jit type of this regArgTab entry
3389 #endif // defined(UNIX_AMD64_ABI)
3390 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
3391 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
3392 // argument register number 'x'. Only used when circular = true.
3393 char slot; // 0 means the register is not used for a register argument
3394 // 1 means the first part of a register argument
3395 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
3396 bool stackArg; // true if the argument gets homed to the stack
3397 bool processed; // true after we've processed the argument (and it is in its final location)
3398 bool circular; // true if this register participates in a circular dependency loop.
3400 #ifdef UNIX_AMD64_ABI
3402 // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
3403 // the type of the lclVar in ways that are not ascertainable from lvType.
3404 // So, for that case we retain the type of the register in the regArgTab.
3406 var_types getRegType(Compiler* compiler)
3408 return type; // UNIX_AMD64 implementation
3411 #else // !UNIX_AMD64_ABI
3413 // In other cases, we simply use the type of the lclVar to determine the type of the register.
3414 var_types getRegType(Compiler* compiler)
3416 const LclVarDsc& varDsc = compiler->lvaTable[varNum];
3417 // Check if this is an HFA register arg and return the HFA type
3418 if (varDsc.lvIsHfaRegArg())
3420 #if defined(_TARGET_WINDOWS_)
3421 // Cannot have hfa types on windows arm targets
3422 // in vararg methods.
3423 assert(!compiler->info.compIsVarArgs);
3424 #endif // defined(_TARGET_WINDOWS_)
3425 return varDsc.GetHfaType();
3427 return compiler->mangleVarArgsType(varDsc.lvType);
3430 #endif // !UNIX_AMD64_ABI
3431 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
3436 for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
3438 varDsc = compiler->lvaTable + varNum;
3440 // Is this variable a register arg?
3441 if (!varDsc->lvIsParam)
3446 if (!varDsc->lvIsRegArg)
3451 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
3452 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
3453 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
3454 // use the the original TYP_STRUCT argument.
3456 if (varDsc->lvPromoted || varDsc->lvIsStructField)
3458 LclVarDsc* parentVarDsc = varDsc;
3459 if (varDsc->lvIsStructField)
3461 assert(!varDsc->lvPromoted);
3462 parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
3465 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
3467 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
3469 noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
3471 // For register arguments that are independent promoted structs we put the promoted field varNum in the
3473 if (varDsc->lvPromoted)
3480 // For register arguments that are not independent promoted structs we put the parent struct varNum in
3482 if (varDsc->lvIsStructField)
3489 var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
3490 // Change regType to the HFA type when we have a HFA argument
3491 if (varDsc->lvIsHfaRegArg())
3493 #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3494 if (compiler->info.compIsVarArgs)
3496 assert(!"Illegal incoming HFA arg encountered in Vararg method.");
3498 #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3499 regType = varDsc->GetHfaType();
3502 #if defined(UNIX_AMD64_ABI)
3503 if (!varTypeIsStruct(regType))
3504 #endif // defined(UNIX_AMD64_ABI)
3506 // A struct might be passed partially in XMM register for System V calls.
3507 // So a single arg might use both register files.
3508 if (isFloatRegType(regType) != doingFloat)
3516 #if defined(UNIX_AMD64_ABI)
3517 if (varTypeIsStruct(varDsc))
3519 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
3520 assert(typeHnd != nullptr);
3521 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3522 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3523 if (!structDesc.passedInRegisters)
3525 // The var is not passed in registers.
3529 unsigned firstRegSlot = 0;
3530 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
3532 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3537 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3538 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3539 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3540 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3543 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3544 // registers or on stack, the upper most 4-bytes will be zero.
3546 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3547 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3550 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3551 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3552 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3553 // there is no need to clear upper 4-bytes of Vector3 type args.
3555 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3556 // Vector3 return values are returned two return registers and Caller assembles them into a
3557 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3558 // type args in prolog and Vector3 type return value of a call
3560 if (varDsc->lvType == TYP_SIMD12)
3562 regType = TYP_DOUBLE;
3567 regType = compiler->GetEightByteType(structDesc, slotCounter);
3570 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3572 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
3573 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
3575 // Store the reg for the first slot.
3578 firstRegSlot = regArgNum;
3581 // Bingo - add it to our table
3582 noway_assert(regArgNum < argMax);
3583 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
3584 // not be multiple vars representing this argument
3586 regArgTab[regArgNum].varNum = varNum;
3587 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
3588 regArgTab[regArgNum].type = regType;
3595 continue; // Nothing to do for this regState set.
3598 regArgNum = firstRegSlot;
3601 #endif // defined(UNIX_AMD64_ABI)
3603 // Bingo - add it to our table
3604 regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
3606 noway_assert(regArgNum < argMax);
3607 // We better not have added it already (there better not be multiple vars representing this argument
3609 noway_assert(regArgTab[regArgNum].slot == 0);
3611 #if defined(UNIX_AMD64_ABI)
3612 // Set the register type.
3613 regArgTab[regArgNum].type = regType;
3614 #endif // defined(UNIX_AMD64_ABI)
3616 regArgTab[regArgNum].varNum = varNum;
3617 regArgTab[regArgNum].slot = 1;
3621 #if FEATURE_MULTIREG_ARGS
3622 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
3624 if (varDsc->lvIsHfaRegArg())
3626 // We have an HFA argument, set slots to the number of registers used
3627 slots = varDsc->lvHfaSlots();
3631 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3632 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
3633 // We have a non-HFA multireg argument, set slots to two
3637 // Note that regArgNum+1 represents an argument index not an actual argument register.
3638 // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
3640 // This is the setup for the rest of a multireg struct arg
3642 for (int i = 1; i < slots; i++)
3644 noway_assert((regArgNum + i) < argMax);
3646 // We better not have added it already (there better not be multiple vars representing this argument
3648 noway_assert(regArgTab[regArgNum + i].slot == 0);
3650 regArgTab[regArgNum + i].varNum = varNum;
3651 regArgTab[regArgNum + i].slot = (char)(i + 1);
3654 #endif // FEATURE_MULTIREG_ARGS
3658 int lclSize = compiler->lvaLclSize(varNum);
3660 if (lclSize > REGSIZE_BYTES)
3662 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3663 slots = lclSize / REGSIZE_BYTES;
3664 if (regArgNum + slots > maxRegArgNum)
3666 slots = maxRegArgNum - regArgNum;
3669 C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
3670 assert(slots < INT8_MAX);
3671 for (char i = 1; i < slots; i++)
3673 regArgTab[regArgNum + i].varNum = varNum;
3674 regArgTab[regArgNum + i].slot = i + 1;
3676 #endif // _TARGET_ARM_
3678 for (int i = 0; i < slots; i++)
3680 regType = regArgTab[regArgNum + i].getRegType(compiler);
3681 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3683 #if !defined(UNIX_AMD64_ABI)
3684 assert((i > 0) || (regNum == varDsc->lvArgReg));
3685 #endif // defined(UNIX_AMD64_ABI)
3687 // Is the arg dead on entry to the method ?
3689 if ((regArgMaskLive & genRegMask(regNum)) == 0)
3691 if (varDsc->lvTrackedNonStruct())
3693 // We may now see some tracked locals with zero refs.
3694 // See Lowering::DoPhase. Tolerate these.
3695 if (varDsc->lvRefCnt() > 0)
3697 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3703 noway_assert(varDsc->lvType == TYP_STRUCT);
3704 #else // !_TARGET_X86_
3705 // For LSRA, it may not be in regArgMaskLive if it has a zero
3706 // refcnt. This is in contrast with the non-LSRA case in which all
3707 // non-tracked args are assumed live on entry.
3708 noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
3709 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs) ||
3710 (varDsc->lvAddrExposed && compiler->opts.compUseSoftFP));
3711 #endif // !_TARGET_X86_
3713 // Mark it as processed and be done with it
3714 regArgTab[regArgNum + i].processed = true;
3719 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3720 // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
3721 // they've already been spilled.
3723 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
3724 #endif // _TARGET_ARM_
3726 #if !defined(UNIX_AMD64_ABI)
3727 noway_assert(xtraReg != (varDsc->lvArgReg + i));
3729 noway_assert(regArgMaskLive & genRegMask(regNum));
3732 regArgTab[regArgNum + i].processed = false;
3734 /* mark stack arguments since we will take care of those first */
3735 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3737 /* If it goes on the stack or in a register that doesn't hold
3738 * an argument anymore -> CANNOT form a circular dependency */
3740 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3742 /* will trash another argument -> possible dependency
3743 * We may need several passes after the table is constructed
3744 * to decide on that */
3746 /* Maybe the argument stays in the register (IDEAL) */
3748 if ((i == 0) && (varDsc->lvRegNum == regNum))
3753 #if !defined(_TARGET_64BIT_)
3754 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
3758 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
3763 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3764 (REG_NEXT(varDsc->lvRegNum) == regNum))
3768 #endif // !defined(_TARGET_64BIT_)
3769 regArgTab[regArgNum + i].circular = true;
3774 regArgTab[regArgNum + i].circular = false;
3776 /* mark the argument register as free */
3777 regArgMaskLive &= ~genRegMask(regNum);
3782 /* Find the circular dependencies for the argument registers, if any.
3783 * A circular dependency is a set of registers R1, R2, ..., Rn
3784 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3789 /* Possible circular dependencies still exist; the previous pass was not enough
3790 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3796 for (argNum = 0; argNum < argMax; argNum++)
3798 // If we already marked the argument as non-circular then continue
3800 if (!regArgTab[argNum].circular)
3805 if (regArgTab[argNum].slot == 0) // Not a register argument
3810 varNum = regArgTab[argNum].varNum;
3811 noway_assert(varNum < compiler->lvaCount);
3812 varDsc = compiler->lvaTable + varNum;
3813 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3815 /* cannot possibly have stack arguments */
3816 noway_assert(varDsc->lvIsInReg());
3817 noway_assert(!regArgTab[argNum].stackArg);
3819 var_types regType = regArgTab[argNum].getRegType(compiler);
3820 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3822 regNumber destRegNum = REG_NA;
3823 if (regArgTab[argNum].slot == 1)
3825 destRegNum = varDsc->lvRegNum;
3827 #if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
3830 assert(regArgTab[argNum].slot == 2);
3832 assert(regArgTab[argNum - 1].slot == 1);
3833 assert(regArgTab[argNum - 1].varNum == varNum);
3834 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
3835 regArgMaskLive &= ~genRegMask(regNum);
3836 regArgTab[argNum].circular = false;
3840 #elif !defined(_TARGET_64BIT_)
3841 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
3843 destRegNum = varDsc->lvOtherReg;
3847 assert(regArgTab[argNum].slot == 2);
3848 assert(varDsc->TypeGet() == TYP_DOUBLE);
3849 destRegNum = REG_NEXT(varDsc->lvRegNum);
3851 #endif // !defined(_TARGET_64BIT_)
3852 noway_assert(destRegNum != REG_NA);
3853 if (genRegMask(destRegNum) & regArgMaskLive)
3855 /* we are trashing a live argument register - record it */
3856 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3857 noway_assert(destRegArgNum < argMax);
3858 regArgTab[destRegArgNum].trashBy = argNum;
3862 /* argument goes to a free register */
3863 regArgTab[argNum].circular = false;
3866 /* mark the argument register as free */
3867 regArgMaskLive &= ~genRegMask(regNum);
3873 /* At this point, everything that has the "circular" flag
3874 * set to "true" forms a circular dependency */
3875 CLANG_FORMAT_COMMENT_ANCHOR;
3882 printf("Circular dependencies found while home-ing the incoming arguments.\n");
3887 // LSRA allocates registers to incoming parameters in order and will not overwrite
3888 // a register still holding a live parameter.
3890 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
3891 "Homing of float argument registers with circular dependencies not implemented.");
3893 /* Now move the arguments to their locations.
3894 * First consider ones that go on the stack since they may
3895 * free some registers. */
3897 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3898 for (argNum = 0; argNum < argMax; argNum++)
3902 #if defined(UNIX_AMD64_ABI)
3903 // If this is the wrong register file, just continue.
3904 if (regArgTab[argNum].type == TYP_UNDEF)
3906 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3907 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3908 // The next register file processing will process it.
3911 #endif // defined(UNIX_AMD64_ABI)
3913 // If the arg is dead on entry to the method, skip it
3915 if (regArgTab[argNum].processed)
3920 if (regArgTab[argNum].slot == 0) // Not a register argument
3925 varNum = regArgTab[argNum].varNum;
3926 noway_assert(varNum < compiler->lvaCount);
3927 varDsc = compiler->lvaTable + varNum;
3929 #ifndef _TARGET_64BIT_
3930 // If not a stack arg go to the next one
3931 if (varDsc->lvType == TYP_LONG)
3933 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
3937 else if (varDsc->lvOtherReg != REG_STK)
3943 #endif // !_TARGET_64BIT_
3945 // If not a stack arg go to the next one
3946 if (!regArgTab[argNum].stackArg)
3952 #if defined(_TARGET_ARM_)
3953 if (varDsc->lvType == TYP_DOUBLE)
3955 if (regArgTab[argNum].slot == 2)
3957 // We handled the entire double when processing the first half (slot == 1)
3963 noway_assert(regArgTab[argNum].circular == false);
3965 noway_assert(varDsc->lvIsParam);
3966 noway_assert(varDsc->lvIsRegArg);
3967 noway_assert(varDsc->lvIsInReg() == false ||
3968 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
3970 var_types storeType = TYP_UNDEF;
3971 unsigned slotSize = TARGET_POINTER_SIZE;
3973 if (varTypeIsStruct(varDsc))
3975 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3976 #if FEATURE_MULTIREG_ARGS
3977 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3978 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3979 #endif // FEATURE_MULTIREG_ARGS
3980 #ifdef UNIX_AMD64_ABI
3981 storeType = regArgTab[argNum].type;
3982 #endif // !UNIX_AMD64_ABI
3983 if (varDsc->lvIsHfaRegArg())
3986 // On ARM32 the storeType for HFA args is always TYP_FLOAT
3987 storeType = TYP_FLOAT;
3988 slotSize = (unsigned)emitActualTypeSize(storeType);
3989 #else // _TARGET_ARM64_
3990 storeType = genActualType(varDsc->GetHfaType());
3991 slotSize = (unsigned)emitActualTypeSize(storeType);
3992 #endif // _TARGET_ARM64_
3995 else // Not a struct type
3997 storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
3999 size = emitActualTypeSize(storeType);
4001 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
4002 #endif //_TARGET_X86_
4004 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
4006 // Stack argument - if the ref count is 0 don't care about it
4008 if (!varDsc->lvOnFrame)
4010 noway_assert(varDsc->lvRefCnt() == 0);
4014 // Since slot is typically 1, baseOffset is typically 0
4015 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
4017 getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
4019 #ifndef UNIX_AMD64_ABI
4020 // Check if we are writing past the end of the struct
4021 if (varTypeIsStruct(varDsc))
4023 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
4025 #endif // !UNIX_AMD64_ABI
4027 if (regArgTab[argNum].slot == 1)
4029 psiMoveToStack(varNum);
4033 /* mark the argument as processed */
4035 regArgTab[argNum].processed = true;
4036 regArgMaskLive &= ~genRegMask(srcRegNum);
4038 #if defined(_TARGET_ARM_)
4039 if (storeType == TYP_DOUBLE)
4041 regArgTab[argNum + 1].processed = true;
4042 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
4047 /* Process any circular dependencies */
4050 unsigned begReg, destReg, srcReg;
4051 unsigned varNumDest, varNumSrc;
4052 LclVarDsc* varDscDest;
4053 LclVarDsc* varDscSrc;
4054 instruction insCopy = INS_mov;
4058 #if defined(FEATURE_HFA) || defined(UNIX_AMD64_ABI)
4059 insCopy = ins_Copy(TYP_DOUBLE);
4060 // Compute xtraReg here when we have a float argument
4061 assert(xtraReg == REG_NA);
4063 regMaskTP fpAvailMask;
4065 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
4066 #if defined(FEATURE_HFA)
4067 fpAvailMask &= RBM_ALLDOUBLE;
4069 #if !defined(UNIX_AMD64_ABI)
4070 #error Error. Wrong architecture.
4071 #endif // !defined(UNIX_AMD64_ABI)
4072 #endif // defined(FEATURE_HFA)
4074 if (fpAvailMask == RBM_NONE)
4076 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
4077 #if defined(FEATURE_HFA)
4078 fpAvailMask &= RBM_ALLDOUBLE;
4080 #if !defined(UNIX_AMD64_ABI)
4081 #error Error. Wrong architecture.
4082 #endif // !defined(UNIX_AMD64_ABI)
4083 #endif // defined(FEATURE_HFA)
4086 assert(fpAvailMask != RBM_NONE);
4088 // We pick the lowest avail register number
4089 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
4090 xtraReg = genRegNumFromMask(tempMask);
4091 #elif defined(_TARGET_X86_)
4092 // This case shouldn't occur on x86 since NYI gets converted to an assert
4093 NYI("Homing circular FP registers via xtraReg");
4097 for (argNum = 0; argNum < argMax; argNum++)
4099 // If not a circular dependency then continue
4100 if (!regArgTab[argNum].circular)
4105 // If already processed the dependency then continue
4107 if (regArgTab[argNum].processed)
4112 if (regArgTab[argNum].slot == 0) // Not a register argument
4117 destReg = begReg = argNum;
4118 srcReg = regArgTab[argNum].trashBy;
4120 varNumDest = regArgTab[destReg].varNum;
4121 noway_assert(varNumDest < compiler->lvaCount);
4122 varDscDest = compiler->lvaTable + varNumDest;
4123 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
4125 noway_assert(srcReg < argMax);
4126 varNumSrc = regArgTab[srcReg].varNum;
4127 noway_assert(varNumSrc < compiler->lvaCount);
4128 varDscSrc = compiler->lvaTable + varNumSrc;
4129 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4131 emitAttr size = EA_PTRSIZE;
4133 #ifdef _TARGET_XARCH_
4135 // The following code relies upon the target architecture having an
4136 // 'xchg' instruction which directly swaps the values held in two registers.
4137 // On the ARM architecture we do not have such an instruction.
4139 if (destReg == regArgTab[srcReg].trashBy)
4141 /* only 2 registers form the circular dependency - use "xchg" */
4143 varNum = regArgTab[argNum].varNum;
4144 noway_assert(varNum < compiler->lvaCount);
4145 varDsc = compiler->lvaTable + varNum;
4146 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4148 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
4150 /* Set "size" to indicate GC if one and only one of
4151 * the operands is a pointer
4152 * RATIONALE: If both are pointers, nothing changes in
4153 * the GC pointer tracking. If only one is a pointer we
4154 * have to "swap" the registers in the GC reg pointer mask
4157 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
4162 noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
4164 getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
4165 regSet.verifyRegUsed(varDscSrc->lvRegNum);
4166 regSet.verifyRegUsed(varDscSrc->lvArgReg);
4168 /* mark both arguments as processed */
4169 regArgTab[destReg].processed = true;
4170 regArgTab[srcReg].processed = true;
4172 regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
4173 regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
4175 psiMoveToReg(varNumSrc);
4176 psiMoveToReg(varNumDest);
4179 #endif // _TARGET_XARCH_
4181 var_types destMemType = varDscDest->TypeGet();
4184 bool cycleAllDouble = true; // assume the best
4186 unsigned iter = begReg;
4189 if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
4191 cycleAllDouble = false;
4194 iter = regArgTab[iter].trashBy;
4195 } while (iter != begReg);
4197 // We may treat doubles as floats for ARM because we could have partial circular
4198 // dependencies of a float with a lo/hi part of the double. We mark the
4199 // trashBy values for each slot of the double, so let the circular dependency
4200 // logic work its way out for floats rather than doubles. If a cycle has all
4201 // doubles, then optimize so that instead of two vmov.f32's to move a double,
4202 // we can use one vmov.f64.
4204 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4206 destMemType = TYP_FLOAT;
4208 #endif // _TARGET_ARM_
4210 if (destMemType == TYP_REF)
4214 else if (destMemType == TYP_BYREF)
4218 else if (destMemType == TYP_DOUBLE)
4222 else if (destMemType == TYP_FLOAT)
4227 /* move the dest reg (begReg) in the extra reg */
4229 assert(xtraReg != REG_NA);
4231 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
4233 getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
4235 regSet.verifyRegUsed(xtraReg);
4237 *pXtraRegClobbered = true;
4239 psiMoveToReg(varNumDest, xtraReg);
4241 /* start moving everything to its right place */
4243 while (srcReg != begReg)
4247 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4248 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
4250 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
4252 regSet.verifyRegUsed(destRegNum);
4254 /* mark 'src' as processed */
4255 noway_assert(srcReg < argMax);
4256 regArgTab[srcReg].processed = true;
4258 if (size == EA_8BYTE)
4259 regArgTab[srcReg + 1].processed = true;
4261 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4263 /* move to the next pair */
4265 srcReg = regArgTab[srcReg].trashBy;
4267 varDscDest = varDscSrc;
4268 destMemType = varDscDest->TypeGet();
4270 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4272 destMemType = TYP_FLOAT;
4275 varNumSrc = regArgTab[srcReg].varNum;
4276 noway_assert(varNumSrc < compiler->lvaCount);
4277 varDscSrc = compiler->lvaTable + varNumSrc;
4278 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4280 if (destMemType == TYP_REF)
4284 else if (destMemType == TYP_DOUBLE)
4294 /* take care of the beginning register */
4296 noway_assert(srcReg == begReg);
4298 /* move the dest reg (begReg) in the extra reg */
4300 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4302 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
4304 regSet.verifyRegUsed(destRegNum);
4306 psiMoveToReg(varNumSrc);
4308 /* mark the beginning register as processed */
4310 regArgTab[srcReg].processed = true;
4312 if (size == EA_8BYTE)
4313 regArgTab[srcReg + 1].processed = true;
4315 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4320 /* Finally take care of the remaining arguments that must be enregistered */
4321 while (regArgMaskLive)
4323 regMaskTP regArgMaskLiveSave = regArgMaskLive;
4325 for (argNum = 0; argNum < argMax; argNum++)
4327 /* If already processed go to the next one */
4328 if (regArgTab[argNum].processed)
4333 if (regArgTab[argNum].slot == 0)
4334 { // Not a register argument
4338 varNum = regArgTab[argNum].varNum;
4339 noway_assert(varNum < compiler->lvaCount);
4340 varDsc = compiler->lvaTable + varNum;
4341 var_types regType = regArgTab[argNum].getRegType(compiler);
4342 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4344 #if defined(UNIX_AMD64_ABI)
4345 if (regType == TYP_UNDEF)
4347 // This could happen if the reg in regArgTab[argNum] is of the other register file -
4348 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
4349 // The next register file processing will process it.
4350 regArgMaskLive &= ~genRegMask(regNum);
4353 #endif // defined(UNIX_AMD64_ABI)
4355 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4356 #ifndef _TARGET_64BIT_
4357 #ifndef _TARGET_ARM_
4358 // Right now we think that incoming arguments are not pointer sized. When we eventually
4359 // understand the calling convention, this still won't be true. But maybe we'll have a better
4360 // idea of how to ignore it.
4362 // On Arm, a long can be passed in register
4363 noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE);
4365 #endif //_TARGET_64BIT_
4367 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
4369 /* Register argument - hopefully it stays in the same register */
4370 regNumber destRegNum = REG_NA;
4371 var_types destMemType = varDsc->TypeGet();
4373 if (regArgTab[argNum].slot == 1)
4375 destRegNum = varDsc->lvRegNum;
4378 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
4380 // The second half of the double has already been processed! Treat this as a single.
4381 destMemType = TYP_FLOAT;
4383 #endif // _TARGET_ARM_
4385 #ifndef _TARGET_64BIT_
4386 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
4388 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
4389 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
4391 destRegNum = regNum;
4395 destRegNum = varDsc->lvOtherReg;
4398 assert(destRegNum != REG_STK);
4402 assert(regArgTab[argNum].slot == 2);
4403 assert(destMemType == TYP_DOUBLE);
4405 // For doubles, we move the entire double using the argNum representing
4406 // the first half of the double. There are two things we won't do:
4407 // (1) move the double when the 1st half of the destination is free but the
4408 // 2nd half is occupied, and (2) move the double when the 2nd half of the
4409 // destination is free but the 1st half is occupied. Here we consider the
4410 // case where the first half can't be moved initially because its target is
4411 // still busy, but the second half can be moved. We wait until the entire
4412 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
4413 // and F2 single moving to F16. When we process F0, its target F2 is busy,
4414 // so we skip it on the first pass. When we process F1, its target F3 is
4415 // available. However, we want to move F0/F1 all at once, so we skip it here.
4416 // We process F2, which frees up F2. The next pass through, we process F0 and
4417 // F2/F3 are empty, so we move it. Note that if half of a double is involved
4418 // in a circularity with a single, then we will have already moved that half
4419 // above, so we go ahead and move the remaining half as a single.
4420 // Because there are no circularities left, we are guaranteed to terminate.
4423 assert(regArgTab[argNum - 1].slot == 1);
4425 if (!regArgTab[argNum - 1].processed)
4427 // The first half of the double hasn't been processed; try to be processed at the same time
4431 // The first half of the double has been processed but the second half hasn't!
4432 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
4433 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
4434 // dependency logic above will move them as singles, leaving just F3 to move. Treat
4435 // it as a single to finish the shuffling.
4437 destMemType = TYP_FLOAT;
4438 destRegNum = REG_NEXT(varDsc->lvRegNum);
4440 #endif // !_TARGET_64BIT_
4441 #if (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4444 assert(regArgTab[argNum].slot == 2);
4446 assert(regArgTab[argNum - 1].slot == 1);
4447 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
4448 destRegNum = varDsc->lvRegNum;
4449 noway_assert(regNum != destRegNum);
4452 #endif // (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4453 noway_assert(destRegNum != REG_NA);
4454 if (destRegNum != regNum)
4456 /* Cannot trash a currently live register argument.
4457 * Skip this one until its target will be free
4458 * which is guaranteed to happen since we have no circular dependencies. */
4460 regMaskTP destMask = genRegMask(destRegNum);
4462 // Don't process the double until both halves of the destination are clear.
4463 if (genActualType(destMemType) == TYP_DOUBLE)
4465 assert((destMask & RBM_DBL_REGS) != 0);
4466 destMask |= genRegMask(REG_NEXT(destRegNum));
4470 if (destMask & regArgMaskLive)
4475 /* Move it to the new register */
4477 emitAttr size = emitActualTypeSize(destMemType);
4479 #if defined(_TARGET_ARM64_)
4480 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4482 // For a SIMD type that is passed in two integer registers,
4483 // Limit the copy below to the first 8 bytes from the first integer register.
4484 // Handle the remaining 8 bytes from the second slot in the code further below
4485 assert(EA_SIZE(size) >= 8);
4490 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
4492 psiMoveToReg(varNum);
4495 /* mark the argument as processed */
4497 assert(!regArgTab[argNum].processed);
4498 regArgTab[argNum].processed = true;
4499 regArgMaskLive &= ~genRegMask(regNum);
4500 #if FEATURE_MULTIREG_ARGS
4501 int argRegCount = 1;
4503 if (genActualType(destMemType) == TYP_DOUBLE)
4508 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4509 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4512 int nextArgNum = argNum + 1;
4513 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4514 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4515 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4516 // and moves the 0th element of the src reg into the 1st element of the dest reg.
4517 getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
4518 // Set destRegNum to regNum so that we skip the setting of the register below,
4519 // but mark argNum as processed and clear regNum from the live mask.
4520 destRegNum = regNum;
4522 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4523 #if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4524 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4526 // For a SIMD type that is passed in two integer registers,
4527 // Code above copies the first integer argument register into the lower 8 bytes
4528 // of the target register. Here we must handle the second 8 bytes of the slot pair by
4529 // inserting the second integer register into the upper 8 bytes of the target
4530 // SIMD floating point register.
4532 int nextArgNum = argNum + 1;
4533 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4534 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4535 noway_assert(genIsValidIntReg(nextRegNum));
4536 noway_assert(genIsValidFloatReg(destRegNum));
4537 getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1);
4539 #endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4541 // Mark the rest of the argument registers corresponding to this multi-reg type as
4542 // being processed and no longer live.
4543 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
4545 int nextArgNum = argNum + regSlot;
4546 assert(!regArgTab[nextArgNum].processed);
4547 regArgTab[nextArgNum].processed = true;
4548 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4549 regArgMaskLive &= ~genRegMask(nextRegNum);
4551 #endif // FEATURE_MULTIREG_ARGS
4554 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4558 #pragma warning(pop)
4561 /*****************************************************************************
4562 * If any incoming stack arguments live in registers, load them.
4564 void CodeGen::genEnregisterIncomingStackArgs()
4569 printf("*************** In genEnregisterIncomingStackArgs()\n");
4573 assert(compiler->compGeneratingProlog);
4575 unsigned varNum = 0;
4577 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4579 /* Is this variable a parameter? */
4581 if (!varDsc->lvIsParam)
4586 /* If it's a register argument then it's already been taken care of.
4587 But, on Arm when under a profiler, we would have prespilled a register argument
4588 and hence here we need to load it from its prespilled location.
4590 bool isPrespilledForProfiling = false;
4591 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4592 isPrespilledForProfiling =
4593 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4596 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4601 /* Has the parameter been assigned to a register? */
4603 if (!varDsc->lvIsInReg())
4608 var_types type = genActualType(varDsc->TypeGet());
4610 /* Is the variable dead on entry */
4612 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4617 /* Load the incoming parameter into the register */
4619 /* Figure out the home offset of the incoming argument */
4621 regNumber regNum = varDsc->lvArgInitReg;
4622 assert(regNum != REG_STK);
4624 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
4625 regSet.verifyRegUsed(regNum);
4627 psiMoveToReg(varNum);
4631 /*-------------------------------------------------------------------------
4633 * We have to decide whether we're going to use block initialization
4634 * in the prolog before we assign final stack offsets. This is because
4635 * when using block initialization we may need additional callee-saved
4636 * registers which need to be saved on the frame, thus increasing the
4639 * We'll count the number of locals we have to initialize,
4640 * and if there are lots of them we'll use block initialization.
4641 * Thus, the local variable table must have accurate register location
4642 * information for enregistered locals for their register state on entry
4645 * At the same time we set lvMustInit for locals (enregistered or on stack)
4646 * that must be initialized (e.g. initialize memory (comInitMem),
4647 * untracked pointers or disable DFA)
4649 void CodeGen::genCheckUseBlockInit()
4651 assert(!compiler->compGeneratingProlog);
4653 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
4654 // larger than int count for more than 1).
4655 unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
4656 // determine whether to use block init.
4661 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4663 if (varDsc->lvIsParam)
4668 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4670 noway_assert(varDsc->lvRefCnt() == 0);
4674 if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
4679 #if FEATURE_FIXED_OUT_ARGS
4680 if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
4684 if (varNum == compiler->lvaOutgoingArgSpaceVar)
4690 #if FEATURE_EH_FUNCLETS
4691 // There's no need to force 0-initialization of the PSPSym, it will be
4692 // initialized with a real value in the prolog
4693 if (varNum == compiler->lvaPSPSym)
4699 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4701 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4702 // initialized by the parent struct. No need to set the lvMustInit bit in the
4707 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
4710 if (varDsc->lvTracked)
4712 /* For uninitialized use of tracked variables, the liveness
4713 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4715 if (varDsc->lvMustInit ||
4716 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4718 /* This var must be initialized */
4720 varDsc->lvMustInit = 1;
4722 /* See if the variable is on the stack will be initialized
4723 * using rep stos - compute the total size to be zero-ed */
4725 if (varDsc->lvOnFrame)
4727 if (!varDsc->lvRegister)
4729 if (!varDsc->lvIsInReg())
4731 // Var is on the stack at entry.
4733 roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4738 // Var is partially enregistered
4739 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
4740 initStkLclCnt += genTypeStSz(TYP_INT);
4746 /* With compInitMem, all untracked vars will have to be init'ed */
4747 /* VSW 102460 - Do not force initialization of compiler generated temps,
4748 unless they are untracked GC type or structs that contain GC pointers */
4749 CLANG_FORMAT_COMMENT_ANCHOR;
4752 // TODO-1stClassStructs
4753 // This is here to duplicate previous behavior, where TYP_SIMD8 locals
4754 // were not being re-typed correctly.
4755 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
4756 #else // !FEATURE_SIMD
4757 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
4758 #endif // !FEATURE_SIMD
4759 varDsc->lvOnFrame &&
4760 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
4762 varDsc->lvMustInit = true;
4764 initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4770 /* Ignore if not a pointer variable or value class with a GC field */
4772 if (!compiler->lvaTypeIsGC(varNum))
4777 /* If we don't know lifetimes of variables, must be conservative */
4778 if (!compiler->backendRequiresLocalVarLifetimes())
4780 varDsc->lvMustInit = true;
4781 noway_assert(!varDsc->lvRegister);
4785 if (!varDsc->lvTracked)
4787 varDsc->lvMustInit = true;
4791 /* Is this a 'must-init' stack pointer local? */
4793 if (varDsc->lvMustInit && varDsc->lvOnFrame)
4795 initStkLclCnt += varDsc->lvStructGcCount;
4798 if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4))
4804 /* Don't forget about spill temps that hold pointers */
4806 if (!TRACK_GC_TEMP_LIFETIMES)
4808 assert(regSet.tmpAllFree());
4809 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4811 if (varTypeIsGC(tempThis->tdTempType()))
4818 // After debugging this further it was found that this logic is incorrect:
4819 // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
4820 // and this also double counts variables (we saw this in the debugger) around line 4829.
4821 // Even though this doesn't pose a problem with correctness it will improperly decide to
4822 // zero init the stack using a block operation instead of a 'case by case' basis.
4823 genInitStkLclCnt = initStkLclCnt;
4825 /* If we have more than 4 untracked locals, use block initialization */
4826 /* TODO-Review: If we have large structs, bias toward not using block initialization since
4827 we waste all the other slots. Really need to compute the correct
4828 and compare that against zeroing the slots individually */
4830 genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
4832 if (genUseBlockInit)
4834 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4836 // If there is a secret stub param, don't count it, as it will no longer
4837 // be live when we do block init.
4838 if (compiler->info.compPublishStubParam)
4840 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4843 #ifdef _TARGET_XARCH_
4844 // If we're going to use "REP STOS", remember that we will trash EDI
4845 // For fastcall we will have to save ECX, EAX
4846 // so reserve two extra callee saved
4847 // This is better than pushing eax, ecx, because we in the later
4848 // we will mess up already computed offsets on the stack (for ESP frames)
4849 regSet.rsSetRegsModified(RBM_EDI);
4851 #ifdef UNIX_AMD64_ABI
4852 // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
4853 // In such case use R12 and R13 registers.
4854 if (maskCalleeRegArgMask & RBM_RCX)
4856 regSet.rsSetRegsModified(RBM_R12);
4859 if (maskCalleeRegArgMask & RBM_RDI)
4861 regSet.rsSetRegsModified(RBM_R13);
4863 #else // !UNIX_AMD64_ABI
4864 if (maskCalleeRegArgMask & RBM_ECX)
4866 regSet.rsSetRegsModified(RBM_ESI);
4868 #endif // !UNIX_AMD64_ABI
4870 if (maskCalleeRegArgMask & RBM_EAX)
4872 regSet.rsSetRegsModified(RBM_EBX);
4875 #endif // _TARGET_XARCH_
4878 // On the Arm if we are using a block init to initialize, then we
4879 // must force spill R4/R5/R6 so that we can use them during
4880 // zero-initialization process.
4882 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
4883 if (forceSpillRegCount > 0)
4884 regSet.rsSetRegsModified(RBM_R4);
4885 if (forceSpillRegCount > 1)
4886 regSet.rsSetRegsModified(RBM_R5);
4887 if (forceSpillRegCount > 2)
4888 regSet.rsSetRegsModified(RBM_R6);
4889 #endif // _TARGET_ARM_
4893 /*-----------------------------------------------------------------------------
4895 * Push any callee-saved registers we have used
4898 #if defined(_TARGET_ARM64_)
4899 void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
4901 void CodeGen::genPushCalleeSavedRegisters()
4904 assert(compiler->compGeneratingProlog);
4906 #if defined(_TARGET_XARCH_)
4907 // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
4908 // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
4910 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
4911 #else // !defined(_TARGET_XARCH_)
4912 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
4916 if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
4918 noway_assert(!"Used register RBM_FPBASE as a scratch register!");
4922 #ifdef _TARGET_XARCH_
4923 // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
4924 if (isFramePointerUsed())
4926 rsPushRegs &= ~RBM_FPBASE;
4930 #ifdef _TARGET_ARMARCH_
4931 // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
4932 if (isFramePointerUsed())
4933 rsPushRegs |= RBM_FPBASE;
4936 // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
4937 // changes in GC suspension architecture.
4939 // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
4940 // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
4941 // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
4942 // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
4943 // be saved on the stack and the GC suspension would time out.
4945 // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
4946 // the following to make GC suspension work in the above scenario:
4947 // - Make return address hijacking work even when lr is not saved on the stack.
4948 // - Generate fully interruptible code for loops that contains calls
4949 // - Generate fully interruptible code for leaf methods
4951 // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
4954 rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
4956 regSet.rsMaskCalleeSaved = rsPushRegs;
4957 #endif // _TARGET_ARMARCH_
4960 if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
4962 printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
4963 compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
4964 dspRegMask(rsPushRegs);
4966 assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
4970 #if defined(_TARGET_ARM_)
4971 regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
4972 regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
4974 maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
4976 assert(FitsIn<int>(maskPushRegsInt));
4977 inst_IV(INS_push, (int)maskPushRegsInt);
4978 compiler->unwindPushMaskInt(maskPushRegsInt);
4980 if (maskPushRegsFloat != 0)
4982 genPushFltRegs(maskPushRegsFloat);
4983 compiler->unwindPushMaskFloat(maskPushRegsFloat);
4985 #elif defined(_TARGET_ARM64_)
4986 // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
4987 // options. Case numbers in comments here refer to this document.
4989 // For most frames, generate, e.g.:
4990 // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
4991 // // ensures stack stays aligned.
4992 // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
4993 // // at top of frame (highest addresses).
4994 // stp r21, r22, [sp, 0x70]
4997 // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
4998 // at the top of the frame.
4999 // 2. If we save FP, then the first store is FP, LR.
5000 // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
5001 // preserve their lower 8 bytes, by calling convention.
5002 // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
5004 // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
5006 int totalFrameSize = genTotalFrameSize();
5008 int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
5010 regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5011 regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
5013 int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
5014 // generate based on various sizes.
5015 int calleeSaveSPDelta = 0;
5016 int calleeSaveSPDeltaUnaligned = 0;
5018 if (isFramePointerUsed())
5020 // We need to save both FP and LR.
5022 assert((maskSaveRegsInt & RBM_FP) != 0);
5023 assert((maskSaveRegsInt & RBM_LR) != 0);
5025 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5030 // stp fp,lr,[sp,#-framesz]!
5032 // The (totalFrameSize < 512) condition ensures that both the predecrement
5033 // and the postincrement of SP can occur with STP.
5035 // After saving callee-saved registers, we establish the frame pointer with:
5037 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5041 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
5042 INS_OPTS_PRE_INDEX);
5043 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5045 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5046 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5048 else if (totalFrameSize <= 512)
5053 // sub sp,sp,#framesz
5054 // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
5056 // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
5057 // signed offset encoding.
5059 // After saving callee-saved registers, we establish the frame pointer with:
5061 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5065 assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
5067 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5068 compiler->unwindAllocStack(totalFrameSize);
5070 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5071 compiler->lvaOutgoingArgSpaceSize);
5072 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5074 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5075 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5081 // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
5082 // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
5083 // stored. For example:
5084 // stp r19,r20,[sp,#-96]!
5085 // stp d8,d9,[sp,#16]
5086 // ... save varargs incoming integer registers ...
5087 // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
5088 // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
5089 // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
5093 // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
5094 // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
5095 // above them. If that is preferable, we could implement it.
5096 // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
5098 // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
5099 // padding from above).
5100 // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
5103 // sub sp,sp,#remainingFrameSz
5104 // or, for large frames:
5105 // mov rX, #remainingFrameSz // maybe multiple instructions
5109 // stp fp,lr,[sp,#outsz]
5112 // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
5113 // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
5114 // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
5115 // following sequences:
5117 // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
5119 // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
5122 // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
5126 // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
5127 // // always guaranteed to be 8 byte aligned).
5128 // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
5130 // sub sp,sp,#outsz - #8
5132 // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
5133 // mov rX, #outsz - #8 // maybe multiple instructions
5139 calleeSaveSPDeltaUnaligned =
5140 totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
5141 assert(calleeSaveSPDeltaUnaligned >= 0);
5142 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5143 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5145 offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5146 assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
5147 // store the callee-saved registers.
5149 // We'll take care of these later, but callee-saved regs code shouldn't see them.
5150 maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
5155 // No frame pointer (no chaining).
5156 assert((maskSaveRegsInt & RBM_FP) == 0);
5157 assert((maskSaveRegsInt & RBM_LR) != 0);
5159 // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
5160 // if we only have one callee-saved register plus LR to save.
5162 NYI("Frame without frame pointer");
5166 assert(frameType != 0);
5168 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
5170 offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
5172 // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
5173 // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
5174 // need to add codes at all.
5176 if (compiler->info.compIsVarArgs)
5178 // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
5179 assert((offset % 16) == 0);
5180 for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
5182 regNumber reg2 = REG_NEXT(reg1);
5183 // stp REG, REG + 1, [SP, #offset]
5184 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
5185 compiler->unwindNop();
5186 offset += 2 * REGSIZE_BYTES;
5192 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
5193 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5195 else if (frameType == 2)
5197 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5198 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5200 else if (frameType == 3)
5202 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5203 assert(remainingFrameSz > 0);
5204 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
5205 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5207 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5209 // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
5210 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5211 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5212 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5213 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5214 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5215 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
5217 genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
5218 offset += spAdjustment2;
5220 // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
5223 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5224 assert(spAdjustment3 > 0);
5225 assert((spAdjustment3 % 16) == 0);
5227 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
5228 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5230 genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
5231 offset += spAdjustment3;
5235 genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
5237 offset += remainingFrameSz;
5239 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5240 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5244 assert(offset == totalFrameSize);
5246 #elif defined(_TARGET_XARCH_)
5247 // Push backwards so we match the order we will pop them in the epilog
5248 // and all the other code that expects it to be in this order.
5249 for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
5251 regMaskTP regBit = genRegMask(reg);
5253 if ((regBit & rsPushRegs) != 0)
5255 inst_RV(INS_push, reg, TYP_REF);
5256 compiler->unwindPush(reg);
5258 if (!doubleAlignOrFramePointerUsed())
5260 psiAdjustStackLevel(REGSIZE_BYTES);
5263 rsPushRegs &= ~regBit;
5268 assert(!"Unknown TARGET");
5272 #if defined(_TARGET_ARM_)
5274 void CodeGen::genPushFltRegs(regMaskTP regMask)
5276 assert(regMask != 0); // Don't call uness we have some registers to push
5277 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5279 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5280 int slots = genCountBits(regMask);
5281 // regMask should be contiguously set
5282 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5283 assert((tmpMask & (tmpMask - 1)) == 0);
5284 assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
5286 // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
5287 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5288 noway_assert((slots % 2) == 0);
5290 getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
5293 void CodeGen::genPopFltRegs(regMaskTP regMask)
5295 assert(regMask != 0); // Don't call uness we have some registers to pop
5296 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5298 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5299 int slots = genCountBits(regMask);
5300 // regMask should be contiguously set
5301 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5302 assert((tmpMask & (tmpMask - 1)) == 0);
5304 // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
5305 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5306 noway_assert((slots % 2) == 0);
5308 getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
5311 /*-----------------------------------------------------------------------------
5313 * If we have a jmp call, then the argument registers cannot be used in the
5314 * epilog. So return the current call's argument registers as the argument
5315 * registers for the jmp call.
5317 regMaskTP CodeGen::genJmpCallArgMask()
5319 assert(compiler->compGeneratingEpilog);
5321 regMaskTP argMask = RBM_NONE;
5322 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
5324 const LclVarDsc& desc = compiler->lvaTable[varNum];
5325 if (desc.lvIsRegArg)
5327 argMask |= genRegMask(desc.lvArgReg);
5333 /*-----------------------------------------------------------------------------
5335 * Free the local stack frame: add to SP.
5336 * If epilog unwind hasn't been started, and we generate code, we start unwind
5337 * and set *pUnwindStarted = true.
5340 void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
5342 assert(compiler->compGeneratingEpilog);
5347 // Add 'frameSize' to SP.
5349 // Unfortunately, we can't just use:
5351 // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
5353 // because we need to generate proper unwind codes for each instruction generated,
5354 // and large frame sizes might generate a temp register load which might
5355 // need an unwind code. We don't want to generate a "NOP" code for this
5356 // temp register load; we want the unwind codes to start after that.
5358 if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
5360 if (!*pUnwindStarted)
5362 compiler->unwindBegEpilog();
5363 *pUnwindStarted = true;
5366 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
5370 regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
5373 // Do not use argument registers as scratch registers in the jmp epilog.
5374 grabMask &= ~genJmpCallArgMask();
5376 regNumber tmpReg = REG_TMP_0;
5377 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
5378 if (*pUnwindStarted)
5380 compiler->unwindPadding();
5383 // We're going to generate an unwindable instruction, so check again if
5384 // we need to start the unwind codes.
5386 if (!*pUnwindStarted)
5388 compiler->unwindBegEpilog();
5389 *pUnwindStarted = true;
5392 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
5395 compiler->unwindAllocStack(frameSize);
5398 /*-----------------------------------------------------------------------------
5400 * Move of relocatable displacement value to register
5402 void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
5404 getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
5405 getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
5407 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5409 getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
5413 /*-----------------------------------------------------------------------------
5415 * Move of relocatable data-label to register
5417 void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
5419 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
5420 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
5422 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5424 getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
5428 /*-----------------------------------------------------------------------------
5430 * Move of relocatable immediate to register
5432 void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber reg)
5434 _ASSERTE(EA_IS_RELOC(size));
5436 getEmitter()->emitIns_MovRelocatableImmediate(INS_movw, size, reg, addr);
5437 getEmitter()->emitIns_MovRelocatableImmediate(INS_movt, size, reg, addr);
5439 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5441 getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
5445 /*-----------------------------------------------------------------------------
5447 * Returns register mask to push/pop to allocate a small stack frame,
5448 * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
5449 * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
5451 regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
5453 assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
5455 // We can't do this optimization with callee saved floating point registers because
5456 // the stack would be allocated in a wrong spot.
5457 if (maskCalleeSavedFloat != RBM_NONE)
5460 // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
5461 // that extra sub sp,XXX/add sp,XXX.
5462 // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
5463 // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
5464 // significant negative side-effects (more memory bus traffic).
5469 case 2 * REGSIZE_BYTES:
5470 return RBM_R2 | RBM_R3;
5476 #endif // _TARGET_ARM_
5478 /*****************************************************************************
5480 * initFltRegs -- The mask of float regs to be zeroed.
5481 * initDblRegs -- The mask of double regs to be zeroed.
5482 * initReg -- A zero initialized integer reg to copy from.
5484 * Does best effort to move between VFP/xmm regs if one is already
5485 * initialized to 0. (Arm Only) Else copies from the integer register which
5488 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
5490 assert(compiler->compGeneratingProlog);
5492 // The first float/double reg that is initialized to 0. So they can be used to
5493 // initialize the remaining registers.
5494 regNumber fltInitReg = REG_NA;
5495 regNumber dblInitReg = REG_NA;
5497 // Iterate through float/double registers and initialize them to 0 or
5498 // copy from already initialized register of the same type.
5499 regMaskTP regMask = genRegMask(REG_FP_FIRST);
5500 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
5502 if (regMask & initFltRegs)
5504 // Do we have a float register already set to 0?
5505 if (fltInitReg != REG_NA)
5508 inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
5513 // Do we have a double register initialized to 0?
5514 if (dblInitReg != REG_NA)
5516 // Copy from double.
5517 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
5522 inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
5524 #elif defined(_TARGET_XARCH_)
5525 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5526 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5528 #elif defined(_TARGET_ARM64_)
5529 // We will just zero out the entire vector register. This sets it to a double/float zero value
5530 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5532 #error Unsupported or unset target architecture
5537 else if (regMask & initDblRegs)
5539 // Do we have a double register already set to 0?
5540 if (dblInitReg != REG_NA)
5542 // Copy from double.
5543 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
5548 // Do we have a float register initialized to 0?
5549 if (fltInitReg != REG_NA)
5552 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
5557 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
5559 #elif defined(_TARGET_XARCH_)
5560 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5561 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5563 #elif defined(_TARGET_ARM64_)
5564 // We will just zero out the entire vector register. This sets it to a double/float zero value
5565 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5567 #error Unsupported or unset target architecture
5575 /*-----------------------------------------------------------------------------
5577 * Restore any callee-saved registers we have used
5580 #if defined(_TARGET_ARM_)
5582 bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
5584 assert(compiler->compGeneratingEpilog);
5586 if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
5592 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5594 assert(compiler->compGeneratingEpilog);
5596 regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5597 regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
5598 regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
5600 // First, pop float registers
5602 if (maskPopRegsFloat != RBM_NONE)
5604 genPopFltRegs(maskPopRegsFloat);
5605 compiler->unwindPopMaskFloat(maskPopRegsFloat);
5608 // Next, pop integer registers
5612 regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
5613 maskPopRegsInt |= maskStackAlloc;
5616 if (isFramePointerUsed())
5618 assert(!regSet.rsRegsModified(RBM_FPBASE));
5619 maskPopRegsInt |= RBM_FPBASE;
5622 if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
5624 maskPopRegsInt |= RBM_PC;
5625 // Record the fact that we use a pop to the PC to perform the return
5626 genUsedPopToReturn = true;
5630 maskPopRegsInt |= RBM_LR;
5631 // Record the fact that we did not use a pop to the PC to perform the return
5632 genUsedPopToReturn = false;
5635 assert(FitsIn<int>(maskPopRegsInt));
5636 inst_IV(INS_pop, (int)maskPopRegsInt);
5637 compiler->unwindPopMaskInt(maskPopRegsInt);
5640 #elif defined(_TARGET_ARM64_)
5642 void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
5644 assert(compiler->compGeneratingEpilog);
5646 regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5648 if (isFramePointerUsed())
5650 rsRestoreRegs |= RBM_FPBASE;
5653 rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
5655 regMaskTP regsToRestoreMask = rsRestoreRegs;
5657 int totalFrameSize = genTotalFrameSize();
5659 int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
5661 int frameType = 0; // An indicator of what type of frame we are popping.
5662 int calleeSaveSPDelta = 0;
5663 int calleeSaveSPDeltaUnaligned = 0;
5665 if (isFramePointerUsed())
5667 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5670 if (compiler->compLocallocUsed)
5672 // Restore sp from fp
5674 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
5675 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5678 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5680 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5682 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5684 else if (totalFrameSize <= 512)
5687 if (compiler->compLocallocUsed)
5689 // Restore sp from fp
5690 // sub sp, fp, #outsz
5691 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5692 compiler->lvaOutgoingArgSpaceSize);
5693 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5696 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5698 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5700 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5706 calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
5707 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
5708 assert(calleeSaveSPDeltaUnaligned >= 0);
5709 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5710 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5712 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
5714 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5715 assert(remainingFrameSz > 0);
5717 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5719 // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
5720 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5721 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5722 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5723 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5724 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5725 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
5727 if (compiler->compLocallocUsed)
5729 // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
5731 // sub sp, fp, #alignmentAdjustment2
5732 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
5733 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5738 // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
5740 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5741 assert(spAdjustment3 > 0);
5742 assert((spAdjustment3 % 16) == 0);
5743 genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
5748 // add sp,sp,#remainingFrameSz
5749 genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP1, nullptr);
5753 if (compiler->compLocallocUsed)
5755 // Restore sp from fp
5756 // sub sp, fp, #outsz
5757 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5758 compiler->lvaOutgoingArgSpaceSize);
5759 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5763 // ldp fp,lr,[sp,#outsz]
5764 // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
5767 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP1,
5771 // Unlike frameType=1 or frameType=2 that restore SP at the end,
5772 // frameType=3 already adjusted SP above to delete local frame.
5773 // There is at most one alignment slot between SP and where we store the callee-saved registers.
5774 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5775 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
5780 // No frame pointer (no chaining).
5781 NYI("Frame without frame pointer");
5782 calleeSaveSPOffset = 0;
5785 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
5790 // ldp fp,lr,[sp],#framesz
5792 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
5793 INS_OPTS_POST_INDEX);
5794 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5796 else if (frameType == 2)
5799 // ldr fp,lr,[sp,#outsz]
5800 // add sp,sp,#framesz
5802 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5803 compiler->lvaOutgoingArgSpaceSize);
5804 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5806 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5807 compiler->unwindAllocStack(totalFrameSize);
5809 else if (frameType == 3)
5811 // Nothing to do after restoring callee-saved registers.
5819 #elif defined(_TARGET_XARCH_)
5821 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5823 assert(compiler->compGeneratingEpilog);
5825 unsigned popCount = 0;
5826 if (regSet.rsRegsModified(RBM_EBX))
5829 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5831 if (regSet.rsRegsModified(RBM_FPBASE))
5833 // EBP cannot be directly modified for EBP frame and double-aligned frames
5834 assert(!doubleAlignOrFramePointerUsed());
5837 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5840 #ifndef UNIX_AMD64_ABI
5841 // For System V AMD64 calling convention ESI and EDI are volatile registers.
5842 if (regSet.rsRegsModified(RBM_ESI))
5845 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5847 if (regSet.rsRegsModified(RBM_EDI))
5850 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5852 #endif // !defined(UNIX_AMD64_ABI)
5854 #ifdef _TARGET_AMD64_
5855 if (regSet.rsRegsModified(RBM_R12))
5858 inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
5860 if (regSet.rsRegsModified(RBM_R13))
5863 inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
5865 if (regSet.rsRegsModified(RBM_R14))
5868 inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
5870 if (regSet.rsRegsModified(RBM_R15))
5873 inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
5875 #endif // _TARGET_AMD64_
5877 // Amd64/x86 doesn't support push/pop of xmm registers.
5878 // These will get saved to stack separately after allocating
5879 // space on stack in prolog sequence. PopCount is essentially
5880 // tracking the count of integer registers pushed.
5882 noway_assert(compiler->compCalleeRegsPushed == popCount);
5885 #elif defined(_TARGET_X86_)
5887 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5889 assert(compiler->compGeneratingEpilog);
5891 unsigned popCount = 0;
5893 /* NOTE: The EBP-less frame code below depends on the fact that
5894 all of the pops are generated right at the start and
5895 each takes one byte of machine code.
5898 if (regSet.rsRegsModified(RBM_FPBASE))
5900 // EBP cannot be directly modified for EBP frame and double-aligned frames
5901 noway_assert(!doubleAlignOrFramePointerUsed());
5903 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5906 if (regSet.rsRegsModified(RBM_EBX))
5909 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5911 if (regSet.rsRegsModified(RBM_ESI))
5914 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5916 if (regSet.rsRegsModified(RBM_EDI))
5919 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5921 noway_assert(compiler->compCalleeRegsPushed == popCount);
5926 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
5927 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
5928 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
5930 #ifdef _TARGET_ARM64_
5932 #else // !_TARGET_ARM64_
5933 if (*pInitRegZeroed == false)
5935 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
5936 *pInitRegZeroed = true;
5939 #endif // !_TARGET_ARM64_
5942 /*-----------------------------------------------------------------------------
5944 * Do we have any untracked pointer locals at all,
5945 * or do we need to initialize memory for locspace?
5947 * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
5948 * initializing memory (not inclusive).
5949 * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
5950 * initializing memory.
5951 * initReg - A scratch register (that gets set to zero on some platforms).
5952 * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
5954 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
5956 assert(compiler->compGeneratingProlog);
5958 if (genUseBlockInit)
5960 assert(untrLclHi > untrLclLo);
5961 #ifdef _TARGET_ARMARCH_
5963 Generate the following code:
5965 For cnt less than 10
5970 stm <rZero1,rZero2>,[rAddr!]
5971 <optional> stm <rZero1,rZero2>,[rAddr!]
5972 <optional> stm <rZero1,rZero2>,[rAddr!]
5973 <optional> stm <rZero1,rZero2>,[rAddr!]
5974 <optional> str rZero1,[rAddr]
5976 For rCnt greater than or equal to 10
5984 stm <rZero1,rZero2>,[rAddr!]
5988 <optional> str rZero1,[rAddr] // When cnt is odd
5990 NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
5994 regNumber rCnt = REG_NA; // Invalid
5997 regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
5998 availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
6000 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
6001 // a large constant.
6003 #if defined(_TARGET_ARM_)
6005 if (compiler->compLocallocUsed)
6007 availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
6010 regNumber rZero1; // We're going to use initReg for rZero1
6013 // We pick the next lowest register number for rZero2
6014 noway_assert(availMask != RBM_NONE);
6015 regMask = genFindLowestBit(availMask);
6016 rZero2 = genRegNumFromMask(regMask);
6017 availMask &= ~regMask;
6018 assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6019 0); // rZero2 is not a live incoming argument reg
6021 // We pick the next lowest register number for rAddr
6022 noway_assert(availMask != RBM_NONE);
6023 regMask = genFindLowestBit(availMask);
6024 rAddr = genRegNumFromMask(regMask);
6025 availMask &= ~regMask;
6027 #else // !define(_TARGET_ARM_)
6029 regNumber rZero1 = REG_ZR;
6031 *pInitRegZeroed = false;
6033 #endif // !defined(_TARGET_ARM_)
6035 bool useLoop = false;
6036 unsigned uCntBytes = untrLclHi - untrLclLo;
6037 assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
6038 unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
6040 // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
6041 // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
6042 // In both of these cases the stm/stp instruction will write two zeros to memory
6043 // and we will use a single str instruction at the end whenever we have an odd count.
6044 if (uCntSlots >= 10)
6049 // We pick the next lowest register number for rCnt
6050 noway_assert(availMask != RBM_NONE);
6051 regMask = genFindLowestBit(availMask);
6052 rCnt = genRegNumFromMask(regMask);
6053 availMask &= ~regMask;
6056 assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6057 0); // rAddr is not a live incoming argument reg
6058 #if defined(_TARGET_ARM_)
6059 if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
6060 #else // !_TARGET_ARM_
6061 if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
6062 #endif // !_TARGET_ARM_
6064 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
6068 // Load immediate into the InitReg register
6069 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
6070 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
6071 *pInitRegZeroed = false;
6076 noway_assert(uCntSlots >= 2);
6077 assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6078 0); // rCnt is not a live incoming argument reg
6079 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
6082 #if defined(_TARGET_ARM_)
6083 rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
6084 instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
6085 target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
6086 #endif // _TARGET_ARM_
6090 while (uCntBytes >= REGSIZE_BYTES * 2)
6093 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
6094 #else // !_TARGET_ARM_
6095 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6096 INS_OPTS_POST_INDEX);
6097 #endif // !_TARGET_ARM_
6098 uCntBytes -= REGSIZE_BYTES * 2;
6101 else // useLoop is true
6104 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
6105 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
6106 #else // !_TARGET_ARM_
6107 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6108 INS_OPTS_POST_INDEX); // zero stack slots
6109 getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
6110 #endif // !_TARGET_ARM_
6111 getEmitter()->emitIns_J(INS_bhi, NULL, -3);
6112 uCntBytes %= REGSIZE_BYTES * 2;
6115 if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
6118 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
6119 #else // _TARGET_ARM_
6120 if ((uCntBytes - REGSIZE_BYTES) == 0)
6122 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
6126 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
6128 #endif // !_TARGET_ARM_
6129 uCntBytes -= REGSIZE_BYTES;
6131 #ifdef _TARGET_ARM64_
6134 assert(uCntBytes == sizeof(int));
6135 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
6136 uCntBytes -= sizeof(int);
6138 #endif // _TARGET_ARM64_
6139 noway_assert(uCntBytes == 0);
6141 #elif defined(_TARGET_XARCH_)
6143 Generate the following code:
6145 lea edi, [ebp/esp-OFFS]
6151 noway_assert(regSet.rsRegsModified(RBM_EDI));
6153 #ifdef UNIX_AMD64_ABI
6154 // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
6155 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6157 noway_assert(regSet.rsRegsModified(RBM_R12));
6158 inst_RV_RV(INS_mov, REG_R12, REG_RCX);
6159 regSet.verifyRegUsed(REG_R12);
6162 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6164 noway_assert(regSet.rsRegsModified(RBM_R13));
6165 inst_RV_RV(INS_mov, REG_R13, REG_RDI);
6166 regSet.verifyRegUsed(REG_R13);
6168 #else // !UNIX_AMD64_ABI
6169 // For register arguments we may have to save ECX
6170 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6172 noway_assert(regSet.rsRegsModified(RBM_ESI));
6173 inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
6174 regSet.verifyRegUsed(REG_ESI);
6176 #endif // !UNIX_AMD64_ABI
6178 noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
6180 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
6181 regSet.verifyRegUsed(REG_EDI);
6183 inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
6184 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
6185 instGen(INS_r_stosd);
6187 #ifdef UNIX_AMD64_ABI
6188 // Move back the argument registers
6189 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6191 inst_RV_RV(INS_mov, REG_RCX, REG_R12);
6194 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6196 inst_RV_RV(INS_mov, REG_RDI, REG_R13);
6198 #else // !UNIX_AMD64_ABI
6199 // Move back the argument registers
6200 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6202 inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
6204 #endif // !UNIX_AMD64_ABI
6207 #error Unsupported or unset target architecture
6210 else if (genInitStkLclCnt > 0)
6212 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6213 0); // initReg is not a live incoming argument reg
6215 /* Initialize any lvMustInit vars on the stack */
6220 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
6222 if (!varDsc->lvMustInit)
6227 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
6228 // partially-enregistered vars in the case where we don't use a block init.
6229 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
6231 // lvMustInit can only be set for GC types or TYP_STRUCT types
6232 // or when compInitMem is true
6233 // or when in debug code
6235 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
6236 compiler->info.compInitMem || compiler->opts.compDbgCode);
6238 if (!varDsc->lvOnFrame)
6243 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
6244 (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
6246 // We only initialize the GC variables in the TYP_STRUCT
6247 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
6248 const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
6250 for (unsigned i = 0; i < slots; i++)
6252 if (gcPtrs[i] != TYPE_GC_NONE)
6254 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
6255 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
6261 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
6263 // zero out the whole thing rounded up to a single stack slot size
6264 unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
6266 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
6268 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
6271 #ifdef _TARGET_64BIT_
6272 assert(i == lclSize || (i + sizeof(int) == lclSize));
6275 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
6278 #endif // _TARGET_64BIT_
6279 assert(i == lclSize);
6283 if (!TRACK_GC_TEMP_LIFETIMES)
6285 assert(regSet.tmpAllFree());
6286 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
6288 if (!varTypeIsGC(tempThis->tdTempType()))
6293 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
6295 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
6301 /*-----------------------------------------------------------------------------
6303 * Save the generic context argument.
6305 * We need to do this within the "prolog" in case anyone tries to inspect
6306 * the param-type-arg/this (which can be done after the prolog) using
6307 * ICodeManager::GetParamTypeArg().
6310 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
6312 assert(compiler->compGeneratingProlog);
6314 bool reportArg = compiler->lvaReportParamTypeArg();
6316 // We should report either generic context arg or "this" when used so.
6319 #ifndef JIT32_GCENCODER
6320 if (!compiler->lvaKeepAliveAndReportThis())
6327 // For JIT32_GCENCODER, we won't be here if reportArg is false.
6328 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
6330 noway_assert(contextArg != BAD_VAR_NUM);
6331 LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
6333 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
6334 // moved to its final home location. So we need to use it from the
6335 // incoming location.
6339 bool isPrespilledForProfiling = false;
6340 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
6341 isPrespilledForProfiling =
6342 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
6345 // Load from the argument register only if it is not prespilled.
6346 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
6348 reg = varDsc->lvArgReg;
6352 if (isFramePointerUsed())
6354 #if defined(_TARGET_ARM_)
6355 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6356 // will become enregistered.
6357 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
6358 noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
6359 (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
6361 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6362 // will become enregistered.
6363 noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
6367 // We will just use the initReg since it is an available register
6368 // and we are probably done using it anyway...
6370 *pInitRegZeroed = false;
6372 // mov reg, [compiler->info.compTypeCtxtArg]
6373 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
6374 regSet.verifyRegUsed(reg);
6377 #if CPU_LOAD_STORE_ARCH
6378 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6379 compiler->lvaCachedGenericContextArgOffset());
6380 #else // CPU_LOAD_STORE_ARCH
6381 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
6382 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6383 compiler->lvaCachedGenericContextArgOffset());
6384 #endif // !CPU_LOAD_STORE_ARCH
6387 /*-----------------------------------------------------------------------------
6389 * Set the "GS" security cookie in the prolog.
6392 void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
6394 assert(compiler->compGeneratingProlog);
6396 if (!compiler->getNeedsGSSecurityCookie())
6401 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
6403 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
6405 #ifdef _TARGET_AMD64_
6406 // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
6407 getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
6408 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
6410 // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
6411 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
6412 compiler->lvaGSSecurityCookie, 0, initReg);
6418 #ifdef _TARGET_XARCH_
6419 // Always use EAX on x86 and x64
6420 // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
6423 // We will just use the initReg since it is an available register
6427 *pInitRegZeroed = false;
6429 #if CPU_LOAD_STORE_ARCH
6430 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6431 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
6432 regSet.verifyRegUsed(reg);
6434 // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
6435 // mov dword ptr [frame.GSSecurityCookie], reg
6436 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6437 regSet.verifyRegUsed(reg);
6439 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
6443 #ifdef PROFILING_SUPPORTED
6445 //-----------------------------------------------------------------------------------
6446 // genProfilingEnterCallback: Generate the profiling function enter callback.
6449 // initReg - register to use as scratch register
6450 // pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
6451 // not zero after this call.
6457 // The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
6458 // VM\i386\asmhelpers.asm for details):
6459 // 1. The calling sequence for calling the helper is:
6460 // push FunctionIDOrClientID
6461 // call ProfileEnterHelper
6462 // 2. The calling function has an EBP frame.
6463 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6464 // the following prolog is assumed:
6467 // 4. All registers are preserved.
6468 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
6470 void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
6472 assert(compiler->compGeneratingProlog);
6474 // Give profiler a chance to back out of hooking this method
6475 if (!compiler->compIsProfilerHookNeeded())
6480 #if defined(_TARGET_AMD64_)
6481 #if !defined(UNIX_AMD64_ABI)
6486 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6487 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6488 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6490 // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
6491 // In case of vararg methods, arg regs are already homed.
6493 // Note: Here we don't need to worry about updating gc'info since enter
6494 // callback is generated as part of prolog which is non-gc interruptible.
6495 // Moreover GC cannot kick while executing inside profiler callback which is a
6496 // profiler requirement so it can examine arguments which could be obj refs.
6497 if (!compiler->info.compIsVarArgs)
6499 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6501 noway_assert(varDsc->lvIsParam);
6503 if (!varDsc->lvIsRegArg)
6508 var_types storeType = varDsc->lvaArgType();
6509 regNumber argReg = varDsc->lvArgReg;
6511 instruction store_ins = ins_Store(storeType);
6514 if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
6516 store_ins = INS_mov;
6518 #endif // FEATURE_SIMD
6520 getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
6524 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6525 // RCX = ProfilerMethHnd
6526 if (compiler->compProfilerMethHndIndirected)
6528 // Profiler hooks enabled during Ngen time.
6529 // Profiler handle needs to be accessed through an indirection of a pointer.
6530 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6534 // No need to record relocations, if we are generating ELT hooks under the influence
6535 // of COMPlus_JitELTHookEnabled=1
6536 if (compiler->opts.compJitELTHookEnabled)
6538 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6542 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6546 // RDX = caller's SP
6548 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6549 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6550 // of that offset to FramePointer to obtain caller's SP value.
6551 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6552 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6553 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6555 // Can't have a call until we have enough padding for rejit
6556 genPrologPadForReJit();
6558 // This will emit either
6559 // "call ip-relative 32-bit offset" or
6560 // "mov rax, helper addr; call rax"
6561 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
6563 // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
6564 // generation logic that moves args around as required by first BB entry point conditions
6565 // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
6566 // and genEnregisterIncomingStackArgs().
6568 // Now reload arg registers from home locations.
6570 // - we need to reload only known (i.e. fixed) reg args.
6571 // - if floating point type, also reload it into corresponding integer reg
6572 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6574 noway_assert(varDsc->lvIsParam);
6576 if (!varDsc->lvIsRegArg)
6581 var_types loadType = varDsc->lvaArgType();
6582 regNumber argReg = varDsc->lvArgReg;
6584 instruction load_ins = ins_Load(loadType);
6587 if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
6591 #endif // FEATURE_SIMD
6593 getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
6596 if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
6598 regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
6599 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6600 inst_RV_RV(ins, argReg, intArgReg, loadType);
6602 #endif // FEATURE_VARARG
6605 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6606 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6608 *pInitRegZeroed = false;
6611 #else // !defined(UNIX_AMD64_ABI)
6613 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6614 // R14 = ProfilerMethHnd
6615 if (compiler->compProfilerMethHndIndirected)
6617 // Profiler hooks enabled during Ngen time.
6618 // Profiler handle needs to be accessed through an indirection of a pointer.
6619 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
6620 (ssize_t)compiler->compProfilerMethHnd);
6624 // No need to record relocations, if we are generating ELT hooks under the influence
6625 // of COMPlus_JitELTHookEnabled=1
6626 if (compiler->opts.compJitELTHookEnabled)
6628 genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6632 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6636 // R15 = caller's SP
6638 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6639 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6640 // of that offset to FramePointer to obtain caller's SP value.
6641 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6642 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6643 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
6645 // Can't have a call until we have enough padding for rejit
6646 genPrologPadForReJit();
6648 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6649 // We use R11 here. This will emit either
6650 // "call ip-relative 32-bit offset" or
6651 // "mov r11, helper addr; call r11"
6652 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6654 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6655 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6657 *pInitRegZeroed = false;
6660 #endif // !defined(UNIX_AMD64_ABI)
6662 #elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
6664 unsigned saveStackLvl2 = genStackLevel;
6666 #if defined(_TARGET_X86_)
6667 // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
6668 // for x86 stack unwinding
6670 #if defined(UNIX_X86_ABI)
6671 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6672 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6673 #endif // UNIX_X86_ABI
6675 // Push the profilerHandle
6676 if (compiler->compProfilerMethHndIndirected)
6678 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6682 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6685 #elif defined(_TARGET_ARM_)
6686 // On Arm arguments are prespilled on stack, which frees r0-r3.
6687 // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
6688 // The call target register could be any free register.
6689 regNumber argReg = REG_PROFILER_ENTER_ARG;
6690 regMaskTP argRegMask = genRegMask(argReg);
6691 assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
6693 if (compiler->compProfilerMethHndIndirected)
6695 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
6696 regSet.verifyRegUsed(argReg);
6700 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
6703 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6707 // Can't have a call until we have enough padding for rejit
6709 genPrologPadForReJit();
6711 // This will emit either
6712 // "call ip-relative 32-bit offset" or
6713 // "mov rax, helper addr; call rax"
6714 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
6715 0, // argSize. Again, we have to lie about it
6716 EA_UNKNOWN); // retSize
6718 #if defined(_TARGET_X86_)
6719 // Check that we have place for the push.
6720 assert(compiler->fgPtrArgCntMax >= 1);
6722 #if defined(UNIX_X86_ABI)
6723 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6724 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6725 #endif // UNIX_X86_ABI
6727 #elif defined(_TARGET_ARM_)
6728 if (initReg == argReg)
6730 *pInitRegZeroed = false;
6733 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6736 /* Restore the stack level */
6738 SetStackLevel(saveStackLvl2);
6741 NYI("Emit Profiler Enter callback");
6745 //-----------------------------------------------------------------------------------
6746 // genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
6747 // Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
6750 // helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
6756 // The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
6757 // ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
6758 // 1. The calling sequence for calling the helper is:
6759 // push FunctionIDOrClientID
6760 // call ProfileLeaveHelper or ProfileTailcallHelper
6761 // 2. The calling function has an EBP frame.
6762 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6763 // the following prolog is assumed:
6766 // 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
6767 // helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
6768 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
6770 void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
6772 assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
6774 // Only hook if profiler says it's okay.
6775 if (!compiler->compIsProfilerHookNeeded())
6780 compiler->info.compProfilerCallback = true;
6782 // Need to save on to the stack level, since the helper call will pop the argument
6783 unsigned saveStackLvl2 = genStackLevel;
6785 #if defined(_TARGET_AMD64_)
6786 #if !defined(UNIX_AMD64_ABI)
6788 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6789 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6790 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6792 // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
6793 // registers that profiler callback kills.
6794 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
6796 regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
6797 noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
6800 // At this point return value is computed and stored in RAX or XMM0.
6801 // On Amd64, Leave callback preserves the return register. We keep
6802 // RAX alive by not reporting as trashed by helper call. Also note
6803 // that GC cannot kick-in while executing inside profiler callback,
6804 // which is a requirement of profiler as well since it needs to examine
6805 // return value which could be an obj ref.
6807 // RCX = ProfilerMethHnd
6808 if (compiler->compProfilerMethHndIndirected)
6810 // Profiler hooks enabled during Ngen time.
6811 // Profiler handle needs to be accessed through an indirection of an address.
6812 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6816 // Don't record relocations, if we are generating ELT hooks under the influence
6817 // of COMPlus_JitELTHookEnabled=1
6818 if (compiler->opts.compJitELTHookEnabled)
6820 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6824 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6828 // RDX = caller's SP
6829 // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
6830 // of the stmnts to execute unconditionally and clean-up rest.
6831 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6833 // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
6834 // value of that offset to FramePointer to obtain caller's SP value.
6835 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6836 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6840 // If we are here means that it is a tentative frame layout during which we
6841 // cannot use caller's SP offset since it is an estimate. For now we require the
6842 // method to have at least a single arg so that we can use it to obtain caller's
6844 LclVarDsc* varDsc = compiler->lvaTable;
6845 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6847 // lea rdx, [FramePointer + Arg0's offset]
6848 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6851 // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
6852 // We use R8 here. This will emit either
6853 // "call ip-relative 32-bit offset" or
6854 // "mov r8, helper addr; call r8"
6855 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
6857 #else // !defined(UNIX_AMD64_ABI)
6859 // RDI = ProfilerMethHnd
6860 if (compiler->compProfilerMethHndIndirected)
6862 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6866 if (compiler->opts.compJitELTHookEnabled)
6868 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6872 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6876 // RSI = caller's SP
6877 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6879 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6880 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6884 LclVarDsc* varDsc = compiler->lvaTable;
6885 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6887 // lea rdx, [FramePointer + Arg0's offset]
6888 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6891 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6892 // We use R11 here. This will emit either
6893 // "call ip-relative 32-bit offset" or
6894 // "mov r11, helper addr; call r11"
6895 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6897 #endif // !defined(UNIX_AMD64_ABI)
6899 #elif defined(_TARGET_X86_)
6901 #if defined(UNIX_X86_ABI)
6902 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6903 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6905 AddNestedAlignment(0xC);
6906 #endif // UNIX_X86_ABI
6909 // Push the profilerHandle
6912 if (compiler->compProfilerMethHndIndirected)
6914 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6918 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6922 #if defined(UNIX_X86_ABI)
6923 int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
6925 int argSize = REGSIZE_BYTES;
6927 genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
6929 // Check that we have place for the push.
6930 assert(compiler->fgPtrArgCntMax >= 1);
6932 #if defined(UNIX_X86_ABI)
6933 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6934 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6935 SubtractStackLevel(0x10);
6936 SubtractNestedAlignment(0xC);
6937 #endif // UNIX_X86_ABI
6939 #elif defined(_TARGET_ARM_)
6941 // Push the profilerHandle
6944 // Contract between JIT and Profiler Leave callout on arm:
6945 // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
6946 // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
6947 // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
6948 // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
6950 // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
6953 emitAttr attr = EA_UNKNOWN;
6955 if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
6956 (varTypeIsFloating(compiler->info.compRetType) ||
6957 compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
6963 // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
6964 // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
6965 if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
6968 gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
6970 else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
6973 gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
6980 getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
6981 regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
6982 gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
6986 if (compiler->compProfilerMethHndIndirected)
6988 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6989 regSet.verifyRegUsed(REG_ARG_0);
6993 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6996 genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
6998 EA_UNKNOWN); // retSize
7000 // Restore state that existed before profiler callback
7003 getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
7004 regSet.verifyRegUsed(REG_ARG_0);
7005 gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
7009 NYI("Emit Profiler Leave callback");
7012 /* Restore the stack level */
7013 SetStackLevel(saveStackLvl2);
7016 #endif // PROFILING_SUPPORTED
7018 /*****************************************************************************
7023 These instructions are just a reordering of the instructions used today.
7029 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7031 add esp, LOCALS_SIZE / pop dummyReg
7041 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
7042 Everything else is similar, though in a different order.
7044 The security object will no longer be at a fixed offset. However, the
7045 offset can still be determined by looking up the GC-info and determining
7046 how many callee-saved registers are pushed.
7053 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7055 add esp, LOCALS_SIZE / pop dummyReg
7059 (mov esp, ebp if there are no callee-saved registers)
7063 Double-aligned frame :
7064 --------------------
7066 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
7067 of callee-saved registers are pushed on the stack so that the locals
7068 themselves are qword-aligned. The instructions are the same as today,
7069 just in a different order.
7077 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
7079 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
7088 localloc (with ebp) frames :
7089 --------------------------
7091 The instructions are the same as today, just in a different order.
7092 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
7093 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
7100 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7102 lea esp, [ebp-calleeSavedRegsPushedSize]
7106 (mov esp, ebp if there are no callee-saved registers)
7110 *****************************************************************************/
7112 /*****************************************************************************
7114 * Generates appropriate NOP padding for a function prolog to support ReJIT.
7117 void CodeGen::genPrologPadForReJit()
7119 assert(compiler->compGeneratingProlog);
7121 #ifdef _TARGET_XARCH_
7122 if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
7127 #if FEATURE_EH_FUNCLETS
7129 // No need to generate pad (nops) for funclets.
7130 // When compiling the main function (and not a funclet)
7131 // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
7132 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7137 #endif // FEATURE_EH_FUNCLETS
7139 unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
7147 /*****************************************************************************
7149 * Reserve space for a function prolog.
7152 void CodeGen::genReserveProlog(BasicBlock* block)
7154 assert(block != nullptr);
7156 JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
7158 /* Nothing is live on entry to the prolog */
7160 getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
7163 /*****************************************************************************
7165 * Reserve space for a function epilog.
7168 void CodeGen::genReserveEpilog(BasicBlock* block)
7170 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
7171 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
7173 /* The return value is special-cased: make sure it goes live for the epilog */
7175 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
7177 if (genFullPtrRegMap && !jmpEpilog)
7179 if (varTypeIsGC(compiler->info.compRetNativeType))
7181 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
7183 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
7185 switch (compiler->info.compRetNativeType)
7188 gcrefRegsArg |= RBM_INTRET;
7191 byrefRegsArg |= RBM_INTRET;
7199 JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
7201 assert(block != nullptr);
7202 const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
7203 bool last = (block->bbNext == nullptr);
7204 getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
7207 #if FEATURE_EH_FUNCLETS
7209 /*****************************************************************************
7211 * Reserve space for a funclet prolog.
7214 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
7216 assert(block != nullptr);
7218 /* Currently, no registers are live on entry to the prolog, except maybe
7219 the exception object. There might be some live stack vars, but they
7220 cannot be accessed until after the frame pointer is re-established.
7221 In order to potentially prevent emitting a death before the prolog
7222 and a birth right after it, we just report it as live during the
7223 prolog, and rely on the prolog being non-interruptible. Trust
7224 genCodeForBBlist to correctly initialize all the sets.
7226 We might need to relax these asserts if the VM ever starts
7227 restoring any registers, then we could have live-in reg vars...
7230 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
7231 noway_assert(gcInfo.gcRegByrefSetCur == 0);
7233 JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
7235 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7236 gcInfo.gcRegByrefSetCur, false);
7239 /*****************************************************************************
7241 * Reserve space for a funclet epilog.
7244 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
7246 assert(block != nullptr);
7248 JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
7250 bool last = (block->bbNext == nullptr);
7251 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7252 gcInfo.gcRegByrefSetCur, last);
7255 #endif // FEATURE_EH_FUNCLETS
7257 /*****************************************************************************
7258 * Finalize the frame size and offset assignments.
7260 * No changes can be made to the modified register set after this, since that can affect how many
7261 * callee-saved registers get saved.
7263 void CodeGen::genFinalizeFrame()
7265 JITDUMP("Finalizing stack frame\n");
7267 // Initializations need to happen based on the var locations at the start
7268 // of the first basic block, so load those up. In particular, the determination
7269 // of whether or not to use block init in the prolog is dependent on the variable
7270 // locations on entry to the function.
7271 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
7273 genCheckUseBlockInit();
7275 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7276 CLANG_FORMAT_COMMENT_ANCHOR;
7278 #if defined(_TARGET_X86_)
7280 if (compiler->compTailCallUsed)
7282 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
7283 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
7284 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
7285 // actually get saved.
7287 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
7289 #endif // _TARGET_X86_
7291 #if defined(_TARGET_ARMARCH_)
7292 // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
7293 // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
7294 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7296 regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
7298 #endif // defined(_TARGET_ARMARCH_)
7300 #if defined(_TARGET_ARM_)
7301 // If there are any reserved registers, add them to the
7302 if (regSet.rsMaskResvd != RBM_NONE)
7304 regSet.rsSetRegsModified(regSet.rsMaskResvd);
7306 #endif // _TARGET_ARM_
7311 printf("Modified regs: ");
7312 dspRegMask(regSet.rsGetModifiedRegsMask());
7317 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7318 if (compiler->opts.compDbgEnC)
7320 // We always save FP.
7321 noway_assert(isFramePointerUsed());
7322 #ifdef _TARGET_AMD64_
7323 // On x64 we always save exactly RBP, RSI and RDI for EnC.
7324 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
7325 regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
7326 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
7327 #else // !_TARGET_AMD64_
7328 // On x86 we save all callee saved regs so the saved reg area size is consistent
7329 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7330 #endif // !_TARGET_AMD64_
7333 /* If we have any pinvoke calls, we might potentially trash everything */
7334 if (compiler->info.compCallUnmanaged)
7336 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
7337 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7340 #ifdef UNIX_AMD64_ABI
7341 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
7342 if (compiler->compIsProfilerHookNeeded())
7344 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
7348 /* Count how many callee-saved registers will actually be saved (pushed) */
7350 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
7351 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
7354 // EBP cannot be (directly) modified
7355 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
7358 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
7360 #ifdef _TARGET_ARMARCH_
7361 if (isFramePointerUsed())
7363 // For a FP based frame we have to push/pop the FP register
7365 maskCalleeRegsPushed |= RBM_FPBASE;
7367 // This assert check that we are not using REG_FP
7368 // as both the frame pointer and as a codegen register
7370 assert(!regSet.rsRegsModified(RBM_FPBASE));
7373 // we always push LR. See genPushCalleeSavedRegisters
7375 maskCalleeRegsPushed |= RBM_LR;
7377 #if defined(_TARGET_ARM_)
7378 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
7379 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
7380 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
7382 if ((maskPushRegsFloat != RBM_NONE) ||
7383 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
7385 // Here we try to keep stack double-aligned before the vpush
7386 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
7388 regNumber extraPushedReg = REG_R4;
7389 while (maskPushRegsInt & genRegMask(extraPushedReg))
7391 extraPushedReg = REG_NEXT(extraPushedReg);
7393 if (extraPushedReg < REG_R11)
7395 maskPushRegsInt |= genRegMask(extraPushedReg);
7396 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
7399 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
7402 // We currently only expect to push/pop consecutive FP registers
7403 // and these have to be double-sized registers as well.
7404 // Here we will insure that maskPushRegsFloat obeys these requirements.
7406 if (maskPushRegsFloat != RBM_NONE)
7408 regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
7409 while (maskPushRegsFloat > contiguousMask)
7411 contiguousMask <<= 2;
7412 contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
7414 if (maskPushRegsFloat != contiguousMask)
7416 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
7417 maskPushRegsFloat |= maskExtraRegs;
7418 regSet.rsSetRegsModified(maskExtraRegs);
7419 maskCalleeRegsPushed |= maskExtraRegs;
7422 #endif // _TARGET_ARM_
7423 #endif // _TARGET_ARMARCH_
7425 #if defined(_TARGET_XARCH_)
7426 // Compute the count of callee saved float regs saved on stack.
7427 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
7428 // regs are stack allocated and preserved in their stack locations.
7429 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
7430 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
7431 #endif // defined(_TARGET_XARCH_)
7433 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
7438 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
7439 dspRegMask(maskCalleeRegsPushed);
7444 /* Assign the final offsets to things living on the stack frame */
7446 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
7448 /* We want to make sure that the prolog size calculated here is accurate
7449 (that is instructions will not shrink because of conservative stack
7450 frame approximations). We do this by filling in the correct size
7451 here (where we have committed to the final numbers for the frame offsets)
7452 This will ensure that the prolog size is always correct
7454 getEmitter()->emitMaxTmpSize = regSet.tmpGetTotalSize();
7457 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
7459 compiler->lvaTableDump();
7464 //------------------------------------------------------------------------
7465 // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
7468 // delta - the offset to add to the current stack pointer to establish the frame pointer
7469 // reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
7471 void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
7473 assert(compiler->compGeneratingProlog);
7475 #if defined(_TARGET_XARCH_)
7479 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
7484 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7485 // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
7489 if (reportUnwindData)
7491 compiler->unwindSetFrameReg(REG_FPBASE, delta);
7494 #elif defined(_TARGET_ARM_)
7496 assert(arm_Valid_Imm_For_Add_SP(delta));
7497 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7499 if (reportUnwindData)
7501 compiler->unwindPadding();
7505 NYI("establish frame pointer");
7509 /*****************************************************************************
7511 * Generates code for a function prolog.
7513 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
7515 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
7516 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
7517 * only instructions which result in control not going to the next instruction. Basically, any time execution would
7518 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
7519 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
7520 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
7522 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
7523 * debugger team to ensure that stepping still works.
7525 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
7529 #pragma warning(push)
7530 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
7532 void CodeGen::genFnProlog()
7534 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
7536 compiler->funSetCurrentFunc(0);
7541 printf("*************** In genFnProlog()\n");
7546 genInterruptibleUsed = true;
7549 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
7551 /* Ready to start on the prolog proper */
7553 getEmitter()->emitBegProlog();
7554 compiler->unwindBegProlog();
7556 // Do this so we can put the prolog instruction group ahead of
7557 // other instruction groups
7558 genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
7561 if (compiler->opts.dspCode)
7563 printf("\n__prolog:\n");
7567 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
7569 // Create new scopes for the method-parameters for the prolog-block.
7575 if (compiler->compJitHaltMethod())
7577 /* put a nop first because the debugger and other tools are likely to
7578 put an int3 at the begining and we don't want to confuse them */
7581 instGen(INS_BREAKPOINT);
7583 #ifdef _TARGET_ARMARCH_
7584 // Avoid asserts in the unwind info because these instructions aren't accounted for.
7585 compiler->unwindPadding();
7586 #endif // _TARGET_ARMARCH_
7590 #if FEATURE_EH_FUNCLETS && defined(DEBUG)
7592 // We cannot force 0-initialization of the PSPSym
7593 // as it will overwrite the real value
7594 if (compiler->lvaPSPSym != BAD_VAR_NUM)
7596 LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
7597 assert(!varDsc->lvMustInit);
7600 #endif // FEATURE_EH_FUNCLETS && DEBUG
7602 /*-------------------------------------------------------------------------
7604 * Record the stack frame ranges that will cover all of the tracked
7605 * and untracked pointer variables.
7606 * Also find which registers will need to be zero-initialized.
7608 * 'initRegs': - Generally, enregistered variables should not need to be
7609 * zero-inited. They only need to be zero-inited when they
7610 * have a possibly uninitialized read on some control
7611 * flow path. Apparently some of the IL_STUBs that we
7612 * generate have this property.
7615 int untrLclLo = +INT_MAX;
7616 int untrLclHi = -INT_MAX;
7617 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
7618 // Note that they may be tracked, but simply not allocated to a register.
7619 bool hasUntrLcl = false;
7621 int GCrefLo = +INT_MAX;
7622 int GCrefHi = -INT_MAX;
7623 bool hasGCRef = false;
7625 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
7626 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
7627 regMaskTP initDblRegs = RBM_NONE;
7632 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7634 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
7639 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
7641 noway_assert(varDsc->lvRefCnt() == 0);
7645 signed int loOffs = varDsc->lvStkOffs;
7646 signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
7648 /* We need to know the offset range of tracked stack GC refs */
7649 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
7651 if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
7653 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
7654 // taken care of by the parent struct.
7655 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
7659 if (loOffs < GCrefLo)
7663 if (hiOffs > GCrefHi)
7670 /* For lvMustInit vars, gather pertinent info */
7672 if (!varDsc->lvMustInit)
7677 if (varDsc->lvIsInReg())
7679 regMaskTP regMask = genRegMask(varDsc->lvRegNum);
7680 if (!varDsc->IsFloatRegType())
7682 initRegs |= regMask;
7684 if (varTypeIsMultiReg(varDsc))
7686 if (varDsc->lvOtherReg != REG_STK)
7688 initRegs |= genRegMask(varDsc->lvOtherReg);
7692 /* Upper DWORD is on the stack, and needs to be inited */
7694 loOffs += sizeof(int);
7699 else if (varDsc->TypeGet() == TYP_DOUBLE)
7701 initDblRegs |= regMask;
7705 initFltRegs |= regMask;
7714 if (loOffs < untrLclLo)
7718 if (hiOffs > untrLclHi)
7725 /* Don't forget about spill temps that hold pointers */
7727 if (!TRACK_GC_TEMP_LIFETIMES)
7729 assert(regSet.tmpAllFree());
7730 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
7732 if (!varTypeIsGC(tempThis->tdTempType()))
7737 signed int loOffs = tempThis->tdTempOffs();
7738 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
7740 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
7741 // previous frame pointer. Thus, stkOffs can't be zero.
7742 CLANG_FORMAT_COMMENT_ANCHOR;
7744 #if !defined(_TARGET_AMD64_)
7745 // However, on amd64 there is no requirement to chain frame pointers.
7747 noway_assert(!isFramePointerUsed() || loOffs != 0);
7748 #endif // !defined(_TARGET_AMD64_)
7750 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
7754 if (loOffs < untrLclLo)
7758 if (hiOffs > untrLclHi)
7765 assert((genInitStkLclCnt > 0) == hasUntrLcl);
7770 if (genInitStkLclCnt > 0)
7772 printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
7779 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
7780 // Ditto for all enregistered user arguments in a varargs method.
7781 // These registers will be available to use for the initReg. We just remove
7782 // all of these registers from the rsCalleeRegArgMaskLiveIn.
7784 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
7787 /* Choose the register to use for zero initialization */
7789 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
7790 bool initRegZeroed = false;
7791 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
7794 // We should not use the special PINVOKE registers as the initReg
7795 // since they are trashed by the jithelper call to setup the PINVOKE frame
7796 if (compiler->info.compCallUnmanaged)
7798 excludeMask |= RBM_PINVOKE_FRAME;
7800 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
7801 if (!compiler->opts.ShouldUsePInvokeHelpers())
7803 noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
7805 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
7807 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
7809 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
7810 if (varDsc->lvRegister)
7812 excludeMask |= genRegMask(varDsc->lvRegNum);
7818 // If we have a variable sized frame (compLocallocUsed is true)
7819 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
7820 if (compiler->compLocallocUsed)
7822 excludeMask |= RBM_SAVED_LOCALLOC_SP;
7824 #endif // _TARGET_ARM_
7826 #if defined(_TARGET_XARCH_)
7827 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7829 // We currently must use REG_EAX on x86 here
7830 // because the loop's backwards branch depends upon the size of EAX encodings
7831 assert(initReg == REG_EAX);
7834 #endif // _TARGET_XARCH_
7836 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
7838 if (tempMask != RBM_NONE)
7840 // We will use one of the registers that we were planning to zero init anyway.
7841 // We pick the lowest register number.
7842 tempMask = genFindLowestBit(tempMask);
7843 initReg = genRegNumFromMask(tempMask);
7845 // Next we prefer to use one of the unused argument registers.
7846 // If they aren't available we use one of the caller-saved integer registers.
7849 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
7850 if (tempMask != RBM_NONE)
7852 // We pick the lowest register number
7853 tempMask = genFindLowestBit(tempMask);
7854 initReg = genRegNumFromMask(tempMask);
7859 noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
7861 #if defined(_TARGET_AMD64_)
7862 // If we are a varargs call, in order to set up the arguments correctly this
7863 // must be done in a 2 step process. As per the x64 ABI:
7864 // a) The caller sets up the argument shadow space (just before the return
7865 // address, 4 pointer sized slots).
7866 // b) The callee is responsible to home the arguments on the shadow space
7867 // provided by the caller.
7868 // This way, the varargs iterator will be able to retrieve the
7869 // call arguments properly since both the arg regs and the stack allocated
7870 // args will be contiguous.
7871 if (compiler->info.compIsVarArgs)
7873 getEmitter()->spillIntArgRegsToShadowSlots();
7876 #endif // _TARGET_AMD64_
7879 /*-------------------------------------------------------------------------
7881 * Now start emitting the part of the prolog which sets up the frame
7884 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
7886 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
7887 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
7889 #endif // _TARGET_ARM_
7891 #ifdef _TARGET_XARCH_
7892 if (doubleAlignOrFramePointerUsed())
7894 inst_RV(INS_push, REG_FPBASE, TYP_REF);
7895 compiler->unwindPush(REG_FPBASE);
7896 psiAdjustStackLevel(REGSIZE_BYTES);
7898 #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
7899 genEstablishFramePointer(0, /*reportUnwindData*/ true);
7900 #endif // !_TARGET_AMD64_
7903 if (compiler->genDoubleAlign())
7905 noway_assert(isFramePointerUsed() == false);
7906 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
7908 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
7910 #endif // DOUBLE_ALIGN
7912 #endif // _TARGET_XARCH_
7914 #ifdef _TARGET_ARM64_
7915 // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
7916 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7917 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
7918 #else // !_TARGET_ARM64_
7919 genPushCalleeSavedRegisters();
7920 #endif // !_TARGET_ARM64_
7923 bool needToEstablishFP = false;
7924 int afterLclFrameSPtoFPdelta = 0;
7925 if (doubleAlignOrFramePointerUsed())
7927 needToEstablishFP = true;
7929 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
7930 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
7931 // too big, we go ahead and do it here.
7933 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
7934 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
7935 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
7937 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
7938 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
7939 needToEstablishFP = false;
7942 #endif // _TARGET_ARM_
7944 //-------------------------------------------------------------------------
7946 // Subtract the local frame size from SP.
7948 //-------------------------------------------------------------------------
7949 CLANG_FORMAT_COMMENT_ANCHOR;
7951 #ifndef _TARGET_ARM64_
7952 regMaskTP maskStackAlloc = RBM_NONE;
7956 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
7957 #endif // _TARGET_ARM_
7959 if (maskStackAlloc == RBM_NONE)
7961 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7963 #endif // !_TARGET_ARM64_
7965 //-------------------------------------------------------------------------
7968 if (compiler->compLocallocUsed)
7970 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
7971 regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
7972 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
7974 #endif // _TARGET_ARMARCH_
7976 #if defined(_TARGET_XARCH_)
7977 // Preserve callee saved float regs to stack.
7978 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
7979 #endif // defined(_TARGET_XARCH_)
7981 #ifdef _TARGET_AMD64_
7982 // Establish the AMD64 frame pointer after the OS-reported prolog.
7983 if (doubleAlignOrFramePointerUsed())
7985 bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
7986 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
7988 #endif //_TARGET_AMD64_
7990 //-------------------------------------------------------------------------
7992 // This is the end of the OS-reported prolog for purposes of unwinding
7994 //-------------------------------------------------------------------------
7997 if (needToEstablishFP)
7999 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
8000 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
8002 #endif // _TARGET_ARM_
8004 if (compiler->info.compPublishStubParam)
8006 #if CPU_LOAD_STORE_ARCH
8007 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8008 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8010 // mov [lvaStubArgumentVar], EAX
8011 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8012 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8014 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
8016 // It's no longer live; clear it out so it can be used after this in the prolog
8017 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
8021 // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
8022 // when creating the stack frame. Don't think it's worth it, though.
8023 if (genNeedPrologStackProbe)
8026 // Can't have a call until we have enough padding for rejit
8028 genPrologPadForReJit();
8029 noway_assert(compiler->opts.compNeedStackProbes);
8030 genGenerateStackProbe();
8031 compiler->compStackProbePrologDone = true;
8033 #endif // STACK_PROBES
8036 // Zero out the frame as needed
8039 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
8041 #if FEATURE_EH_FUNCLETS
8043 genSetPSPSym(initReg, &initRegZeroed);
8045 #else // !FEATURE_EH_FUNCLETS
8047 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
8048 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
8050 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
8051 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
8053 // Zero out the slot for nesting level 0
8054 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
8058 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8059 initRegZeroed = true;
8062 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
8066 #endif // !FEATURE_EH_FUNCLETS
8068 genReportGenericContextArg(initReg, &initRegZeroed);
8070 // The local variable representing the security object must be on the stack frame
8071 // and must be 0 initialized.
8072 noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
8073 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
8074 compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
8076 #ifdef JIT32_GCENCODER
8077 // Initialize the LocalAllocSP slot if there is localloc in the function.
8078 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
8080 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
8082 #endif // JIT32_GCENCODER
8084 // Set up the GS security cookie
8086 genSetGSSecurityCookie(initReg, &initRegZeroed);
8088 #ifdef PROFILING_SUPPORTED
8090 // Insert a function entry callback for profiling, if requested.
8091 genProfilingEnterCallback(initReg, &initRegZeroed);
8093 #endif // PROFILING_SUPPORTED
8095 if (!genInterruptible)
8097 /*-------------------------------------------------------------------------
8099 * The 'real' prolog ends here for non-interruptible methods.
8100 * For fully-interruptible methods, we extend the prolog so that
8101 * we do not need to track GC inforation while shuffling the
8104 * Make sure there's enough padding for ReJIT.
8107 genPrologPadForReJit();
8108 getEmitter()->emitMarkPrologEnd();
8111 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
8112 // The unused bits of Vector3 arguments must be cleared
8113 // since native compiler doesn't initize the upper bits to zeros.
8115 // TODO-Cleanup: This logic can be implemented in
8116 // genFnPrologCalleeRegArgs() for argument registers and
8117 // genEnregisterIncomingStackArgs() for stack arguments.
8118 genClearStackVec3ArgUpperBits();
8119 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
8121 /*-----------------------------------------------------------------------------
8122 * Take care of register arguments first
8127 // Update the arg initial register locations.
8128 compiler->lvaUpdateArgsWithInitialReg();
8130 FOREACH_REGISTER_FILE(regState)
8132 if (regState->rsCalleeRegArgMaskLiveIn)
8134 // If we need an extra register to shuffle around the incoming registers
8135 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
8136 // if we don't need to use the xtraReg then this flag will stay false
8139 bool xtraRegClobbered = false;
8141 if (genRegMask(initReg) & RBM_ARG_REGS)
8147 xtraReg = REG_SCRATCH;
8148 initRegZeroed = false;
8151 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
8153 if (xtraRegClobbered)
8155 initRegZeroed = false;
8160 // Home the incoming arguments
8161 genEnregisterIncomingStackArgs();
8163 /* Initialize any must-init registers variables now */
8167 regMaskTP regMask = 0x1;
8169 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
8171 if (regMask & initRegs)
8173 // Check if we have already zeroed this register
8174 if ((reg == initReg) && initRegZeroed)
8180 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
8183 initRegZeroed = true;
8190 if (initFltRegs | initDblRegs)
8192 // If initReg is not in initRegs then we will use REG_SCRATCH
8193 if ((genRegMask(initReg) & initRegs) == 0)
8195 initReg = REG_SCRATCH;
8196 initRegZeroed = false;
8200 // This is needed only for Arm since it can use a zero initialized int register
8201 // to initialize vfp registers.
8204 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8205 initRegZeroed = true;
8207 #endif // _TARGET_ARM_
8209 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
8212 //-----------------------------------------------------------------------------
8215 // Increase the prolog size here only if fully interruptible.
8216 // And again make sure it's big enough for ReJIT
8219 if (genInterruptible)
8221 genPrologPadForReJit();
8222 getEmitter()->emitMarkPrologEnd();
8225 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
8232 getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
8236 noway_assert(GCrefLo == +INT_MAX);
8237 noway_assert(GCrefHi == -INT_MAX);
8241 if (compiler->opts.dspCode)
8248 // On non-x86 the VARARG cookie does not need any special treatment.
8250 // Load up the VARARG argument pointer register so it doesn't get clobbered.
8251 // only do this if we actually access any statically declared args
8252 // (our argument pointer register has a refcount > 0).
8253 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
8255 if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt() > 0)
8257 varDsc = &compiler->lvaTable[argsStartVar];
8259 noway_assert(compiler->info.compArgsCount > 0);
8261 // MOV EAX, <VARARGS HANDLE>
8262 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
8263 regSet.verifyRegUsed(REG_EAX);
8266 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
8268 // EDX might actually be holding something here. So make sure to only use EAX for this code
8271 LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
8272 noway_assert(!lastArg->lvRegister);
8273 signed offset = lastArg->lvStkOffs;
8274 assert(offset != BAD_STK_OFFS);
8275 noway_assert(lastArg->lvFramePointerBased);
8277 // LEA EAX, &<VARARGS HANDLE> + EAX
8278 getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
8280 if (varDsc->lvIsInReg())
8282 if (varDsc->lvRegNum != REG_EAX)
8284 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
8285 regSet.verifyRegUsed(varDsc->lvRegNum);
8290 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
8294 #endif // _TARGET_X86_
8296 #if defined(DEBUG) && defined(_TARGET_XARCH_)
8297 if (compiler->opts.compStackCheckOnRet)
8299 noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC &&
8300 compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister &&
8301 compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame);
8302 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0);
8304 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
8306 getEmitter()->emitEndProlog();
8307 compiler->unwindEndProlog();
8309 noway_assert(getEmitter()->emitMaxTmpSize == regSet.tmpGetTotalSize());
8312 #pragma warning(pop)
8315 /*****************************************************************************
8317 * Generates code for a function epilog.
8319 * Please consult the "debugger team notification" comment in genFnProlog().
8322 #if defined(_TARGET_ARMARCH_)
8324 void CodeGen::genFnEpilog(BasicBlock* block)
8328 printf("*************** In genFnEpilog()\n");
8331 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8333 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8334 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8335 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8338 if (compiler->opts.dspCode)
8339 printf("\n__epilog:\n");
8343 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8344 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8345 printf(", gcRegGCrefSetCur=");
8346 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8347 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8348 printf(", gcRegByrefSetCur=");
8349 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8350 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8355 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8357 GenTree* lastNode = block->lastNode();
8359 // Method handle and address info used in case of jump epilog
8360 CORINFO_METHOD_HANDLE methHnd = nullptr;
8361 CORINFO_CONST_LOOKUP addrInfo;
8362 addrInfo.addr = nullptr;
8363 addrInfo.accessType = IAT_VALUE;
8365 if (jmpEpilog && lastNode->gtOper == GT_JMP)
8367 methHnd = (CORINFO_METHOD_HANDLE)lastNode->gtVal.gtVal1;
8368 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8372 // We delay starting the unwind codes until we have an instruction which we know
8373 // needs an unwind code. In particular, for large stack frames in methods without
8374 // localloc, the sequence might look something like this:
8377 // pop {r4,r5,r6,r10,r11,pc}
8378 // In this case, the "movw" should not be part of the unwind codes, since it will
8379 // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
8380 // also sets the current location as the beginning offset of the epilog, so every
8381 // instruction afterwards needs an unwind code. In the case above, if you call
8382 // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
8384 bool unwindStarted = false;
8386 // Tear down the stack frame
8388 if (compiler->compLocallocUsed)
8392 compiler->unwindBegEpilog();
8393 unwindStarted = true;
8397 inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
8398 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
8402 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
8405 genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
8410 // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
8411 compiler->unwindBegEpilog();
8412 unwindStarted = true;
8415 if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE)
8417 // IAT_RELPVALUE jump at the end is done using relative indirection, so,
8418 // additional helper register is required.
8419 // We use LR just before it is going to be restored from stack, i.e.
8430 regNumber indCallReg = REG_R12;
8431 regNumber vptrReg1 = REG_LR;
8433 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8434 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg);
8435 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8436 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1);
8439 genPopCalleeSavedRegisters(jmpEpilog);
8441 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
8443 // We better not have used a pop PC to return otherwise this will be unreachable code
8444 noway_assert(!genUsedPopToReturn);
8446 int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
8447 inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
8448 compiler->unwindAllocStack(preSpillRegArgSize);
8453 // We better not have used a pop PC to return otherwise this will be unreachable code
8454 noway_assert(!genUsedPopToReturn);
8457 #else // _TARGET_ARM64_
8458 compiler->unwindBegEpilog();
8460 genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
8461 #endif // _TARGET_ARM64_
8465 hasTailCalls = true;
8467 noway_assert(block->bbJumpKind == BBJ_RETURN);
8468 noway_assert(block->bbTreeList != nullptr);
8470 /* figure out what jump we have */
8471 GenTree* jmpNode = lastNode;
8472 #if !FEATURE_FASTTAILCALL
8473 noway_assert(jmpNode->gtOper == GT_JMP);
8474 #else // FEATURE_FASTTAILCALL
8476 // If jmpNode is GT_JMP then gtNext must be null.
8477 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8478 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8480 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8481 noway_assert((jmpNode->gtOper == GT_JMP) ||
8482 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8484 // The next block is associated with this "if" stmt
8485 if (jmpNode->gtOper == GT_JMP)
8486 #endif // FEATURE_FASTTAILCALL
8488 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8489 // the same descriptor with some minor adjustments.
8490 assert(methHnd != nullptr);
8491 assert(addrInfo.addr != nullptr);
8493 #ifdef _TARGET_ARMARCH_
8494 emitter::EmitCallType callType;
8496 regNumber indCallReg;
8497 switch (addrInfo.accessType)
8500 if (validImmForBL((ssize_t)addrInfo.addr))
8502 // Simple direct call
8503 callType = emitter::EC_FUNC_TOKEN;
8504 addr = addrInfo.addr;
8505 indCallReg = REG_NA;
8509 // otherwise the target address doesn't fit in an immediate
8510 // so we have to burn a register...
8514 // Load the address into a register, load indirect and call through a register
8515 // We have to use R12 since we assume the argument registers are in use
8516 callType = emitter::EC_INDIR_R;
8517 indCallReg = REG_INDIRECT_CALL_TARGET_REG;
8519 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8520 if (addrInfo.accessType == IAT_PVALUE)
8522 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8523 regSet.verifyRegUsed(indCallReg);
8529 // Load the address into a register, load relative indirect and call through a register
8530 // We have to use R12 since we assume the argument registers are in use
8531 // LR is used as helper register right before it is restored from stack, thus,
8532 // all relative address calculations are performed before LR is restored.
8533 callType = emitter::EC_INDIR_R;
8534 indCallReg = REG_R12;
8537 regSet.verifyRegUsed(indCallReg);
8543 NO_WAY("Unsupported JMP indirection");
8546 /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
8547 * the same descriptor with some minor adjustments.
8551 getEmitter()->emitIns_Call(callType,
8553 INDEBUG_LDISASM_COMMA(nullptr)
8556 EA_UNKNOWN, // retSize
8557 #if defined(_TARGET_ARM64_)
8558 EA_UNKNOWN, // secondRetSize
8560 gcInfo.gcVarPtrSetCur,
8561 gcInfo.gcRegGCrefSetCur,
8562 gcInfo.gcRegByrefSetCur,
8563 BAD_IL_OFFSET, // IL offset
8570 CLANG_FORMAT_COMMENT_ANCHOR;
8571 #endif //_TARGET_ARMARCH_
8573 #if FEATURE_FASTTAILCALL
8577 // Call target = REG_FASTTAILCALL_TARGET
8578 // https://github.com/dotnet/coreclr/issues/4827
8579 // Do we need a special encoding for stack walker like rex.w prefix for x64?
8580 getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
8582 #endif // FEATURE_FASTTAILCALL
8587 if (!genUsedPopToReturn)
8589 // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
8590 // so we need a "bx lr" instruction to return from the function.
8591 inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
8592 compiler->unwindBranch16();
8594 #else // _TARGET_ARM64_
8595 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
8596 compiler->unwindReturn(REG_LR);
8597 #endif // _TARGET_ARM64_
8600 compiler->unwindEndEpilog();
8603 #elif defined(_TARGET_XARCH_)
8605 void CodeGen::genFnEpilog(BasicBlock* block)
8610 printf("*************** In genFnEpilog()\n");
8614 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8616 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8617 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8618 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8620 noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
8623 genInterruptibleUsed = true;
8626 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8629 if (compiler->opts.dspCode)
8631 printf("\n__epilog:\n");
8636 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8637 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8638 printf(", gcRegGCrefSetCur=");
8639 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8640 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8641 printf(", gcRegByrefSetCur=");
8642 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8643 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8648 // Restore float registers that were saved to stack before SP is modified.
8649 genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
8651 #ifdef JIT32_GCENCODER
8652 // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
8653 // the above call to `genRestoreCalleeSavedFltRegs` because that function
8654 // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
8655 // which is the only target that uses the JIT32 GC encoder
8656 // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
8657 // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
8658 // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
8659 // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
8660 // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
8661 // we will need to rethink this.
8662 getEmitter()->emitStartEpilog();
8665 /* Compute the size in bytes we've pushed/popped */
8667 if (!doubleAlignOrFramePointerUsed())
8669 // We have an ESP frame */
8671 noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
8673 /* Get rid of our local variables */
8675 if (compiler->compLclFrameSize)
8678 /* Add 'compiler->compLclFrameSize' to ESP */
8679 /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
8681 if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
8683 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8684 regSet.verifyRegUsed(REG_ECX);
8687 #endif // _TARGET_X86
8689 /* Add 'compiler->compLclFrameSize' to ESP */
8690 /* Generate "add esp, <stack-size>" */
8691 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8695 genPopCalleeSavedRegisters();
8699 noway_assert(doubleAlignOrFramePointerUsed());
8701 /* Tear down the stack frame */
8703 bool needMovEspEbp = false;
8706 if (compiler->genDoubleAlign())
8709 // add esp, compLclFrameSize
8711 // We need not do anything (except the "mov esp, ebp") if
8712 // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
8713 // also complicates the code manager. Hence, we ignore that case.
8715 noway_assert(compiler->compLclFrameSize != 0);
8716 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8718 needMovEspEbp = true;
8721 #endif // DOUBLE_ALIGN
8723 bool needLea = false;
8725 if (compiler->compLocallocUsed)
8727 // ESP may be variable if a localloc was actually executed. Reset it.
8728 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8732 else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
8734 if (compiler->compLclFrameSize != 0)
8736 #ifdef _TARGET_AMD64_
8737 // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
8738 // do an LEA to "pop off" the frame allocation.
8740 #else // !_TARGET_AMD64_
8741 // We will just generate "mov esp, ebp" and be done with it.
8742 needMovEspEbp = true;
8743 #endif // !_TARGET_AMD64_
8746 else if (compiler->compLclFrameSize == 0)
8748 // do nothing before popping the callee-saved registers
8751 else if (compiler->compLclFrameSize == REGSIZE_BYTES)
8753 // "pop ecx" will make ESP point to the callee-saved registers
8754 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8755 regSet.verifyRegUsed(REG_ECX);
8757 #endif // _TARGET_X86
8760 // We need to make ESP point to the callee-saved registers
8768 #ifdef _TARGET_AMD64_
8769 // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
8771 // Case 1: localloc not used.
8772 // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
8773 // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8774 // The amount to be subtracted from RBP to point at callee saved int regs.
8776 // Case 2: localloc used
8777 // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
8778 // Offset = Amount to be added to RBP to point at callee saved int regs.
8779 offset = genSPtoFPdelta() - compiler->compLclFrameSize;
8781 // Offset should fit within a byte if localloc is not used.
8782 if (!compiler->compLocallocUsed)
8784 noway_assert(offset < UCHAR_MAX);
8787 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8788 offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8789 noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
8792 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
8797 // Pop the callee-saved registers (if any)
8800 genPopCalleeSavedRegisters();
8802 #ifdef _TARGET_AMD64_
8803 assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
8804 #else // !_TARGET_AMD64_
8808 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
8810 #endif // !_TARGET_AMD64_
8813 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
8816 getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
8818 /* Check if this a special return block i.e.
8819 * CEE_JMP instruction */
8823 noway_assert(block->bbJumpKind == BBJ_RETURN);
8824 noway_assert(block->bbTreeList);
8826 // figure out what jump we have
8827 GenTree* jmpNode = block->lastNode();
8828 #if !FEATURE_FASTTAILCALL
8830 noway_assert(jmpNode->gtOper == GT_JMP);
8833 // If jmpNode is GT_JMP then gtNext must be null.
8834 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8835 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8837 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8838 noway_assert((jmpNode->gtOper == GT_JMP) ||
8839 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8841 // The next block is associated with this "if" stmt
8842 if (jmpNode->gtOper == GT_JMP)
8845 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8846 // the same descriptor with some minor adjustments.
8847 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
8849 CORINFO_CONST_LOOKUP addrInfo;
8850 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8851 if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
8853 NO_WAY("Unsupported JMP indirection");
8856 const emitter::EmitCallType callType =
8857 (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
8859 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8860 // the same descriptor with some minor adjustments.
8863 getEmitter()->emitIns_Call(callType,
8865 INDEBUG_LDISASM_COMMA(nullptr)
8868 EA_UNKNOWN // retSize
8869 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
8870 gcInfo.gcVarPtrSetCur,
8871 gcInfo.gcRegGCrefSetCur,
8872 gcInfo.gcRegByrefSetCur,
8873 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
8878 #if FEATURE_FASTTAILCALL
8881 #ifdef _TARGET_AMD64_
8883 // Call target = RAX.
8884 // Stack walker requires that a register indirect tail call be rex.w prefixed.
8885 getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
8887 assert(!"Fast tail call as epilog+jmp");
8889 #endif //_TARGET_AMD64_
8891 #endif // FEATURE_FASTTAILCALL
8895 unsigned stkArgSize = 0; // Zero on all platforms except x86
8897 #if defined(_TARGET_X86_)
8898 bool fCalleePop = true;
8900 // varargs has caller pop
8901 if (compiler->info.compIsVarArgs)
8905 if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
8907 #endif // UNIX_X86_ABI
8911 noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
8912 stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
8914 noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
8916 #endif // _TARGET_X86_
8918 /* Return, popping our arguments (if any) */
8919 instGen_Return(stkArgSize);
8924 #error Unsupported or unset target architecture
8927 #if FEATURE_EH_FUNCLETS
8931 /*****************************************************************************
8933 * Generates code for an EH funclet prolog.
8935 * Funclets have the following incoming arguments:
8937 * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
8938 * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
8939 * finally/fault: none
8941 * Funclets set the following registers on exit:
8943 * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
8944 * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
8945 * finally/fault: none
8947 * The ARM funclet prolog sequence is:
8949 * push {regs,lr} ; We push the callee-saved regs and 'lr'.
8950 * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
8951 * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
8952 * ; calculated for the entire function.
8953 * sub sp, XXX ; Establish the rest of the frame.
8954 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
8955 * ; up to preserve stack alignment. If we push an odd number of registers, we also
8956 * ; generate this, to keep the stack aligned.
8958 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
8960 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
8963 * if (this is a filter funclet)
8965 * // r1 on entry to a filter funclet is CallerSP of the containing function:
8966 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
8967 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
8968 * // a funclet. Consider:
8972 * // throw new Exception();
8973 * // } catch(Exception) {
8974 * // throw new Exception(); // The exception thrown here ...
8976 * // } filter { // ... will be processed here, while the "catch" funclet frame is
8977 * // // still on the stack
8978 * // } filter-handler {
8981 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
8982 * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
8983 * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
8985 * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
8986 * ; the dynamically containing funclet or function)
8987 * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
8988 * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
8992 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
8993 * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
8995 * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
8996 * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
8999 * The epilog sequence is then:
9001 * add sp, XXX ; if necessary
9004 * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
9005 * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
9007 * The funclet frame is thus:
9010 * |-----------------------|
9013 * +=======================+ <---- Caller's SP
9014 * |Callee saved registers |
9015 * |-----------------------|
9016 * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
9017 * | | // in function and funclet
9018 * |-----------------------|
9019 * | PSP slot | // Omitted in CoreRT ABI
9020 * |-----------------------|
9021 * ~ possible 4 byte pad ~
9023 * |-----------------------|
9024 * | Outgoing arg space |
9025 * |-----------------------| <---- Ambient SP
9032 void CodeGen::genFuncletProlog(BasicBlock* block)
9036 printf("*************** In genFuncletProlog()\n");
9039 assert(block != NULL);
9040 assert(block->bbFlags & BBF_FUNCLET_BEG);
9042 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9044 gcInfo.gcResetForBB();
9046 compiler->unwindBegProlog();
9048 regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9049 regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
9051 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
9052 maskPushRegsInt |= maskStackAlloc;
9054 assert(FitsIn<int>(maskPushRegsInt));
9055 inst_IV(INS_push, (int)maskPushRegsInt);
9056 compiler->unwindPushMaskInt(maskPushRegsInt);
9058 if (maskPushRegsFloat != RBM_NONE)
9060 genPushFltRegs(maskPushRegsFloat);
9061 compiler->unwindPushMaskFloat(maskPushRegsFloat);
9064 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
9066 regMaskTP maskArgRegsLiveIn;
9069 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
9071 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9073 maskArgRegsLiveIn = RBM_NONE;
9077 maskArgRegsLiveIn = RBM_R0;
9080 regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
9081 bool initRegZeroed = false;
9083 if (maskStackAlloc == RBM_NONE)
9085 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9088 // This is the end of the OS-reported prolog for purposes of unwinding
9089 compiler->unwindEndProlog();
9093 // This is the first block of a filter
9095 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
9096 genFuncletInfo.fiPSP_slot_CallerSP_offset);
9097 regSet.verifyRegUsed(REG_R1);
9098 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
9099 genFuncletInfo.fiPSP_slot_SP_offset);
9100 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
9101 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9105 // This is a non-filter funclet
9106 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
9107 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9108 regSet.verifyRegUsed(REG_R3);
9109 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
9110 genFuncletInfo.fiPSP_slot_SP_offset);
9114 /*****************************************************************************
9116 * Generates code for an EH funclet epilog.
9119 void CodeGen::genFuncletEpilog()
9123 printf("*************** In genFuncletEpilog()\n");
9126 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9128 // Just as for the main function, we delay starting the unwind codes until we have
9129 // an instruction which we know needs an unwind code. This is to support code like
9133 // pop {r4,r5,r6,r10,r11,pc}
9134 // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
9136 bool unwindStarted = false;
9138 /* The saved regs info saves the LR register. We need to pop the PC register to return */
9139 assert(genFuncletInfo.fiSaveRegs & RBM_LR);
9141 regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9142 regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
9144 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
9145 maskPopRegsInt |= maskStackAlloc;
9147 if (maskStackAlloc == RBM_NONE)
9149 genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
9154 // We'll definitely generate an unwindable instruction next
9155 compiler->unwindBegEpilog();
9156 unwindStarted = true;
9159 maskPopRegsInt &= ~RBM_LR;
9160 maskPopRegsInt |= RBM_PC;
9162 if (maskPopRegsFloat != RBM_NONE)
9164 genPopFltRegs(maskPopRegsFloat);
9165 compiler->unwindPopMaskFloat(maskPopRegsFloat);
9168 assert(FitsIn<int>(maskPopRegsInt));
9169 inst_IV(INS_pop, (int)maskPopRegsInt);
9170 compiler->unwindPopMaskInt(maskPopRegsInt);
9172 compiler->unwindEndEpilog();
9175 /*****************************************************************************
9177 * Capture the information used to generate the funclet prologs and epilogs.
9178 * Note that all funclet prologs are identical, and all funclet epilogs are
9179 * identical (per type: filters are identical, and non-filters are identical).
9180 * Thus, we compute the data used for these just once.
9182 * See genFuncletProlog() for more information about the prolog/epilog sequences.
9185 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9187 if (compiler->ehAnyFunclets())
9189 assert(isFramePointerUsed());
9190 assert(compiler->lvaDoneFrameLayout ==
9191 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9193 // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
9194 // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
9195 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
9196 // (also assumed in genFnProlog()).
9197 assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
9198 unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9199 genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
9201 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
9202 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
9203 unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
9204 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9205 unsigned funcletFrameSize =
9206 preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
9208 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
9209 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
9210 unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
9212 unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
9213 int PSP_slot_CallerSP_offset =
9214 -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
9216 /* Now save it for future use */
9218 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
9219 genFuncletInfo.fiSpDelta = spDelta;
9220 genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
9221 genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
9227 printf("Funclet prolog / epilog info\n");
9228 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9229 printf(" Save regs: ");
9230 dspRegMask(rsMaskSaveRegs);
9232 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9233 printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
9234 printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
9236 if (PSP_slot_CallerSP_offset !=
9237 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
9238 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
9239 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
9243 assert(PSP_slot_CallerSP_offset < 0);
9244 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9246 assert(PSP_slot_CallerSP_offset ==
9247 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
9248 // function and funclet!
9253 #elif defined(_TARGET_AMD64_)
9255 /*****************************************************************************
9257 * Generates code for an EH funclet prolog.
9259 * Funclets have the following incoming arguments:
9261 * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
9262 * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
9263 * finally/fault: rcx = InitialSP
9265 * Funclets set the following registers on exit:
9267 * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
9268 * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9269 * finally/fault: none
9271 * The AMD64 funclet prolog sequence is:
9274 * push callee-saved regs
9275 * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
9276 * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
9277 * ; the entire function.
9278 * sub sp, XXX ; Establish the rest of the frame.
9279 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
9280 * ; up to preserve stack alignment. If we push an odd number of registers, we also
9281 * ; generate this, to keep the stack aligned.
9283 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
9285 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
9287 * ; Also, re-establish the frame pointer from the PSP.
9289 * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
9290 * ; PSP of the dynamically containing funclet or function)
9291 * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
9292 * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
9293 * ; Function_InitialSP_to_FP_delta==0, we don't need this
9296 * The epilog sequence is then:
9299 * pop callee-saved regs ; if necessary
9303 * The funclet frame is thus:
9306 * |-----------------------|
9309 * +=======================+ <---- Caller's SP
9310 * | Return address |
9311 * |-----------------------|
9313 * |-----------------------|
9314 * |Callee saved registers |
9315 * |-----------------------|
9316 * ~ possible 8 byte pad ~
9318 * |-----------------------|
9319 * | PSP slot | // Omitted in CoreRT ABI
9320 * |-----------------------|
9321 * | Outgoing arg space | // this only exists if the function makes a call
9322 * |-----------------------| <---- Initial SP
9328 * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
9329 * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
9330 * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
9331 * "FRAMEPTR OFFSETS" for details.
9334 void CodeGen::genFuncletProlog(BasicBlock* block)
9339 printf("*************** In genFuncletProlog()\n");
9343 assert(!regSet.rsRegsModified(RBM_FPBASE));
9344 assert(block != nullptr);
9345 assert(block->bbFlags & BBF_FUNCLET_BEG);
9346 assert(isFramePointerUsed());
9348 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9350 gcInfo.gcResetForBB();
9352 compiler->unwindBegProlog();
9354 // We need to push ebp, since it's callee-saved.
9355 // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
9356 // keep track of that on a per-funclet basis, so we push the same set as in the main function.
9357 // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
9358 // is stored here (all temps are allocated in the parent frame).
9359 // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
9360 // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
9362 inst_RV(INS_push, REG_FPBASE, TYP_REF);
9363 compiler->unwindPush(REG_FPBASE);
9365 // Callee saved int registers are pushed to stack.
9366 genPushCalleeSavedRegisters();
9368 regMaskTP maskArgRegsLiveIn;
9369 if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9371 maskArgRegsLiveIn = RBM_ARG_0;
9375 maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
9378 regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
9379 bool initRegZeroed = false;
9381 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9383 // Callee saved float registers are copied to stack in their assigned stack slots
9384 // after allocating space for them as part of funclet frame.
9385 genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9387 // This is the end of the OS-reported prolog for purposes of unwinding
9388 compiler->unwindEndProlog();
9390 // If there is no PSPSym (CoreRT ABI), we are done.
9391 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9396 getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9398 regSet.verifyRegUsed(REG_FPBASE);
9400 getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9402 if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
9404 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
9405 genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9408 // We've modified EBP, but not really. Say that we haven't...
9409 regSet.rsRemoveRegsModified(RBM_FPBASE);
9412 /*****************************************************************************
9414 * Generates code for an EH funclet epilog.
9416 * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
9419 void CodeGen::genFuncletEpilog()
9424 printf("*************** In genFuncletEpilog()\n");
9428 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9430 // Restore callee saved XMM regs from their stack slots before modifying SP
9431 // to position at callee saved int regs.
9432 genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9433 inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
9434 genPopCalleeSavedRegisters();
9435 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
9439 /*****************************************************************************
9441 * Capture the information used to generate the funclet prologs and epilogs.
9444 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9446 if (!compiler->ehAnyFunclets())
9451 // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
9452 // because we're not going to allocate the same size frame as the parent.
9454 assert(isFramePointerUsed());
9455 assert(compiler->lvaDoneFrameLayout ==
9456 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9457 assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
9459 // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
9460 // that's ok, because we're figuring out an offset in the parent frame.
9461 genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
9462 compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
9465 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9466 #ifndef UNIX_AMD64_ABI
9467 // No 4 slots for outgoing params on the stack for System V systems.
9468 assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
9469 (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
9470 // slots if there are any calls in the function.
9471 #endif // UNIX_AMD64_ABI
9472 unsigned offset = compiler->lvaOutgoingArgSpaceSize;
9474 genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
9476 // How much stack do we allocate in the funclet?
9477 // We need to 16-byte align the stack.
9479 unsigned totalFrameSize =
9480 REGSIZE_BYTES // return address
9481 + REGSIZE_BYTES // pushed EBP
9482 + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
9484 // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
9485 // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
9486 unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
9487 unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
9489 unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
9491 totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
9492 + calleeFPRegsSavedSize // pushed callee-saved float regs
9493 // below calculated 'pad' will go here
9494 + PSPSymSize // PSPSym
9495 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9498 unsigned pad = AlignmentPad(totalFrameSize, 16);
9500 genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
9501 + calleeFPRegsSavedSize // Callee saved xmm regs
9502 + pad + PSPSymSize // PSPSym
9503 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9510 printf("Funclet prolog / epilog info\n");
9511 printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9512 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9513 printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
9516 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9518 assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
9519 compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
9525 #elif defined(_TARGET_ARM64_)
9527 // Look in CodeGenArm64.cpp
9529 #elif defined(_TARGET_X86_)
9531 /*****************************************************************************
9533 * Generates code for an EH funclet prolog.
9536 * Funclets have the following incoming arguments:
9538 * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
9539 * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
9540 * finally/fault: none
9542 * Funclets set the following registers on exit:
9544 * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
9545 * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9546 * finally/fault: none
9548 * Funclet prolog/epilog sequence and funclet frame layout are TBD.
9552 void CodeGen::genFuncletProlog(BasicBlock* block)
9557 printf("*************** In genFuncletProlog()\n");
9561 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9563 gcInfo.gcResetForBB();
9565 compiler->unwindBegProlog();
9567 // This is the end of the OS-reported prolog for purposes of unwinding
9568 compiler->unwindEndProlog();
9570 // TODO We may need EBP restore sequence here if we introduce PSPSym
9572 // Add a padding for 16-byte alignment
9573 inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
9576 /*****************************************************************************
9578 * Generates code for an EH funclet epilog.
9581 void CodeGen::genFuncletEpilog()
9586 printf("*************** In genFuncletEpilog()\n");
9590 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9592 // Revert a padding that was added for 16-byte alignment
9593 inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
9598 /*****************************************************************************
9600 * Capture the information used to generate the funclet prologs and epilogs.
9603 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9605 if (!compiler->ehAnyFunclets())
9613 /*****************************************************************************
9615 * Generates code for an EH funclet prolog.
9618 void CodeGen::genFuncletProlog(BasicBlock* block)
9620 NYI("Funclet prolog");
9623 /*****************************************************************************
9625 * Generates code for an EH funclet epilog.
9628 void CodeGen::genFuncletEpilog()
9630 NYI("Funclet epilog");
9633 /*****************************************************************************
9635 * Capture the information used to generate the funclet prologs and epilogs.
9638 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9640 if (compiler->ehAnyFunclets())
9642 NYI("genCaptureFuncletPrologEpilogInfo()");
9648 /*-----------------------------------------------------------------------------
9650 * Set the main function PSPSym value in the frame.
9651 * Funclets use different code to load the PSP sym and save it in their frame.
9652 * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
9653 * The PSPSym section of that document is copied here.
9655 ***********************************
9656 * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
9657 * accesses locals from the main function body.
9659 * First, two definitions.
9661 * Caller-SP is the value of the stack pointer in a function's caller before the call
9662 * instruction is executed. That is, when function A calls function B, Caller-SP for B
9663 * is the value of the stack pointer immediately before the call instruction in A
9664 * (calling B) was executed. Note that this definition holds for both AMD64, which
9665 * pushes the return value when a call instruction is executed, and for ARM, which
9666 * doesn't. For AMD64, Caller-SP is the address above the call return address.
9668 * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
9669 * the frame has been allocated. That is, before any "alloca"-type allocations.
9671 * The PSPSym is a pointer-sized local variable in the frame of the main function and
9672 * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
9673 * for the main function. The stack offset of the PSPSym is reported to the VM in the
9674 * GC information header. The value reported in the GC information is the offset of the
9675 * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
9676 * value is reported to the VM, differs between architectures. In particular, note that
9677 * most things in the GC information header are reported as offsets relative to Caller-SP,
9678 * but PSPSym on AMD64 is one (maybe the only) exception.)
9680 * The VM uses the PSPSym to find other locals it cares about (such as the generics context
9681 * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
9682 * the frame pointer is the same value in a funclet as it is in the main function body.
9684 * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
9685 * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
9686 * only true for first pass funclets (currently just filters) and it is passed as the second
9687 * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
9688 * frame in the exception processing system. For the CLR, it points either to the main function
9689 * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
9690 * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
9692 * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
9693 * don't know if the Establisher Frame is from the main function or a funclet, we design the
9694 * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
9695 * offset from the Establisher Frame in each case. (This is also required because we only report
9696 * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
9697 * function and all of its funclets). Then, the funclet uses this known offset to compute the
9698 * PSPSym address and read its value. From this, it can compute the value of the frame pointer
9699 * (which is a constant offset from the PSPSym value) and set the frame register to be the same
9700 * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
9701 * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
9702 * for every nested funclet invocation.
9704 * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
9705 * restores all non-volatile registers to their values within the parent frame. This includes
9706 * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
9707 * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
9709 * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
9710 * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
9711 * ARM this is the first argument and passed in R0.
9713 * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
9714 * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
9715 * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
9716 * is required in all funclets as well as the main function, whereas if the establisher frame was
9717 * correctly reported, the PSPSym could be omitted in some cases.)
9718 ***********************************
9720 void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
9722 assert(compiler->compGeneratingProlog);
9724 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9729 noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
9731 #if defined(_TARGET_ARM_)
9733 // We either generate:
9735 // str r1, [reg + PSPSymOffset]
9738 // str r1, [reg + PSPSymOffset]
9739 // depending on the smallest encoding
9741 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9746 if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
9748 // use the "add <reg>, sp, imm" form
9750 callerSPOffs = SPtoCallerSPdelta;
9751 regBase = REG_SPBASE;
9755 // use the "add <reg>, r11, imm" form
9757 int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
9758 noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
9760 callerSPOffs = FPtoCallerSPdelta;
9761 regBase = REG_FPBASE;
9764 // We will just use the initReg since it is an available register
9765 // and we are probably done using it anyway...
9766 regNumber regTmp = initReg;
9767 *pInitRegZeroed = false;
9769 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
9770 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9772 #elif defined(_TARGET_ARM64_)
9774 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9776 // We will just use the initReg since it is an available register
9777 // and we are probably done using it anyway...
9778 regNumber regTmp = initReg;
9779 *pInitRegZeroed = false;
9781 getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
9782 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9784 #elif defined(_TARGET_AMD64_)
9786 // The PSP sym value is Initial-SP, not Caller-SP!
9787 // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
9788 // has been established.
9791 // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
9793 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
9797 NYI("Set function PSP sym");
9802 #endif // FEATURE_EH_FUNCLETS
9804 /*****************************************************************************
9806 * Generates code for all the function and funclet prologs and epilogs.
9809 void CodeGen::genGeneratePrologsAndEpilogs()
9814 printf("*************** Before prolog / epilog generation\n");
9815 getEmitter()->emitDispIGlist(false);
9819 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
9820 // This affects our code that determines which untracked locals need to be zero initialized.
9821 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
9823 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
9825 getEmitter()->emitStartPrologEpilogGeneration();
9827 gcInfo.gcResetForBB();
9830 // Generate all the prologs and epilogs.
9831 CLANG_FORMAT_COMMENT_ANCHOR;
9833 #if FEATURE_EH_FUNCLETS
9835 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
9836 // information computed during codegen, or during function prolog generation, like
9837 // frame offsets. It must run after main function prolog generation.
9839 genCaptureFuncletPrologEpilogInfo();
9841 #endif // FEATURE_EH_FUNCLETS
9843 // Walk the list of prologs and epilogs and generate them.
9844 // We maintain a list of prolog and epilog basic blocks in
9845 // the insGroup structure in the emitter. This list was created
9846 // during code generation by the genReserve*() functions.
9848 // TODO: it seems like better design would be to create a list of prologs/epilogs
9849 // in the code generator (not the emitter), and then walk that list. But we already
9850 // have the insGroup list, which serves well, so we don't need the extra allocations
9851 // for a prolog/epilog list in the code generator.
9853 getEmitter()->emitGeneratePrologEpilog();
9855 // Tell the emitter we're done with all prolog and epilog generation.
9857 getEmitter()->emitFinishPrologEpilogGeneration();
9862 printf("*************** After prolog / epilog generation\n");
9863 getEmitter()->emitDispIGlist(false);
9869 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9870 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9872 XX End Prolog / Epilog XX
9874 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9875 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9879 void CodeGen::genGenerateStackProbe()
9881 noway_assert(compiler->opts.compNeedStackProbes);
9883 // If this assert fires, it means somebody has changed the value
9884 // CORINFO_STACKPROBE_DEPTH.
9885 // Why does the EE need such a deep probe? It should just need a couple
9886 // of bytes, to set up a frame in the unmanaged code..
9888 static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
9890 JITDUMP("Emitting stack probe:\n");
9891 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
9892 -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
9894 #endif // STACK_PROBES
9896 #if defined(_TARGET_XARCH_)
9897 // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9898 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9899 // Here offset = 16-byte aligned offset after pushing integer registers.
9902 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
9903 // non-funclet: this will be compLclFrameSize.
9904 // funclet frames: this will be FuncletInfo.fiSpDelta.
9905 void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
9907 genVzeroupperIfNeeded(false);
9908 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9910 // Only callee saved floating point registers should be in regMask
9911 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9914 if (regMask == RBM_NONE)
9919 #ifdef _TARGET_AMD64_
9920 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9921 unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9923 // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
9924 assert((offset % 16) == 0);
9925 instruction copyIns = ins_Copy(TYP_FLOAT);
9926 #else // !_TARGET_AMD64_
9927 unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
9928 instruction copyIns = INS_movupd;
9929 #endif // !_TARGET_AMD64_
9931 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9933 regMaskTP regBit = genRegMask(reg);
9934 if ((regBit & regMask) != 0)
9936 // ABI requires us to preserve lower 128-bits of YMM register.
9937 getEmitter()->emitIns_AR_R(copyIns,
9938 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
9940 reg, REG_SPBASE, offset);
9941 compiler->unwindSaveReg(reg, offset);
9943 offset -= XMM_REGSIZE_BYTES;
9948 // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9949 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9950 // Here offset = 16-byte aligned offset after pushing integer registers.
9953 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
9954 // non-funclet: this will be compLclFrameSize.
9955 // funclet frames: this will be FuncletInfo.fiSpDelta.
9956 void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
9958 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9960 // Only callee saved floating point registers should be in regMask
9961 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9964 if (regMask == RBM_NONE)
9966 genVzeroupperIfNeeded();
9970 #ifdef _TARGET_AMD64_
9971 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9972 instruction copyIns = ins_Copy(TYP_FLOAT);
9973 #else // !_TARGET_AMD64_
9974 unsigned firstFPRegPadding = 0;
9975 instruction copyIns = INS_movupd;
9976 #endif // !_TARGET_AMD64_
9980 if (compiler->compLocallocUsed)
9982 // localloc frame: use frame pointer relative offset
9983 assert(isFramePointerUsed());
9984 regBase = REG_FPBASE;
9985 offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
9989 regBase = REG_SPBASE;
9990 offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9993 #ifdef _TARGET_AMD64_
9994 // Offset is 16-byte aligned since we use movaps for restoring xmm regs
9995 assert((offset % 16) == 0);
9996 #endif // _TARGET_AMD64_
9998 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
10000 regMaskTP regBit = genRegMask(reg);
10001 if ((regBit & regMask) != 0)
10003 // ABI requires us to restore lower 128-bits of YMM register.
10004 getEmitter()->emitIns_R_AR(copyIns,
10005 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
10007 reg, regBase, offset);
10008 regMask &= ~regBit;
10009 offset -= XMM_REGSIZE_BYTES;
10012 genVzeroupperIfNeeded();
10015 // Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
10016 // AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
10017 // (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
10018 // 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
10019 // code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
10020 // if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
10023 // check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
10024 // instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
10026 void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
10028 bool emitVzeroUpper = false;
10029 if (check256bitOnly)
10031 emitVzeroUpper = getEmitter()->Contains256bitAVX();
10035 emitVzeroUpper = getEmitter()->ContainsAVX();
10038 if (emitVzeroUpper)
10040 assert(compiler->canUseVexEncoding());
10041 instGen(INS_vzeroupper);
10045 #endif // defined(_TARGET_XARCH_)
10047 //-----------------------------------------------------------------------------------
10048 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
10051 // hClass - type handle
10054 // true if type is returned in multiple registers, false otherwise.
10056 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
10058 if (hClass == NO_CLASS_HANDLE)
10063 structPassingKind howToReturnStruct;
10064 var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
10066 return (varTypeIsStruct(returnType));
10069 //----------------------------------------------
10070 // Methods that support HFA's for ARM32/ARM64
10071 //----------------------------------------------
10073 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
10076 return varTypeIsFloating(GetHfaType(hClass));
10082 bool Compiler::IsHfa(GenTree* tree)
10085 return IsHfa(gtGetStructHandleIfPresent(tree));
10091 var_types Compiler::GetHfaType(GenTree* tree)
10094 return GetHfaType(gtGetStructHandleIfPresent(tree));
10100 unsigned Compiler::GetHfaCount(GenTree* tree)
10102 return GetHfaCount(gtGetStructHandleIfPresent(tree));
10105 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
10107 var_types result = TYP_UNDEF;
10108 if (hClass != NO_CLASS_HANDLE)
10111 CorInfoType corType = info.compCompHnd->getHFAType(hClass);
10112 if (corType != CORINFO_TYPE_UNDEF)
10114 result = JITtype2varType(corType);
10116 #endif // FEATURE_HFA
10121 //------------------------------------------------------------------------
10122 // GetHfaCount: Given a class handle for an HFA struct
10123 // return the number of registers needed to hold the HFA
10125 // Note that on ARM32 the single precision registers overlap with
10126 // the double precision registers and for that reason each
10127 // double register is considered to be two single registers.
10128 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
10129 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
10130 // will return 4 for both.
10132 // hClass: the class handle of a HFA struct
10134 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
10136 assert(IsHfa(hClass));
10137 #ifdef _TARGET_ARM_
10138 // A HFA of doubles is twice as large as an HFA of singles for ARM32
10139 // (i.e. uses twice the number of single precison registers)
10140 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
10141 #else // _TARGET_ARM64_
10142 var_types hfaType = GetHfaType(hClass);
10143 unsigned classSize = info.compCompHnd->getClassSize(hClass);
10144 // Note that the retail build issues a warning about a potential divsion by zero without the Max function
10145 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
10146 return classSize / elemSize;
10147 #endif // _TARGET_ARM64_
10150 #ifdef _TARGET_XARCH_
10152 //------------------------------------------------------------------------
10153 // genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
10154 // map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
10155 // X86/x64 has a special encoding for shift/rotate-by-constant-1.
10158 // ins: the base shift/rotate instruction
10159 // shiftByValue: the constant value by which we are shifting/rotating
10161 instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
10163 assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
10166 // Which format should we use?
10168 instruction shiftByConstantIns;
10170 if (shiftByValue == 1)
10172 // Use the shift-by-one format.
10174 assert(INS_rcl + 1 == INS_rcl_1);
10175 assert(INS_rcr + 1 == INS_rcr_1);
10176 assert(INS_rol + 1 == INS_rol_1);
10177 assert(INS_ror + 1 == INS_ror_1);
10178 assert(INS_shl + 1 == INS_shl_1);
10179 assert(INS_shr + 1 == INS_shr_1);
10180 assert(INS_sar + 1 == INS_sar_1);
10182 shiftByConstantIns = (instruction)(ins + 1);
10186 // Use the shift-by-NNN format.
10188 assert(INS_rcl + 2 == INS_rcl_N);
10189 assert(INS_rcr + 2 == INS_rcr_N);
10190 assert(INS_rol + 2 == INS_rol_N);
10191 assert(INS_ror + 2 == INS_ror_N);
10192 assert(INS_shl + 2 == INS_shl_N);
10193 assert(INS_shr + 2 == INS_shr_N);
10194 assert(INS_sar + 2 == INS_sar_N);
10196 shiftByConstantIns = (instruction)(ins + 2);
10199 return shiftByConstantIns;
10202 #endif // _TARGET_XARCH_
10204 //------------------------------------------------------------------------------------------------ //
10205 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
10208 // The number of the first argument with stack slot on the caller's frame.
10211 // On x64 Windows the caller always creates slots (homing space) in its frame for the
10212 // first 4 arguments of a callee (register passed args). So, the the variable number
10213 // (lclNum) for the first argument with a stack slot is always 0.
10214 // For System V systems or armarch, there is no such calling convention requirement, and the code
10215 // needs to find the first stack passed argument from the caller. This is done by iterating over
10216 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
10218 unsigned CodeGen::getFirstArgWithStackSlot()
10220 #if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_)
10221 unsigned baseVarNum = 0;
10222 // Iterate over all the lvParam variables in the Lcl var table until we find the first one
10223 // that's passed on the stack.
10224 LclVarDsc* varDsc = nullptr;
10225 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
10227 varDsc = &(compiler->lvaTable[i]);
10229 // We should have found a stack parameter (and broken out of this loop) before
10230 // we find any non-parameters.
10231 assert(varDsc->lvIsParam);
10233 if (varDsc->lvArgReg == REG_STK)
10239 assert(varDsc != nullptr);
10242 #elif defined(_TARGET_AMD64_)
10244 #else // _TARGET_X86
10245 // Not implemented for x86.
10246 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
10247 return BAD_VAR_NUM;
10248 #endif // _TARGET_X86_
10251 //------------------------------------------------------------------------
10252 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
10254 void CodeGen::genSinglePush()
10256 AddStackLevel(REGSIZE_BYTES);
10259 //------------------------------------------------------------------------
10260 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
10262 void CodeGen::genSinglePop()
10264 SubtractStackLevel(REGSIZE_BYTES);
10267 //------------------------------------------------------------------------
10268 // genPushRegs: Push the given registers.
10271 // regs - mask or registers to push
10272 // byrefRegs - OUT arg. Set to byref registers that were pushed.
10273 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
10276 // Mask of registers pushed.
10279 // This function does not check if the register is marked as used, etc.
10281 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
10283 *byrefRegs = RBM_NONE;
10284 *noRefRegs = RBM_NONE;
10286 if (regs == RBM_NONE)
10291 #if FEATURE_FIXED_OUT_ARGS
10293 NYI("Don't call genPushRegs with real regs!");
10296 #else // FEATURE_FIXED_OUT_ARGS
10298 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
10299 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
10301 regMaskTP pushedRegs = regs;
10303 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
10305 regMaskTP regBit = regMaskTP(1) << reg;
10307 if ((regBit & regs) == RBM_NONE)
10311 if (regBit & gcInfo.gcRegGCrefSetCur)
10315 else if (regBit & gcInfo.gcRegByrefSetCur)
10317 *byrefRegs |= regBit;
10320 else if (noRefRegs != NULL)
10322 *noRefRegs |= regBit;
10330 inst_RV(INS_push, reg, type);
10333 gcInfo.gcMarkRegSetNpt(regBit);
10340 #endif // FEATURE_FIXED_OUT_ARGS
10343 //------------------------------------------------------------------------
10344 // genPopRegs: Pop the registers that were pushed by genPushRegs().
10347 // regs - mask of registers to pop
10348 // byrefRegs - The byref registers that were pushed by genPushRegs().
10349 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
10354 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
10356 if (regs == RBM_NONE)
10361 #if FEATURE_FIXED_OUT_ARGS
10363 NYI("Don't call genPopRegs with real regs!");
10365 #else // FEATURE_FIXED_OUT_ARGS
10367 noway_assert((regs & byrefRegs) == byrefRegs);
10368 noway_assert((regs & noRefRegs) == noRefRegs);
10369 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
10371 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
10372 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
10374 // Walk the registers in the reverse order as genPushRegs()
10375 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
10377 regMaskTP regBit = regMaskTP(1) << reg;
10379 if ((regBit & regs) == RBM_NONE)
10383 if (regBit & byrefRegs)
10387 else if (regBit & noRefRegs)
10396 inst_RV(INS_pop, reg, type);
10399 if (type != TYP_INT)
10400 gcInfo.gcMarkRegPtrVal(reg, type);
10405 #endif // FEATURE_FIXED_OUT_ARGS
10408 /*****************************************************************************
10411 * This function should be called only after the sizes of the emitter blocks
10412 * have been finalized.
10415 void CodeGen::genSetScopeInfo()
10417 if (!compiler->opts.compScopeInfo)
10425 printf("*************** In genSetScopeInfo()\n");
10429 if (compiler->info.compVarScopesCount == 0)
10431 compiler->eeSetLVcount(0);
10432 compiler->eeSetLVdone();
10436 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
10437 noway_assert(psiOpenScopeList.scNext == nullptr);
10440 unsigned scopeCnt = siScopeCnt + psiScopeCnt;
10442 compiler->eeSetLVcount(scopeCnt);
10445 genTrnslLocalVarCount = scopeCnt;
10448 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
10452 // Record the scopes found for the parameters over the prolog.
10453 // The prolog needs to be treated differently as a variable may not
10454 // have the same info in the prolog block as is given by compiler->lvaTable.
10455 // eg. A register parameter is actually on the stack, before it is loaded to reg.
10457 CodeGen::psiScope* scopeP;
10459 for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
10461 noway_assert(scopeP != nullptr);
10462 noway_assert(scopeP->scStartLoc.Valid());
10463 noway_assert(scopeP->scEndLoc.Valid());
10465 UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
10466 UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
10468 unsigned varNum = scopeP->scSlotNum;
10469 noway_assert(startOffs <= endOffs);
10471 // The range may be 0 if the prolog is empty. For such a case,
10472 // report the liveness of arguments to span at least the first
10473 // instruction in the method. This will be incorrect (except on
10474 // entry to the method) if the very first instruction of the method
10475 // is part of a loop. However, this should happen
10476 // very rarely, and the incorrectness is worth being able to look
10477 // at the argument on entry to the method.
10478 if (startOffs == endOffs)
10480 noway_assert(startOffs == 0);
10484 Compiler::siVarLoc varLoc;
10486 if (scopeP->scRegister)
10488 varLoc.vlType = Compiler::VLT_REG;
10489 varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
10493 varLoc.vlType = Compiler::VLT_STK;
10494 varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
10495 varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
10498 genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
10501 // Record the scopes for the rest of the method.
10502 // Check that the LocalVarInfo scopes look OK
10503 noway_assert(siOpenScopeList.scNext == nullptr);
10505 CodeGen::siScope* scopeL;
10507 for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
10509 noway_assert(scopeL != nullptr);
10510 noway_assert(scopeL->scStartLoc.Valid());
10511 noway_assert(scopeL->scEndLoc.Valid());
10513 // Find the start and end IP
10515 UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
10516 UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
10518 noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
10520 // For stack vars, find the base register, and offset
10523 signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
10525 if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
10527 baseReg = REG_SPBASE;
10528 offset += scopeL->scStackLevel;
10532 baseReg = REG_FPBASE;
10535 // Now fill in the varLoc
10537 Compiler::siVarLoc varLoc;
10539 // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
10540 // for part of its lifetime, or in different registers for different parts of its lifetime.
10541 // This should only matter for non-debug code, where we do variable enregistration.
10542 // We should store the ranges of variable enregistration in the scope table.
10543 if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
10545 var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
10551 #ifdef _TARGET_64BIT_
10553 #endif // _TARGET_64BIT_
10555 varLoc.vlType = Compiler::VLT_REG;
10556 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10559 #ifndef _TARGET_64BIT_
10561 #if !CPU_HAS_FP_SUPPORT
10565 if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
10567 varLoc.vlType = Compiler::VLT_REG_REG;
10568 varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10569 varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
10573 varLoc.vlType = Compiler::VLT_REG_STK;
10574 varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10575 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
10576 if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
10578 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10580 varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
10583 #endif // !_TARGET_64BIT_
10585 #ifdef _TARGET_64BIT_
10589 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10590 // so no XMM registers can get debug information.
10591 varLoc.vlType = Compiler::VLT_REG_FP;
10592 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10595 #else // !_TARGET_64BIT_
10597 #if CPU_HAS_FP_SUPPORT
10600 if (isFloatRegType(type))
10602 varLoc.vlType = Compiler::VLT_FPSTK;
10603 varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10606 #endif // CPU_HAS_FP_SUPPORT
10608 #endif // !_TARGET_64BIT_
10610 #ifdef FEATURE_SIMD
10615 varLoc.vlType = Compiler::VLT_REG_FP;
10617 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10618 // so no XMM registers can get debug information.
10620 // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
10621 // in eeDispVar() --> getRegName() that regNumber is valid.
10622 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10624 #endif // FEATURE_SIMD
10627 noway_assert(!"Invalid type");
10632 assert(offset != BAD_STK_OFFS);
10633 LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
10634 switch (genActualType(varDsc->TypeGet()))
10641 case TYP_BLK: // Needed because of the TYP_BLK stress mode
10642 #ifdef FEATURE_SIMD
10648 #ifdef _TARGET_64BIT_
10651 #endif // _TARGET_64BIT_
10652 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
10653 // In the AMD64 ABI we are supposed to pass a struct by reference when its
10654 // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
10655 // the IR to comply with the ABI and therefore changes the type of the lclVar
10656 // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
10657 // this is still a struct by setting the lvIsTemp flag.
10658 // The same is true for ARM64 and structs > 16 bytes.
10659 // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
10660 // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
10661 // to accomodate for this situation.
10662 if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
10664 assert(varDsc->lvIsParam);
10665 varLoc.vlType = Compiler::VLT_STK_BYREF;
10668 #endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
10670 varLoc.vlType = Compiler::VLT_STK;
10672 varLoc.vlStk.vlsBaseReg = baseReg;
10673 varLoc.vlStk.vlsOffset = offset;
10674 if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
10676 varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10680 #ifndef _TARGET_64BIT_
10683 varLoc.vlType = Compiler::VLT_STK2;
10684 varLoc.vlStk2.vls2BaseReg = baseReg;
10685 varLoc.vlStk2.vls2Offset = offset;
10686 if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
10688 varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10691 #endif // !_TARGET_64BIT_
10694 noway_assert(!"Invalid type");
10698 genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
10699 scopeL->scAvailable, varLoc);
10702 compiler->eeSetLVdone();
10705 //------------------------------------------------------------------------
10706 // genSetScopeInfo: Record scope information for debug info
10710 // startOffs - the starting offset for this scope
10711 // length - the length of this scope
10712 // varNum - the lclVar for this scope info
10718 // Called for every scope info piece to record by the main genSetScopeInfo()
10720 void CodeGen::genSetScopeInfo(unsigned which,
10721 UNATIVE_OFFSET startOffs,
10722 UNATIVE_OFFSET length,
10726 Compiler::siVarLoc& varLoc)
10728 // We need to do some mapping while reporting back these variables.
10730 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
10731 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
10733 #ifdef _TARGET_X86_
10734 // Non-x86 platforms are allowed to access all arguments directly
10735 // so we don't need this code.
10737 // Is this a varargs function?
10739 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
10740 varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
10742 noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
10744 // All stack arguments (except the varargs handle) have to be
10745 // accessed via the varargs cookie. Discard generated info,
10746 // and just find its position relative to the varargs handle
10748 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
10749 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
10751 noway_assert(!compiler->opts.compDbgCode);
10755 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
10756 // arguments of vararg functions to avoid reporting them to GC.
10757 noway_assert(!compiler->lvaTable[varNum].lvRegister);
10758 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
10759 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
10761 noway_assert(cookieOffset < varOffset);
10762 unsigned offset = varOffset - cookieOffset;
10763 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
10764 noway_assert(offset < stkArgSize);
10765 offset = stkArgSize - offset;
10767 varLoc.vlType = Compiler::VLT_FIXED_VA;
10768 varLoc.vlFixedVarArg.vlfvOffset = offset;
10771 #endif // _TARGET_X86_
10773 VarName name = nullptr;
10777 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
10779 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
10781 name = compiler->info.compVarScopes[scopeNum].vsdName;
10785 // Hang on to this compiler->info.
10787 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
10789 tlvi.tlviVarNum = ilVarNum;
10790 tlvi.tlviLVnum = LVnum;
10791 tlvi.tlviName = name;
10792 tlvi.tlviStartPC = startOffs;
10793 tlvi.tlviLength = length;
10794 tlvi.tlviAvailable = avail;
10795 tlvi.tlviVarLoc = varLoc;
10799 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
10802 /*****************************************************************************/
10805 /*****************************************************************************
10808 * Can be called only after lviSetLocalVarInfo() has been called
10812 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
10814 if (!compiler->opts.compScopeInfo)
10817 if (compiler->info.compVarScopesCount == 0)
10820 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10822 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10824 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
10825 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10826 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10828 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10835 /*****************************************************************************
10838 * Can be called only after lviSetLocalVarInfo() has been called
10842 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
10844 if (!compiler->opts.compScopeInfo)
10847 if (compiler->info.compVarScopesCount == 0)
10850 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10852 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10854 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
10855 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10856 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10858 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10865 /*****************************************************************************/
10866 #endif // defined(DEBUG)
10867 #endif // LATE_DISASM
10871 /*****************************************************************************
10872 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
10875 void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
10877 if (mappingNum != unsigned(-1))
10879 printf("%d: ", mappingNum);
10882 IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
10884 if (offsx == BAD_IL_OFFSET)
10890 Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
10892 if (jitIsStackEmpty(offsx))
10894 printf(" STACK_EMPTY");
10897 if (jitIsCallInstruction(offsx))
10899 printf(" CALL_INSTRUCTION");
10904 ipMapping->ipmdNativeLoc.Print();
10905 // We can only call this after code generation. Is there any way to tell when it's legal to call?
10906 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
10908 if (ipMapping->ipmdIsLabel)
10916 void CodeGen::genIPmappingListDisp()
10918 unsigned mappingNum = 0;
10919 Compiler::IPmappingDsc* ipMapping;
10921 for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
10923 genIPmappingDisp(mappingNum, ipMapping);
10930 /*****************************************************************************
10932 * Append an IPmappingDsc struct to the list that we're maintaining
10933 * for the debugger.
10934 * Record the instr offset as being at the current code gen position.
10937 void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
10939 if (!compiler->opts.compDbgInfo)
10944 assert(offsx != BAD_IL_OFFSET);
10946 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10948 case ICorDebugInfo::PROLOG:
10949 case ICorDebugInfo::EPILOG:
10954 if (offsx != ICorDebugInfo::NO_MAPPING)
10956 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
10959 // Ignore this one if it's the same IL offset as the last one we saw.
10960 // Note that we'll let through two identical IL offsets if the flag bits
10961 // differ, or two identical "special" mappings (e.g., PROLOG).
10962 if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
10964 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
10970 /* Create a mapping entry and append it to the list */
10972 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
10973 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
10974 addMapping->ipmdILoffsx = offsx;
10975 addMapping->ipmdIsLabel = isLabel;
10976 addMapping->ipmdNext = nullptr;
10978 if (compiler->genIPmappingList != nullptr)
10980 assert(compiler->genIPmappingLast != nullptr);
10981 assert(compiler->genIPmappingLast->ipmdNext == nullptr);
10982 compiler->genIPmappingLast->ipmdNext = addMapping;
10986 assert(compiler->genIPmappingLast == nullptr);
10987 compiler->genIPmappingList = addMapping;
10990 compiler->genIPmappingLast = addMapping;
10995 printf("Added IP mapping: ");
10996 genIPmappingDisp(unsigned(-1), addMapping);
11001 /*****************************************************************************
11003 * Prepend an IPmappingDsc struct to the list that we're maintaining
11004 * for the debugger.
11005 * Record the instr offset as being at the current code gen position.
11007 void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
11009 if (!compiler->opts.compDbgInfo)
11014 assert(offsx != BAD_IL_OFFSET);
11015 assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
11017 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11019 case ICorDebugInfo::NO_MAPPING:
11020 case ICorDebugInfo::PROLOG:
11021 case ICorDebugInfo::EPILOG:
11025 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
11029 /* Create a mapping entry and prepend it to the list */
11031 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
11032 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
11033 addMapping->ipmdILoffsx = offsx;
11034 addMapping->ipmdIsLabel = true;
11035 addMapping->ipmdNext = nullptr;
11037 addMapping->ipmdNext = compiler->genIPmappingList;
11038 compiler->genIPmappingList = addMapping;
11040 if (compiler->genIPmappingLast == nullptr)
11042 compiler->genIPmappingLast = addMapping;
11048 printf("Added IP mapping to front: ");
11049 genIPmappingDisp(unsigned(-1), addMapping);
11054 /*****************************************************************************/
11056 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
11057 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
11058 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
11060 C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
11061 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
11062 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
11063 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
11065 //------------------------------------------------------------------------
11066 // jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
11067 // Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
11068 // is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
11071 // offsx - the IL_OFFSETX value with the IL offset to extract.
11076 IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
11078 assert(offsx != BAD_IL_OFFSET);
11080 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11082 case ICorDebugInfo::NO_MAPPING:
11083 case ICorDebugInfo::PROLOG:
11084 case ICorDebugInfo::EPILOG:
11088 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11092 //------------------------------------------------------------------------
11093 // jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
11094 // distinguished values. Asserts if passed BAD_IL_OFFSET.
11097 // offsx - the IL_OFFSETX value with the IL offset to extract.
11102 IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
11104 assert(offsx != BAD_IL_OFFSET);
11106 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11108 case ICorDebugInfo::NO_MAPPING:
11109 case ICorDebugInfo::PROLOG:
11110 case ICorDebugInfo::EPILOG:
11111 return IL_OFFSET(offsx);
11114 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11118 //------------------------------------------------------------------------
11119 // jitIsStackEmpty: Does the IL offset have the stack empty bit set?
11120 // Asserts if passed BAD_IL_OFFSET.
11123 // offsx - the IL_OFFSETX value to check
11126 // 'true' if the stack empty bit is set; 'false' otherwise.
11128 bool jitIsStackEmpty(IL_OFFSETX offsx)
11130 assert(offsx != BAD_IL_OFFSET);
11132 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11134 case ICorDebugInfo::NO_MAPPING:
11135 case ICorDebugInfo::PROLOG:
11136 case ICorDebugInfo::EPILOG:
11140 return (offsx & IL_OFFSETX_STKBIT) == 0;
11144 //------------------------------------------------------------------------
11145 // jitIsCallInstruction: Does the IL offset have the call instruction bit set?
11146 // Asserts if passed BAD_IL_OFFSET.
11149 // offsx - the IL_OFFSETX value to check
11152 // 'true' if the call instruction bit is set; 'false' otherwise.
11154 bool jitIsCallInstruction(IL_OFFSETX offsx)
11156 assert(offsx != BAD_IL_OFFSET);
11158 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11160 case ICorDebugInfo::NO_MAPPING:
11161 case ICorDebugInfo::PROLOG:
11162 case ICorDebugInfo::EPILOG:
11166 return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
11170 /*****************************************************************************/
11172 void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
11174 if (!compiler->opts.compDbgCode)
11179 if (offsx == BAD_IL_OFFSET)
11184 /* If other IL were offsets reported, skip */
11186 if (compiler->genIPmappingLast == nullptr)
11191 if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
11196 /* offsx was the last reported offset. Make sure that we generated native code */
11198 if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
11204 /*****************************************************************************
11206 * Shut down the IP-mapping logic, report the info to the EE.
11209 void CodeGen::genIPmappingGen()
11211 if (!compiler->opts.compDbgInfo)
11219 printf("*************** In genIPmappingGen()\n");
11223 if (compiler->genIPmappingList == nullptr)
11225 compiler->eeSetLIcount(0);
11226 compiler->eeSetLIdone();
11230 Compiler::IPmappingDsc* tmpMapping;
11231 Compiler::IPmappingDsc* prevMapping;
11232 unsigned mappingCnt;
11233 UNATIVE_OFFSET lastNativeOfs;
11235 /* First count the number of distinct mapping records */
11238 lastNativeOfs = UNATIVE_OFFSET(~0);
11240 for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
11241 tmpMapping = tmpMapping->ipmdNext)
11243 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11245 // Managed RetVal - since new sequence points are emitted to identify IL calls,
11246 // make sure that those are not filtered and do not interfere with filtering of
11247 // other sequence points.
11248 if (jitIsCallInstruction(srcIP))
11254 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11256 if (nextNativeOfs != lastNativeOfs)
11259 lastNativeOfs = nextNativeOfs;
11260 prevMapping = tmpMapping;
11264 /* If there are mappings with the same native offset, then:
11265 o If one of them is NO_MAPPING, ignore it
11266 o If one of them is a label, report that and ignore the other one
11267 o Else report the higher IL offset
11270 PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
11271 if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11273 // If the previous entry was NO_MAPPING, ignore it
11274 prevMapping->ipmdNativeLoc.Init();
11275 prevMapping = tmpMapping;
11277 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11279 // If the current entry is NO_MAPPING, ignore it
11280 // Leave prevMapping unchanged as tmpMapping is no longer valid
11281 tmpMapping->ipmdNativeLoc.Init();
11283 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11285 // counting for special cases: see below
11287 prevMapping = tmpMapping;
11291 noway_assert(prevMapping != nullptr);
11292 noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
11293 lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
11295 /* The previous block had the same native offset. We have to
11296 discard one of the mappings. Simply reinitialize ipmdNativeLoc
11297 and prevMapping will be ignored later. */
11299 if (prevMapping->ipmdIsLabel)
11301 // Leave prevMapping unchanged as tmpMapping is no longer valid
11302 tmpMapping->ipmdNativeLoc.Init();
11306 prevMapping->ipmdNativeLoc.Init();
11307 prevMapping = tmpMapping;
11312 /* Tell them how many mapping records we've got */
11314 compiler->eeSetLIcount(mappingCnt);
11316 /* Now tell them about the mappings */
11319 lastNativeOfs = UNATIVE_OFFSET(~0);
11321 for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
11323 // Do we have to skip this record ?
11324 if (!tmpMapping->ipmdNativeLoc.Valid())
11329 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11330 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11332 if (jitIsCallInstruction(srcIP))
11334 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
11336 else if (nextNativeOfs != lastNativeOfs)
11338 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11339 lastNativeOfs = nextNativeOfs;
11341 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11343 // For the special case of an IL instruction with no body
11344 // followed by the epilog (say ret void immediately preceding
11345 // the method end), we put two entries in, so that we'll stop
11346 // at the (empty) ret statement if the user tries to put a
11347 // breakpoint there, and then have the option of seeing the
11348 // epilog or not based on SetUnmappedStopMask for the stepper.
11349 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11355 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
11356 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
11357 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
11358 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
11360 if (compiler->opts.compDbgCode)
11362 //Assert that the first instruction of every basic block with more than one incoming edge has a
11363 //different sequence point from each incoming block.
11365 //It turns out that the only thing we really have to assert is that the first statement in each basic
11366 //block has an IL offset and appears in eeBoundaries.
11367 for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
11369 if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
11371 noway_assert(block->bbTreeList->gtOper == GT_STMT);
11372 bool found = false;
11373 if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
11375 IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
11376 for (unsigned i = 0; i < eeBoundariesCount; ++i)
11378 if (eeBoundaries[i].ilOffset == ilOffs)
11385 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
11391 compiler->eeSetLIdone();
11394 /*============================================================================
11396 * These are empty stubs to help the late dis-assembler to compile
11397 * if the late disassembler is being built into a non-DEBUG build.
11399 *============================================================================
11402 #if defined(LATE_DISASM)
11403 #if !defined(DEBUG)
11406 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
11412 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
11417 /*****************************************************************************/
11418 #endif // !defined(DEBUG)
11419 #endif // defined(LATE_DISASM)
11420 /*****************************************************************************/
11422 //------------------------------------------------------------------------
11423 // indirForm: Make a temporary indir we can feed to pattern matching routines
11424 // in cases where we don't want to instantiate all the indirs that happen.
11426 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
11428 GenTreeIndir i(GT_IND, type, base, nullptr);
11429 i.gtRegNum = REG_NA;
11434 //------------------------------------------------------------------------
11435 // intForm: Make a temporary int we can feed to pattern matching routines
11436 // in cases where we don't want to instantiate.
11438 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
11440 GenTreeIntCon i(type, value);
11441 i.gtRegNum = REG_NA;
11445 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11446 //------------------------------------------------------------------------
11447 // genLongReturn: Generates code for long return statement for x86 and arm.
11449 // Note: treeNode's and op1's registers are already consumed.
11452 // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
11457 void CodeGen::genLongReturn(GenTree* treeNode)
11459 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11460 assert(treeNode->TypeGet() == TYP_LONG);
11461 GenTree* op1 = treeNode->gtGetOp1();
11462 var_types targetType = treeNode->TypeGet();
11464 assert(op1 != nullptr);
11465 assert(op1->OperGet() == GT_LONG);
11466 GenTree* loRetVal = op1->gtGetOp1();
11467 GenTree* hiRetVal = op1->gtGetOp2();
11468 assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
11470 genConsumeReg(loRetVal);
11471 genConsumeReg(hiRetVal);
11472 if (loRetVal->gtRegNum != REG_LNGRET_LO)
11474 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
11476 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
11478 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
11481 #endif // _TARGET_X86_ || _TARGET_ARM_
11483 //------------------------------------------------------------------------
11484 // genReturn: Generates code for return statement.
11485 // In case of struct return, delegates to the genStructReturn method.
11488 // treeNode - The GT_RETURN or GT_RETFILT tree node.
11493 void CodeGen::genReturn(GenTree* treeNode)
11495 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11496 GenTree* op1 = treeNode->gtGetOp1();
11497 var_types targetType = treeNode->TypeGet();
11499 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
11500 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
11501 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
11502 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
11505 if (targetType == TYP_VOID)
11507 assert(op1 == nullptr);
11511 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11512 if (targetType == TYP_LONG)
11514 genLongReturn(treeNode);
11517 #endif // _TARGET_X86_ || _TARGET_ARM_
11519 if (isStructReturn(treeNode))
11521 genStructReturn(treeNode);
11523 else if (targetType != TYP_VOID)
11525 assert(op1 != nullptr);
11526 noway_assert(op1->gtRegNum != REG_NA);
11528 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
11529 // consumed a reg for the operand. This is because the variable
11530 // is dead after return. But we are issuing more instructions
11531 // like "profiler leave callback" after this consumption. So
11532 // if you are issuing more instructions after this point,
11533 // remember to keep the variable live up until the new method
11534 // exit point where it is actually dead.
11535 genConsumeReg(op1);
11537 #if defined(_TARGET_ARM64_)
11538 genSimpleReturn(treeNode);
11539 #else // !_TARGET_ARM64_
11540 #if defined(_TARGET_X86_)
11541 if (varTypeIsFloating(treeNode))
11543 genFloatReturn(treeNode);
11546 #elif defined(_TARGET_ARM_)
11547 if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
11549 if (targetType == TYP_FLOAT)
11551 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
11555 assert(targetType == TYP_DOUBLE);
11556 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
11561 #endif // _TARGET_ARM_
11563 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
11564 if (op1->gtRegNum != retReg)
11566 inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
11569 #endif // !_TARGET_ARM64_
11573 #ifdef PROFILING_SUPPORTED
11575 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
11576 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
11577 // in the handling of the GT_RETURN statement.
11578 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
11579 // for the return registers containing GC refs.
11581 // There will be a single return block while generating profiler ELT callbacks.
11583 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
11584 // In flowgraph and other places assert that the last node of a block marked as
11585 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
11586 // maintain such an invariant irrespective of whether profiler hook needed or not.
11587 // Also, there is not much to be gained by materializing it as an explicit node.
11588 if (compiler->compCurBB == compiler->genReturnBB)
11591 // Since we are invalidating the assumption that we would slip into the epilog
11592 // right after the "return", we need to preserve the return reg's GC state
11593 // across the call until actual method return.
11594 ReturnTypeDesc retTypeDesc;
11595 unsigned regCount = 0;
11596 if (compiler->compMethodReturnsMultiRegRetType())
11598 if (varTypeIsLong(compiler->info.compRetNativeType))
11600 retTypeDesc.InitializeLongReturnType(compiler);
11602 else // we must have a struct return type
11604 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
11606 regCount = retTypeDesc.GetReturnRegCount();
11609 if (varTypeIsGC(compiler->info.compRetType))
11611 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
11613 else if (compiler->compMethodReturnsMultiRegRetType())
11615 for (unsigned i = 0; i < regCount; ++i)
11617 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11619 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
11624 genProfilingLeaveCallback();
11626 if (varTypeIsGC(compiler->info.compRetType))
11628 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
11630 else if (compiler->compMethodReturnsMultiRegRetType())
11632 for (unsigned i = 0; i < regCount; ++i)
11634 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11636 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
11641 #endif // PROFILING_SUPPORTED
11643 #if defined(DEBUG) && defined(_TARGET_XARCH_)
11644 bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
11646 #if FEATURE_EH_FUNCLETS
11647 // Don't do stack pointer check at the return from a funclet; only for the main function.
11648 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
11650 doStackPointerCheck = false;
11652 #else // !FEATURE_EH_FUNCLETS
11653 // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
11654 // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
11655 if ((compiler->compCurBB->bbJumpKind == BBJ_EHFINALLYRET) || (compiler->compCurBB->bbJumpKind == BBJ_EHFILTERRET))
11657 doStackPointerCheck = false;
11659 #endif // !FEATURE_EH_FUNCLETS
11661 genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
11662 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11665 #if defined(DEBUG) && defined(_TARGET_XARCH_)
11667 //------------------------------------------------------------------------
11668 // genStackPointerCheck: Generate code to check the stack pointer against a saved value.
11669 // This is a debug check.
11672 // doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
11673 // lvaStackPointerVar - The local variable number that holds the value of the stack pointer
11674 // we are comparing against.
11679 void CodeGen::genStackPointerCheck(bool doStackPointerCheck, unsigned lvaStackPointerVar)
11681 if (doStackPointerCheck)
11683 noway_assert(lvaStackPointerVar != 0xCCCCCCCC && compiler->lvaTable[lvaStackPointerVar].lvDoNotEnregister &&
11684 compiler->lvaTable[lvaStackPointerVar].lvOnFrame);
11685 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0);
11687 BasicBlock* sp_check = genCreateTempLabel();
11688 getEmitter()->emitIns_J(INS_je, sp_check);
11689 instGen(INS_BREAKPOINT);
11690 genDefineTempLabel(sp_check);
11694 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)