1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Code Generator Common: XX
9 XX Methods common to all architectures and register allocation strategies XX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
16 // identical, and which should probably be moved here.
27 #ifndef JIT32_GCENCODER
28 #include "gcinfoencoder.h"
31 /*****************************************************************************/
33 const BYTE genTypeSizes[] = {
34 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
39 const BYTE genTypeAlignments[] = {
40 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
45 const BYTE genTypeStSzs[] = {
46 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
51 const BYTE genActualTypes[] = {
52 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
57 void CodeGenInterface::setFramePointerRequiredEH(bool value)
59 m_cgFramePointerRequired = value;
61 #ifndef JIT32_GCENCODER
64 // EnumGcRefs will only enumerate slots in aborted frames
65 // if they are fully-interruptible. So if we have a catch
66 // or finally that will keep frame-vars alive, we need to
67 // force fully-interruptible.
68 CLANG_FORMAT_COMMENT_ANCHOR;
73 printf("Method has EH, marking method as fully interruptible\n");
77 m_cgInterruptible = true;
79 #endif // JIT32_GCENCODER
82 /*****************************************************************************/
83 CodeGenInterface* getCodeGenerator(Compiler* comp)
85 return new (comp, CMK_Codegen) CodeGen(comp);
88 // CodeGen constructor
89 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
94 /*****************************************************************************/
96 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
98 #if defined(_TARGET_XARCH_)
99 negBitmaskFlt = nullptr;
100 negBitmaskDbl = nullptr;
101 absBitmaskFlt = nullptr;
102 absBitmaskDbl = nullptr;
103 u8ToDblBitmask = nullptr;
104 #endif // defined(_TARGET_XARCH_)
106 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107 m_stkArgVarNum = BAD_VAR_NUM;
110 #if defined(UNIX_X86_ABI)
111 curNestedAlignment = 0;
112 maxNestedAlignment = 0;
115 gcInfo.regSet = ®Set;
116 m_cgEmitter = new (compiler->getAllocator()) emitter();
117 m_cgEmitter->codeGen = this;
118 m_cgEmitter->gcInfo = &gcInfo;
121 setVerbose(compiler->verbose);
129 getDisAssembler().disInit(compiler);
133 genTempLiveChg = true;
134 genTrnslLocalVarCount = 0;
136 // Shouldn't be used before it is set in genFnProlog()
137 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
139 #if defined(_TARGET_XARCH_)
140 // Shouldn't be used before it is set in genFnProlog()
141 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
142 #endif // defined(_TARGET_XARCH_)
145 #ifdef _TARGET_AMD64_
146 // This will be set before final frame layout.
147 compiler->compVSQuirkStackPaddingNeeded = 0;
149 // Set to true if we perform the Quirk that fixes the PPP issue
150 compiler->compQuirkForPPPflag = false;
151 #endif // _TARGET_AMD64_
153 // Initialize the IP-mapping logic.
154 compiler->genIPmappingList = nullptr;
155 compiler->genIPmappingLast = nullptr;
156 compiler->genCallSite2ILOffsetMap = nullptr;
158 /* Assume that we not fully interruptible */
160 genInterruptible = false;
161 #ifdef _TARGET_ARMARCH_
162 hasTailCalls = false;
163 #endif // _TARGET_ARMARCH_
165 genInterruptibleUsed = false;
166 genCurDispOffset = (unsigned)-1;
170 void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
172 tree->gtRegNum = reg;
175 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
177 //---------------------------------------------------------------------
178 // genTotalFrameSize - return the "total" size of the stack frame, including local size
179 // and callee-saved register size. There are a few things "missing" depending on the
180 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
182 // For ARM, this doesn't include the prespilled registers.
184 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
185 // It also doesn't include the pushed return address.
190 int CodeGenInterface::genTotalFrameSize()
192 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
194 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
196 assert(totalFrameSize >= 0);
197 return totalFrameSize;
200 //---------------------------------------------------------------------
201 // genSPtoFPdelta - return the offset from SP to the frame pointer.
202 // This number is going to be positive, since SP must be at the lowest
205 // There must be a frame pointer to call this function!
207 int CodeGenInterface::genSPtoFPdelta()
209 assert(isFramePointerUsed());
213 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
219 //---------------------------------------------------------------------
220 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
221 // This number is going to be negative, since the Caller-SP is at a higher
222 // address than the frame pointer.
224 // There must be a frame pointer to call this function!
226 int CodeGenInterface::genCallerSPtoFPdelta()
228 assert(isFramePointerUsed());
229 int callerSPtoFPdelta = 0;
231 #if defined(_TARGET_ARM_)
232 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
233 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
234 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
235 #elif defined(_TARGET_X86_)
236 // Thanks to ebp chaining, the difference between ebp-based addresses
237 // and caller-SP-relative addresses is just the 2 pointers:
240 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
242 #error "Unknown _TARGET_"
245 assert(callerSPtoFPdelta <= 0);
246 return callerSPtoFPdelta;
249 //---------------------------------------------------------------------
250 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
252 // This number will be negative.
254 int CodeGenInterface::genCallerSPtoInitialSPdelta()
256 int callerSPtoSPdelta = 0;
258 #if defined(_TARGET_ARM_)
259 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
260 callerSPtoSPdelta -= genTotalFrameSize();
261 #elif defined(_TARGET_X86_)
262 callerSPtoSPdelta -= genTotalFrameSize();
263 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
265 // compCalleeRegsPushed does not account for the frame pointer
266 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
267 if (isFramePointerUsed())
269 callerSPtoSPdelta -= REGSIZE_BYTES;
272 #error "Unknown _TARGET_"
275 assert(callerSPtoSPdelta <= 0);
276 return callerSPtoSPdelta;
279 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
281 /*****************************************************************************
282 * Should we round simple operations (assignments, arithmetic operations, etc.)
287 bool CodeGen::genShouldRoundFP()
289 RoundLevel roundLevel = getRoundFloatLevel();
294 case ROUND_CMP_CONST:
299 assert(roundLevel == ROUND_ALWAYS);
304 /*****************************************************************************
306 * Initialize some global variables.
309 void CodeGen::genPrepForCompiler()
311 treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
313 /* Figure out which non-register variables hold pointers */
315 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
317 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
318 // in a register (i.e. they live on the stack for all or part of their lifetime).
319 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
323 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
325 if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
327 if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
329 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
333 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
334 genLastLiveMask = RBM_NONE;
336 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
340 /*****************************************************************************
341 * To report exception handling information to the VM, we need the size of the exception
342 * handling regions. To compute that, we need to emit labels for the beginning block of
343 * an EH region, and the block that immediately follows a region. Go through the EH
344 * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
346 * The beginning blocks of the EH regions already should have this flag set.
348 * No blocks should be added or removed after this.
350 * This code is closely couple with genReportEH() in the sense that any block
351 * that this procedure has determined it needs to have a label has to be selected
352 * using the same logic both here and in genReportEH(), so basically any time there is
353 * a change in the way we handle EH reporting, we have to keep the logic of these two
357 void CodeGen::genPrepForEHCodegen()
359 assert(!compiler->fgSafeBasicBlockCreation);
364 bool anyFinallys = false;
366 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
367 HBtab < HBtabEnd; HBtab++)
369 assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
370 assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
372 if (HBtab->ebdTryLast->bbNext != nullptr)
374 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
377 if (HBtab->ebdHndLast->bbNext != nullptr)
379 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
382 if (HBtab->HasFilter())
384 assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
385 // The block after the last block of the filter is
386 // the handler begin block, which we already asserted
387 // has BBF_HAS_LABEL set.
390 #if FEATURE_EH_CALLFINALLY_THUNKS
391 if (HBtab->HasFinallyHandler())
395 #endif // FEATURE_EH_CALLFINALLY_THUNKS
398 #if FEATURE_EH_CALLFINALLY_THUNKS
401 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
403 if (block->bbJumpKind == BBJ_CALLFINALLY)
405 BasicBlock* bbToLabel = block->bbNext;
406 if (block->isBBCallAlwaysPair())
408 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
410 if (bbToLabel != nullptr)
412 bbToLabel->bbFlags |= BBF_HAS_LABEL;
414 } // block is BBJ_CALLFINALLY
416 } // if (anyFinallys)
417 #endif // FEATURE_EH_CALLFINALLY_THUNKS
420 void CodeGenInterface::genUpdateLife(GenTree* tree)
422 treeLifeUpdater->UpdateLife(tree);
425 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
427 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
430 // Return the register mask for the given register variable
432 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
434 regMaskTP regMask = RBM_NONE;
436 assert(varDsc->lvIsInReg());
438 if (varTypeIsFloating(varDsc->TypeGet()))
440 regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
444 regMask = genRegMask(varDsc->lvRegNum);
449 // Return the register mask for the given lclVar or regVar tree node
451 regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
453 assert(tree->gtOper == GT_LCL_VAR);
455 regMaskTP regMask = RBM_NONE;
456 const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
457 if (varDsc->lvPromoted)
459 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
461 noway_assert(compiler->lvaTable[i].lvIsStructField);
462 if (compiler->lvaTable[i].lvIsInReg())
464 regMask |= genGetRegMask(&compiler->lvaTable[i]);
468 else if (varDsc->lvIsInReg())
470 regMask = genGetRegMask(varDsc);
475 // The given lclVar is either going live (being born) or dying.
476 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
477 // Update regSet.rsMaskVars accordingly.
479 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
481 regMaskTP regMask = genGetRegMask(varDsc);
484 if (compiler->verbose)
486 printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
487 varDsc->PrintVarReg();
488 printf(" is becoming %s ", (isDying) ? "dead" : "live");
489 Compiler::printTreeID(tree);
496 // We'd like to be able to assert the following, however if we are walking
497 // through a qmark/colon tree, we may encounter multiple last-use nodes.
498 // assert((regSet.rsMaskVars & regMask) == regMask);
499 regSet.RemoveMaskVars(regMask);
503 assert((regSet.rsMaskVars & regMask) == 0);
504 regSet.AddMaskVars(regMask);
508 //----------------------------------------------------------------------
509 // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
510 // Not all JIT Helper calls follow the standard ABI on the target architecture.
512 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
513 // enumerated) and not 100% accurate (some killsets are bigger than
514 // what they really are).
515 // There's some work to be done in several places in the JIT to
516 // accurately track the registers that are getting killed by
518 // a) LSRA needs several changes to accomodate more precise killsets
519 // for every helper call it sees (both explicitly [easy] and
520 // implicitly [hard])
521 // b) Currently for AMD64, when we generate code for a helper call
522 // we're independently over-pessimizing the killsets of the call
523 // (independently from LSRA) and this needs changes
524 // both in CodeGenAmd64.cpp and emitx86.cpp.
526 // The best solution for this problem would be to try to centralize
527 // the killset information in a single place but then make the
528 // corresponding changes so every code generation phase is in sync
531 // The interim solution is to only add known helper calls that don't
532 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
535 // helper - The helper being inquired about
538 // Mask of register kills -- registers whose values are no longer guaranteed to be the same.
540 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
544 case CORINFO_HELP_ASSIGN_BYREF:
545 #if defined(_TARGET_AMD64_)
546 return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
547 #elif defined(_TARGET_ARMARCH_)
548 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
549 #elif defined(_TARGET_X86_)
550 return RBM_ESI | RBM_EDI | RBM_ECX;
552 NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
553 return RBM_CALLEE_TRASH;
556 #if defined(_TARGET_ARMARCH_)
557 case CORINFO_HELP_ASSIGN_REF:
558 case CORINFO_HELP_CHECKED_ASSIGN_REF:
559 return RBM_CALLEE_TRASH_WRITEBARRIER;
562 case CORINFO_HELP_PROF_FCN_ENTER:
563 #ifdef RBM_PROFILER_ENTER_TRASH
564 return RBM_PROFILER_ENTER_TRASH;
566 NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
569 case CORINFO_HELP_PROF_FCN_LEAVE:
570 #ifdef RBM_PROFILER_LEAVE_TRASH
571 return RBM_PROFILER_LEAVE_TRASH;
573 NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
576 case CORINFO_HELP_PROF_FCN_TAILCALL:
577 #ifdef RBM_PROFILER_TAILCALL_TRASH
578 return RBM_PROFILER_TAILCALL_TRASH;
580 NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
584 case CORINFO_HELP_ASSIGN_REF_EAX:
585 case CORINFO_HELP_ASSIGN_REF_ECX:
586 case CORINFO_HELP_ASSIGN_REF_EBX:
587 case CORINFO_HELP_ASSIGN_REF_EBP:
588 case CORINFO_HELP_ASSIGN_REF_ESI:
589 case CORINFO_HELP_ASSIGN_REF_EDI:
591 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
592 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
593 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
594 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
595 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
596 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
599 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
600 case CORINFO_HELP_ASSIGN_REF:
601 case CORINFO_HELP_CHECKED_ASSIGN_REF:
602 return RBM_EAX | RBM_EDX;
603 #endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
606 case CORINFO_HELP_STOP_FOR_GC:
607 return RBM_STOP_FOR_GC_TRASH;
609 case CORINFO_HELP_INIT_PINVOKE_FRAME:
610 return RBM_INIT_PINVOKE_FRAME_TRASH;
613 return RBM_CALLEE_TRASH;
617 //----------------------------------------------------------------------
618 // compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer
619 // contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining
620 // what registers to remove from the current live GC/byref sets (and thus what to report as dead in the
621 // GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by
622 // compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src
623 // address registers as killed for liveness purposes, since their values change. However, they still are
624 // valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here.
626 // Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers.
629 // helper - The helper being inquired about
632 // Mask of GC register kills
634 regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
636 assert(emitter::emitNoGChelper(helper));
640 case CORINFO_HELP_ASSIGN_BYREF:
641 #if defined(_TARGET_X86_)
642 // This helper only trashes ECX.
644 #elif defined(_TARGET_AMD64_)
645 // This uses and defs RDI and RSI.
646 return RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
647 #elif defined(_TARGET_ARMARCH_)
648 return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
650 assert(!"unknown arch");
653 #if defined(_TARGET_XARCH_)
654 case CORINFO_HELP_PROF_FCN_ENTER:
655 return RBM_PROFILER_ENTER_TRASH;
657 case CORINFO_HELP_PROF_FCN_LEAVE:
658 return RBM_PROFILER_LEAVE_TRASH;
660 case CORINFO_HELP_PROF_FCN_TAILCALL:
661 return RBM_PROFILER_TAILCALL_TRASH;
662 #endif // defined(_TARGET_XARCH_)
664 #if defined(_TARGET_ARMARCH_)
665 case CORINFO_HELP_ASSIGN_REF:
666 case CORINFO_HELP_CHECKED_ASSIGN_REF:
667 return RBM_CALLEE_GCTRASH_WRITEBARRIER;
668 case CORINFO_HELP_PROF_FCN_LEAVE:
669 // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH on ARMARCH.
670 return RBM_CALLEE_TRASH_NOGC & ~RBM_PROFILER_RET_SCRATCH;
673 #if defined(_TARGET_X86_)
674 case CORINFO_HELP_INIT_PINVOKE_FRAME:
675 return RBM_INIT_PINVOKE_FRAME_TRASH;
676 #endif // defined(_TARGET_X86_)
679 return RBM_CALLEE_TRASH_NOGC;
683 template <bool ForCodeGen>
684 void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
691 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
692 dumpConvertedVarSet(this, compCurLife);
693 printf(" -> %s ", VarSetOps::ToString(this, newLife));
694 dumpConvertedVarSet(this, newLife);
699 /* We should only be called when the live set has actually changed */
701 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
705 VarSetOps::Assign(this, compCurLife, newLife);
709 /* Figure out which variables are becoming live/dead at this point */
711 // deadSet = compCurLife - newLife
712 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
714 // bornSet = newLife - compCurLife
715 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
717 /* Can't simultaneously become live and dead at the same time */
719 // (deadSet UNION bornSet) != EMPTY
720 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
721 // (deadSet INTERSECTION bornSet) == EMPTY
722 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
724 VarSetOps::Assign(this, compCurLife, newLife);
726 // Handle the dying vars first, then the newly live vars.
727 // This is because, in the RyuJIT backend case, they may occupy registers that
728 // will be occupied by another var that is newly live.
729 VarSetOps::Iter deadIter(this, deadSet);
730 unsigned deadVarIndex = 0;
731 while (deadIter.NextElem(&deadVarIndex))
733 unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
734 varDsc = lvaTable + varNum;
735 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
736 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
738 if (varDsc->lvIsInReg())
740 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
742 regMaskTP regMask = varDsc->lvRegMask();
745 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
749 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
751 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
753 // This isn't in a register, so update the gcVarPtrSetCur.
754 else if (isGCRef || isByRef)
756 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
757 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
761 VarSetOps::Iter bornIter(this, bornSet);
762 unsigned bornVarIndex = 0;
763 while (bornIter.NextElem(&bornVarIndex))
765 unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
766 varDsc = lvaTable + varNum;
767 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
768 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
770 if (varDsc->lvIsInReg())
773 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
775 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
778 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
779 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
780 regMaskTP regMask = varDsc->lvRegMask();
783 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
787 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
790 // This isn't in a register, so update the gcVarPtrSetCur
791 else if (lvaIsGCTracked(varDsc))
793 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
794 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
801 // Need an explicit instantiation.
802 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
804 /*****************************************************************************
808 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
810 getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
813 /*****************************************************************************
817 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
819 getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
823 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
828 //----------------------------------------------------------------------
829 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
832 // tree - spilled GenTree node
835 // TempDsc corresponding to tree
836 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
838 // tree must be in spilled state.
839 assert((tree->gtFlags & GTF_SPILLED) != 0);
841 // Get the tree's SpillDsc.
842 RegSet::SpillDsc* prevDsc;
843 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
844 assert(spillDsc != nullptr);
846 // Get the temp desc.
847 TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
851 #ifdef _TARGET_XARCH_
853 #ifdef _TARGET_AMD64_
854 // Returns relocation type hint for an addr.
855 // Note that there are no reloc hints on x86.
858 // addr - data address
861 // relocation type hint
863 unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
865 return compiler->eeGetRelocTypeHint((void*)addr);
867 #endif //_TARGET_AMD64_
869 // Return true if an absolute indirect data address can be encoded as IP-relative.
870 // offset. Note that this method should be used only when the caller knows that
871 // the address is an icon value that VM has given and there is no GenTree node
872 // representing it. Otherwise, one should always use FitsInAddrBase().
875 // addr - an absolute indirect data address
878 // true if indir data addr could be encoded as IP-relative offset.
880 bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
882 #ifdef _TARGET_AMD64_
883 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
885 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
890 // Return true if an indirect code address can be encoded as IP-relative offset.
891 // Note that this method should be used only when the caller knows that the
892 // address is an icon value that VM has given and there is no GenTree node
893 // representing it. Otherwise, one should always use FitsInAddrBase().
896 // addr - an absolute indirect code address
899 // true if indir code addr could be encoded as IP-relative offset.
901 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
903 #ifdef _TARGET_AMD64_
904 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
906 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
911 // Return true if an indirect code address can be encoded as 32-bit displacement
912 // relative to zero. Note that this method should be used only when the caller
913 // knows that the address is an icon value that VM has given and there is no
914 // GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
917 // addr - absolute indirect code address
920 // true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
922 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
924 return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
927 // Return true if an absolute indirect code address needs a relocation recorded with VM.
930 // addr - an absolute indirect code address
933 // true if indir code addr needs a relocation recorded with VM
935 bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
937 // If generating relocatable ngen code, then all code addr should go through relocation
938 if (compiler->opts.compReloc)
943 #ifdef _TARGET_AMD64_
944 // See if the code indir addr can be encoded as 32-bit displacement relative to zero.
945 // We don't need a relocation in that case.
946 if (genCodeIndirAddrCanBeEncodedAsZeroRelOffset(addr))
951 // Else we need a relocation.
954 // On x86 there is no need to record or ask for relocations during jitting,
955 // because all addrs fit within 32-bits.
957 #endif //_TARGET_X86_
960 // Return true if a direct code address needs to be marked as relocatable.
963 // addr - absolute direct code address
966 // true if direct code addr needs a relocation recorded with VM
968 bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
970 // If generating relocatable ngen code, then all code addr should go through relocation
971 if (compiler->opts.compReloc)
976 #ifdef _TARGET_AMD64_
977 // By default all direct code addresses go through relocation so that VM will setup
978 // a jump stub if addr cannot be encoded as pc-relative offset.
981 // On x86 there is no need for recording relocations during jitting,
982 // because all addrs fit within 32-bits.
984 #endif //_TARGET_X86_
986 #endif //_TARGET_XARCH_
988 /*****************************************************************************
990 * The following can be used to create basic blocks that serve as labels for
991 * the emitter. Use with caution - these are not real basic blocks!
996 BasicBlock* CodeGen::genCreateTempLabel()
999 // These blocks don't affect FP
1000 compiler->fgSafeBasicBlockCreation = true;
1003 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
1006 compiler->fgSafeBasicBlockCreation = false;
1009 block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
1011 // Use coldness of current block, as this label will
1012 // be contained in it.
1013 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
1017 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
1019 block->bbTgtStkDepth = genStackLevel / sizeof(int);
1026 void CodeGen::genDefineTempLabel(BasicBlock* label)
1029 if (compiler->opts.dspCode)
1031 printf("\n L_M%03u_" FMT_BB ":\n", Compiler::s_compMethodsCount, label->bbNum);
1035 label->bbEmitCookie =
1036 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1039 /*****************************************************************************
1041 * Adjust the stack pointer by the given value; assumes that this follows
1042 * a call so only callee-saved registers (and registers that may hold a
1043 * return value) are used at this point.
1046 void CodeGen::genAdjustSP(target_ssize_t delta)
1048 #if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
1049 if (delta == sizeof(int))
1050 inst_RV(INS_pop, REG_ECX, TYP_INT);
1053 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
1056 //------------------------------------------------------------------------
1057 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
1060 // block - The BasicBlock for which we are about to generate code.
1063 // Must be called just prior to generating code for 'block'.
1066 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1067 // and if 'block' is a throw helper block with a non-zero stack level.
1069 void CodeGen::genAdjustStackLevel(BasicBlock* block)
1071 #if !FEATURE_FIXED_OUT_ARGS
1072 // Check for inserted throw blocks and adjust genStackLevel.
1073 CLANG_FORMAT_COMMENT_ANCHOR;
1075 #if defined(UNIX_X86_ABI)
1076 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1078 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1079 // at this point if a jump to this block is made in the middle of pushing arugments.
1081 // Here we restore SP to prevent potential stack alignment issues.
1082 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1086 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1088 noway_assert(block->bbFlags & BBF_JMP_TARGET);
1090 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1092 if (genStackLevel != 0)
1095 getEmitter()->emitMarkStackLvl(genStackLevel);
1096 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1098 #else // _TARGET_X86_
1099 NYI("Need emitMarkStackLvl()");
1100 #endif // _TARGET_X86_
1103 #endif // !FEATURE_FIXED_OUT_ARGS
1106 #ifdef _TARGET_ARMARCH_
1108 // alignmentWB is out param
1109 unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1111 unsigned alignment = 0;
1112 unsigned opSize = 0;
1114 if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
1116 opSize = InferStructOpSizeAlign(op, &alignment);
1120 alignment = genTypeAlignments[op->TypeGet()];
1121 opSize = genTypeSizes[op->TypeGet()];
1124 assert(opSize != 0);
1125 assert(alignment != 0);
1127 (*alignmentWB) = alignment;
1131 // alignmentWB is out param
1132 unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1134 unsigned alignment = 0;
1135 unsigned opSize = 0;
1137 while (op->gtOper == GT_COMMA)
1139 op = op->gtOp.gtOp2;
1142 if (op->gtOper == GT_OBJ)
1144 CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1145 opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1146 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1148 else if (op->gtOper == GT_LCL_VAR)
1150 unsigned varNum = op->gtLclVarCommon.gtLclNum;
1151 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1152 assert(varDsc->lvType == TYP_STRUCT);
1153 opSize = varDsc->lvSize();
1154 #ifndef _TARGET_64BIT_
1155 if (varDsc->lvStructDoubleAlign)
1157 alignment = TARGET_POINTER_SIZE * 2;
1160 #endif // !_TARGET_64BIT_
1162 alignment = TARGET_POINTER_SIZE;
1165 else if (op->OperIsCopyBlkOp())
1167 GenTree* op2 = op->gtOp.gtOp2;
1169 if (op2->OperGet() == GT_CNS_INT)
1171 if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1173 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1174 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1176 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1180 opSize = (unsigned)op2->gtIntCon.gtIconVal;
1181 GenTree* op1 = op->gtOp.gtOp1;
1182 assert(op1->OperGet() == GT_LIST);
1183 GenTree* dstAddr = op1->gtOp.gtOp1;
1184 if (dstAddr->OperGet() == GT_ADDR)
1186 InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1190 assert(!"Unhandle dstAddr node");
1191 alignment = TARGET_POINTER_SIZE;
1197 noway_assert(!"Variable sized COPYBLK register arg!");
1199 alignment = TARGET_POINTER_SIZE;
1202 else if (op->gtOper == GT_MKREFANY)
1204 opSize = TARGET_POINTER_SIZE * 2;
1205 alignment = TARGET_POINTER_SIZE;
1207 else if (op->IsArgPlaceHolderNode())
1209 CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1210 assert(clsHnd != 0);
1211 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1212 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1216 assert(!"Unhandled gtOper");
1217 opSize = TARGET_POINTER_SIZE;
1218 alignment = TARGET_POINTER_SIZE;
1221 assert(opSize != 0);
1222 assert(alignment != 0);
1224 (*alignmentWB) = alignment;
1228 #endif // _TARGET_ARMARCH_
1230 /*****************************************************************************
1232 * Take an address expression and try to find the best set of components to
1233 * form an address mode; returns non-zero if this is successful.
1235 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1236 * Refactor this code so that the underlying analysis can be used in
1237 * the RyuJIT Backend to do lowering, instead of having to call this method with the
1238 * option to not generate the code.
1240 * 'fold' specifies if it is OK to fold the array index which hangs off
1243 * If successful, the parameters will be set to the following values:
1245 * *rv1Ptr ... base operand
1246 * *rv2Ptr ... optional operand
1247 * *revPtr ... true if rv2 is before rv1 in the evaluation order
1248 * #if SCALED_ADDR_MODES
1249 * *mulPtr ... optional multiplier (2/4/8) for rv2
1250 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1252 * *cnsPtr ... integer constant [optional]
1254 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1255 * identifies the components that might be used to
1256 * form an address mode later on.
1259 bool CodeGen::genCreateAddrMode(GenTree* addr,
1264 #if SCALED_ADDR_MODES
1266 #endif // SCALED_ADDR_MODES
1270 The following indirections are valid address modes on x86/x64:
1272 [ icon] * not handled here
1276 [reg1 + reg2 + icon]
1283 [reg1 + 2 * reg2 + icon]
1284 [reg1 + 4 * reg2 + icon]
1285 [reg1 + 8 * reg2 + icon]
1287 The following indirections are valid address modes on arm64:
1292 [reg1 + reg2 * natural-scale]
1296 /* All indirect address modes require the address to be an addition */
1298 if (addr->gtOper != GT_ADD)
1303 // Can't use indirect addressing mode as we need to check for overflow.
1304 // Also, can't use 'lea' as it doesn't set the flags.
1306 if (addr->gtOverflow())
1311 GenTree* rv1 = nullptr;
1312 GenTree* rv2 = nullptr;
1318 #if SCALED_ADDR_MODES
1320 #endif // SCALED_ADDR_MODES
1324 /* What order are the sub-operands to be evaluated */
1326 if (addr->gtFlags & GTF_REVERSE_OPS)
1328 op1 = addr->gtOp.gtOp2;
1329 op2 = addr->gtOp.gtOp1;
1333 op1 = addr->gtOp.gtOp1;
1334 op2 = addr->gtOp.gtOp2;
1337 bool rev = false; // Is op2 first in the evaluation order?
1340 A complex address mode can combine the following operands:
1342 op1 ... base address
1343 op2 ... optional scaled index
1344 #if SCALED_ADDR_MODES
1345 mul ... optional multiplier (2/4/8) for op2
1347 cns ... optional displacement
1349 Here we try to find such a set of operands and arrange for these
1350 to sit in registers.
1354 #if SCALED_ADDR_MODES
1356 #endif // SCALED_ADDR_MODES
1359 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1360 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1361 here if we find a scaled index.
1363 CLANG_FORMAT_COMMENT_ANCHOR;
1365 #if SCALED_ADDR_MODES
1367 #endif // SCALED_ADDR_MODES
1369 /* Special case: keep constants as 'op2' */
1371 if (op1->IsCnsIntOrI())
1373 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1379 /* Check for an addition of a constant */
1381 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
1383 /* We're adding a constant */
1385 cns += op2->gtIntConCommon.IconValue();
1387 #if defined(_TARGET_ARMARCH_)
1391 /* Inspect the operand the constant is being added to */
1393 switch (op1->gtOper)
1397 if (op1->gtOverflow())
1402 op2 = op1->gtOp.gtOp2;
1403 op1 = op1->gtOp.gtOp1;
1407 #if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1408 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1410 if (op1->gtOverflow())
1412 return false; // Need overflow check
1419 mul = op1->GetScaledIndex();
1422 /* We can use "[mul*rv2 + icon]" */
1425 rv2 = op1->gtOp.gtOp1;
1430 #endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1437 /* The best we can do is "[rv1 + icon]" */
1445 // op2 is not a constant. So keep on trying.
1447 /* Neither op1 nor op2 are sitting in a register right now */
1449 switch (op1->gtOper)
1451 #if !defined(_TARGET_ARMARCH_)
1452 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1455 if (op1->gtOverflow())
1460 if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
1462 cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
1463 op1 = op1->gtOp.gtOp1;
1470 #if SCALED_ADDR_MODES
1474 if (op1->gtOverflow())
1483 mul = op1->GetScaledIndex();
1486 /* 'op1' is a scaled value */
1489 rv2 = op1->gtOp.gtOp1;
1492 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1494 if (jitIsScaleIndexMul(argScale * mul))
1496 mul = mul * argScale;
1497 rv2 = rv2->gtOp.gtOp1;
1505 noway_assert(rev == false);
1512 #endif // SCALED_ADDR_MODES
1513 #endif // !_TARGET_ARMARCH
1517 op1 = op1->gtOp.gtOp1;
1522 op1 = op1->gtOp.gtOp2;
1530 switch (op2->gtOper)
1532 #if !defined(_TARGET_ARMARCH_)
1533 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1536 if (op2->gtOverflow())
1541 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
1543 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1544 op2 = op2->gtOp.gtOp1;
1551 #if SCALED_ADDR_MODES
1555 if (op2->gtOverflow())
1564 mul = op2->GetScaledIndex();
1567 // 'op2' is a scaled value...is it's argument also scaled?
1569 rv2 = op2->gtOp.gtOp1;
1570 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1572 if (jitIsScaleIndexMul(argScale * mul))
1574 mul = mul * argScale;
1575 rv2 = rv2->gtOp.gtOp1;
1589 #endif // SCALED_ADDR_MODES
1590 #endif // !_TARGET_ARMARCH
1594 op2 = op2->gtOp.gtOp1;
1599 op2 = op2->gtOp.gtOp2;
1606 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
1610 #ifdef _TARGET_ARM64_
1618 /* Make sure a GC address doesn't end up in 'rv2' */
1620 if (varTypeIsGC(rv2->TypeGet()))
1622 noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
1631 /* Special case: constant array index (that is range-checked) */
1638 if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
1640 /* For valuetype arrays where we can't use the scaled address
1641 mode, rv2 will point to the scaled index. So we have to do
1644 tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1652 /* May be a simple array. rv2 will points to the actual index */
1658 /* Get hold of the array index and see if it's a constant */
1659 if (index->IsIntCnsFitsInI32())
1661 /* Get hold of the index value */
1662 ssize_t ixv = index->AsIntConCommon()->IconValue();
1664 #if SCALED_ADDR_MODES
1665 /* Scale the index if necessary */
1672 if (FitsIn<INT32>(cns + ixv))
1674 /* Add the scaled index to the offset value */
1678 #if SCALED_ADDR_MODES
1679 /* There is no scaled operand any more */
1688 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
1689 noway_assert(rv1 || mul != 1);
1691 noway_assert(FitsIn<INT32>(cns));
1693 if (rv1 == nullptr && rv2 == nullptr)
1698 /* Success - return the various components to the caller */
1703 #if SCALED_ADDR_MODES
1711 /*****************************************************************************
1712 * The condition to use for (the jmp/set for) the given type of operation
1714 * In case of amd64, this routine should be used when there is no gentree available
1715 * and one needs to generate jumps based on integer comparisons. When gentree is
1716 * available always use its overloaded version.
1721 emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
1723 const static BYTE genJCCinsSigned[] = {
1724 #if defined(_TARGET_XARCH_)
1731 EJ_je, // GT_TEST_EQ
1732 EJ_jne, // GT_TEST_NE
1733 #elif defined(_TARGET_ARMARCH_)
1740 #if defined(_TARGET_ARM64_)
1741 EJ_eq, // GT_TEST_EQ
1742 EJ_ne, // GT_TEST_NE
1747 const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
1749 #if defined(_TARGET_XARCH_)
1756 EJ_je, // GT_TEST_EQ
1757 EJ_jne, // GT_TEST_NE
1758 #elif defined(_TARGET_ARMARCH_)
1765 #if defined(_TARGET_ARM64_)
1766 EJ_eq, // GT_TEST_EQ
1767 EJ_ne, // GT_TEST_NE
1772 const static BYTE genJCCinsLogical[] = /* logical operation */
1774 #if defined(_TARGET_XARCH_)
1775 EJ_je, // GT_EQ (Z == 1)
1776 EJ_jne, // GT_NE (Z == 0)
1777 EJ_js, // GT_LT (S == 1)
1779 EJ_jns, // GT_GE (S == 0)
1781 EJ_NONE, // GT_TEST_EQ
1782 EJ_NONE, // GT_TEST_NE
1783 #elif defined(_TARGET_ARMARCH_)
1784 EJ_eq, // GT_EQ (Z == 1)
1785 EJ_ne, // GT_NE (Z == 0)
1786 EJ_mi, // GT_LT (N == 1)
1788 EJ_pl, // GT_GE (N == 0)
1790 #if defined(_TARGET_ARM64_)
1791 EJ_eq, // GT_TEST_EQ
1792 EJ_ne, // GT_TEST_NE
1797 #if defined(_TARGET_XARCH_)
1798 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
1799 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
1800 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
1801 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
1802 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
1803 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
1804 assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1805 assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1807 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
1808 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
1809 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
1810 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
1811 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
1812 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
1813 assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1814 assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1816 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
1817 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
1818 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
1819 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
1820 #elif defined(_TARGET_ARMARCH_)
1821 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
1822 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
1823 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
1824 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
1825 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
1826 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
1828 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
1829 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
1830 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
1831 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
1832 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
1833 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
1835 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
1836 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
1837 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
1838 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
1840 assert(!"unknown arch");
1842 assert(GenTree::OperIsCompare(cmp));
1844 emitJumpKind result = EJ_COUNT;
1846 if (compareKind == CK_UNSIGNED)
1848 result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
1850 else if (compareKind == CK_SIGNED)
1852 result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
1854 else if (compareKind == CK_LOGICAL)
1856 result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
1858 assert(result != EJ_COUNT);
1862 #ifdef _TARGET_ARMARCH_
1863 //------------------------------------------------------------------------
1864 // genEmitGSCookieCheck: Generate code to check that the GS cookie
1865 // wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
1867 void CodeGen::genEmitGSCookieCheck(bool pushReg)
1869 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
1871 // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
1872 // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
1873 if (!pushReg && (compiler->info.compRetType == TYP_REF))
1874 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
1876 // We need two temporary registers, to load the GS cookie values and compare them. We can't use
1877 // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
1878 // callee-trash registers, which should not contain anything interesting at this point.
1879 // We don't have any IR node representing this check, so LSRA can't communicate registers
1882 regNumber regGSConst = REG_GSCOOKIE_TMP_0;
1883 regNumber regGSValue = REG_GSCOOKIE_TMP_1;
1885 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
1887 // load the GS cookie constant into a reg
1889 genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
1893 // Ngen case - GS cookie constant needs to be accessed through an indirection.
1894 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
1895 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
1897 // Load this method's GS value from the stack frame
1898 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
1899 // Compare with the GC cookie constant
1900 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
1902 BasicBlock* gsCheckBlk = genCreateTempLabel();
1903 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1904 inst_JMP(jmpEqual, gsCheckBlk);
1905 // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
1906 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
1907 genDefineTempLabel(gsCheckBlk);
1909 #endif // _TARGET_ARMARCH_
1911 /*****************************************************************************
1913 * Generate an exit sequence for a return from a method (note: when compiling
1914 * for speed there might be multiple exit points).
1917 void CodeGen::genExitCode(BasicBlock* block)
1919 /* Just wrote the first instruction of the epilog - inform debugger
1920 Note that this may result in a duplicate IPmapping entry, and
1923 // For non-optimized debuggable code, there is only one epilog.
1924 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
1926 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
1927 if (compiler->getNeedsGSSecurityCookie())
1929 genEmitGSCookieCheck(jmpEpilog);
1934 // The GS cookie check created a temp label that has no live
1935 // incoming GC registers, we need to fix that
1940 /* Figure out which register parameters hold pointers */
1942 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1945 noway_assert(varDsc->lvIsParam);
1947 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
1950 getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1951 getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1955 genReserveEpilog(block);
1958 //------------------------------------------------------------------------
1959 // genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1962 // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1963 // Otherwise, we generate the 'throw' inline.
1966 // jumpKind - jump kind to generate;
1967 // codeKind - the special throw-helper kind;
1968 // failBlk - optional fail target block, if it is already known;
1970 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk)
1972 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1973 #if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
1974 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1975 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1976 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1980 // For code with throw helper blocks, find and use the helper block for
1981 // raising the exception. The block may be shared by other trees too.
1983 BasicBlock* excpRaisingBlock;
1985 if (failBlk != nullptr)
1987 // We already know which block to jump to. Use that.
1988 excpRaisingBlock = failBlk;
1991 Compiler::AddCodeDsc* add =
1992 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1993 assert(excpRaisingBlock == add->acdDstBlk);
1994 #if !FEATURE_FIXED_OUT_ARGS
1995 assert(add->acdStkLvlInit || isFramePointerUsed());
1996 #endif // !FEATURE_FIXED_OUT_ARGS
2001 // Find the helper-block which raises the exception.
2002 Compiler::AddCodeDsc* add =
2003 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2004 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
2005 excpRaisingBlock = add->acdDstBlk;
2006 #if !FEATURE_FIXED_OUT_ARGS
2007 assert(add->acdStkLvlInit || isFramePointerUsed());
2008 #endif // !FEATURE_FIXED_OUT_ARGS
2011 noway_assert(excpRaisingBlock != nullptr);
2013 // Jump to the exception-throwing block on error.
2014 inst_JMP(jumpKind, excpRaisingBlock);
2018 // The code to throw the exception will be generated inline, and
2019 // we will jump around it in the normal non-exception case.
2021 BasicBlock* tgtBlk = nullptr;
2022 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
2023 if (reverseJumpKind != jumpKind)
2025 tgtBlk = genCreateTempLabel();
2026 inst_JMP(reverseJumpKind, tgtBlk);
2029 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
2031 // Define the spot for the normal non-exception case to jump to.
2032 if (tgtBlk != nullptr)
2034 assert(reverseJumpKind != jumpKind);
2035 genDefineTempLabel(tgtBlk);
2040 /*****************************************************************************
2042 * The last operation done was generating code for "tree" and that would
2043 * have set the flags. Check if the operation caused an overflow.
2047 void CodeGen::genCheckOverflow(GenTree* tree)
2049 // Overflow-check should be asked for this tree
2050 noway_assert(tree->gtOverflow());
2052 const var_types type = tree->TypeGet();
2054 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
2055 noway_assert(!varTypeIsSmall(type));
2057 emitJumpKind jumpKind;
2059 #ifdef _TARGET_ARM64_
2060 if (tree->OperGet() == GT_MUL)
2067 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
2069 #if defined(_TARGET_XARCH_)
2071 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
2073 #elif defined(_TARGET_ARMARCH_)
2075 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
2077 if (jumpKind == EJ_lo)
2079 if (tree->OperGet() != GT_SUB)
2085 #endif // defined(_TARGET_ARMARCH_)
2088 // Jump to the block which will throw the expection
2090 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
2093 #if FEATURE_EH_FUNCLETS
2095 /*****************************************************************************
2097 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
2098 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
2103 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
2105 if (block->bbFlags & BBF_FUNCLET_BEG)
2107 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
2108 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2110 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
2114 // We shouldn't see FUNC_ROOT
2115 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2116 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
2121 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
2122 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2124 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
2126 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
2128 assert(!block->hasHndIndex());
2132 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2133 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
2138 #if defined(_TARGET_ARM_)
2139 void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
2141 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
2142 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
2143 // calls the funclet during non-exceptional control flow.
2144 if (block->bbFlags & BBF_FINALLY_TARGET)
2146 assert(block->bbFlags & BBF_JMP_TARGET);
2149 if (compiler->verbose)
2151 printf("\nEmitting finally target NOP predecessor for " FMT_BB "\n", block->bbNum);
2154 // Create a label that we'll use for computing the start of an EH region, if this block is
2155 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
2156 // determining the EH regions, then this NOP would end up outside of the region, if this
2157 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
2158 // would be executed, which we would prefer not to do.
2160 block->bbUnwindNopEmitCookie =
2161 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
2168 #endif // FEATURE_EH_FUNCLETS
2170 /*****************************************************************************
2172 * Generate code for the function.
2175 void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
2180 printf("*************** In genGenerateCode()\n");
2181 compiler->fgDispBasicBlocks(compiler->verboseTrees);
2186 unsigned prologSize;
2187 unsigned epilogSize;
2192 genInterruptibleUsed = true;
2195 genNeedPrologStackProbe = false;
2198 compiler->fgDebugCheckBBlist();
2201 /* This is the real thing */
2203 genPrepForCompiler();
2205 /* Prepare the emitter */
2206 getEmitter()->Init();
2208 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
2212 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
2214 compiler->opts.disAsm = true;
2217 if (compiler->opts.disAsm)
2219 printf("; Assembly listing for method %s\n", compiler->info.compFullName);
2221 printf("; Emitting ");
2223 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
2225 printf("SMALL_CODE");
2227 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
2229 printf("FAST_CODE");
2233 printf("BLENDED_CODE");
2238 if (compiler->info.genCPU == CPU_X86)
2240 printf("generic X86 CPU");
2242 else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
2244 printf("Pentium 4");
2246 else if (compiler->info.genCPU == CPU_X64)
2248 if (compiler->canUseVexEncoding())
2250 printf("X64 CPU with AVX");
2254 printf("X64 CPU with SSE2");
2257 else if (compiler->info.genCPU == CPU_ARM)
2259 printf("generic ARM CPU");
2261 else if (compiler->info.genCPU == CPU_ARM64)
2263 printf("generic ARM64 CPU");
2267 printf("unknown architecture");
2270 #if defined(_TARGET_WINDOWS_)
2271 printf(" - Windows");
2272 #elif defined(_TARGET_UNIX_)
2278 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2280 printf("; Tier-0 compilation\n");
2282 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
2284 printf("; Tier-1 compilation\n");
2287 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
2289 printf("; optimized code\n");
2291 else if (compiler->opts.compDbgCode)
2293 printf("; debuggable code\n");
2295 else if (compiler->opts.MinOpts())
2297 printf("; compiler->opts.MinOpts() is true\n");
2301 printf("; unknown optimization flags\n");
2305 if (compiler->genDoubleAlign())
2306 printf("; double-aligned frame\n");
2309 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
2311 if (genInterruptible)
2313 printf("; fully interruptible\n");
2317 printf("; partially interruptible\n");
2320 if (compiler->fgHaveProfileData())
2322 printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
2323 compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
2326 if (compiler->fgProfileData_ILSizeMismatch)
2328 printf("; discarded IBC profile data due to mismatch in ILSize\n");
2333 // We compute the final frame layout before code generation. This is because LSRA
2334 // has already computed exactly the maximum concurrent number of spill temps of each type that are
2335 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
2336 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
2337 // memory from the VM.
2341 unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps.
2343 getEmitter()->emitBegFN(isFramePointerUsed()
2346 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
2347 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
2352 /* Now generate code for the function */
2356 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
2357 // generation hasn't touched it (it shouldn't!).
2360 compiler->lvaTableDump();
2364 /* We can now generate the function prolog and epilog */
2366 genGeneratePrologsAndEpilogs();
2368 /* Bind jump distances */
2370 getEmitter()->emitJumpDistBind();
2372 /* The code is now complete and final; it should not change after this. */
2374 /* Compute the size of the code sections that we are going to ask the VM
2375 to allocate. Note that this might not be precisely the size of the
2376 code we emit, though it's fatal if we emit more code than the size we
2378 (Note: an example of a case where we emit less code would be useful.)
2381 getEmitter()->emitComputeCodeSizes();
2385 // Code to test or stress our ability to run a fallback compile.
2386 // We trigger the fallback here, before asking the VM for any memory,
2387 // because if not, we will leak mem, as the current codebase can't free
2388 // the mem after the emitter asks the VM for it. As this is only a stress
2389 // mode, we only want the functionality, and don't care about the relative
2390 // ugliness of having the failure here.
2391 if (!compiler->jitFallbackCompile)
2393 // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
2394 // especially that caused by enabling JIT stress.
2395 if (!JitConfig.JitNoForceFallback())
2397 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
2399 NO_WAY_NOASSERT("Stress failure");
2406 /* We've finished collecting all the unwind information for the function. Now reserve
2407 space for it from the VM.
2410 compiler->unwindReserve();
2414 size_t dataSize = getEmitter()->emitDataSize();
2416 #endif // DISPLAY_SIZES
2420 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
2422 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2423 trackedStackPtrsContig = false;
2424 #elif defined(_TARGET_ARM_)
2425 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
2426 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
2428 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
2432 /* We're done generating code for this function */
2433 compiler->compCodeGenDone = true;
2436 compiler->EndPhase(PHASE_GENERATE_CODE);
2438 codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
2439 (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
2440 &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
2442 compiler->EndPhase(PHASE_EMIT_CODE);
2445 if (compiler->opts.disAsm)
2447 printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
2448 compiler->info.compFullName);
2449 printf("; ============================================================\n");
2450 printf(""); // in our logic this causes a flush
2455 printf("*************** After end code gen, before unwindEmit()\n");
2456 getEmitter()->emitDispIGlist(true);
2460 #if EMIT_TRACK_STACK_DEPTH
2461 // Check our max stack level. Needed for fgAddCodeRef().
2462 // We need to relax the assert as our estimation won't include code-gen
2463 // stack changes (which we know don't affect fgAddCodeRef()).
2464 // NOTE: after emitEndCodeGen (including here), emitMaxStackDepth is a
2465 // count of DWORD-sized arguments, NOT argument size in bytes.
2467 unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
2468 compiler->compHndBBtabCount + // Return address for locally-called finallys
2469 genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
2470 (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
2471 #if defined(UNIX_X86_ABI)
2472 // Convert maxNestedAlignment to DWORD count before adding to maxAllowedStackDepth.
2473 assert(maxNestedAlignment % sizeof(int) == 0);
2474 maxAllowedStackDepth += maxNestedAlignment / sizeof(int);
2476 noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
2478 #endif // EMIT_TRACK_STACK_DEPTH
2480 *nativeSizeOfCode = codeSize;
2481 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2483 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2485 // Make sure that the x86 alignment and cache prefetch optimization rules
2488 // Don't start a method in the last 7 bytes of a 16-byte alignment area
2489 // unless we are generating SMALL_CODE
2490 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
2492 /* Now that the code is issued, we can finalize and emit the unwind data */
2494 compiler->unwindEmit(*codePtr, coldCodePtr);
2496 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
2500 /* Finalize the Local Var info in terms of generated code */
2505 unsigned finalHotCodeSize;
2506 unsigned finalColdCodeSize;
2507 if (compiler->fgFirstColdBlock != nullptr)
2509 // We did some hot/cold splitting. The hot section is always padded out to the
2510 // size we thought it would be, but the cold section is not.
2511 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2512 assert(compiler->info.compTotalHotCodeSize > 0);
2513 assert(compiler->info.compTotalColdCodeSize > 0);
2514 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2515 finalColdCodeSize = codeSize - finalHotCodeSize;
2519 // No hot/cold splitting
2520 assert(codeSize <= compiler->info.compTotalHotCodeSize);
2521 assert(compiler->info.compTotalHotCodeSize > 0);
2522 assert(compiler->info.compTotalColdCodeSize == 0);
2523 finalHotCodeSize = codeSize;
2524 finalColdCodeSize = 0;
2526 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2527 #endif // LATE_DISASM
2529 /* Report any exception handlers to the VM */
2533 #ifdef JIT32_GCENCODER
2538 // Create and store the GC info for this method.
2539 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2542 FILE* dmpf = jitstdout;
2544 compiler->opts.dmpHex = false;
2545 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2548 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2553 compiler->opts.dmpHex = true;
2556 if (compiler->opts.dmpHex)
2558 size_t consSize = getEmitter()->emitDataSize();
2559 size_t infoSize = compiler->compInfoBlkSize;
2561 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2562 fprintf(dmpf, "\n");
2566 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2570 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2572 #ifdef JIT32_GCENCODER
2574 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2575 #endif // JIT32_GCENCODER
2577 fprintf(dmpf, "\n");
2581 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
2585 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2587 #ifdef JIT32_GCENCODER
2589 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2590 #endif // JIT32_GCENCODER
2595 if (dmpf != jitstdout)
2602 /* Tell the emitter that we're done with this function */
2604 getEmitter()->emitEndFN();
2606 /* Shut down the spill logic */
2608 regSet.rsSpillDone();
2610 /* Shut down the temp logic */
2616 grossVMsize += compiler->info.compILCodeSize;
2617 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2618 grossNCsize += codeSize + dataSize;
2620 #endif // DISPLAY_SIZES
2622 compiler->EndPhase(PHASE_EMIT_GCEH);
2625 /*****************************************************************************
2627 * Report EH clauses to the VM
2630 void CodeGen::genReportEH()
2632 if (compiler->compHndBBtabCount == 0)
2638 if (compiler->opts.dspEHTable)
2640 printf("*************** EH table for %s\n", compiler->info.compFullName);
2648 bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
2650 unsigned EHCount = compiler->compHndBBtabCount;
2652 #if FEATURE_EH_FUNCLETS
2653 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2655 unsigned duplicateClauseCount = 0;
2656 unsigned enclosingTryIndex;
2658 // Duplicate clauses are not used by CoreRT ABI
2661 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
2663 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2664 // ignoring 'mutual protect' trys
2665 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2666 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2668 ++duplicateClauseCount;
2671 EHCount += duplicateClauseCount;
2674 #if FEATURE_EH_CALLFINALLY_THUNKS
2675 unsigned clonedFinallyCount = 0;
2677 // Duplicate clauses are not used by CoreRT ABI
2680 // We don't keep track of how many cloned finally there are. So, go through and count.
2681 // We do a quick pass first through the EH table to see if there are any try/finally
2682 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2684 bool anyFinallys = false;
2685 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2686 HBtab < HBtabEnd; HBtab++)
2688 if (HBtab->HasFinallyHandler())
2696 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
2698 if (block->bbJumpKind == BBJ_CALLFINALLY)
2700 ++clonedFinallyCount;
2704 EHCount += clonedFinallyCount;
2707 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2709 #endif // FEATURE_EH_FUNCLETS
2712 if (compiler->opts.dspEHTable)
2714 #if FEATURE_EH_FUNCLETS
2715 #if FEATURE_EH_CALLFINALLY_THUNKS
2716 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2717 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2718 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2719 #else // !FEATURE_EH_CALLFINALLY_THUNKS
2720 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2721 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2722 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2723 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
2724 #else // !FEATURE_EH_FUNCLETS
2725 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2726 assert(compiler->compHndBBtabCount == EHCount);
2727 #endif // !FEATURE_EH_FUNCLETS
2731 // Tell the VM how many EH clauses to expect.
2732 compiler->eeSetEHcount(EHCount);
2734 XTnum = 0; // This is the index we pass to the VM
2736 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2737 HBtab < HBtabEnd; HBtab++)
2739 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2741 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2742 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2744 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2745 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2746 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2747 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2749 if (HBtab->HasFilter())
2751 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2755 hndTyp = HBtab->ebdTyp;
2758 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2760 if (isCoreRTABI && (XTnum > 0))
2762 // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2763 // try block as the previous one. The runtime cannot reliably infer this information from
2764 // native code offsets because of different try blocks can have same offsets. Alternative
2765 // solution to this problem would be inserting extra nops to ensure that different try
2766 // blocks have different offsets.
2767 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
2769 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2770 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2771 // IL as "try { try {} catch {} catch {} } finally {}".
2772 assert(HBtab->HasCatchHandler());
2773 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
2777 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2778 // the fields aren't accurate.
2780 CORINFO_EH_CLAUSE clause;
2781 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2782 clause.Flags = flags;
2783 clause.TryOffset = tryBeg;
2784 clause.TryLength = tryEnd;
2785 clause.HandlerOffset = hndBeg;
2786 clause.HandlerLength = hndEnd;
2788 assert(XTnum < EHCount);
2790 // Tell the VM about this EH clause.
2791 compiler->eeSetEHinfo(XTnum, &clause);
2796 #if FEATURE_EH_FUNCLETS
2797 // Now output duplicated clauses.
2799 // If a funclet has been created by moving a handler out of a try region that it was originally nested
2800 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
2801 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2802 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2803 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2804 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2805 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2808 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2809 // try or handler region):
2827 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2828 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
2829 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2830 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2831 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2832 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2833 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2834 // representing try (1) protecting the new funclets catch (3) and (4).
2835 // The code will be generated as follows:
2837 // ABCFH // "main" code
2842 // The EH regions are:
2847 // D -> G // "duplicate" clause
2848 // E -> G // "duplicate" clause
2850 // Note that we actually need to generate one of these additional "duplicate" clauses for every
2851 // region the funclet is nested in. Take this example:
2883 // When we pull out funclets, we get the following generated code:
2885 // ABCDEHJMO // "main" function
2893 // And the EH regions we report to the VM are (in order; main clauses
2894 // first in most-to-least nested order, funclets ("duplicated clauses")
2895 // last, in most-to-least nested) are:
2903 // F -> I // funclet clause #1 for F
2904 // F -> K // funclet clause #2 for F
2905 // F -> L // funclet clause #3 for F
2906 // F -> N // funclet clause #4 for F
2907 // G -> I // funclet clause #1 for G
2908 // G -> K // funclet clause #2 for G
2909 // G -> L // funclet clause #3 for G
2910 // G -> N // funclet clause #4 for G
2911 // I -> K // funclet clause #1 for I
2912 // I -> L // funclet clause #2 for I
2913 // I -> N // funclet clause #3 for I
2914 // K -> N // funclet clause #1 for K
2915 // L -> N // funclet clause #1 for L
2917 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2918 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2919 // to add a clause "F -> G" because F is NOT protected by G, but we still have
2920 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2922 // The overall ordering of the clauses is still the same most-to-least nesting
2923 // after front-to-back start offset. Because we place the funclets at the end
2924 // these new clauses should also go at the end by this ordering.
2927 if (duplicateClauseCount > 0)
2929 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
2931 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2933 unsigned enclosingTryIndex;
2935 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2937 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2938 // ignoring 'mutual protect' trys
2939 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2940 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2942 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2943 // that will have the enclosing try protecting the funclet.
2945 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2946 // greater EH table index
2948 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2950 // The try region is the handler of the funclet. Note that for filters, we don't protect the
2951 // filter region, only the filter handler region. This is because exceptions in filters never
2952 // escape; the VM swallows them.
2954 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2955 BasicBlock* bbTryLast = fletTab->ebdHndLast;
2957 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2958 BasicBlock* bbHndLast = encTab->ebdHndLast;
2960 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2962 tryBeg = compiler->ehCodeOffset(bbTryBeg);
2963 hndBeg = compiler->ehCodeOffset(bbHndBeg);
2965 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2966 : compiler->ehCodeOffset(bbTryLast->bbNext);
2967 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2968 : compiler->ehCodeOffset(bbHndLast->bbNext);
2970 if (encTab->HasFilter())
2972 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2976 hndTyp = encTab->ebdTyp;
2979 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2981 // Tell the VM this is an extra clause caused by moving funclets out of line.
2982 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
2984 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2985 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2986 // instruction immediately after the 'try' body. So, it really could be more accurately named
2989 CORINFO_EH_CLAUSE clause;
2990 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2991 clause.Flags = flags;
2992 clause.TryOffset = tryBeg;
2993 clause.TryLength = tryEnd;
2994 clause.HandlerOffset = hndBeg;
2995 clause.HandlerLength = hndEnd;
2997 assert(XTnum < EHCount);
2999 // Tell the VM about this EH clause (a duplicated clause).
3000 compiler->eeSetEHinfo(XTnum, &clause);
3003 ++reportedDuplicateClauseCount;
3006 if (duplicateClauseCount == reportedDuplicateClauseCount)
3008 break; // we've reported all of them; no need to continue looking
3012 } // for each 'true' enclosing 'try'
3013 } // for each EH table entry
3015 assert(duplicateClauseCount == reportedDuplicateClauseCount);
3016 } // if (duplicateClauseCount > 0)
3018 #if FEATURE_EH_CALLFINALLY_THUNKS
3019 if (clonedFinallyCount > 0)
3021 unsigned reportedClonedFinallyCount = 0;
3022 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3024 if (block->bbJumpKind == BBJ_CALLFINALLY)
3026 UNATIVE_OFFSET hndBeg, hndEnd;
3028 hndBeg = compiler->ehCodeOffset(block);
3030 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
3031 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
3032 BasicBlock* bbLabel = block->bbNext;
3033 if (block->isBBCallAlwaysPair())
3035 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
3037 if (bbLabel == nullptr)
3039 hndEnd = compiler->info.compNativeCodeSize;
3043 assert(bbLabel->bbEmitCookie != nullptr);
3044 hndEnd = compiler->ehCodeOffset(bbLabel);
3047 CORINFO_EH_CLAUSE clause;
3048 clause.ClassToken = 0; // unused
3049 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
3050 clause.TryOffset = hndBeg;
3051 clause.TryLength = hndBeg;
3052 clause.HandlerOffset = hndBeg;
3053 clause.HandlerLength = hndEnd;
3055 assert(XTnum < EHCount);
3057 // Tell the VM about this EH clause (a cloned finally clause).
3058 compiler->eeSetEHinfo(XTnum, &clause);
3061 ++reportedClonedFinallyCount;
3064 if (clonedFinallyCount == reportedClonedFinallyCount)
3066 break; // we're done; no need to keep looking
3069 } // block is BBJ_CALLFINALLY
3072 assert(clonedFinallyCount == reportedClonedFinallyCount);
3073 } // if (clonedFinallyCount > 0)
3074 #endif // FEATURE_EH_CALLFINALLY_THUNKS
3076 #endif // FEATURE_EH_FUNCLETS
3078 assert(XTnum == EHCount);
3081 //----------------------------------------------------------------------
3082 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3083 // helper should be used.
3086 // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
3089 // true if an optimized write barrier helper should be used, false otherwise.
3090 // Note: only x86 implements register-specific source optimized write
3091 // barriers currently.
3093 bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
3095 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3097 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3106 //----------------------------------------------------------------------
3107 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3108 // helper should be used.
3110 // This has the same functionality as the version of
3111 // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
3112 // determining what the required write barrier form is, if possible.
3115 // tgt - target tree of write (e.g., GT_STOREIND)
3116 // assignVal - tree with value to write
3119 // true if an optimized write barrier helper should be used, false otherwise.
3120 // Note: only x86 implements register-specific source optimized write
3121 // barriers currently.
3123 bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
3125 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3127 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3128 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3137 //----------------------------------------------------------------------
3138 // genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
3139 // barrier, and the write barrier form required, determine the helper to call.
3142 // tgt - target tree of write (e.g., GT_STOREIND)
3143 // wbf - already computed write barrier form to use
3146 // Write barrier helper to use.
3148 // Note: do not call this function to get an optimized write barrier helper (e.g.,
3151 CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3153 noway_assert(tgt->gtOper == GT_STOREIND);
3155 CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
3158 if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
3160 helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
3164 if (tgt->gtOper != GT_CLS_VAR)
3166 if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
3168 if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
3170 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3172 else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
3174 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3178 assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
3179 ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
3180 (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
3181 ((helper == CORINFO_HELP_ASSIGN_REF) &&
3182 (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
3187 //----------------------------------------------------------------------
3188 // genGCWriteBarrier: Generate a write barrier for a node.
3191 // tgt - target tree of write (e.g., GT_STOREIND)
3192 // wbf - already computed write barrier form to use
3194 void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3196 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
3198 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
3199 // We classify the "tgt" trees as follows:
3200 // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
3201 // IND [-> ADDR -> IND] -> { GT_LCL_VAR, ADD({GT_LCL_VAR}, X), ADD(X, (GT_LCL_VAR)) }
3202 // then let "v" be the GT_LCL_VAR.
3203 // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
3204 // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
3205 // * Otherwise, classify as CWBKind_OtherByRefLocal.
3206 // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
3207 // Otherwise, classify as CWBKind_Unclassified.
3209 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
3210 if (tgt->gtOper == GT_IND)
3212 GenTree* lcl = NULL;
3214 GenTree* indArg = tgt->gtOp.gtOp1;
3215 if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
3217 indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
3219 if (indArg->gtOper == GT_LCL_VAR)
3223 else if (indArg->gtOper == GT_ADD)
3225 if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR)
3227 lcl = indArg->gtOp.gtOp1;
3229 else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR)
3231 lcl = indArg->gtOp.gtOp2;
3236 wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
3237 unsigned lclNum = lcl->AsLclVar()->GetLclNum();
3238 if (lclNum == compiler->info.compRetBuffArg)
3240 wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
3244 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
3245 if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
3247 wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
3253 // We should have eliminated the barrier for this case.
3254 assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
3258 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
3262 // Enable this to sample the unclassified trees.
3263 static int unclassifiedBarrierSite = 0;
3264 if (wbKind == CWBKind_Unclassified)
3266 unclassifiedBarrierSite++;
3267 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
3272 inst_IV(INS_push, wbKind);
3273 genEmitHelperCall(helper,
3275 EA_PTRSIZE); // retSize
3276 SubtractStackLevel(4);
3280 genEmitHelperCall(helper,
3282 EA_PTRSIZE); // retSize
3285 #else // !FEATURE_COUNT_GC_WRITE_BARRIERS
3286 genEmitHelperCall(helper,
3288 EA_PTRSIZE); // retSize
3289 #endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
3293 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3294 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3296 XX Prolog / Epilog XX
3298 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3299 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3302 /*****************************************************************************
3304 * Generates code for moving incoming register arguments to their
3305 * assigned location, in the function prolog.
3309 #pragma warning(push)
3310 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3312 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
3317 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
3321 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
3322 unsigned argNum; // current argNum, always in [0..argMax-1]
3323 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
3324 unsigned regArgNum; // index into the regArgTab[] table
3325 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
3326 bool doingFloat = regState->rsIsFloat;
3328 // We should be generating the prolog block when we are called
3329 assert(compiler->compGeneratingProlog);
3331 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
3332 noway_assert(regArgMaskLive != 0);
3334 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
3335 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
3337 // The regArgTab can always have unused entries,
3338 // for example if an architecture always increments the arg register number but uses either
3339 // an integer register or a floating point register to hold the next argument
3340 // then with a mix of float and integer args you could have:
3342 // sampleMethod(int i, float x, int j, float y, int k, float z);
3343 // r0, r2 and r4 as valid integer arguments with argMax as 5
3344 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
3345 // The first one is doingFloat==false and the second one is doingFloat==true
3347 // If a fixed return buffer (in r8) was also present then the first one would become:
3348 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
3351 argMax = regState->rsCalleeRegArgCount;
3352 fixedRetBufIndex = (unsigned)-1; // Invalid value
3354 // If necessary we will select a correct xtraReg for circular floating point args later.
3358 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
3360 else // we are doing the integer registers
3362 noway_assert(argMax <= MAX_REG_ARG);
3363 if (hasFixedRetBuffReg())
3365 fixedRetBufIndex = theFixedRetBuffArgNum();
3366 // We have an additional integer register argument when hasFixedRetBuffReg() is true
3367 argMax = fixedRetBufIndex + 1;
3368 assert(argMax == (MAX_REG_ARG + 1));
3373 // Construct a table with the register arguments, for detecting circular and
3374 // non-circular dependencies between the register arguments. A dependency is when
3375 // an argument register Rn needs to be moved to register Rm that is also an argument
3376 // register. The table is constructed in the order the arguments are passed in
3377 // registers: the first register argument is in regArgTab[0], the second in
3378 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
3379 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
3380 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
3381 // we have increased the allocated size of the regArgTab[] by one.
3385 unsigned varNum; // index into compiler->lvaTable[] for this register argument
3386 #if defined(UNIX_AMD64_ABI)
3387 var_types type; // the Jit type of this regArgTab entry
3388 #endif // defined(UNIX_AMD64_ABI)
3389 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
3390 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
3391 // argument register number 'x'. Only used when circular = true.
3392 char slot; // 0 means the register is not used for a register argument
3393 // 1 means the first part of a register argument
3394 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
3395 bool stackArg; // true if the argument gets homed to the stack
3396 bool processed; // true after we've processed the argument (and it is in its final location)
3397 bool circular; // true if this register participates in a circular dependency loop.
3399 #ifdef UNIX_AMD64_ABI
3401 // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
3402 // the type of the lclVar in ways that are not ascertainable from lvType.
3403 // So, for that case we retain the type of the register in the regArgTab.
3405 var_types getRegType(Compiler* compiler)
3407 return type; // UNIX_AMD64 implementation
3410 #else // !UNIX_AMD64_ABI
3412 // In other cases, we simply use the type of the lclVar to determine the type of the register.
3413 var_types getRegType(Compiler* compiler)
3415 const LclVarDsc& varDsc = compiler->lvaTable[varNum];
3416 // Check if this is an HFA register arg and return the HFA type
3417 if (varDsc.lvIsHfaRegArg())
3419 #if defined(_TARGET_WINDOWS_)
3420 // Cannot have hfa types on windows arm targets
3421 // in vararg methods.
3422 assert(!compiler->info.compIsVarArgs);
3423 #endif // defined(_TARGET_WINDOWS_)
3424 return varDsc.GetHfaType();
3426 return compiler->mangleVarArgsType(varDsc.lvType);
3429 #endif // !UNIX_AMD64_ABI
3430 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
3435 for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
3437 varDsc = compiler->lvaTable + varNum;
3439 // Is this variable a register arg?
3440 if (!varDsc->lvIsParam)
3445 if (!varDsc->lvIsRegArg)
3450 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
3451 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
3452 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
3453 // use the the original TYP_STRUCT argument.
3455 if (varDsc->lvPromoted || varDsc->lvIsStructField)
3457 LclVarDsc* parentVarDsc = varDsc;
3458 if (varDsc->lvIsStructField)
3460 assert(!varDsc->lvPromoted);
3461 parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
3464 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
3466 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
3468 noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
3470 // For register arguments that are independent promoted structs we put the promoted field varNum in the
3472 if (varDsc->lvPromoted)
3479 // For register arguments that are not independent promoted structs we put the parent struct varNum in
3481 if (varDsc->lvIsStructField)
3488 var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
3489 // Change regType to the HFA type when we have a HFA argument
3490 if (varDsc->lvIsHfaRegArg())
3492 #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3493 if (compiler->info.compIsVarArgs)
3495 assert(!"Illegal incoming HFA arg encountered in Vararg method.");
3497 #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3498 regType = varDsc->GetHfaType();
3501 #if defined(UNIX_AMD64_ABI)
3502 if (!varTypeIsStruct(regType))
3503 #endif // defined(UNIX_AMD64_ABI)
3505 // A struct might be passed partially in XMM register for System V calls.
3506 // So a single arg might use both register files.
3507 if (isFloatRegType(regType) != doingFloat)
3515 #if defined(UNIX_AMD64_ABI)
3516 if (varTypeIsStruct(varDsc))
3518 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
3519 assert(typeHnd != nullptr);
3520 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3521 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3522 if (!structDesc.passedInRegisters)
3524 // The var is not passed in registers.
3528 unsigned firstRegSlot = 0;
3529 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
3531 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3536 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3537 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3538 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3539 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3542 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3543 // registers or on stack, the upper most 4-bytes will be zero.
3545 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3546 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3549 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3550 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3551 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3552 // there is no need to clear upper 4-bytes of Vector3 type args.
3554 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3555 // Vector3 return values are returned two return registers and Caller assembles them into a
3556 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3557 // type args in prolog and Vector3 type return value of a call
3559 if (varDsc->lvType == TYP_SIMD12)
3561 regType = TYP_DOUBLE;
3566 regType = compiler->GetEightByteType(structDesc, slotCounter);
3569 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3571 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
3572 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
3574 // Store the reg for the first slot.
3577 firstRegSlot = regArgNum;
3580 // Bingo - add it to our table
3581 noway_assert(regArgNum < argMax);
3582 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
3583 // not be multiple vars representing this argument
3585 regArgTab[regArgNum].varNum = varNum;
3586 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
3587 regArgTab[regArgNum].type = regType;
3594 continue; // Nothing to do for this regState set.
3597 regArgNum = firstRegSlot;
3600 #endif // defined(UNIX_AMD64_ABI)
3602 // Bingo - add it to our table
3603 regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
3605 noway_assert(regArgNum < argMax);
3606 // We better not have added it already (there better not be multiple vars representing this argument
3608 noway_assert(regArgTab[regArgNum].slot == 0);
3610 #if defined(UNIX_AMD64_ABI)
3611 // Set the register type.
3612 regArgTab[regArgNum].type = regType;
3613 #endif // defined(UNIX_AMD64_ABI)
3615 regArgTab[regArgNum].varNum = varNum;
3616 regArgTab[regArgNum].slot = 1;
3620 #if FEATURE_MULTIREG_ARGS
3621 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
3623 if (varDsc->lvIsHfaRegArg())
3625 // We have an HFA argument, set slots to the number of registers used
3626 slots = varDsc->lvHfaSlots();
3630 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3631 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
3632 // We have a non-HFA multireg argument, set slots to two
3636 // Note that regArgNum+1 represents an argument index not an actual argument register.
3637 // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
3639 // This is the setup for the rest of a multireg struct arg
3641 for (int i = 1; i < slots; i++)
3643 noway_assert((regArgNum + i) < argMax);
3645 // We better not have added it already (there better not be multiple vars representing this argument
3647 noway_assert(regArgTab[regArgNum + i].slot == 0);
3649 regArgTab[regArgNum + i].varNum = varNum;
3650 regArgTab[regArgNum + i].slot = (char)(i + 1);
3653 #endif // FEATURE_MULTIREG_ARGS
3657 int lclSize = compiler->lvaLclSize(varNum);
3659 if (lclSize > REGSIZE_BYTES)
3661 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3662 slots = lclSize / REGSIZE_BYTES;
3663 if (regArgNum + slots > maxRegArgNum)
3665 slots = maxRegArgNum - regArgNum;
3668 C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
3669 assert(slots < INT8_MAX);
3670 for (char i = 1; i < slots; i++)
3672 regArgTab[regArgNum + i].varNum = varNum;
3673 regArgTab[regArgNum + i].slot = i + 1;
3675 #endif // _TARGET_ARM_
3677 for (int i = 0; i < slots; i++)
3679 regType = regArgTab[regArgNum + i].getRegType(compiler);
3680 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3682 #if !defined(UNIX_AMD64_ABI)
3683 assert((i > 0) || (regNum == varDsc->lvArgReg));
3684 #endif // defined(UNIX_AMD64_ABI)
3686 // Is the arg dead on entry to the method ?
3688 if ((regArgMaskLive & genRegMask(regNum)) == 0)
3690 if (varDsc->lvTrackedNonStruct())
3692 // We may now see some tracked locals with zero refs.
3693 // See Lowering::DoPhase. Tolerate these.
3694 if (varDsc->lvRefCnt() > 0)
3696 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3702 noway_assert(varDsc->lvType == TYP_STRUCT);
3703 #else // !_TARGET_X86_
3704 // For LSRA, it may not be in regArgMaskLive if it has a zero
3705 // refcnt. This is in contrast with the non-LSRA case in which all
3706 // non-tracked args are assumed live on entry.
3707 noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
3708 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs) ||
3709 (varDsc->lvAddrExposed && compiler->opts.compUseSoftFP));
3710 #endif // !_TARGET_X86_
3712 // Mark it as processed and be done with it
3713 regArgTab[regArgNum + i].processed = true;
3718 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3719 // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
3720 // they've already been spilled.
3722 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
3723 #endif // _TARGET_ARM_
3725 #if !defined(UNIX_AMD64_ABI)
3726 noway_assert(xtraReg != (varDsc->lvArgReg + i));
3728 noway_assert(regArgMaskLive & genRegMask(regNum));
3731 regArgTab[regArgNum + i].processed = false;
3733 /* mark stack arguments since we will take care of those first */
3734 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3736 /* If it goes on the stack or in a register that doesn't hold
3737 * an argument anymore -> CANNOT form a circular dependency */
3739 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3741 /* will trash another argument -> possible dependency
3742 * We may need several passes after the table is constructed
3743 * to decide on that */
3745 /* Maybe the argument stays in the register (IDEAL) */
3747 if ((i == 0) && (varDsc->lvRegNum == regNum))
3752 #if !defined(_TARGET_64BIT_)
3753 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
3757 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
3762 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3763 (REG_NEXT(varDsc->lvRegNum) == regNum))
3767 #endif // !defined(_TARGET_64BIT_)
3768 regArgTab[regArgNum + i].circular = true;
3773 regArgTab[regArgNum + i].circular = false;
3775 /* mark the argument register as free */
3776 regArgMaskLive &= ~genRegMask(regNum);
3781 /* Find the circular dependencies for the argument registers, if any.
3782 * A circular dependency is a set of registers R1, R2, ..., Rn
3783 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3788 /* Possible circular dependencies still exist; the previous pass was not enough
3789 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3795 for (argNum = 0; argNum < argMax; argNum++)
3797 // If we already marked the argument as non-circular then continue
3799 if (!regArgTab[argNum].circular)
3804 if (regArgTab[argNum].slot == 0) // Not a register argument
3809 varNum = regArgTab[argNum].varNum;
3810 noway_assert(varNum < compiler->lvaCount);
3811 varDsc = compiler->lvaTable + varNum;
3812 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3814 /* cannot possibly have stack arguments */
3815 noway_assert(varDsc->lvIsInReg());
3816 noway_assert(!regArgTab[argNum].stackArg);
3818 var_types regType = regArgTab[argNum].getRegType(compiler);
3819 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3821 regNumber destRegNum = REG_NA;
3822 if (regArgTab[argNum].slot == 1)
3824 destRegNum = varDsc->lvRegNum;
3826 #if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
3829 assert(regArgTab[argNum].slot == 2);
3831 assert(regArgTab[argNum - 1].slot == 1);
3832 assert(regArgTab[argNum - 1].varNum == varNum);
3833 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
3834 regArgMaskLive &= ~genRegMask(regNum);
3835 regArgTab[argNum].circular = false;
3839 #elif !defined(_TARGET_64BIT_)
3840 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
3842 destRegNum = varDsc->lvOtherReg;
3846 assert(regArgTab[argNum].slot == 2);
3847 assert(varDsc->TypeGet() == TYP_DOUBLE);
3848 destRegNum = REG_NEXT(varDsc->lvRegNum);
3850 #endif // !defined(_TARGET_64BIT_)
3851 noway_assert(destRegNum != REG_NA);
3852 if (genRegMask(destRegNum) & regArgMaskLive)
3854 /* we are trashing a live argument register - record it */
3855 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3856 noway_assert(destRegArgNum < argMax);
3857 regArgTab[destRegArgNum].trashBy = argNum;
3861 /* argument goes to a free register */
3862 regArgTab[argNum].circular = false;
3865 /* mark the argument register as free */
3866 regArgMaskLive &= ~genRegMask(regNum);
3872 /* At this point, everything that has the "circular" flag
3873 * set to "true" forms a circular dependency */
3874 CLANG_FORMAT_COMMENT_ANCHOR;
3881 printf("Circular dependencies found while home-ing the incoming arguments.\n");
3886 // LSRA allocates registers to incoming parameters in order and will not overwrite
3887 // a register still holding a live parameter.
3889 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
3890 "Homing of float argument registers with circular dependencies not implemented.");
3892 /* Now move the arguments to their locations.
3893 * First consider ones that go on the stack since they may
3894 * free some registers. */
3896 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3897 for (argNum = 0; argNum < argMax; argNum++)
3901 #if defined(UNIX_AMD64_ABI)
3902 // If this is the wrong register file, just continue.
3903 if (regArgTab[argNum].type == TYP_UNDEF)
3905 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3906 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3907 // The next register file processing will process it.
3910 #endif // defined(UNIX_AMD64_ABI)
3912 // If the arg is dead on entry to the method, skip it
3914 if (regArgTab[argNum].processed)
3919 if (regArgTab[argNum].slot == 0) // Not a register argument
3924 varNum = regArgTab[argNum].varNum;
3925 noway_assert(varNum < compiler->lvaCount);
3926 varDsc = compiler->lvaTable + varNum;
3928 #ifndef _TARGET_64BIT_
3929 // If not a stack arg go to the next one
3930 if (varDsc->lvType == TYP_LONG)
3932 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
3936 else if (varDsc->lvOtherReg != REG_STK)
3942 #endif // !_TARGET_64BIT_
3944 // If not a stack arg go to the next one
3945 if (!regArgTab[argNum].stackArg)
3951 #if defined(_TARGET_ARM_)
3952 if (varDsc->lvType == TYP_DOUBLE)
3954 if (regArgTab[argNum].slot == 2)
3956 // We handled the entire double when processing the first half (slot == 1)
3962 noway_assert(regArgTab[argNum].circular == false);
3964 noway_assert(varDsc->lvIsParam);
3965 noway_assert(varDsc->lvIsRegArg);
3966 noway_assert(varDsc->lvIsInReg() == false ||
3967 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
3969 var_types storeType = TYP_UNDEF;
3970 unsigned slotSize = TARGET_POINTER_SIZE;
3972 if (varTypeIsStruct(varDsc))
3974 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3975 #if FEATURE_MULTIREG_ARGS
3976 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3977 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3978 #endif // FEATURE_MULTIREG_ARGS
3979 #ifdef UNIX_AMD64_ABI
3980 storeType = regArgTab[argNum].type;
3981 #endif // !UNIX_AMD64_ABI
3982 if (varDsc->lvIsHfaRegArg())
3985 // On ARM32 the storeType for HFA args is always TYP_FLOAT
3986 storeType = TYP_FLOAT;
3987 slotSize = (unsigned)emitActualTypeSize(storeType);
3988 #else // _TARGET_ARM64_
3989 storeType = genActualType(varDsc->GetHfaType());
3990 slotSize = (unsigned)emitActualTypeSize(storeType);
3991 #endif // _TARGET_ARM64_
3994 else // Not a struct type
3996 storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
3998 size = emitActualTypeSize(storeType);
4000 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
4001 #endif //_TARGET_X86_
4003 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
4005 // Stack argument - if the ref count is 0 don't care about it
4007 if (!varDsc->lvOnFrame)
4009 noway_assert(varDsc->lvRefCnt() == 0);
4013 // Since slot is typically 1, baseOffset is typically 0
4014 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
4016 getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
4018 #ifndef UNIX_AMD64_ABI
4019 // Check if we are writing past the end of the struct
4020 if (varTypeIsStruct(varDsc))
4022 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
4024 #endif // !UNIX_AMD64_ABI
4026 if (regArgTab[argNum].slot == 1)
4028 psiMoveToStack(varNum);
4032 /* mark the argument as processed */
4034 regArgTab[argNum].processed = true;
4035 regArgMaskLive &= ~genRegMask(srcRegNum);
4037 #if defined(_TARGET_ARM_)
4038 if (storeType == TYP_DOUBLE)
4040 regArgTab[argNum + 1].processed = true;
4041 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
4046 /* Process any circular dependencies */
4049 unsigned begReg, destReg, srcReg;
4050 unsigned varNumDest, varNumSrc;
4051 LclVarDsc* varDscDest;
4052 LclVarDsc* varDscSrc;
4053 instruction insCopy = INS_mov;
4057 #if defined(FEATURE_HFA) || defined(UNIX_AMD64_ABI)
4058 insCopy = ins_Copy(TYP_DOUBLE);
4059 // Compute xtraReg here when we have a float argument
4060 assert(xtraReg == REG_NA);
4062 regMaskTP fpAvailMask;
4064 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
4065 #if defined(FEATURE_HFA)
4066 fpAvailMask &= RBM_ALLDOUBLE;
4068 #if !defined(UNIX_AMD64_ABI)
4069 #error Error. Wrong architecture.
4070 #endif // !defined(UNIX_AMD64_ABI)
4071 #endif // defined(FEATURE_HFA)
4073 if (fpAvailMask == RBM_NONE)
4075 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
4076 #if defined(FEATURE_HFA)
4077 fpAvailMask &= RBM_ALLDOUBLE;
4079 #if !defined(UNIX_AMD64_ABI)
4080 #error Error. Wrong architecture.
4081 #endif // !defined(UNIX_AMD64_ABI)
4082 #endif // defined(FEATURE_HFA)
4085 assert(fpAvailMask != RBM_NONE);
4087 // We pick the lowest avail register number
4088 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
4089 xtraReg = genRegNumFromMask(tempMask);
4090 #elif defined(_TARGET_X86_)
4091 // This case shouldn't occur on x86 since NYI gets converted to an assert
4092 NYI("Homing circular FP registers via xtraReg");
4096 for (argNum = 0; argNum < argMax; argNum++)
4098 // If not a circular dependency then continue
4099 if (!regArgTab[argNum].circular)
4104 // If already processed the dependency then continue
4106 if (regArgTab[argNum].processed)
4111 if (regArgTab[argNum].slot == 0) // Not a register argument
4116 destReg = begReg = argNum;
4117 srcReg = regArgTab[argNum].trashBy;
4119 varNumDest = regArgTab[destReg].varNum;
4120 noway_assert(varNumDest < compiler->lvaCount);
4121 varDscDest = compiler->lvaTable + varNumDest;
4122 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
4124 noway_assert(srcReg < argMax);
4125 varNumSrc = regArgTab[srcReg].varNum;
4126 noway_assert(varNumSrc < compiler->lvaCount);
4127 varDscSrc = compiler->lvaTable + varNumSrc;
4128 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4130 emitAttr size = EA_PTRSIZE;
4132 #ifdef _TARGET_XARCH_
4134 // The following code relies upon the target architecture having an
4135 // 'xchg' instruction which directly swaps the values held in two registers.
4136 // On the ARM architecture we do not have such an instruction.
4138 if (destReg == regArgTab[srcReg].trashBy)
4140 /* only 2 registers form the circular dependency - use "xchg" */
4142 varNum = regArgTab[argNum].varNum;
4143 noway_assert(varNum < compiler->lvaCount);
4144 varDsc = compiler->lvaTable + varNum;
4145 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4147 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
4149 /* Set "size" to indicate GC if one and only one of
4150 * the operands is a pointer
4151 * RATIONALE: If both are pointers, nothing changes in
4152 * the GC pointer tracking. If only one is a pointer we
4153 * have to "swap" the registers in the GC reg pointer mask
4156 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
4161 noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
4163 getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
4164 regSet.verifyRegUsed(varDscSrc->lvRegNum);
4165 regSet.verifyRegUsed(varDscSrc->lvArgReg);
4167 /* mark both arguments as processed */
4168 regArgTab[destReg].processed = true;
4169 regArgTab[srcReg].processed = true;
4171 regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
4172 regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
4174 psiMoveToReg(varNumSrc);
4175 psiMoveToReg(varNumDest);
4178 #endif // _TARGET_XARCH_
4180 var_types destMemType = varDscDest->TypeGet();
4183 bool cycleAllDouble = true; // assume the best
4185 unsigned iter = begReg;
4188 if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
4190 cycleAllDouble = false;
4193 iter = regArgTab[iter].trashBy;
4194 } while (iter != begReg);
4196 // We may treat doubles as floats for ARM because we could have partial circular
4197 // dependencies of a float with a lo/hi part of the double. We mark the
4198 // trashBy values for each slot of the double, so let the circular dependency
4199 // logic work its way out for floats rather than doubles. If a cycle has all
4200 // doubles, then optimize so that instead of two vmov.f32's to move a double,
4201 // we can use one vmov.f64.
4203 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4205 destMemType = TYP_FLOAT;
4207 #endif // _TARGET_ARM_
4209 if (destMemType == TYP_REF)
4213 else if (destMemType == TYP_BYREF)
4217 else if (destMemType == TYP_DOUBLE)
4221 else if (destMemType == TYP_FLOAT)
4226 /* move the dest reg (begReg) in the extra reg */
4228 assert(xtraReg != REG_NA);
4230 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
4232 getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
4234 regSet.verifyRegUsed(xtraReg);
4236 *pXtraRegClobbered = true;
4238 psiMoveToReg(varNumDest, xtraReg);
4240 /* start moving everything to its right place */
4242 while (srcReg != begReg)
4246 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4247 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
4249 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
4251 regSet.verifyRegUsed(destRegNum);
4253 /* mark 'src' as processed */
4254 noway_assert(srcReg < argMax);
4255 regArgTab[srcReg].processed = true;
4257 if (size == EA_8BYTE)
4258 regArgTab[srcReg + 1].processed = true;
4260 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4262 /* move to the next pair */
4264 srcReg = regArgTab[srcReg].trashBy;
4266 varDscDest = varDscSrc;
4267 destMemType = varDscDest->TypeGet();
4269 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4271 destMemType = TYP_FLOAT;
4274 varNumSrc = regArgTab[srcReg].varNum;
4275 noway_assert(varNumSrc < compiler->lvaCount);
4276 varDscSrc = compiler->lvaTable + varNumSrc;
4277 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4279 if (destMemType == TYP_REF)
4283 else if (destMemType == TYP_DOUBLE)
4293 /* take care of the beginning register */
4295 noway_assert(srcReg == begReg);
4297 /* move the dest reg (begReg) in the extra reg */
4299 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4301 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
4303 regSet.verifyRegUsed(destRegNum);
4305 psiMoveToReg(varNumSrc);
4307 /* mark the beginning register as processed */
4309 regArgTab[srcReg].processed = true;
4311 if (size == EA_8BYTE)
4312 regArgTab[srcReg + 1].processed = true;
4314 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4319 /* Finally take care of the remaining arguments that must be enregistered */
4320 while (regArgMaskLive)
4322 regMaskTP regArgMaskLiveSave = regArgMaskLive;
4324 for (argNum = 0; argNum < argMax; argNum++)
4326 /* If already processed go to the next one */
4327 if (regArgTab[argNum].processed)
4332 if (regArgTab[argNum].slot == 0)
4333 { // Not a register argument
4337 varNum = regArgTab[argNum].varNum;
4338 noway_assert(varNum < compiler->lvaCount);
4339 varDsc = compiler->lvaTable + varNum;
4340 var_types regType = regArgTab[argNum].getRegType(compiler);
4341 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4343 #if defined(UNIX_AMD64_ABI)
4344 if (regType == TYP_UNDEF)
4346 // This could happen if the reg in regArgTab[argNum] is of the other register file -
4347 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
4348 // The next register file processing will process it.
4349 regArgMaskLive &= ~genRegMask(regNum);
4352 #endif // defined(UNIX_AMD64_ABI)
4354 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4355 #ifndef _TARGET_64BIT_
4356 #ifndef _TARGET_ARM_
4357 // Right now we think that incoming arguments are not pointer sized. When we eventually
4358 // understand the calling convention, this still won't be true. But maybe we'll have a better
4359 // idea of how to ignore it.
4361 // On Arm, a long can be passed in register
4362 noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE);
4364 #endif //_TARGET_64BIT_
4366 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
4368 /* Register argument - hopefully it stays in the same register */
4369 regNumber destRegNum = REG_NA;
4370 var_types destMemType = varDsc->TypeGet();
4372 if (regArgTab[argNum].slot == 1)
4374 destRegNum = varDsc->lvRegNum;
4377 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
4379 // The second half of the double has already been processed! Treat this as a single.
4380 destMemType = TYP_FLOAT;
4382 #endif // _TARGET_ARM_
4384 #ifndef _TARGET_64BIT_
4385 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
4387 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
4388 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
4390 destRegNum = regNum;
4394 destRegNum = varDsc->lvOtherReg;
4397 assert(destRegNum != REG_STK);
4401 assert(regArgTab[argNum].slot == 2);
4402 assert(destMemType == TYP_DOUBLE);
4404 // For doubles, we move the entire double using the argNum representing
4405 // the first half of the double. There are two things we won't do:
4406 // (1) move the double when the 1st half of the destination is free but the
4407 // 2nd half is occupied, and (2) move the double when the 2nd half of the
4408 // destination is free but the 1st half is occupied. Here we consider the
4409 // case where the first half can't be moved initially because its target is
4410 // still busy, but the second half can be moved. We wait until the entire
4411 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
4412 // and F2 single moving to F16. When we process F0, its target F2 is busy,
4413 // so we skip it on the first pass. When we process F1, its target F3 is
4414 // available. However, we want to move F0/F1 all at once, so we skip it here.
4415 // We process F2, which frees up F2. The next pass through, we process F0 and
4416 // F2/F3 are empty, so we move it. Note that if half of a double is involved
4417 // in a circularity with a single, then we will have already moved that half
4418 // above, so we go ahead and move the remaining half as a single.
4419 // Because there are no circularities left, we are guaranteed to terminate.
4422 assert(regArgTab[argNum - 1].slot == 1);
4424 if (!regArgTab[argNum - 1].processed)
4426 // The first half of the double hasn't been processed; try to be processed at the same time
4430 // The first half of the double has been processed but the second half hasn't!
4431 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
4432 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
4433 // dependency logic above will move them as singles, leaving just F3 to move. Treat
4434 // it as a single to finish the shuffling.
4436 destMemType = TYP_FLOAT;
4437 destRegNum = REG_NEXT(varDsc->lvRegNum);
4439 #endif // !_TARGET_64BIT_
4440 #if (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4443 assert(regArgTab[argNum].slot == 2);
4445 assert(regArgTab[argNum - 1].slot == 1);
4446 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
4447 destRegNum = varDsc->lvRegNum;
4448 noway_assert(regNum != destRegNum);
4451 #endif // (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4452 noway_assert(destRegNum != REG_NA);
4453 if (destRegNum != regNum)
4455 /* Cannot trash a currently live register argument.
4456 * Skip this one until its target will be free
4457 * which is guaranteed to happen since we have no circular dependencies. */
4459 regMaskTP destMask = genRegMask(destRegNum);
4461 // Don't process the double until both halves of the destination are clear.
4462 if (genActualType(destMemType) == TYP_DOUBLE)
4464 assert((destMask & RBM_DBL_REGS) != 0);
4465 destMask |= genRegMask(REG_NEXT(destRegNum));
4469 if (destMask & regArgMaskLive)
4474 /* Move it to the new register */
4476 emitAttr size = emitActualTypeSize(destMemType);
4478 #if defined(_TARGET_ARM64_)
4479 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4481 // For a SIMD type that is passed in two integer registers,
4482 // Limit the copy below to the first 8 bytes from the first integer register.
4483 // Handle the remaining 8 bytes from the second slot in the code further below
4484 assert(EA_SIZE(size) >= 8);
4489 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
4491 psiMoveToReg(varNum);
4494 /* mark the argument as processed */
4496 assert(!regArgTab[argNum].processed);
4497 regArgTab[argNum].processed = true;
4498 regArgMaskLive &= ~genRegMask(regNum);
4499 #if FEATURE_MULTIREG_ARGS
4500 int argRegCount = 1;
4502 if (genActualType(destMemType) == TYP_DOUBLE)
4507 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4508 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4511 int nextArgNum = argNum + 1;
4512 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4513 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4514 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4515 // and moves the 0th element of the src reg into the 1st element of the dest reg.
4516 getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
4517 // Set destRegNum to regNum so that we skip the setting of the register below,
4518 // but mark argNum as processed and clear regNum from the live mask.
4519 destRegNum = regNum;
4521 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4522 #if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4523 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4525 // For a SIMD type that is passed in two integer registers,
4526 // Code above copies the first integer argument register into the lower 8 bytes
4527 // of the target register. Here we must handle the second 8 bytes of the slot pair by
4528 // inserting the second integer register into the upper 8 bytes of the target
4529 // SIMD floating point register.
4531 int nextArgNum = argNum + 1;
4532 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4533 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4534 noway_assert(genIsValidIntReg(nextRegNum));
4535 noway_assert(genIsValidFloatReg(destRegNum));
4536 getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1);
4538 #endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4540 // Mark the rest of the argument registers corresponding to this multi-reg type as
4541 // being processed and no longer live.
4542 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
4544 int nextArgNum = argNum + regSlot;
4545 assert(!regArgTab[nextArgNum].processed);
4546 regArgTab[nextArgNum].processed = true;
4547 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4548 regArgMaskLive &= ~genRegMask(nextRegNum);
4550 #endif // FEATURE_MULTIREG_ARGS
4553 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4557 #pragma warning(pop)
4560 /*****************************************************************************
4561 * If any incoming stack arguments live in registers, load them.
4563 void CodeGen::genEnregisterIncomingStackArgs()
4568 printf("*************** In genEnregisterIncomingStackArgs()\n");
4572 assert(compiler->compGeneratingProlog);
4574 unsigned varNum = 0;
4576 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4578 /* Is this variable a parameter? */
4580 if (!varDsc->lvIsParam)
4585 /* If it's a register argument then it's already been taken care of.
4586 But, on Arm when under a profiler, we would have prespilled a register argument
4587 and hence here we need to load it from its prespilled location.
4589 bool isPrespilledForProfiling = false;
4590 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4591 isPrespilledForProfiling =
4592 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4595 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4600 /* Has the parameter been assigned to a register? */
4602 if (!varDsc->lvIsInReg())
4607 var_types type = genActualType(varDsc->TypeGet());
4609 /* Is the variable dead on entry */
4611 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4616 /* Load the incoming parameter into the register */
4618 /* Figure out the home offset of the incoming argument */
4620 regNumber regNum = varDsc->lvArgInitReg;
4621 assert(regNum != REG_STK);
4623 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
4624 regSet.verifyRegUsed(regNum);
4626 psiMoveToReg(varNum);
4630 /*-------------------------------------------------------------------------
4632 * We have to decide whether we're going to use block initialization
4633 * in the prolog before we assign final stack offsets. This is because
4634 * when using block initialization we may need additional callee-saved
4635 * registers which need to be saved on the frame, thus increasing the
4638 * We'll count the number of locals we have to initialize,
4639 * and if there are lots of them we'll use block initialization.
4640 * Thus, the local variable table must have accurate register location
4641 * information for enregistered locals for their register state on entry
4644 * At the same time we set lvMustInit for locals (enregistered or on stack)
4645 * that must be initialized (e.g. initialize memory (comInitMem),
4646 * untracked pointers or disable DFA)
4648 void CodeGen::genCheckUseBlockInit()
4650 assert(!compiler->compGeneratingProlog);
4652 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
4653 // larger than int count for more than 1).
4654 unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
4655 // determine whether to use block init.
4660 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4662 if (varDsc->lvIsParam)
4667 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4669 noway_assert(varDsc->lvRefCnt() == 0);
4673 if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
4678 #if FEATURE_FIXED_OUT_ARGS
4679 if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
4683 if (varNum == compiler->lvaOutgoingArgSpaceVar)
4689 #if FEATURE_EH_FUNCLETS
4690 // There's no need to force 0-initialization of the PSPSym, it will be
4691 // initialized with a real value in the prolog
4692 if (varNum == compiler->lvaPSPSym)
4698 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4700 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4701 // initialized by the parent struct. No need to set the lvMustInit bit in the
4706 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
4709 if (varDsc->lvTracked)
4711 /* For uninitialized use of tracked variables, the liveness
4712 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4714 if (varDsc->lvMustInit ||
4715 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4717 /* This var must be initialized */
4719 varDsc->lvMustInit = 1;
4721 /* See if the variable is on the stack will be initialized
4722 * using rep stos - compute the total size to be zero-ed */
4724 if (varDsc->lvOnFrame)
4726 if (!varDsc->lvRegister)
4728 if (!varDsc->lvIsInReg())
4730 // Var is on the stack at entry.
4732 roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4737 // Var is partially enregistered
4738 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
4739 initStkLclCnt += genTypeStSz(TYP_INT);
4745 /* With compInitMem, all untracked vars will have to be init'ed */
4746 /* VSW 102460 - Do not force initialization of compiler generated temps,
4747 unless they are untracked GC type or structs that contain GC pointers */
4748 CLANG_FORMAT_COMMENT_ANCHOR;
4751 // TODO-1stClassStructs
4752 // This is here to duplicate previous behavior, where TYP_SIMD8 locals
4753 // were not being re-typed correctly.
4754 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
4755 #else // !FEATURE_SIMD
4756 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
4757 #endif // !FEATURE_SIMD
4758 varDsc->lvOnFrame &&
4759 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
4761 varDsc->lvMustInit = true;
4763 initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4769 /* Ignore if not a pointer variable or value class with a GC field */
4771 if (!compiler->lvaTypeIsGC(varNum))
4776 /* If we don't know lifetimes of variables, must be conservative */
4777 if (!compiler->backendRequiresLocalVarLifetimes())
4779 varDsc->lvMustInit = true;
4780 noway_assert(!varDsc->lvRegister);
4784 if (!varDsc->lvTracked)
4786 varDsc->lvMustInit = true;
4790 /* Is this a 'must-init' stack pointer local? */
4792 if (varDsc->lvMustInit && varDsc->lvOnFrame)
4794 initStkLclCnt += varDsc->lvStructGcCount;
4797 if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4))
4803 /* Don't forget about spill temps that hold pointers */
4805 if (!TRACK_GC_TEMP_LIFETIMES)
4807 assert(regSet.tmpAllFree());
4808 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4810 if (varTypeIsGC(tempThis->tdTempType()))
4817 // After debugging this further it was found that this logic is incorrect:
4818 // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
4819 // and this also double counts variables (we saw this in the debugger) around line 4829.
4820 // Even though this doesn't pose a problem with correctness it will improperly decide to
4821 // zero init the stack using a block operation instead of a 'case by case' basis.
4822 genInitStkLclCnt = initStkLclCnt;
4824 /* If we have more than 4 untracked locals, use block initialization */
4825 /* TODO-Review: If we have large structs, bias toward not using block initialization since
4826 we waste all the other slots. Really need to compute the correct
4827 and compare that against zeroing the slots individually */
4829 genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
4831 if (genUseBlockInit)
4833 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4835 // If there is a secret stub param, don't count it, as it will no longer
4836 // be live when we do block init.
4837 if (compiler->info.compPublishStubParam)
4839 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4842 #ifdef _TARGET_XARCH_
4843 // If we're going to use "REP STOS", remember that we will trash EDI
4844 // For fastcall we will have to save ECX, EAX
4845 // so reserve two extra callee saved
4846 // This is better than pushing eax, ecx, because we in the later
4847 // we will mess up already computed offsets on the stack (for ESP frames)
4848 regSet.rsSetRegsModified(RBM_EDI);
4850 #ifdef UNIX_AMD64_ABI
4851 // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
4852 // In such case use R12 and R13 registers.
4853 if (maskCalleeRegArgMask & RBM_RCX)
4855 regSet.rsSetRegsModified(RBM_R12);
4858 if (maskCalleeRegArgMask & RBM_RDI)
4860 regSet.rsSetRegsModified(RBM_R13);
4862 #else // !UNIX_AMD64_ABI
4863 if (maskCalleeRegArgMask & RBM_ECX)
4865 regSet.rsSetRegsModified(RBM_ESI);
4867 #endif // !UNIX_AMD64_ABI
4869 if (maskCalleeRegArgMask & RBM_EAX)
4871 regSet.rsSetRegsModified(RBM_EBX);
4874 #endif // _TARGET_XARCH_
4877 // On the Arm if we are using a block init to initialize, then we
4878 // must force spill R4/R5/R6 so that we can use them during
4879 // zero-initialization process.
4881 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
4882 if (forceSpillRegCount > 0)
4883 regSet.rsSetRegsModified(RBM_R4);
4884 if (forceSpillRegCount > 1)
4885 regSet.rsSetRegsModified(RBM_R5);
4886 if (forceSpillRegCount > 2)
4887 regSet.rsSetRegsModified(RBM_R6);
4888 #endif // _TARGET_ARM_
4892 /*-----------------------------------------------------------------------------
4894 * Push any callee-saved registers we have used
4897 #if defined(_TARGET_ARM64_)
4898 void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
4900 void CodeGen::genPushCalleeSavedRegisters()
4903 assert(compiler->compGeneratingProlog);
4905 #if defined(_TARGET_XARCH_)
4906 // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
4907 // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
4909 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
4910 #else // !defined(_TARGET_XARCH_)
4911 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
4915 if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
4917 noway_assert(!"Used register RBM_FPBASE as a scratch register!");
4921 #ifdef _TARGET_XARCH_
4922 // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
4923 if (isFramePointerUsed())
4925 rsPushRegs &= ~RBM_FPBASE;
4929 #ifdef _TARGET_ARMARCH_
4930 // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
4931 if (isFramePointerUsed())
4932 rsPushRegs |= RBM_FPBASE;
4935 // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
4936 // changes in GC suspension architecture.
4938 // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
4939 // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
4940 // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
4941 // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
4942 // be saved on the stack and the GC suspension would time out.
4944 // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
4945 // the following to make GC suspension work in the above scenario:
4946 // - Make return address hijacking work even when lr is not saved on the stack.
4947 // - Generate fully interruptible code for loops that contains calls
4948 // - Generate fully interruptible code for leaf methods
4950 // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
4953 rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
4955 regSet.rsMaskCalleeSaved = rsPushRegs;
4956 #endif // _TARGET_ARMARCH_
4959 if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
4961 printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
4962 compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
4963 dspRegMask(rsPushRegs);
4965 assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
4969 #if defined(_TARGET_ARM_)
4970 regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
4971 regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
4973 maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
4975 assert(FitsIn<int>(maskPushRegsInt));
4976 inst_IV(INS_push, (int)maskPushRegsInt);
4977 compiler->unwindPushMaskInt(maskPushRegsInt);
4979 if (maskPushRegsFloat != 0)
4981 genPushFltRegs(maskPushRegsFloat);
4982 compiler->unwindPushMaskFloat(maskPushRegsFloat);
4984 #elif defined(_TARGET_ARM64_)
4985 // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
4986 // options. Case numbers in comments here refer to this document.
4988 // For most frames, generate, e.g.:
4989 // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
4990 // // ensures stack stays aligned.
4991 // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
4992 // // at top of frame (highest addresses).
4993 // stp r21, r22, [sp, 0x70]
4996 // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
4997 // at the top of the frame.
4998 // 2. If we save FP, then the first store is FP, LR.
4999 // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
5000 // preserve their lower 8 bytes, by calling convention.
5001 // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
5003 // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
5005 int totalFrameSize = genTotalFrameSize();
5007 int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
5009 regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5010 regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
5012 int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
5013 // generate based on various sizes.
5014 int calleeSaveSPDelta = 0;
5015 int calleeSaveSPDeltaUnaligned = 0;
5017 if (isFramePointerUsed())
5019 // We need to save both FP and LR.
5021 assert((maskSaveRegsInt & RBM_FP) != 0);
5022 assert((maskSaveRegsInt & RBM_LR) != 0);
5024 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5029 // stp fp,lr,[sp,#-framesz]!
5031 // The (totalFrameSize < 512) condition ensures that both the predecrement
5032 // and the postincrement of SP can occur with STP.
5034 // After saving callee-saved registers, we establish the frame pointer with:
5036 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5040 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
5041 INS_OPTS_PRE_INDEX);
5042 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5044 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5045 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5047 else if (totalFrameSize <= 512)
5052 // sub sp,sp,#framesz
5053 // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
5055 // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
5056 // signed offset encoding.
5058 // After saving callee-saved registers, we establish the frame pointer with:
5060 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5064 assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
5066 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5067 compiler->unwindAllocStack(totalFrameSize);
5069 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5070 compiler->lvaOutgoingArgSpaceSize);
5071 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5073 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5074 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5080 // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
5081 // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
5082 // stored. For example:
5083 // stp r19,r20,[sp,#-96]!
5084 // stp d8,d9,[sp,#16]
5085 // ... save varargs incoming integer registers ...
5086 // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
5087 // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
5088 // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
5092 // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
5093 // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
5094 // above them. If that is preferable, we could implement it.
5095 // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
5097 // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
5098 // padding from above).
5099 // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
5102 // sub sp,sp,#remainingFrameSz
5103 // or, for large frames:
5104 // mov rX, #remainingFrameSz // maybe multiple instructions
5108 // stp fp,lr,[sp,#outsz]
5111 // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
5112 // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
5113 // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
5114 // following sequences:
5116 // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
5118 // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
5121 // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
5125 // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
5126 // // always guaranteed to be 8 byte aligned).
5127 // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
5129 // sub sp,sp,#outsz - #8
5131 // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
5132 // mov rX, #outsz - #8 // maybe multiple instructions
5138 calleeSaveSPDeltaUnaligned =
5139 totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
5140 assert(calleeSaveSPDeltaUnaligned >= 0);
5141 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5142 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5144 offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5145 assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
5146 // store the callee-saved registers.
5148 // We'll take care of these later, but callee-saved regs code shouldn't see them.
5149 maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
5154 // No frame pointer (no chaining).
5155 assert((maskSaveRegsInt & RBM_FP) == 0);
5156 assert((maskSaveRegsInt & RBM_LR) != 0);
5158 // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
5159 // if we only have one callee-saved register plus LR to save.
5161 NYI("Frame without frame pointer");
5165 assert(frameType != 0);
5167 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
5169 offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
5171 // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
5172 // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
5173 // need to add codes at all.
5175 if (compiler->info.compIsVarArgs)
5177 // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
5178 assert((offset % 16) == 0);
5179 for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
5181 regNumber reg2 = REG_NEXT(reg1);
5182 // stp REG, REG + 1, [SP, #offset]
5183 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
5184 compiler->unwindNop();
5185 offset += 2 * REGSIZE_BYTES;
5191 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
5192 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5194 else if (frameType == 2)
5196 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5197 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5199 else if (frameType == 3)
5201 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5202 assert(remainingFrameSz > 0);
5203 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
5204 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5206 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5208 // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
5209 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5210 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5211 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5212 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5213 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5214 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
5216 genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
5217 offset += spAdjustment2;
5219 // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
5222 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5223 assert(spAdjustment3 > 0);
5224 assert((spAdjustment3 % 16) == 0);
5226 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
5227 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5229 genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
5230 offset += spAdjustment3;
5234 genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
5236 offset += remainingFrameSz;
5238 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5239 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5243 assert(offset == totalFrameSize);
5245 #elif defined(_TARGET_XARCH_)
5246 // Push backwards so we match the order we will pop them in the epilog
5247 // and all the other code that expects it to be in this order.
5248 for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
5250 regMaskTP regBit = genRegMask(reg);
5252 if ((regBit & rsPushRegs) != 0)
5254 inst_RV(INS_push, reg, TYP_REF);
5255 compiler->unwindPush(reg);
5257 if (!doubleAlignOrFramePointerUsed())
5259 psiAdjustStackLevel(REGSIZE_BYTES);
5262 rsPushRegs &= ~regBit;
5267 assert(!"Unknown TARGET");
5271 #if defined(_TARGET_ARM_)
5273 void CodeGen::genPushFltRegs(regMaskTP regMask)
5275 assert(regMask != 0); // Don't call uness we have some registers to push
5276 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5278 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5279 int slots = genCountBits(regMask);
5280 // regMask should be contiguously set
5281 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5282 assert((tmpMask & (tmpMask - 1)) == 0);
5283 assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
5285 // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
5286 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5287 noway_assert((slots % 2) == 0);
5289 getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
5292 void CodeGen::genPopFltRegs(regMaskTP regMask)
5294 assert(regMask != 0); // Don't call uness we have some registers to pop
5295 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5297 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5298 int slots = genCountBits(regMask);
5299 // regMask should be contiguously set
5300 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5301 assert((tmpMask & (tmpMask - 1)) == 0);
5303 // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
5304 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5305 noway_assert((slots % 2) == 0);
5307 getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
5310 /*-----------------------------------------------------------------------------
5312 * If we have a jmp call, then the argument registers cannot be used in the
5313 * epilog. So return the current call's argument registers as the argument
5314 * registers for the jmp call.
5316 regMaskTP CodeGen::genJmpCallArgMask()
5318 assert(compiler->compGeneratingEpilog);
5320 regMaskTP argMask = RBM_NONE;
5321 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
5323 const LclVarDsc& desc = compiler->lvaTable[varNum];
5324 if (desc.lvIsRegArg)
5326 argMask |= genRegMask(desc.lvArgReg);
5332 /*-----------------------------------------------------------------------------
5334 * Free the local stack frame: add to SP.
5335 * If epilog unwind hasn't been started, and we generate code, we start unwind
5336 * and set *pUnwindStarted = true.
5339 void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
5341 assert(compiler->compGeneratingEpilog);
5346 // Add 'frameSize' to SP.
5348 // Unfortunately, we can't just use:
5350 // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
5352 // because we need to generate proper unwind codes for each instruction generated,
5353 // and large frame sizes might generate a temp register load which might
5354 // need an unwind code. We don't want to generate a "NOP" code for this
5355 // temp register load; we want the unwind codes to start after that.
5357 if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
5359 if (!*pUnwindStarted)
5361 compiler->unwindBegEpilog();
5362 *pUnwindStarted = true;
5365 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
5369 regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
5372 // Do not use argument registers as scratch registers in the jmp epilog.
5373 grabMask &= ~genJmpCallArgMask();
5375 regNumber tmpReg = REG_TMP_0;
5376 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
5377 if (*pUnwindStarted)
5379 compiler->unwindPadding();
5382 // We're going to generate an unwindable instruction, so check again if
5383 // we need to start the unwind codes.
5385 if (!*pUnwindStarted)
5387 compiler->unwindBegEpilog();
5388 *pUnwindStarted = true;
5391 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
5394 compiler->unwindAllocStack(frameSize);
5397 /*-----------------------------------------------------------------------------
5399 * Move of relocatable displacement value to register
5401 void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
5403 getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
5404 getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
5406 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5408 getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
5412 /*-----------------------------------------------------------------------------
5414 * Move of relocatable data-label to register
5416 void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
5418 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
5419 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
5421 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5423 getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
5427 /*-----------------------------------------------------------------------------
5429 * Move of relocatable immediate to register
5431 void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber reg)
5433 _ASSERTE(EA_IS_RELOC(size));
5435 getEmitter()->emitIns_MovRelocatableImmediate(INS_movw, size, reg, addr);
5436 getEmitter()->emitIns_MovRelocatableImmediate(INS_movt, size, reg, addr);
5438 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5440 getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
5444 /*-----------------------------------------------------------------------------
5446 * Returns register mask to push/pop to allocate a small stack frame,
5447 * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
5448 * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
5450 regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
5452 assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
5454 // We can't do this optimization with callee saved floating point registers because
5455 // the stack would be allocated in a wrong spot.
5456 if (maskCalleeSavedFloat != RBM_NONE)
5459 // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
5460 // that extra sub sp,XXX/add sp,XXX.
5461 // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
5462 // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
5463 // significant negative side-effects (more memory bus traffic).
5468 case 2 * REGSIZE_BYTES:
5469 return RBM_R2 | RBM_R3;
5475 #endif // _TARGET_ARM_
5477 /*****************************************************************************
5479 * initFltRegs -- The mask of float regs to be zeroed.
5480 * initDblRegs -- The mask of double regs to be zeroed.
5481 * initReg -- A zero initialized integer reg to copy from.
5483 * Does best effort to move between VFP/xmm regs if one is already
5484 * initialized to 0. (Arm Only) Else copies from the integer register which
5487 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
5489 assert(compiler->compGeneratingProlog);
5491 // The first float/double reg that is initialized to 0. So they can be used to
5492 // initialize the remaining registers.
5493 regNumber fltInitReg = REG_NA;
5494 regNumber dblInitReg = REG_NA;
5496 // Iterate through float/double registers and initialize them to 0 or
5497 // copy from already initialized register of the same type.
5498 regMaskTP regMask = genRegMask(REG_FP_FIRST);
5499 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
5501 if (regMask & initFltRegs)
5503 // Do we have a float register already set to 0?
5504 if (fltInitReg != REG_NA)
5507 inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
5512 // Do we have a double register initialized to 0?
5513 if (dblInitReg != REG_NA)
5515 // Copy from double.
5516 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
5521 inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
5523 #elif defined(_TARGET_XARCH_)
5524 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5525 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5527 #elif defined(_TARGET_ARM64_)
5528 // We will just zero out the entire vector register. This sets it to a double/float zero value
5529 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5531 #error Unsupported or unset target architecture
5536 else if (regMask & initDblRegs)
5538 // Do we have a double register already set to 0?
5539 if (dblInitReg != REG_NA)
5541 // Copy from double.
5542 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
5547 // Do we have a float register initialized to 0?
5548 if (fltInitReg != REG_NA)
5551 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
5556 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
5558 #elif defined(_TARGET_XARCH_)
5559 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5560 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5562 #elif defined(_TARGET_ARM64_)
5563 // We will just zero out the entire vector register. This sets it to a double/float zero value
5564 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5566 #error Unsupported or unset target architecture
5574 /*-----------------------------------------------------------------------------
5576 * Restore any callee-saved registers we have used
5579 #if defined(_TARGET_ARM_)
5581 bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
5583 assert(compiler->compGeneratingEpilog);
5585 if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
5591 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5593 assert(compiler->compGeneratingEpilog);
5595 regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5596 regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
5597 regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
5599 // First, pop float registers
5601 if (maskPopRegsFloat != RBM_NONE)
5603 genPopFltRegs(maskPopRegsFloat);
5604 compiler->unwindPopMaskFloat(maskPopRegsFloat);
5607 // Next, pop integer registers
5611 regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
5612 maskPopRegsInt |= maskStackAlloc;
5615 if (isFramePointerUsed())
5617 assert(!regSet.rsRegsModified(RBM_FPBASE));
5618 maskPopRegsInt |= RBM_FPBASE;
5621 if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
5623 maskPopRegsInt |= RBM_PC;
5624 // Record the fact that we use a pop to the PC to perform the return
5625 genUsedPopToReturn = true;
5629 maskPopRegsInt |= RBM_LR;
5630 // Record the fact that we did not use a pop to the PC to perform the return
5631 genUsedPopToReturn = false;
5634 assert(FitsIn<int>(maskPopRegsInt));
5635 inst_IV(INS_pop, (int)maskPopRegsInt);
5636 compiler->unwindPopMaskInt(maskPopRegsInt);
5639 #elif defined(_TARGET_ARM64_)
5641 void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
5643 assert(compiler->compGeneratingEpilog);
5645 regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5647 if (isFramePointerUsed())
5649 rsRestoreRegs |= RBM_FPBASE;
5652 rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
5654 regMaskTP regsToRestoreMask = rsRestoreRegs;
5656 int totalFrameSize = genTotalFrameSize();
5658 int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
5660 int frameType = 0; // An indicator of what type of frame we are popping.
5661 int calleeSaveSPDelta = 0;
5662 int calleeSaveSPDeltaUnaligned = 0;
5664 if (isFramePointerUsed())
5666 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5669 if (compiler->compLocallocUsed)
5671 // Restore sp from fp
5673 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
5674 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5677 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5679 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5681 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5683 else if (totalFrameSize <= 512)
5686 if (compiler->compLocallocUsed)
5688 // Restore sp from fp
5689 // sub sp, fp, #outsz
5690 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5691 compiler->lvaOutgoingArgSpaceSize);
5692 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5695 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5697 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5699 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5705 calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
5706 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
5707 assert(calleeSaveSPDeltaUnaligned >= 0);
5708 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5709 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5711 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
5713 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5714 assert(remainingFrameSz > 0);
5716 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5718 // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
5719 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5720 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5721 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5722 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5723 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5724 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
5726 if (compiler->compLocallocUsed)
5728 // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
5730 // sub sp, fp, #alignmentAdjustment2
5731 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
5732 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5737 // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
5739 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5740 assert(spAdjustment3 > 0);
5741 assert((spAdjustment3 % 16) == 0);
5742 genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
5747 // add sp,sp,#remainingFrameSz
5748 genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr);
5752 if (compiler->compLocallocUsed)
5754 // Restore sp from fp
5755 // sub sp, fp, #outsz
5756 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5757 compiler->lvaOutgoingArgSpaceSize);
5758 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5762 // ldp fp,lr,[sp,#outsz]
5763 // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
5766 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false,
5770 // Unlike frameType=1 or frameType=2 that restore SP at the end,
5771 // frameType=3 already adjusted SP above to delete local frame.
5772 // There is at most one alignment slot between SP and where we store the callee-saved registers.
5773 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5774 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
5779 // No frame pointer (no chaining).
5780 NYI("Frame without frame pointer");
5781 calleeSaveSPOffset = 0;
5784 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
5789 // ldp fp,lr,[sp],#framesz
5791 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
5792 INS_OPTS_POST_INDEX);
5793 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5795 else if (frameType == 2)
5798 // ldr fp,lr,[sp,#outsz]
5799 // add sp,sp,#framesz
5801 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5802 compiler->lvaOutgoingArgSpaceSize);
5803 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5805 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5806 compiler->unwindAllocStack(totalFrameSize);
5808 else if (frameType == 3)
5810 // Nothing to do after restoring callee-saved registers.
5818 #elif defined(_TARGET_XARCH_)
5820 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5822 assert(compiler->compGeneratingEpilog);
5824 unsigned popCount = 0;
5825 if (regSet.rsRegsModified(RBM_EBX))
5828 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5830 if (regSet.rsRegsModified(RBM_FPBASE))
5832 // EBP cannot be directly modified for EBP frame and double-aligned frames
5833 assert(!doubleAlignOrFramePointerUsed());
5836 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5839 #ifndef UNIX_AMD64_ABI
5840 // For System V AMD64 calling convention ESI and EDI are volatile registers.
5841 if (regSet.rsRegsModified(RBM_ESI))
5844 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5846 if (regSet.rsRegsModified(RBM_EDI))
5849 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5851 #endif // !defined(UNIX_AMD64_ABI)
5853 #ifdef _TARGET_AMD64_
5854 if (regSet.rsRegsModified(RBM_R12))
5857 inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
5859 if (regSet.rsRegsModified(RBM_R13))
5862 inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
5864 if (regSet.rsRegsModified(RBM_R14))
5867 inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
5869 if (regSet.rsRegsModified(RBM_R15))
5872 inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
5874 #endif // _TARGET_AMD64_
5876 // Amd64/x86 doesn't support push/pop of xmm registers.
5877 // These will get saved to stack separately after allocating
5878 // space on stack in prolog sequence. PopCount is essentially
5879 // tracking the count of integer registers pushed.
5881 noway_assert(compiler->compCalleeRegsPushed == popCount);
5884 #elif defined(_TARGET_X86_)
5886 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5888 assert(compiler->compGeneratingEpilog);
5890 unsigned popCount = 0;
5892 /* NOTE: The EBP-less frame code below depends on the fact that
5893 all of the pops are generated right at the start and
5894 each takes one byte of machine code.
5897 if (regSet.rsRegsModified(RBM_FPBASE))
5899 // EBP cannot be directly modified for EBP frame and double-aligned frames
5900 noway_assert(!doubleAlignOrFramePointerUsed());
5902 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5905 if (regSet.rsRegsModified(RBM_EBX))
5908 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5910 if (regSet.rsRegsModified(RBM_ESI))
5913 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5915 if (regSet.rsRegsModified(RBM_EDI))
5918 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5920 noway_assert(compiler->compCalleeRegsPushed == popCount);
5925 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
5926 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
5927 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
5929 #ifdef _TARGET_ARM64_
5931 #else // !_TARGET_ARM64_
5932 if (*pInitRegZeroed == false)
5934 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
5935 *pInitRegZeroed = true;
5938 #endif // !_TARGET_ARM64_
5941 /*-----------------------------------------------------------------------------
5943 * Do we have any untracked pointer locals at all,
5944 * or do we need to initialize memory for locspace?
5946 * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
5947 * initializing memory (not inclusive).
5948 * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
5949 * initializing memory.
5950 * initReg - A scratch register (that gets set to zero on some platforms).
5951 * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
5953 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
5955 assert(compiler->compGeneratingProlog);
5957 if (genUseBlockInit)
5959 assert(untrLclHi > untrLclLo);
5960 #ifdef _TARGET_ARMARCH_
5962 Generate the following code:
5964 For cnt less than 10
5969 stm <rZero1,rZero2>,[rAddr!]
5970 <optional> stm <rZero1,rZero2>,[rAddr!]
5971 <optional> stm <rZero1,rZero2>,[rAddr!]
5972 <optional> stm <rZero1,rZero2>,[rAddr!]
5973 <optional> str rZero1,[rAddr]
5975 For rCnt greater than or equal to 10
5983 stm <rZero1,rZero2>,[rAddr!]
5987 <optional> str rZero1,[rAddr] // When cnt is odd
5989 NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
5993 regNumber rCnt = REG_NA; // Invalid
5996 regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
5997 availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
5999 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
6000 // a large constant.
6002 #if defined(_TARGET_ARM_)
6004 if (compiler->compLocallocUsed)
6006 availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
6009 regNumber rZero1; // We're going to use initReg for rZero1
6012 // We pick the next lowest register number for rZero2
6013 noway_assert(availMask != RBM_NONE);
6014 regMask = genFindLowestBit(availMask);
6015 rZero2 = genRegNumFromMask(regMask);
6016 availMask &= ~regMask;
6017 assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6018 0); // rZero2 is not a live incoming argument reg
6020 // We pick the next lowest register number for rAddr
6021 noway_assert(availMask != RBM_NONE);
6022 regMask = genFindLowestBit(availMask);
6023 rAddr = genRegNumFromMask(regMask);
6024 availMask &= ~regMask;
6026 #else // !define(_TARGET_ARM_)
6028 regNumber rZero1 = REG_ZR;
6030 *pInitRegZeroed = false;
6032 #endif // !defined(_TARGET_ARM_)
6034 bool useLoop = false;
6035 unsigned uCntBytes = untrLclHi - untrLclLo;
6036 assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
6037 unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
6039 // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
6040 // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
6041 // In both of these cases the stm/stp instruction will write two zeros to memory
6042 // and we will use a single str instruction at the end whenever we have an odd count.
6043 if (uCntSlots >= 10)
6048 // We pick the next lowest register number for rCnt
6049 noway_assert(availMask != RBM_NONE);
6050 regMask = genFindLowestBit(availMask);
6051 rCnt = genRegNumFromMask(regMask);
6052 availMask &= ~regMask;
6055 assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6056 0); // rAddr is not a live incoming argument reg
6057 #if defined(_TARGET_ARM_)
6058 if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
6059 #else // !_TARGET_ARM_
6060 if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
6061 #endif // !_TARGET_ARM_
6063 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
6067 // Load immediate into the InitReg register
6068 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
6069 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
6070 *pInitRegZeroed = false;
6075 noway_assert(uCntSlots >= 2);
6076 assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6077 0); // rCnt is not a live incoming argument reg
6078 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
6081 #if defined(_TARGET_ARM_)
6082 rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
6083 instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
6084 target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
6085 #endif // _TARGET_ARM_
6089 while (uCntBytes >= REGSIZE_BYTES * 2)
6092 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
6093 #else // !_TARGET_ARM_
6094 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6095 INS_OPTS_POST_INDEX);
6096 #endif // !_TARGET_ARM_
6097 uCntBytes -= REGSIZE_BYTES * 2;
6100 else // useLoop is true
6103 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
6104 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
6105 #else // !_TARGET_ARM_
6106 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6107 INS_OPTS_POST_INDEX); // zero stack slots
6108 getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
6109 #endif // !_TARGET_ARM_
6110 getEmitter()->emitIns_J(INS_bhi, NULL, -3);
6111 uCntBytes %= REGSIZE_BYTES * 2;
6114 if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
6117 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
6118 #else // _TARGET_ARM_
6119 if ((uCntBytes - REGSIZE_BYTES) == 0)
6121 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
6125 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
6127 #endif // !_TARGET_ARM_
6128 uCntBytes -= REGSIZE_BYTES;
6130 #ifdef _TARGET_ARM64_
6133 assert(uCntBytes == sizeof(int));
6134 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
6135 uCntBytes -= sizeof(int);
6137 #endif // _TARGET_ARM64_
6138 noway_assert(uCntBytes == 0);
6140 #elif defined(_TARGET_XARCH_)
6142 Generate the following code:
6144 lea edi, [ebp/esp-OFFS]
6150 noway_assert(regSet.rsRegsModified(RBM_EDI));
6152 #ifdef UNIX_AMD64_ABI
6153 // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
6154 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6156 noway_assert(regSet.rsRegsModified(RBM_R12));
6157 inst_RV_RV(INS_mov, REG_R12, REG_RCX);
6158 regSet.verifyRegUsed(REG_R12);
6161 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6163 noway_assert(regSet.rsRegsModified(RBM_R13));
6164 inst_RV_RV(INS_mov, REG_R13, REG_RDI);
6165 regSet.verifyRegUsed(REG_R13);
6167 #else // !UNIX_AMD64_ABI
6168 // For register arguments we may have to save ECX
6169 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6171 noway_assert(regSet.rsRegsModified(RBM_ESI));
6172 inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
6173 regSet.verifyRegUsed(REG_ESI);
6175 #endif // !UNIX_AMD64_ABI
6177 noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
6179 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
6180 regSet.verifyRegUsed(REG_EDI);
6182 inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
6183 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
6184 instGen(INS_r_stosd);
6186 #ifdef UNIX_AMD64_ABI
6187 // Move back the argument registers
6188 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6190 inst_RV_RV(INS_mov, REG_RCX, REG_R12);
6193 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6195 inst_RV_RV(INS_mov, REG_RDI, REG_R13);
6197 #else // !UNIX_AMD64_ABI
6198 // Move back the argument registers
6199 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6201 inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
6203 #endif // !UNIX_AMD64_ABI
6206 #error Unsupported or unset target architecture
6209 else if (genInitStkLclCnt > 0)
6211 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6212 0); // initReg is not a live incoming argument reg
6214 /* Initialize any lvMustInit vars on the stack */
6219 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
6221 if (!varDsc->lvMustInit)
6226 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
6227 // partially-enregistered vars in the case where we don't use a block init.
6228 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
6230 // lvMustInit can only be set for GC types or TYP_STRUCT types
6231 // or when compInitMem is true
6232 // or when in debug code
6234 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
6235 compiler->info.compInitMem || compiler->opts.compDbgCode);
6237 if (!varDsc->lvOnFrame)
6242 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
6243 (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
6245 // We only initialize the GC variables in the TYP_STRUCT
6246 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
6247 const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
6249 for (unsigned i = 0; i < slots; i++)
6251 if (gcPtrs[i] != TYPE_GC_NONE)
6253 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
6254 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
6260 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
6262 // zero out the whole thing rounded up to a single stack slot size
6263 unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
6265 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
6267 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
6270 #ifdef _TARGET_64BIT_
6271 assert(i == lclSize || (i + sizeof(int) == lclSize));
6274 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
6277 #endif // _TARGET_64BIT_
6278 assert(i == lclSize);
6282 if (!TRACK_GC_TEMP_LIFETIMES)
6284 assert(regSet.tmpAllFree());
6285 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
6287 if (!varTypeIsGC(tempThis->tdTempType()))
6292 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
6294 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
6300 /*-----------------------------------------------------------------------------
6302 * Save the generic context argument.
6304 * We need to do this within the "prolog" in case anyone tries to inspect
6305 * the param-type-arg/this (which can be done after the prolog) using
6306 * ICodeManager::GetParamTypeArg().
6309 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
6311 assert(compiler->compGeneratingProlog);
6313 bool reportArg = compiler->lvaReportParamTypeArg();
6315 // We should report either generic context arg or "this" when used so.
6318 #ifndef JIT32_GCENCODER
6319 if (!compiler->lvaKeepAliveAndReportThis())
6326 // For JIT32_GCENCODER, we won't be here if reportArg is false.
6327 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
6329 noway_assert(contextArg != BAD_VAR_NUM);
6330 LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
6332 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
6333 // moved to its final home location. So we need to use it from the
6334 // incoming location.
6338 bool isPrespilledForProfiling = false;
6339 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
6340 isPrespilledForProfiling =
6341 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
6344 // Load from the argument register only if it is not prespilled.
6345 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
6347 reg = varDsc->lvArgReg;
6351 if (isFramePointerUsed())
6353 #if defined(_TARGET_ARM_)
6354 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6355 // will become enregistered.
6356 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
6357 noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
6358 (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
6360 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6361 // will become enregistered.
6362 noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
6366 // We will just use the initReg since it is an available register
6367 // and we are probably done using it anyway...
6369 *pInitRegZeroed = false;
6371 // mov reg, [compiler->info.compTypeCtxtArg]
6372 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
6373 regSet.verifyRegUsed(reg);
6376 #if CPU_LOAD_STORE_ARCH
6377 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6378 compiler->lvaCachedGenericContextArgOffset());
6379 #else // CPU_LOAD_STORE_ARCH
6380 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
6381 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6382 compiler->lvaCachedGenericContextArgOffset());
6383 #endif // !CPU_LOAD_STORE_ARCH
6386 /*-----------------------------------------------------------------------------
6388 * Set the "GS" security cookie in the prolog.
6391 void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
6393 assert(compiler->compGeneratingProlog);
6395 if (!compiler->getNeedsGSSecurityCookie())
6400 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
6402 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
6404 #ifdef _TARGET_AMD64_
6405 // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
6406 getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
6407 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
6409 // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
6410 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
6411 compiler->lvaGSSecurityCookie, 0, initReg);
6417 #ifdef _TARGET_XARCH_
6418 // Always use EAX on x86 and x64
6419 // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
6422 // We will just use the initReg since it is an available register
6426 *pInitRegZeroed = false;
6428 #if CPU_LOAD_STORE_ARCH
6429 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6430 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
6431 regSet.verifyRegUsed(reg);
6433 // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
6434 // mov dword ptr [frame.GSSecurityCookie], reg
6435 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6436 regSet.verifyRegUsed(reg);
6438 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
6442 #ifdef PROFILING_SUPPORTED
6444 //-----------------------------------------------------------------------------------
6445 // genProfilingEnterCallback: Generate the profiling function enter callback.
6448 // initReg - register to use as scratch register
6449 // pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
6450 // not zero after this call.
6456 // The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
6457 // VM\i386\asmhelpers.asm for details):
6458 // 1. The calling sequence for calling the helper is:
6459 // push FunctionIDOrClientID
6460 // call ProfileEnterHelper
6461 // 2. The calling function has an EBP frame.
6462 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6463 // the following prolog is assumed:
6466 // 4. All registers are preserved.
6467 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
6469 void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
6471 assert(compiler->compGeneratingProlog);
6473 // Give profiler a chance to back out of hooking this method
6474 if (!compiler->compIsProfilerHookNeeded())
6479 #if defined(_TARGET_AMD64_)
6480 #if !defined(UNIX_AMD64_ABI)
6485 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6486 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6487 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6489 // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
6490 // In case of vararg methods, arg regs are already homed.
6492 // Note: Here we don't need to worry about updating gc'info since enter
6493 // callback is generated as part of prolog which is non-gc interruptible.
6494 // Moreover GC cannot kick while executing inside profiler callback which is a
6495 // profiler requirement so it can examine arguments which could be obj refs.
6496 if (!compiler->info.compIsVarArgs)
6498 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6500 noway_assert(varDsc->lvIsParam);
6502 if (!varDsc->lvIsRegArg)
6507 var_types storeType = varDsc->lvaArgType();
6508 regNumber argReg = varDsc->lvArgReg;
6510 instruction store_ins = ins_Store(storeType);
6513 if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
6515 store_ins = INS_mov;
6517 #endif // FEATURE_SIMD
6519 getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
6523 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6524 // RCX = ProfilerMethHnd
6525 if (compiler->compProfilerMethHndIndirected)
6527 // Profiler hooks enabled during Ngen time.
6528 // Profiler handle needs to be accessed through an indirection of a pointer.
6529 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6533 // No need to record relocations, if we are generating ELT hooks under the influence
6534 // of COMPlus_JitELTHookEnabled=1
6535 if (compiler->opts.compJitELTHookEnabled)
6537 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6541 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6545 // RDX = caller's SP
6547 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6548 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6549 // of that offset to FramePointer to obtain caller's SP value.
6550 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6551 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6552 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6554 // Can't have a call until we have enough padding for rejit
6555 genPrologPadForReJit();
6557 // This will emit either
6558 // "call ip-relative 32-bit offset" or
6559 // "mov rax, helper addr; call rax"
6560 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
6562 // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
6563 // generation logic that moves args around as required by first BB entry point conditions
6564 // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
6565 // and genEnregisterIncomingStackArgs().
6567 // Now reload arg registers from home locations.
6569 // - we need to reload only known (i.e. fixed) reg args.
6570 // - if floating point type, also reload it into corresponding integer reg
6571 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6573 noway_assert(varDsc->lvIsParam);
6575 if (!varDsc->lvIsRegArg)
6580 var_types loadType = varDsc->lvaArgType();
6581 regNumber argReg = varDsc->lvArgReg;
6583 instruction load_ins = ins_Load(loadType);
6586 if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
6590 #endif // FEATURE_SIMD
6592 getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
6595 if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
6597 regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
6598 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6599 inst_RV_RV(ins, argReg, intArgReg, loadType);
6601 #endif // FEATURE_VARARG
6604 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6605 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6607 *pInitRegZeroed = false;
6610 #else // !defined(UNIX_AMD64_ABI)
6612 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6613 // R14 = ProfilerMethHnd
6614 if (compiler->compProfilerMethHndIndirected)
6616 // Profiler hooks enabled during Ngen time.
6617 // Profiler handle needs to be accessed through an indirection of a pointer.
6618 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
6619 (ssize_t)compiler->compProfilerMethHnd);
6623 // No need to record relocations, if we are generating ELT hooks under the influence
6624 // of COMPlus_JitELTHookEnabled=1
6625 if (compiler->opts.compJitELTHookEnabled)
6627 genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6631 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6635 // R15 = caller's SP
6637 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6638 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6639 // of that offset to FramePointer to obtain caller's SP value.
6640 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6641 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6642 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
6644 // Can't have a call until we have enough padding for rejit
6645 genPrologPadForReJit();
6647 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6648 // We use R11 here. This will emit either
6649 // "call ip-relative 32-bit offset" or
6650 // "mov r11, helper addr; call r11"
6651 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6653 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6654 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6656 *pInitRegZeroed = false;
6659 #endif // !defined(UNIX_AMD64_ABI)
6661 #elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
6663 unsigned saveStackLvl2 = genStackLevel;
6665 #if defined(_TARGET_X86_)
6666 // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
6667 // for x86 stack unwinding
6669 #if defined(UNIX_X86_ABI)
6670 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6671 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6672 #endif // UNIX_X86_ABI
6674 // Push the profilerHandle
6675 if (compiler->compProfilerMethHndIndirected)
6677 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6681 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6684 #elif defined(_TARGET_ARM_)
6685 // On Arm arguments are prespilled on stack, which frees r0-r3.
6686 // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
6687 // The call target register could be any free register.
6688 regNumber argReg = REG_PROFILER_ENTER_ARG;
6689 regMaskTP argRegMask = genRegMask(argReg);
6690 assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
6692 if (compiler->compProfilerMethHndIndirected)
6694 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
6695 regSet.verifyRegUsed(argReg);
6699 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
6702 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6706 // Can't have a call until we have enough padding for rejit
6708 genPrologPadForReJit();
6710 // This will emit either
6711 // "call ip-relative 32-bit offset" or
6712 // "mov rax, helper addr; call rax"
6713 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
6714 0, // argSize. Again, we have to lie about it
6715 EA_UNKNOWN); // retSize
6717 #if defined(_TARGET_X86_)
6718 // Check that we have place for the push.
6719 assert(compiler->fgPtrArgCntMax >= 1);
6721 #if defined(UNIX_X86_ABI)
6722 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6723 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6724 #endif // UNIX_X86_ABI
6726 #elif defined(_TARGET_ARM_)
6727 if (initReg == argReg)
6729 *pInitRegZeroed = false;
6732 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6735 /* Restore the stack level */
6737 SetStackLevel(saveStackLvl2);
6740 NYI("Emit Profiler Enter callback");
6744 //-----------------------------------------------------------------------------------
6745 // genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
6746 // Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
6749 // helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
6755 // The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
6756 // ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
6757 // 1. The calling sequence for calling the helper is:
6758 // push FunctionIDOrClientID
6759 // call ProfileLeaveHelper or ProfileTailcallHelper
6760 // 2. The calling function has an EBP frame.
6761 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6762 // the following prolog is assumed:
6765 // 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
6766 // helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
6767 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
6769 void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
6771 assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
6773 // Only hook if profiler says it's okay.
6774 if (!compiler->compIsProfilerHookNeeded())
6779 compiler->info.compProfilerCallback = true;
6781 // Need to save on to the stack level, since the helper call will pop the argument
6782 unsigned saveStackLvl2 = genStackLevel;
6784 #if defined(_TARGET_AMD64_)
6785 #if !defined(UNIX_AMD64_ABI)
6787 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6788 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6789 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6791 // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
6792 // registers that profiler callback kills.
6793 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
6795 regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
6796 noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
6799 // At this point return value is computed and stored in RAX or XMM0.
6800 // On Amd64, Leave callback preserves the return register. We keep
6801 // RAX alive by not reporting as trashed by helper call. Also note
6802 // that GC cannot kick-in while executing inside profiler callback,
6803 // which is a requirement of profiler as well since it needs to examine
6804 // return value which could be an obj ref.
6806 // RCX = ProfilerMethHnd
6807 if (compiler->compProfilerMethHndIndirected)
6809 // Profiler hooks enabled during Ngen time.
6810 // Profiler handle needs to be accessed through an indirection of an address.
6811 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6815 // Don't record relocations, if we are generating ELT hooks under the influence
6816 // of COMPlus_JitELTHookEnabled=1
6817 if (compiler->opts.compJitELTHookEnabled)
6819 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6823 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6827 // RDX = caller's SP
6828 // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
6829 // of the stmnts to execute unconditionally and clean-up rest.
6830 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6832 // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
6833 // value of that offset to FramePointer to obtain caller's SP value.
6834 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6835 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6839 // If we are here means that it is a tentative frame layout during which we
6840 // cannot use caller's SP offset since it is an estimate. For now we require the
6841 // method to have at least a single arg so that we can use it to obtain caller's
6843 LclVarDsc* varDsc = compiler->lvaTable;
6844 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6846 // lea rdx, [FramePointer + Arg0's offset]
6847 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6850 // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
6851 // We use R8 here. This will emit either
6852 // "call ip-relative 32-bit offset" or
6853 // "mov r8, helper addr; call r8"
6854 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
6856 #else // !defined(UNIX_AMD64_ABI)
6858 // RDI = ProfilerMethHnd
6859 if (compiler->compProfilerMethHndIndirected)
6861 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6865 if (compiler->opts.compJitELTHookEnabled)
6867 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6871 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6875 // RSI = caller's SP
6876 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6878 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6879 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6883 LclVarDsc* varDsc = compiler->lvaTable;
6884 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6886 // lea rdx, [FramePointer + Arg0's offset]
6887 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6890 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6891 // We use R11 here. This will emit either
6892 // "call ip-relative 32-bit offset" or
6893 // "mov r11, helper addr; call r11"
6894 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6896 #endif // !defined(UNIX_AMD64_ABI)
6898 #elif defined(_TARGET_X86_)
6900 #if defined(UNIX_X86_ABI)
6901 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6902 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6904 AddNestedAlignment(0xC);
6905 #endif // UNIX_X86_ABI
6908 // Push the profilerHandle
6911 if (compiler->compProfilerMethHndIndirected)
6913 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6917 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6921 #if defined(UNIX_X86_ABI)
6922 int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
6924 int argSize = REGSIZE_BYTES;
6926 genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
6928 // Check that we have place for the push.
6929 assert(compiler->fgPtrArgCntMax >= 1);
6931 #if defined(UNIX_X86_ABI)
6932 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6933 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6934 SubtractStackLevel(0x10);
6935 SubtractNestedAlignment(0xC);
6936 #endif // UNIX_X86_ABI
6938 #elif defined(_TARGET_ARM_)
6940 // Push the profilerHandle
6943 // Contract between JIT and Profiler Leave callout on arm:
6944 // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
6945 // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
6946 // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
6947 // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
6949 // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
6952 emitAttr attr = EA_UNKNOWN;
6954 if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
6955 (varTypeIsFloating(compiler->info.compRetType) ||
6956 compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
6962 // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
6963 // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
6964 if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
6967 gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
6969 else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
6972 gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
6979 getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
6980 regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
6981 gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
6985 if (compiler->compProfilerMethHndIndirected)
6987 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6988 regSet.verifyRegUsed(REG_ARG_0);
6992 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6995 genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
6997 EA_UNKNOWN); // retSize
6999 // Restore state that existed before profiler callback
7002 getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
7003 regSet.verifyRegUsed(REG_ARG_0);
7004 gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
7008 NYI("Emit Profiler Leave callback");
7011 /* Restore the stack level */
7012 SetStackLevel(saveStackLvl2);
7015 #endif // PROFILING_SUPPORTED
7017 /*****************************************************************************
7022 These instructions are just a reordering of the instructions used today.
7028 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7030 add esp, LOCALS_SIZE / pop dummyReg
7040 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
7041 Everything else is similar, though in a different order.
7043 The security object will no longer be at a fixed offset. However, the
7044 offset can still be determined by looking up the GC-info and determining
7045 how many callee-saved registers are pushed.
7052 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7054 add esp, LOCALS_SIZE / pop dummyReg
7058 (mov esp, ebp if there are no callee-saved registers)
7062 Double-aligned frame :
7063 --------------------
7065 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
7066 of callee-saved registers are pushed on the stack so that the locals
7067 themselves are qword-aligned. The instructions are the same as today,
7068 just in a different order.
7076 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
7078 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
7087 localloc (with ebp) frames :
7088 --------------------------
7090 The instructions are the same as today, just in a different order.
7091 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
7092 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
7099 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7101 lea esp, [ebp-calleeSavedRegsPushedSize]
7105 (mov esp, ebp if there are no callee-saved registers)
7109 *****************************************************************************/
7111 /*****************************************************************************
7113 * Generates appropriate NOP padding for a function prolog to support ReJIT.
7116 void CodeGen::genPrologPadForReJit()
7118 assert(compiler->compGeneratingProlog);
7120 #ifdef _TARGET_XARCH_
7121 if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
7126 #if FEATURE_EH_FUNCLETS
7128 // No need to generate pad (nops) for funclets.
7129 // When compiling the main function (and not a funclet)
7130 // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
7131 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7136 #endif // FEATURE_EH_FUNCLETS
7138 unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
7146 /*****************************************************************************
7148 * Reserve space for a function prolog.
7151 void CodeGen::genReserveProlog(BasicBlock* block)
7153 assert(block != nullptr);
7155 JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
7157 /* Nothing is live on entry to the prolog */
7159 getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
7162 /*****************************************************************************
7164 * Reserve space for a function epilog.
7167 void CodeGen::genReserveEpilog(BasicBlock* block)
7169 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
7170 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
7172 /* The return value is special-cased: make sure it goes live for the epilog */
7174 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
7176 if (genFullPtrRegMap && !jmpEpilog)
7178 if (varTypeIsGC(compiler->info.compRetNativeType))
7180 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
7182 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
7184 switch (compiler->info.compRetNativeType)
7187 gcrefRegsArg |= RBM_INTRET;
7190 byrefRegsArg |= RBM_INTRET;
7198 JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
7200 assert(block != nullptr);
7201 const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
7202 bool last = (block->bbNext == nullptr);
7203 getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
7206 #if FEATURE_EH_FUNCLETS
7208 /*****************************************************************************
7210 * Reserve space for a funclet prolog.
7213 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
7215 assert(block != nullptr);
7217 /* Currently, no registers are live on entry to the prolog, except maybe
7218 the exception object. There might be some live stack vars, but they
7219 cannot be accessed until after the frame pointer is re-established.
7220 In order to potentially prevent emitting a death before the prolog
7221 and a birth right after it, we just report it as live during the
7222 prolog, and rely on the prolog being non-interruptible. Trust
7223 genCodeForBBlist to correctly initialize all the sets.
7225 We might need to relax these asserts if the VM ever starts
7226 restoring any registers, then we could have live-in reg vars...
7229 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
7230 noway_assert(gcInfo.gcRegByrefSetCur == 0);
7232 JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
7234 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7235 gcInfo.gcRegByrefSetCur, false);
7238 /*****************************************************************************
7240 * Reserve space for a funclet epilog.
7243 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
7245 assert(block != nullptr);
7247 JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
7249 bool last = (block->bbNext == nullptr);
7250 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7251 gcInfo.gcRegByrefSetCur, last);
7254 #endif // FEATURE_EH_FUNCLETS
7256 /*****************************************************************************
7257 * Finalize the frame size and offset assignments.
7259 * No changes can be made to the modified register set after this, since that can affect how many
7260 * callee-saved registers get saved.
7262 void CodeGen::genFinalizeFrame()
7264 JITDUMP("Finalizing stack frame\n");
7266 // Initializations need to happen based on the var locations at the start
7267 // of the first basic block, so load those up. In particular, the determination
7268 // of whether or not to use block init in the prolog is dependent on the variable
7269 // locations on entry to the function.
7270 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
7272 genCheckUseBlockInit();
7274 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7275 CLANG_FORMAT_COMMENT_ANCHOR;
7277 #if defined(_TARGET_X86_)
7279 if (compiler->compTailCallUsed)
7281 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
7282 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
7283 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
7284 // actually get saved.
7286 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
7288 #endif // _TARGET_X86_
7290 #if defined(_TARGET_ARMARCH_)
7291 // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
7292 // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
7293 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7295 regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
7297 #endif // defined(_TARGET_ARMARCH_)
7299 #if defined(_TARGET_ARM_)
7300 // If there are any reserved registers, add them to the
7301 if (regSet.rsMaskResvd != RBM_NONE)
7303 regSet.rsSetRegsModified(regSet.rsMaskResvd);
7305 #endif // _TARGET_ARM_
7310 printf("Modified regs: ");
7311 dspRegMask(regSet.rsGetModifiedRegsMask());
7316 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7317 if (compiler->opts.compDbgEnC)
7319 // We always save FP.
7320 noway_assert(isFramePointerUsed());
7321 #ifdef _TARGET_AMD64_
7322 // On x64 we always save exactly RBP, RSI and RDI for EnC.
7323 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
7324 regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
7325 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
7326 #else // !_TARGET_AMD64_
7327 // On x86 we save all callee saved regs so the saved reg area size is consistent
7328 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7329 #endif // !_TARGET_AMD64_
7332 /* If we have any pinvoke calls, we might potentially trash everything */
7333 if (compiler->info.compCallUnmanaged)
7335 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
7336 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7339 #ifdef UNIX_AMD64_ABI
7340 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
7341 if (compiler->compIsProfilerHookNeeded())
7343 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
7347 /* Count how many callee-saved registers will actually be saved (pushed) */
7349 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
7350 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
7353 // EBP cannot be (directly) modified
7354 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
7357 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
7359 #ifdef _TARGET_ARMARCH_
7360 if (isFramePointerUsed())
7362 // For a FP based frame we have to push/pop the FP register
7364 maskCalleeRegsPushed |= RBM_FPBASE;
7366 // This assert check that we are not using REG_FP
7367 // as both the frame pointer and as a codegen register
7369 assert(!regSet.rsRegsModified(RBM_FPBASE));
7372 // we always push LR. See genPushCalleeSavedRegisters
7374 maskCalleeRegsPushed |= RBM_LR;
7376 #if defined(_TARGET_ARM_)
7377 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
7378 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
7379 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
7381 if ((maskPushRegsFloat != RBM_NONE) ||
7382 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
7384 // Here we try to keep stack double-aligned before the vpush
7385 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
7387 regNumber extraPushedReg = REG_R4;
7388 while (maskPushRegsInt & genRegMask(extraPushedReg))
7390 extraPushedReg = REG_NEXT(extraPushedReg);
7392 if (extraPushedReg < REG_R11)
7394 maskPushRegsInt |= genRegMask(extraPushedReg);
7395 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
7398 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
7401 // We currently only expect to push/pop consecutive FP registers
7402 // and these have to be double-sized registers as well.
7403 // Here we will insure that maskPushRegsFloat obeys these requirements.
7405 if (maskPushRegsFloat != RBM_NONE)
7407 regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
7408 while (maskPushRegsFloat > contiguousMask)
7410 contiguousMask <<= 2;
7411 contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
7413 if (maskPushRegsFloat != contiguousMask)
7415 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
7416 maskPushRegsFloat |= maskExtraRegs;
7417 regSet.rsSetRegsModified(maskExtraRegs);
7418 maskCalleeRegsPushed |= maskExtraRegs;
7421 #endif // _TARGET_ARM_
7422 #endif // _TARGET_ARMARCH_
7424 #if defined(_TARGET_XARCH_)
7425 // Compute the count of callee saved float regs saved on stack.
7426 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
7427 // regs are stack allocated and preserved in their stack locations.
7428 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
7429 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
7430 #endif // defined(_TARGET_XARCH_)
7432 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
7437 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
7438 dspRegMask(maskCalleeRegsPushed);
7443 /* Assign the final offsets to things living on the stack frame */
7445 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
7447 /* We want to make sure that the prolog size calculated here is accurate
7448 (that is instructions will not shrink because of conservative stack
7449 frame approximations). We do this by filling in the correct size
7450 here (where we have committed to the final numbers for the frame offsets)
7451 This will ensure that the prolog size is always correct
7453 getEmitter()->emitMaxTmpSize = regSet.tmpGetTotalSize();
7456 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
7458 compiler->lvaTableDump();
7463 //------------------------------------------------------------------------
7464 // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
7467 // delta - the offset to add to the current stack pointer to establish the frame pointer
7468 // reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
7470 void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
7472 assert(compiler->compGeneratingProlog);
7474 #if defined(_TARGET_XARCH_)
7478 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
7483 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7484 // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
7488 if (reportUnwindData)
7490 compiler->unwindSetFrameReg(REG_FPBASE, delta);
7493 #elif defined(_TARGET_ARM_)
7495 assert(arm_Valid_Imm_For_Add_SP(delta));
7496 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7498 if (reportUnwindData)
7500 compiler->unwindPadding();
7504 NYI("establish frame pointer");
7508 /*****************************************************************************
7510 * Generates code for a function prolog.
7512 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
7514 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
7515 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
7516 * only instructions which result in control not going to the next instruction. Basically, any time execution would
7517 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
7518 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
7519 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
7521 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
7522 * debugger team to ensure that stepping still works.
7524 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
7528 #pragma warning(push)
7529 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
7531 void CodeGen::genFnProlog()
7533 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
7535 compiler->funSetCurrentFunc(0);
7540 printf("*************** In genFnProlog()\n");
7545 genInterruptibleUsed = true;
7548 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
7550 /* Ready to start on the prolog proper */
7552 getEmitter()->emitBegProlog();
7553 compiler->unwindBegProlog();
7555 // Do this so we can put the prolog instruction group ahead of
7556 // other instruction groups
7557 genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
7560 if (compiler->opts.dspCode)
7562 printf("\n__prolog:\n");
7566 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
7568 // Create new scopes for the method-parameters for the prolog-block.
7574 if (compiler->compJitHaltMethod())
7576 /* put a nop first because the debugger and other tools are likely to
7577 put an int3 at the begining and we don't want to confuse them */
7580 instGen(INS_BREAKPOINT);
7582 #ifdef _TARGET_ARMARCH_
7583 // Avoid asserts in the unwind info because these instructions aren't accounted for.
7584 compiler->unwindPadding();
7585 #endif // _TARGET_ARMARCH_
7589 #if FEATURE_EH_FUNCLETS && defined(DEBUG)
7591 // We cannot force 0-initialization of the PSPSym
7592 // as it will overwrite the real value
7593 if (compiler->lvaPSPSym != BAD_VAR_NUM)
7595 LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
7596 assert(!varDsc->lvMustInit);
7599 #endif // FEATURE_EH_FUNCLETS && DEBUG
7601 /*-------------------------------------------------------------------------
7603 * Record the stack frame ranges that will cover all of the tracked
7604 * and untracked pointer variables.
7605 * Also find which registers will need to be zero-initialized.
7607 * 'initRegs': - Generally, enregistered variables should not need to be
7608 * zero-inited. They only need to be zero-inited when they
7609 * have a possibly uninitialized read on some control
7610 * flow path. Apparently some of the IL_STUBs that we
7611 * generate have this property.
7614 int untrLclLo = +INT_MAX;
7615 int untrLclHi = -INT_MAX;
7616 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
7617 // Note that they may be tracked, but simply not allocated to a register.
7618 bool hasUntrLcl = false;
7620 int GCrefLo = +INT_MAX;
7621 int GCrefHi = -INT_MAX;
7622 bool hasGCRef = false;
7624 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
7625 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
7626 regMaskTP initDblRegs = RBM_NONE;
7631 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7633 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
7638 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
7640 noway_assert(varDsc->lvRefCnt() == 0);
7644 signed int loOffs = varDsc->lvStkOffs;
7645 signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
7647 /* We need to know the offset range of tracked stack GC refs */
7648 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
7650 if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
7652 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
7653 // taken care of by the parent struct.
7654 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
7658 if (loOffs < GCrefLo)
7662 if (hiOffs > GCrefHi)
7669 /* For lvMustInit vars, gather pertinent info */
7671 if (!varDsc->lvMustInit)
7676 if (varDsc->lvIsInReg())
7678 regMaskTP regMask = genRegMask(varDsc->lvRegNum);
7679 if (!varDsc->IsFloatRegType())
7681 initRegs |= regMask;
7683 if (varTypeIsMultiReg(varDsc))
7685 if (varDsc->lvOtherReg != REG_STK)
7687 initRegs |= genRegMask(varDsc->lvOtherReg);
7691 /* Upper DWORD is on the stack, and needs to be inited */
7693 loOffs += sizeof(int);
7698 else if (varDsc->TypeGet() == TYP_DOUBLE)
7700 initDblRegs |= regMask;
7704 initFltRegs |= regMask;
7713 if (loOffs < untrLclLo)
7717 if (hiOffs > untrLclHi)
7724 /* Don't forget about spill temps that hold pointers */
7726 if (!TRACK_GC_TEMP_LIFETIMES)
7728 assert(regSet.tmpAllFree());
7729 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
7731 if (!varTypeIsGC(tempThis->tdTempType()))
7736 signed int loOffs = tempThis->tdTempOffs();
7737 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
7739 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
7740 // previous frame pointer. Thus, stkOffs can't be zero.
7741 CLANG_FORMAT_COMMENT_ANCHOR;
7743 #if !defined(_TARGET_AMD64_)
7744 // However, on amd64 there is no requirement to chain frame pointers.
7746 noway_assert(!isFramePointerUsed() || loOffs != 0);
7747 #endif // !defined(_TARGET_AMD64_)
7749 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
7753 if (loOffs < untrLclLo)
7757 if (hiOffs > untrLclHi)
7764 assert((genInitStkLclCnt > 0) == hasUntrLcl);
7769 if (genInitStkLclCnt > 0)
7771 printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
7778 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
7779 // Ditto for all enregistered user arguments in a varargs method.
7780 // These registers will be available to use for the initReg. We just remove
7781 // all of these registers from the rsCalleeRegArgMaskLiveIn.
7783 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
7786 /* Choose the register to use for zero initialization */
7788 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
7789 bool initRegZeroed = false;
7790 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
7793 // We should not use the special PINVOKE registers as the initReg
7794 // since they are trashed by the jithelper call to setup the PINVOKE frame
7795 if (compiler->info.compCallUnmanaged)
7797 excludeMask |= RBM_PINVOKE_FRAME;
7799 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
7800 if (!compiler->opts.ShouldUsePInvokeHelpers())
7802 noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
7804 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
7806 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
7808 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
7809 if (varDsc->lvRegister)
7811 excludeMask |= genRegMask(varDsc->lvRegNum);
7817 // If we have a variable sized frame (compLocallocUsed is true)
7818 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
7819 if (compiler->compLocallocUsed)
7821 excludeMask |= RBM_SAVED_LOCALLOC_SP;
7823 #endif // _TARGET_ARM_
7825 #if defined(_TARGET_XARCH_)
7826 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7828 // We currently must use REG_EAX on x86 here
7829 // because the loop's backwards branch depends upon the size of EAX encodings
7830 assert(initReg == REG_EAX);
7833 #endif // _TARGET_XARCH_
7835 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
7837 if (tempMask != RBM_NONE)
7839 // We will use one of the registers that we were planning to zero init anyway.
7840 // We pick the lowest register number.
7841 tempMask = genFindLowestBit(tempMask);
7842 initReg = genRegNumFromMask(tempMask);
7844 // Next we prefer to use one of the unused argument registers.
7845 // If they aren't available we use one of the caller-saved integer registers.
7848 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
7849 if (tempMask != RBM_NONE)
7851 // We pick the lowest register number
7852 tempMask = genFindLowestBit(tempMask);
7853 initReg = genRegNumFromMask(tempMask);
7858 noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
7860 #if defined(_TARGET_AMD64_)
7861 // If we are a varargs call, in order to set up the arguments correctly this
7862 // must be done in a 2 step process. As per the x64 ABI:
7863 // a) The caller sets up the argument shadow space (just before the return
7864 // address, 4 pointer sized slots).
7865 // b) The callee is responsible to home the arguments on the shadow space
7866 // provided by the caller.
7867 // This way, the varargs iterator will be able to retrieve the
7868 // call arguments properly since both the arg regs and the stack allocated
7869 // args will be contiguous.
7870 if (compiler->info.compIsVarArgs)
7872 getEmitter()->spillIntArgRegsToShadowSlots();
7875 #endif // _TARGET_AMD64_
7878 /*-------------------------------------------------------------------------
7880 * Now start emitting the part of the prolog which sets up the frame
7883 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
7885 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
7886 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
7888 #endif // _TARGET_ARM_
7890 #ifdef _TARGET_XARCH_
7891 if (doubleAlignOrFramePointerUsed())
7893 inst_RV(INS_push, REG_FPBASE, TYP_REF);
7894 compiler->unwindPush(REG_FPBASE);
7895 psiAdjustStackLevel(REGSIZE_BYTES);
7897 #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
7898 genEstablishFramePointer(0, /*reportUnwindData*/ true);
7899 #endif // !_TARGET_AMD64_
7902 if (compiler->genDoubleAlign())
7904 noway_assert(isFramePointerUsed() == false);
7905 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
7907 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
7909 #endif // DOUBLE_ALIGN
7911 #endif // _TARGET_XARCH_
7913 #ifdef _TARGET_ARM64_
7914 // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
7915 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7916 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
7917 #else // !_TARGET_ARM64_
7918 genPushCalleeSavedRegisters();
7919 #endif // !_TARGET_ARM64_
7922 bool needToEstablishFP = false;
7923 int afterLclFrameSPtoFPdelta = 0;
7924 if (doubleAlignOrFramePointerUsed())
7926 needToEstablishFP = true;
7928 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
7929 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
7930 // too big, we go ahead and do it here.
7932 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
7933 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
7934 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
7936 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
7937 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
7938 needToEstablishFP = false;
7941 #endif // _TARGET_ARM_
7943 //-------------------------------------------------------------------------
7945 // Subtract the local frame size from SP.
7947 //-------------------------------------------------------------------------
7948 CLANG_FORMAT_COMMENT_ANCHOR;
7950 #ifndef _TARGET_ARM64_
7951 regMaskTP maskStackAlloc = RBM_NONE;
7955 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
7956 #endif // _TARGET_ARM_
7958 if (maskStackAlloc == RBM_NONE)
7960 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7962 #endif // !_TARGET_ARM64_
7964 //-------------------------------------------------------------------------
7967 if (compiler->compLocallocUsed)
7969 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
7970 regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
7971 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
7973 #endif // _TARGET_ARMARCH_
7975 #if defined(_TARGET_XARCH_)
7976 // Preserve callee saved float regs to stack.
7977 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
7978 #endif // defined(_TARGET_XARCH_)
7980 #ifdef _TARGET_AMD64_
7981 // Establish the AMD64 frame pointer after the OS-reported prolog.
7982 if (doubleAlignOrFramePointerUsed())
7984 bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
7985 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
7987 #endif //_TARGET_AMD64_
7989 //-------------------------------------------------------------------------
7991 // This is the end of the OS-reported prolog for purposes of unwinding
7993 //-------------------------------------------------------------------------
7996 if (needToEstablishFP)
7998 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
7999 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
8001 #endif // _TARGET_ARM_
8003 if (compiler->info.compPublishStubParam)
8005 #if CPU_LOAD_STORE_ARCH
8006 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8007 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8009 // mov [lvaStubArgumentVar], EAX
8010 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8011 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8013 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
8015 // It's no longer live; clear it out so it can be used after this in the prolog
8016 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
8020 // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
8021 // when creating the stack frame. Don't think it's worth it, though.
8022 if (genNeedPrologStackProbe)
8025 // Can't have a call until we have enough padding for rejit
8027 genPrologPadForReJit();
8028 noway_assert(compiler->opts.compNeedStackProbes);
8029 genGenerateStackProbe();
8030 compiler->compStackProbePrologDone = true;
8032 #endif // STACK_PROBES
8035 // Zero out the frame as needed
8038 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
8040 #if FEATURE_EH_FUNCLETS
8042 genSetPSPSym(initReg, &initRegZeroed);
8044 #else // !FEATURE_EH_FUNCLETS
8046 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
8047 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
8049 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
8050 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
8052 // Zero out the slot for nesting level 0
8053 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
8057 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8058 initRegZeroed = true;
8061 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
8065 #endif // !FEATURE_EH_FUNCLETS
8067 genReportGenericContextArg(initReg, &initRegZeroed);
8069 // The local variable representing the security object must be on the stack frame
8070 // and must be 0 initialized.
8071 noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
8072 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
8073 compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
8075 #ifdef JIT32_GCENCODER
8076 // Initialize the LocalAllocSP slot if there is localloc in the function.
8077 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
8079 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
8081 #endif // JIT32_GCENCODER
8083 // Set up the GS security cookie
8085 genSetGSSecurityCookie(initReg, &initRegZeroed);
8087 #ifdef PROFILING_SUPPORTED
8089 // Insert a function entry callback for profiling, if requested.
8090 genProfilingEnterCallback(initReg, &initRegZeroed);
8092 #endif // PROFILING_SUPPORTED
8094 if (!genInterruptible)
8096 /*-------------------------------------------------------------------------
8098 * The 'real' prolog ends here for non-interruptible methods.
8099 * For fully-interruptible methods, we extend the prolog so that
8100 * we do not need to track GC inforation while shuffling the
8103 * Make sure there's enough padding for ReJIT.
8106 genPrologPadForReJit();
8107 getEmitter()->emitMarkPrologEnd();
8110 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
8111 // The unused bits of Vector3 arguments must be cleared
8112 // since native compiler doesn't initize the upper bits to zeros.
8114 // TODO-Cleanup: This logic can be implemented in
8115 // genFnPrologCalleeRegArgs() for argument registers and
8116 // genEnregisterIncomingStackArgs() for stack arguments.
8117 genClearStackVec3ArgUpperBits();
8118 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
8120 /*-----------------------------------------------------------------------------
8121 * Take care of register arguments first
8126 // Update the arg initial register locations.
8127 compiler->lvaUpdateArgsWithInitialReg();
8129 FOREACH_REGISTER_FILE(regState)
8131 if (regState->rsCalleeRegArgMaskLiveIn)
8133 // If we need an extra register to shuffle around the incoming registers
8134 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
8135 // if we don't need to use the xtraReg then this flag will stay false
8138 bool xtraRegClobbered = false;
8140 if (genRegMask(initReg) & RBM_ARG_REGS)
8146 xtraReg = REG_SCRATCH;
8147 initRegZeroed = false;
8150 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
8152 if (xtraRegClobbered)
8154 initRegZeroed = false;
8159 // Home the incoming arguments
8160 genEnregisterIncomingStackArgs();
8162 /* Initialize any must-init registers variables now */
8166 regMaskTP regMask = 0x1;
8168 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
8170 if (regMask & initRegs)
8172 // Check if we have already zeroed this register
8173 if ((reg == initReg) && initRegZeroed)
8179 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
8182 initRegZeroed = true;
8189 if (initFltRegs | initDblRegs)
8191 // If initReg is not in initRegs then we will use REG_SCRATCH
8192 if ((genRegMask(initReg) & initRegs) == 0)
8194 initReg = REG_SCRATCH;
8195 initRegZeroed = false;
8199 // This is needed only for Arm since it can use a zero initialized int register
8200 // to initialize vfp registers.
8203 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8204 initRegZeroed = true;
8206 #endif // _TARGET_ARM_
8208 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
8211 //-----------------------------------------------------------------------------
8214 // Increase the prolog size here only if fully interruptible.
8215 // And again make sure it's big enough for ReJIT
8218 if (genInterruptible)
8220 genPrologPadForReJit();
8221 getEmitter()->emitMarkPrologEnd();
8224 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
8231 getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
8235 noway_assert(GCrefLo == +INT_MAX);
8236 noway_assert(GCrefHi == -INT_MAX);
8240 if (compiler->opts.dspCode)
8247 // On non-x86 the VARARG cookie does not need any special treatment.
8249 // Load up the VARARG argument pointer register so it doesn't get clobbered.
8250 // only do this if we actually access any statically declared args
8251 // (our argument pointer register has a refcount > 0).
8252 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
8254 if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt() > 0)
8256 varDsc = &compiler->lvaTable[argsStartVar];
8258 noway_assert(compiler->info.compArgsCount > 0);
8260 // MOV EAX, <VARARGS HANDLE>
8261 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
8262 regSet.verifyRegUsed(REG_EAX);
8265 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
8267 // EDX might actually be holding something here. So make sure to only use EAX for this code
8270 LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
8271 noway_assert(!lastArg->lvRegister);
8272 signed offset = lastArg->lvStkOffs;
8273 assert(offset != BAD_STK_OFFS);
8274 noway_assert(lastArg->lvFramePointerBased);
8276 // LEA EAX, &<VARARGS HANDLE> + EAX
8277 getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
8279 if (varDsc->lvIsInReg())
8281 if (varDsc->lvRegNum != REG_EAX)
8283 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
8284 regSet.verifyRegUsed(varDsc->lvRegNum);
8289 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
8293 #endif // _TARGET_X86_
8295 #if defined(DEBUG) && defined(_TARGET_XARCH_)
8296 if (compiler->opts.compStackCheckOnRet)
8298 noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC &&
8299 compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister &&
8300 compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame);
8301 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0);
8303 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
8305 getEmitter()->emitEndProlog();
8306 compiler->unwindEndProlog();
8308 noway_assert(getEmitter()->emitMaxTmpSize == regSet.tmpGetTotalSize());
8311 #pragma warning(pop)
8314 /*****************************************************************************
8316 * Generates code for a function epilog.
8318 * Please consult the "debugger team notification" comment in genFnProlog().
8321 #if defined(_TARGET_ARMARCH_)
8323 void CodeGen::genFnEpilog(BasicBlock* block)
8327 printf("*************** In genFnEpilog()\n");
8330 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8332 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8333 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8334 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8337 if (compiler->opts.dspCode)
8338 printf("\n__epilog:\n");
8342 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8343 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8344 printf(", gcRegGCrefSetCur=");
8345 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8346 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8347 printf(", gcRegByrefSetCur=");
8348 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8349 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8354 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8356 GenTree* lastNode = block->lastNode();
8358 // Method handle and address info used in case of jump epilog
8359 CORINFO_METHOD_HANDLE methHnd = nullptr;
8360 CORINFO_CONST_LOOKUP addrInfo;
8361 addrInfo.addr = nullptr;
8362 addrInfo.accessType = IAT_VALUE;
8364 if (jmpEpilog && lastNode->gtOper == GT_JMP)
8366 methHnd = (CORINFO_METHOD_HANDLE)lastNode->gtVal.gtVal1;
8367 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8371 // We delay starting the unwind codes until we have an instruction which we know
8372 // needs an unwind code. In particular, for large stack frames in methods without
8373 // localloc, the sequence might look something like this:
8376 // pop {r4,r5,r6,r10,r11,pc}
8377 // In this case, the "movw" should not be part of the unwind codes, since it will
8378 // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
8379 // also sets the current location as the beginning offset of the epilog, so every
8380 // instruction afterwards needs an unwind code. In the case above, if you call
8381 // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
8383 bool unwindStarted = false;
8385 // Tear down the stack frame
8387 if (compiler->compLocallocUsed)
8391 compiler->unwindBegEpilog();
8392 unwindStarted = true;
8396 inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
8397 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
8401 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
8404 genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
8409 // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
8410 compiler->unwindBegEpilog();
8411 unwindStarted = true;
8414 if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE)
8416 // IAT_RELPVALUE jump at the end is done using relative indirection, so,
8417 // additional helper register is required.
8418 // We use LR just before it is going to be restored from stack, i.e.
8429 regNumber indCallReg = REG_R12;
8430 regNumber vptrReg1 = REG_LR;
8432 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8433 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg);
8434 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8435 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1);
8438 genPopCalleeSavedRegisters(jmpEpilog);
8440 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
8442 // We better not have used a pop PC to return otherwise this will be unreachable code
8443 noway_assert(!genUsedPopToReturn);
8445 int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
8446 inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
8447 compiler->unwindAllocStack(preSpillRegArgSize);
8452 // We better not have used a pop PC to return otherwise this will be unreachable code
8453 noway_assert(!genUsedPopToReturn);
8456 #else // _TARGET_ARM64_
8457 compiler->unwindBegEpilog();
8459 genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
8460 #endif // _TARGET_ARM64_
8464 hasTailCalls = true;
8466 noway_assert(block->bbJumpKind == BBJ_RETURN);
8467 noway_assert(block->bbTreeList != nullptr);
8469 /* figure out what jump we have */
8470 GenTree* jmpNode = lastNode;
8471 #if !FEATURE_FASTTAILCALL
8472 noway_assert(jmpNode->gtOper == GT_JMP);
8473 #else // FEATURE_FASTTAILCALL
8475 // If jmpNode is GT_JMP then gtNext must be null.
8476 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8477 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8479 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8480 noway_assert((jmpNode->gtOper == GT_JMP) ||
8481 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8483 // The next block is associated with this "if" stmt
8484 if (jmpNode->gtOper == GT_JMP)
8485 #endif // FEATURE_FASTTAILCALL
8487 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8488 // the same descriptor with some minor adjustments.
8489 assert(methHnd != nullptr);
8490 assert(addrInfo.addr != nullptr);
8492 #ifdef _TARGET_ARMARCH_
8493 emitter::EmitCallType callType;
8495 regNumber indCallReg;
8496 switch (addrInfo.accessType)
8499 if (validImmForBL((ssize_t)addrInfo.addr))
8501 // Simple direct call
8502 callType = emitter::EC_FUNC_TOKEN;
8503 addr = addrInfo.addr;
8504 indCallReg = REG_NA;
8508 // otherwise the target address doesn't fit in an immediate
8509 // so we have to burn a register...
8513 // Load the address into a register, load indirect and call through a register
8514 // We have to use R12 since we assume the argument registers are in use
8515 callType = emitter::EC_INDIR_R;
8516 indCallReg = REG_INDIRECT_CALL_TARGET_REG;
8518 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8519 if (addrInfo.accessType == IAT_PVALUE)
8521 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8522 regSet.verifyRegUsed(indCallReg);
8528 // Load the address into a register, load relative indirect and call through a register
8529 // We have to use R12 since we assume the argument registers are in use
8530 // LR is used as helper register right before it is restored from stack, thus,
8531 // all relative address calculations are performed before LR is restored.
8532 callType = emitter::EC_INDIR_R;
8533 indCallReg = REG_R12;
8536 regSet.verifyRegUsed(indCallReg);
8542 NO_WAY("Unsupported JMP indirection");
8545 /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
8546 * the same descriptor with some minor adjustments.
8550 getEmitter()->emitIns_Call(callType,
8552 INDEBUG_LDISASM_COMMA(nullptr)
8555 EA_UNKNOWN, // retSize
8556 #if defined(_TARGET_ARM64_)
8557 EA_UNKNOWN, // secondRetSize
8559 gcInfo.gcVarPtrSetCur,
8560 gcInfo.gcRegGCrefSetCur,
8561 gcInfo.gcRegByrefSetCur,
8562 BAD_IL_OFFSET, // IL offset
8569 CLANG_FORMAT_COMMENT_ANCHOR;
8570 #endif //_TARGET_ARMARCH_
8572 #if FEATURE_FASTTAILCALL
8576 // Call target = REG_FASTTAILCALL_TARGET
8577 // https://github.com/dotnet/coreclr/issues/4827
8578 // Do we need a special encoding for stack walker like rex.w prefix for x64?
8579 getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
8581 #endif // FEATURE_FASTTAILCALL
8586 if (!genUsedPopToReturn)
8588 // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
8589 // so we need a "bx lr" instruction to return from the function.
8590 inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
8591 compiler->unwindBranch16();
8593 #else // _TARGET_ARM64_
8594 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
8595 compiler->unwindReturn(REG_LR);
8596 #endif // _TARGET_ARM64_
8599 compiler->unwindEndEpilog();
8602 #elif defined(_TARGET_XARCH_)
8604 void CodeGen::genFnEpilog(BasicBlock* block)
8609 printf("*************** In genFnEpilog()\n");
8613 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8615 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8616 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8617 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8619 noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
8622 genInterruptibleUsed = true;
8625 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8628 if (compiler->opts.dspCode)
8630 printf("\n__epilog:\n");
8635 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8636 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8637 printf(", gcRegGCrefSetCur=");
8638 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8639 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8640 printf(", gcRegByrefSetCur=");
8641 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8642 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8647 // Restore float registers that were saved to stack before SP is modified.
8648 genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
8650 #ifdef JIT32_GCENCODER
8651 // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
8652 // the above call to `genRestoreCalleeSavedFltRegs` because that function
8653 // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
8654 // which is the only target that uses the JIT32 GC encoder
8655 // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
8656 // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
8657 // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
8658 // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
8659 // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
8660 // we will need to rethink this.
8661 getEmitter()->emitStartEpilog();
8664 /* Compute the size in bytes we've pushed/popped */
8666 if (!doubleAlignOrFramePointerUsed())
8668 // We have an ESP frame */
8670 noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
8672 /* Get rid of our local variables */
8674 if (compiler->compLclFrameSize)
8677 /* Add 'compiler->compLclFrameSize' to ESP */
8678 /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
8680 if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
8682 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8683 regSet.verifyRegUsed(REG_ECX);
8686 #endif // _TARGET_X86
8688 /* Add 'compiler->compLclFrameSize' to ESP */
8689 /* Generate "add esp, <stack-size>" */
8690 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8694 genPopCalleeSavedRegisters();
8698 noway_assert(doubleAlignOrFramePointerUsed());
8700 /* Tear down the stack frame */
8702 bool needMovEspEbp = false;
8705 if (compiler->genDoubleAlign())
8708 // add esp, compLclFrameSize
8710 // We need not do anything (except the "mov esp, ebp") if
8711 // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
8712 // also complicates the code manager. Hence, we ignore that case.
8714 noway_assert(compiler->compLclFrameSize != 0);
8715 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8717 needMovEspEbp = true;
8720 #endif // DOUBLE_ALIGN
8722 bool needLea = false;
8724 if (compiler->compLocallocUsed)
8726 // ESP may be variable if a localloc was actually executed. Reset it.
8727 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8731 else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
8733 if (compiler->compLclFrameSize != 0)
8735 #ifdef _TARGET_AMD64_
8736 // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
8737 // do an LEA to "pop off" the frame allocation.
8739 #else // !_TARGET_AMD64_
8740 // We will just generate "mov esp, ebp" and be done with it.
8741 needMovEspEbp = true;
8742 #endif // !_TARGET_AMD64_
8745 else if (compiler->compLclFrameSize == 0)
8747 // do nothing before popping the callee-saved registers
8750 else if (compiler->compLclFrameSize == REGSIZE_BYTES)
8752 // "pop ecx" will make ESP point to the callee-saved registers
8753 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8754 regSet.verifyRegUsed(REG_ECX);
8756 #endif // _TARGET_X86
8759 // We need to make ESP point to the callee-saved registers
8767 #ifdef _TARGET_AMD64_
8768 // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
8770 // Case 1: localloc not used.
8771 // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
8772 // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8773 // The amount to be subtracted from RBP to point at callee saved int regs.
8775 // Case 2: localloc used
8776 // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
8777 // Offset = Amount to be added to RBP to point at callee saved int regs.
8778 offset = genSPtoFPdelta() - compiler->compLclFrameSize;
8780 // Offset should fit within a byte if localloc is not used.
8781 if (!compiler->compLocallocUsed)
8783 noway_assert(offset < UCHAR_MAX);
8786 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8787 offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8788 noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
8791 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
8796 // Pop the callee-saved registers (if any)
8799 genPopCalleeSavedRegisters();
8801 #ifdef _TARGET_AMD64_
8802 assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
8803 #else // !_TARGET_AMD64_
8807 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
8809 #endif // !_TARGET_AMD64_
8812 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
8815 getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
8817 /* Check if this a special return block i.e.
8818 * CEE_JMP instruction */
8822 noway_assert(block->bbJumpKind == BBJ_RETURN);
8823 noway_assert(block->bbTreeList);
8825 // figure out what jump we have
8826 GenTree* jmpNode = block->lastNode();
8827 #if !FEATURE_FASTTAILCALL
8829 noway_assert(jmpNode->gtOper == GT_JMP);
8832 // If jmpNode is GT_JMP then gtNext must be null.
8833 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8834 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8836 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8837 noway_assert((jmpNode->gtOper == GT_JMP) ||
8838 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8840 // The next block is associated with this "if" stmt
8841 if (jmpNode->gtOper == GT_JMP)
8844 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8845 // the same descriptor with some minor adjustments.
8846 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
8848 CORINFO_CONST_LOOKUP addrInfo;
8849 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8850 if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
8852 NO_WAY("Unsupported JMP indirection");
8855 const emitter::EmitCallType callType =
8856 (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
8858 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8859 // the same descriptor with some minor adjustments.
8862 getEmitter()->emitIns_Call(callType,
8864 INDEBUG_LDISASM_COMMA(nullptr)
8867 EA_UNKNOWN // retSize
8868 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
8869 gcInfo.gcVarPtrSetCur,
8870 gcInfo.gcRegGCrefSetCur,
8871 gcInfo.gcRegByrefSetCur,
8872 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
8877 #if FEATURE_FASTTAILCALL
8880 #ifdef _TARGET_AMD64_
8882 // Call target = RAX.
8883 // Stack walker requires that a register indirect tail call be rex.w prefixed.
8884 getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
8886 assert(!"Fast tail call as epilog+jmp");
8888 #endif //_TARGET_AMD64_
8890 #endif // FEATURE_FASTTAILCALL
8894 unsigned stkArgSize = 0; // Zero on all platforms except x86
8896 #if defined(_TARGET_X86_)
8897 bool fCalleePop = true;
8899 // varargs has caller pop
8900 if (compiler->info.compIsVarArgs)
8904 if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
8906 #endif // UNIX_X86_ABI
8910 noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
8911 stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
8913 noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
8915 #endif // _TARGET_X86_
8917 /* Return, popping our arguments (if any) */
8918 instGen_Return(stkArgSize);
8923 #error Unsupported or unset target architecture
8926 #if FEATURE_EH_FUNCLETS
8930 /*****************************************************************************
8932 * Generates code for an EH funclet prolog.
8934 * Funclets have the following incoming arguments:
8936 * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
8937 * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
8938 * finally/fault: none
8940 * Funclets set the following registers on exit:
8942 * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
8943 * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
8944 * finally/fault: none
8946 * The ARM funclet prolog sequence is:
8948 * push {regs,lr} ; We push the callee-saved regs and 'lr'.
8949 * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
8950 * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
8951 * ; calculated for the entire function.
8952 * sub sp, XXX ; Establish the rest of the frame.
8953 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
8954 * ; up to preserve stack alignment. If we push an odd number of registers, we also
8955 * ; generate this, to keep the stack aligned.
8957 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
8959 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
8962 * if (this is a filter funclet)
8964 * // r1 on entry to a filter funclet is CallerSP of the containing function:
8965 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
8966 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
8967 * // a funclet. Consider:
8971 * // throw new Exception();
8972 * // } catch(Exception) {
8973 * // throw new Exception(); // The exception thrown here ...
8975 * // } filter { // ... will be processed here, while the "catch" funclet frame is
8976 * // // still on the stack
8977 * // } filter-handler {
8980 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
8981 * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
8982 * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
8984 * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
8985 * ; the dynamically containing funclet or function)
8986 * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
8987 * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
8991 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
8992 * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
8994 * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
8995 * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
8998 * The epilog sequence is then:
9000 * add sp, XXX ; if necessary
9003 * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
9004 * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
9006 * The funclet frame is thus:
9009 * |-----------------------|
9012 * +=======================+ <---- Caller's SP
9013 * |Callee saved registers |
9014 * |-----------------------|
9015 * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
9016 * | | // in function and funclet
9017 * |-----------------------|
9018 * | PSP slot | // Omitted in CoreRT ABI
9019 * |-----------------------|
9020 * ~ possible 4 byte pad ~
9022 * |-----------------------|
9023 * | Outgoing arg space |
9024 * |-----------------------| <---- Ambient SP
9031 void CodeGen::genFuncletProlog(BasicBlock* block)
9035 printf("*************** In genFuncletProlog()\n");
9038 assert(block != NULL);
9039 assert(block->bbFlags & BBF_FUNCLET_BEG);
9041 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9043 gcInfo.gcResetForBB();
9045 compiler->unwindBegProlog();
9047 regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9048 regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
9050 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
9051 maskPushRegsInt |= maskStackAlloc;
9053 assert(FitsIn<int>(maskPushRegsInt));
9054 inst_IV(INS_push, (int)maskPushRegsInt);
9055 compiler->unwindPushMaskInt(maskPushRegsInt);
9057 if (maskPushRegsFloat != RBM_NONE)
9059 genPushFltRegs(maskPushRegsFloat);
9060 compiler->unwindPushMaskFloat(maskPushRegsFloat);
9063 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
9065 regMaskTP maskArgRegsLiveIn;
9068 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
9070 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9072 maskArgRegsLiveIn = RBM_NONE;
9076 maskArgRegsLiveIn = RBM_R0;
9079 regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
9080 bool initRegZeroed = false;
9082 if (maskStackAlloc == RBM_NONE)
9084 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9087 // This is the end of the OS-reported prolog for purposes of unwinding
9088 compiler->unwindEndProlog();
9092 // This is the first block of a filter
9094 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
9095 genFuncletInfo.fiPSP_slot_CallerSP_offset);
9096 regSet.verifyRegUsed(REG_R1);
9097 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
9098 genFuncletInfo.fiPSP_slot_SP_offset);
9099 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
9100 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9104 // This is a non-filter funclet
9105 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
9106 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9107 regSet.verifyRegUsed(REG_R3);
9108 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
9109 genFuncletInfo.fiPSP_slot_SP_offset);
9113 /*****************************************************************************
9115 * Generates code for an EH funclet epilog.
9118 void CodeGen::genFuncletEpilog()
9122 printf("*************** In genFuncletEpilog()\n");
9125 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9127 // Just as for the main function, we delay starting the unwind codes until we have
9128 // an instruction which we know needs an unwind code. This is to support code like
9132 // pop {r4,r5,r6,r10,r11,pc}
9133 // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
9135 bool unwindStarted = false;
9137 /* The saved regs info saves the LR register. We need to pop the PC register to return */
9138 assert(genFuncletInfo.fiSaveRegs & RBM_LR);
9140 regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9141 regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
9143 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
9144 maskPopRegsInt |= maskStackAlloc;
9146 if (maskStackAlloc == RBM_NONE)
9148 genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
9153 // We'll definitely generate an unwindable instruction next
9154 compiler->unwindBegEpilog();
9155 unwindStarted = true;
9158 maskPopRegsInt &= ~RBM_LR;
9159 maskPopRegsInt |= RBM_PC;
9161 if (maskPopRegsFloat != RBM_NONE)
9163 genPopFltRegs(maskPopRegsFloat);
9164 compiler->unwindPopMaskFloat(maskPopRegsFloat);
9167 assert(FitsIn<int>(maskPopRegsInt));
9168 inst_IV(INS_pop, (int)maskPopRegsInt);
9169 compiler->unwindPopMaskInt(maskPopRegsInt);
9171 compiler->unwindEndEpilog();
9174 /*****************************************************************************
9176 * Capture the information used to generate the funclet prologs and epilogs.
9177 * Note that all funclet prologs are identical, and all funclet epilogs are
9178 * identical (per type: filters are identical, and non-filters are identical).
9179 * Thus, we compute the data used for these just once.
9181 * See genFuncletProlog() for more information about the prolog/epilog sequences.
9184 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9186 if (compiler->ehAnyFunclets())
9188 assert(isFramePointerUsed());
9189 assert(compiler->lvaDoneFrameLayout ==
9190 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9192 // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
9193 // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
9194 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
9195 // (also assumed in genFnProlog()).
9196 assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
9197 unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9198 genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
9200 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
9201 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
9202 unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
9203 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9204 unsigned funcletFrameSize =
9205 preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
9207 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
9208 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
9209 unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
9211 unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
9212 int PSP_slot_CallerSP_offset =
9213 -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
9215 /* Now save it for future use */
9217 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
9218 genFuncletInfo.fiSpDelta = spDelta;
9219 genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
9220 genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
9226 printf("Funclet prolog / epilog info\n");
9227 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9228 printf(" Save regs: ");
9229 dspRegMask(rsMaskSaveRegs);
9231 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9232 printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
9233 printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
9235 if (PSP_slot_CallerSP_offset !=
9236 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
9237 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
9238 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
9242 assert(PSP_slot_CallerSP_offset < 0);
9243 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9245 assert(PSP_slot_CallerSP_offset ==
9246 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
9247 // function and funclet!
9252 #elif defined(_TARGET_AMD64_)
9254 /*****************************************************************************
9256 * Generates code for an EH funclet prolog.
9258 * Funclets have the following incoming arguments:
9260 * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
9261 * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
9262 * finally/fault: rcx = InitialSP
9264 * Funclets set the following registers on exit:
9266 * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
9267 * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9268 * finally/fault: none
9270 * The AMD64 funclet prolog sequence is:
9273 * push callee-saved regs
9274 * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
9275 * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
9276 * ; the entire function.
9277 * sub sp, XXX ; Establish the rest of the frame.
9278 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
9279 * ; up to preserve stack alignment. If we push an odd number of registers, we also
9280 * ; generate this, to keep the stack aligned.
9282 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
9284 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
9286 * ; Also, re-establish the frame pointer from the PSP.
9288 * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
9289 * ; PSP of the dynamically containing funclet or function)
9290 * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
9291 * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
9292 * ; Function_InitialSP_to_FP_delta==0, we don't need this
9295 * The epilog sequence is then:
9298 * pop callee-saved regs ; if necessary
9302 * The funclet frame is thus:
9305 * |-----------------------|
9308 * +=======================+ <---- Caller's SP
9309 * | Return address |
9310 * |-----------------------|
9312 * |-----------------------|
9313 * |Callee saved registers |
9314 * |-----------------------|
9315 * ~ possible 8 byte pad ~
9317 * |-----------------------|
9318 * | PSP slot | // Omitted in CoreRT ABI
9319 * |-----------------------|
9320 * | Outgoing arg space | // this only exists if the function makes a call
9321 * |-----------------------| <---- Initial SP
9327 * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
9328 * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
9329 * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
9330 * "FRAMEPTR OFFSETS" for details.
9333 void CodeGen::genFuncletProlog(BasicBlock* block)
9338 printf("*************** In genFuncletProlog()\n");
9342 assert(!regSet.rsRegsModified(RBM_FPBASE));
9343 assert(block != nullptr);
9344 assert(block->bbFlags & BBF_FUNCLET_BEG);
9345 assert(isFramePointerUsed());
9347 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9349 gcInfo.gcResetForBB();
9351 compiler->unwindBegProlog();
9353 // We need to push ebp, since it's callee-saved.
9354 // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
9355 // keep track of that on a per-funclet basis, so we push the same set as in the main function.
9356 // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
9357 // is stored here (all temps are allocated in the parent frame).
9358 // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
9359 // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
9361 inst_RV(INS_push, REG_FPBASE, TYP_REF);
9362 compiler->unwindPush(REG_FPBASE);
9364 // Callee saved int registers are pushed to stack.
9365 genPushCalleeSavedRegisters();
9367 regMaskTP maskArgRegsLiveIn;
9368 if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9370 maskArgRegsLiveIn = RBM_ARG_0;
9374 maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
9377 regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
9378 bool initRegZeroed = false;
9380 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9382 // Callee saved float registers are copied to stack in their assigned stack slots
9383 // after allocating space for them as part of funclet frame.
9384 genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9386 // This is the end of the OS-reported prolog for purposes of unwinding
9387 compiler->unwindEndProlog();
9389 // If there is no PSPSym (CoreRT ABI), we are done.
9390 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9395 getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9397 regSet.verifyRegUsed(REG_FPBASE);
9399 getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9401 if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
9403 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
9404 genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9407 // We've modified EBP, but not really. Say that we haven't...
9408 regSet.rsRemoveRegsModified(RBM_FPBASE);
9411 /*****************************************************************************
9413 * Generates code for an EH funclet epilog.
9415 * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
9418 void CodeGen::genFuncletEpilog()
9423 printf("*************** In genFuncletEpilog()\n");
9427 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9429 // Restore callee saved XMM regs from their stack slots before modifying SP
9430 // to position at callee saved int regs.
9431 genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9432 inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
9433 genPopCalleeSavedRegisters();
9434 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
9438 /*****************************************************************************
9440 * Capture the information used to generate the funclet prologs and epilogs.
9443 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9445 if (!compiler->ehAnyFunclets())
9450 // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
9451 // because we're not going to allocate the same size frame as the parent.
9453 assert(isFramePointerUsed());
9454 assert(compiler->lvaDoneFrameLayout ==
9455 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9456 assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
9458 // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
9459 // that's ok, because we're figuring out an offset in the parent frame.
9460 genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
9461 compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
9464 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9465 #ifndef UNIX_AMD64_ABI
9466 // No 4 slots for outgoing params on the stack for System V systems.
9467 assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
9468 (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
9469 // slots if there are any calls in the function.
9470 #endif // UNIX_AMD64_ABI
9471 unsigned offset = compiler->lvaOutgoingArgSpaceSize;
9473 genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
9475 // How much stack do we allocate in the funclet?
9476 // We need to 16-byte align the stack.
9478 unsigned totalFrameSize =
9479 REGSIZE_BYTES // return address
9480 + REGSIZE_BYTES // pushed EBP
9481 + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
9483 // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
9484 // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
9485 unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
9486 unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
9488 unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
9490 totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
9491 + calleeFPRegsSavedSize // pushed callee-saved float regs
9492 // below calculated 'pad' will go here
9493 + PSPSymSize // PSPSym
9494 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9497 unsigned pad = AlignmentPad(totalFrameSize, 16);
9499 genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
9500 + calleeFPRegsSavedSize // Callee saved xmm regs
9501 + pad + PSPSymSize // PSPSym
9502 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9509 printf("Funclet prolog / epilog info\n");
9510 printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9511 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9512 printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
9515 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9517 assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
9518 compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
9524 #elif defined(_TARGET_ARM64_)
9526 // Look in CodeGenArm64.cpp
9528 #elif defined(_TARGET_X86_)
9530 /*****************************************************************************
9532 * Generates code for an EH funclet prolog.
9535 * Funclets have the following incoming arguments:
9537 * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
9538 * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
9539 * finally/fault: none
9541 * Funclets set the following registers on exit:
9543 * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
9544 * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9545 * finally/fault: none
9547 * Funclet prolog/epilog sequence and funclet frame layout are TBD.
9551 void CodeGen::genFuncletProlog(BasicBlock* block)
9556 printf("*************** In genFuncletProlog()\n");
9560 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9562 gcInfo.gcResetForBB();
9564 compiler->unwindBegProlog();
9566 // This is the end of the OS-reported prolog for purposes of unwinding
9567 compiler->unwindEndProlog();
9569 // TODO We may need EBP restore sequence here if we introduce PSPSym
9571 // Add a padding for 16-byte alignment
9572 inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
9575 /*****************************************************************************
9577 * Generates code for an EH funclet epilog.
9580 void CodeGen::genFuncletEpilog()
9585 printf("*************** In genFuncletEpilog()\n");
9589 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9591 // Revert a padding that was added for 16-byte alignment
9592 inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
9597 /*****************************************************************************
9599 * Capture the information used to generate the funclet prologs and epilogs.
9602 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9604 if (!compiler->ehAnyFunclets())
9612 /*****************************************************************************
9614 * Generates code for an EH funclet prolog.
9617 void CodeGen::genFuncletProlog(BasicBlock* block)
9619 NYI("Funclet prolog");
9622 /*****************************************************************************
9624 * Generates code for an EH funclet epilog.
9627 void CodeGen::genFuncletEpilog()
9629 NYI("Funclet epilog");
9632 /*****************************************************************************
9634 * Capture the information used to generate the funclet prologs and epilogs.
9637 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9639 if (compiler->ehAnyFunclets())
9641 NYI("genCaptureFuncletPrologEpilogInfo()");
9647 /*-----------------------------------------------------------------------------
9649 * Set the main function PSPSym value in the frame.
9650 * Funclets use different code to load the PSP sym and save it in their frame.
9651 * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
9652 * The PSPSym section of that document is copied here.
9654 ***********************************
9655 * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
9656 * accesses locals from the main function body.
9658 * First, two definitions.
9660 * Caller-SP is the value of the stack pointer in a function's caller before the call
9661 * instruction is executed. That is, when function A calls function B, Caller-SP for B
9662 * is the value of the stack pointer immediately before the call instruction in A
9663 * (calling B) was executed. Note that this definition holds for both AMD64, which
9664 * pushes the return value when a call instruction is executed, and for ARM, which
9665 * doesn't. For AMD64, Caller-SP is the address above the call return address.
9667 * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
9668 * the frame has been allocated. That is, before any "alloca"-type allocations.
9670 * The PSPSym is a pointer-sized local variable in the frame of the main function and
9671 * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
9672 * for the main function. The stack offset of the PSPSym is reported to the VM in the
9673 * GC information header. The value reported in the GC information is the offset of the
9674 * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
9675 * value is reported to the VM, differs between architectures. In particular, note that
9676 * most things in the GC information header are reported as offsets relative to Caller-SP,
9677 * but PSPSym on AMD64 is one (maybe the only) exception.)
9679 * The VM uses the PSPSym to find other locals it cares about (such as the generics context
9680 * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
9681 * the frame pointer is the same value in a funclet as it is in the main function body.
9683 * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
9684 * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
9685 * only true for first pass funclets (currently just filters) and it is passed as the second
9686 * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
9687 * frame in the exception processing system. For the CLR, it points either to the main function
9688 * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
9689 * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
9691 * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
9692 * don't know if the Establisher Frame is from the main function or a funclet, we design the
9693 * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
9694 * offset from the Establisher Frame in each case. (This is also required because we only report
9695 * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
9696 * function and all of its funclets). Then, the funclet uses this known offset to compute the
9697 * PSPSym address and read its value. From this, it can compute the value of the frame pointer
9698 * (which is a constant offset from the PSPSym value) and set the frame register to be the same
9699 * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
9700 * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
9701 * for every nested funclet invocation.
9703 * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
9704 * restores all non-volatile registers to their values within the parent frame. This includes
9705 * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
9706 * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
9708 * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
9709 * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
9710 * ARM this is the first argument and passed in R0.
9712 * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
9713 * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
9714 * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
9715 * is required in all funclets as well as the main function, whereas if the establisher frame was
9716 * correctly reported, the PSPSym could be omitted in some cases.)
9717 ***********************************
9719 void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
9721 assert(compiler->compGeneratingProlog);
9723 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9728 noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
9730 #if defined(_TARGET_ARM_)
9732 // We either generate:
9734 // str r1, [reg + PSPSymOffset]
9737 // str r1, [reg + PSPSymOffset]
9738 // depending on the smallest encoding
9740 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9745 if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
9747 // use the "add <reg>, sp, imm" form
9749 callerSPOffs = SPtoCallerSPdelta;
9750 regBase = REG_SPBASE;
9754 // use the "add <reg>, r11, imm" form
9756 int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
9757 noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
9759 callerSPOffs = FPtoCallerSPdelta;
9760 regBase = REG_FPBASE;
9763 // We will just use the initReg since it is an available register
9764 // and we are probably done using it anyway...
9765 regNumber regTmp = initReg;
9766 *pInitRegZeroed = false;
9768 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
9769 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9771 #elif defined(_TARGET_ARM64_)
9773 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9775 // We will just use the initReg since it is an available register
9776 // and we are probably done using it anyway...
9777 regNumber regTmp = initReg;
9778 *pInitRegZeroed = false;
9780 getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
9781 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9783 #elif defined(_TARGET_AMD64_)
9785 // The PSP sym value is Initial-SP, not Caller-SP!
9786 // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
9787 // has been established.
9790 // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
9792 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
9796 NYI("Set function PSP sym");
9801 #endif // FEATURE_EH_FUNCLETS
9803 /*****************************************************************************
9805 * Generates code for all the function and funclet prologs and epilogs.
9808 void CodeGen::genGeneratePrologsAndEpilogs()
9813 printf("*************** Before prolog / epilog generation\n");
9814 getEmitter()->emitDispIGlist(false);
9818 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
9819 // This affects our code that determines which untracked locals need to be zero initialized.
9820 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
9822 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
9824 getEmitter()->emitStartPrologEpilogGeneration();
9826 gcInfo.gcResetForBB();
9829 // Generate all the prologs and epilogs.
9830 CLANG_FORMAT_COMMENT_ANCHOR;
9832 #if FEATURE_EH_FUNCLETS
9834 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
9835 // information computed during codegen, or during function prolog generation, like
9836 // frame offsets. It must run after main function prolog generation.
9838 genCaptureFuncletPrologEpilogInfo();
9840 #endif // FEATURE_EH_FUNCLETS
9842 // Walk the list of prologs and epilogs and generate them.
9843 // We maintain a list of prolog and epilog basic blocks in
9844 // the insGroup structure in the emitter. This list was created
9845 // during code generation by the genReserve*() functions.
9847 // TODO: it seems like better design would be to create a list of prologs/epilogs
9848 // in the code generator (not the emitter), and then walk that list. But we already
9849 // have the insGroup list, which serves well, so we don't need the extra allocations
9850 // for a prolog/epilog list in the code generator.
9852 getEmitter()->emitGeneratePrologEpilog();
9854 // Tell the emitter we're done with all prolog and epilog generation.
9856 getEmitter()->emitFinishPrologEpilogGeneration();
9861 printf("*************** After prolog / epilog generation\n");
9862 getEmitter()->emitDispIGlist(false);
9868 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9869 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9871 XX End Prolog / Epilog XX
9873 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9874 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9878 void CodeGen::genGenerateStackProbe()
9880 noway_assert(compiler->opts.compNeedStackProbes);
9882 // If this assert fires, it means somebody has changed the value
9883 // CORINFO_STACKPROBE_DEPTH.
9884 // Why does the EE need such a deep probe? It should just need a couple
9885 // of bytes, to set up a frame in the unmanaged code..
9887 static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
9889 JITDUMP("Emitting stack probe:\n");
9890 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
9891 -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
9893 #endif // STACK_PROBES
9895 #if defined(_TARGET_XARCH_)
9896 // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9897 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9898 // Here offset = 16-byte aligned offset after pushing integer registers.
9901 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
9902 // non-funclet: this will be compLclFrameSize.
9903 // funclet frames: this will be FuncletInfo.fiSpDelta.
9904 void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
9906 genVzeroupperIfNeeded(false);
9907 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9909 // Only callee saved floating point registers should be in regMask
9910 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9913 if (regMask == RBM_NONE)
9918 #ifdef _TARGET_AMD64_
9919 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9920 unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9922 // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
9923 assert((offset % 16) == 0);
9924 instruction copyIns = ins_Copy(TYP_FLOAT);
9925 #else // !_TARGET_AMD64_
9926 unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
9927 instruction copyIns = INS_movupd;
9928 #endif // !_TARGET_AMD64_
9930 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9932 regMaskTP regBit = genRegMask(reg);
9933 if ((regBit & regMask) != 0)
9935 // ABI requires us to preserve lower 128-bits of YMM register.
9936 getEmitter()->emitIns_AR_R(copyIns,
9937 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
9939 reg, REG_SPBASE, offset);
9940 compiler->unwindSaveReg(reg, offset);
9942 offset -= XMM_REGSIZE_BYTES;
9947 // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9948 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9949 // Here offset = 16-byte aligned offset after pushing integer registers.
9952 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
9953 // non-funclet: this will be compLclFrameSize.
9954 // funclet frames: this will be FuncletInfo.fiSpDelta.
9955 void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
9957 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9959 // Only callee saved floating point registers should be in regMask
9960 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9963 if (regMask == RBM_NONE)
9965 genVzeroupperIfNeeded();
9969 #ifdef _TARGET_AMD64_
9970 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9971 instruction copyIns = ins_Copy(TYP_FLOAT);
9972 #else // !_TARGET_AMD64_
9973 unsigned firstFPRegPadding = 0;
9974 instruction copyIns = INS_movupd;
9975 #endif // !_TARGET_AMD64_
9979 if (compiler->compLocallocUsed)
9981 // localloc frame: use frame pointer relative offset
9982 assert(isFramePointerUsed());
9983 regBase = REG_FPBASE;
9984 offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
9988 regBase = REG_SPBASE;
9989 offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9992 #ifdef _TARGET_AMD64_
9993 // Offset is 16-byte aligned since we use movaps for restoring xmm regs
9994 assert((offset % 16) == 0);
9995 #endif // _TARGET_AMD64_
9997 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9999 regMaskTP regBit = genRegMask(reg);
10000 if ((regBit & regMask) != 0)
10002 // ABI requires us to restore lower 128-bits of YMM register.
10003 getEmitter()->emitIns_R_AR(copyIns,
10004 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
10006 reg, regBase, offset);
10007 regMask &= ~regBit;
10008 offset -= XMM_REGSIZE_BYTES;
10011 genVzeroupperIfNeeded();
10014 // Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
10015 // AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
10016 // (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
10017 // 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
10018 // code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
10019 // if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
10022 // check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
10023 // instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
10025 void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
10027 bool emitVzeroUpper = false;
10028 if (check256bitOnly)
10030 emitVzeroUpper = getEmitter()->Contains256bitAVX();
10034 emitVzeroUpper = getEmitter()->ContainsAVX();
10037 if (emitVzeroUpper)
10039 assert(compiler->canUseVexEncoding());
10040 instGen(INS_vzeroupper);
10044 #endif // defined(_TARGET_XARCH_)
10046 //-----------------------------------------------------------------------------------
10047 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
10050 // hClass - type handle
10053 // true if type is returned in multiple registers, false otherwise.
10055 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
10057 if (hClass == NO_CLASS_HANDLE)
10062 structPassingKind howToReturnStruct;
10063 var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
10065 return (varTypeIsStruct(returnType));
10068 //----------------------------------------------
10069 // Methods that support HFA's for ARM32/ARM64
10070 //----------------------------------------------
10072 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
10075 return varTypeIsFloating(GetHfaType(hClass));
10081 bool Compiler::IsHfa(GenTree* tree)
10084 return IsHfa(gtGetStructHandleIfPresent(tree));
10090 var_types Compiler::GetHfaType(GenTree* tree)
10093 return GetHfaType(gtGetStructHandleIfPresent(tree));
10099 unsigned Compiler::GetHfaCount(GenTree* tree)
10101 return GetHfaCount(gtGetStructHandleIfPresent(tree));
10104 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
10106 var_types result = TYP_UNDEF;
10107 if (hClass != NO_CLASS_HANDLE)
10110 CorInfoType corType = info.compCompHnd->getHFAType(hClass);
10111 if (corType != CORINFO_TYPE_UNDEF)
10113 result = JITtype2varType(corType);
10115 #endif // FEATURE_HFA
10120 //------------------------------------------------------------------------
10121 // GetHfaCount: Given a class handle for an HFA struct
10122 // return the number of registers needed to hold the HFA
10124 // Note that on ARM32 the single precision registers overlap with
10125 // the double precision registers and for that reason each
10126 // double register is considered to be two single registers.
10127 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
10128 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
10129 // will return 4 for both.
10131 // hClass: the class handle of a HFA struct
10133 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
10135 assert(IsHfa(hClass));
10136 #ifdef _TARGET_ARM_
10137 // A HFA of doubles is twice as large as an HFA of singles for ARM32
10138 // (i.e. uses twice the number of single precison registers)
10139 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
10140 #else // _TARGET_ARM64_
10141 var_types hfaType = GetHfaType(hClass);
10142 unsigned classSize = info.compCompHnd->getClassSize(hClass);
10143 // Note that the retail build issues a warning about a potential divsion by zero without the Max function
10144 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
10145 return classSize / elemSize;
10146 #endif // _TARGET_ARM64_
10149 #ifdef _TARGET_XARCH_
10151 //------------------------------------------------------------------------
10152 // genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
10153 // map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
10154 // X86/x64 has a special encoding for shift/rotate-by-constant-1.
10157 // ins: the base shift/rotate instruction
10158 // shiftByValue: the constant value by which we are shifting/rotating
10160 instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
10162 assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
10165 // Which format should we use?
10167 instruction shiftByConstantIns;
10169 if (shiftByValue == 1)
10171 // Use the shift-by-one format.
10173 assert(INS_rcl + 1 == INS_rcl_1);
10174 assert(INS_rcr + 1 == INS_rcr_1);
10175 assert(INS_rol + 1 == INS_rol_1);
10176 assert(INS_ror + 1 == INS_ror_1);
10177 assert(INS_shl + 1 == INS_shl_1);
10178 assert(INS_shr + 1 == INS_shr_1);
10179 assert(INS_sar + 1 == INS_sar_1);
10181 shiftByConstantIns = (instruction)(ins + 1);
10185 // Use the shift-by-NNN format.
10187 assert(INS_rcl + 2 == INS_rcl_N);
10188 assert(INS_rcr + 2 == INS_rcr_N);
10189 assert(INS_rol + 2 == INS_rol_N);
10190 assert(INS_ror + 2 == INS_ror_N);
10191 assert(INS_shl + 2 == INS_shl_N);
10192 assert(INS_shr + 2 == INS_shr_N);
10193 assert(INS_sar + 2 == INS_sar_N);
10195 shiftByConstantIns = (instruction)(ins + 2);
10198 return shiftByConstantIns;
10201 #endif // _TARGET_XARCH_
10203 //------------------------------------------------------------------------------------------------ //
10204 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
10207 // The number of the first argument with stack slot on the caller's frame.
10210 // On x64 Windows the caller always creates slots (homing space) in its frame for the
10211 // first 4 arguments of a callee (register passed args). So, the the variable number
10212 // (lclNum) for the first argument with a stack slot is always 0.
10213 // For System V systems or armarch, there is no such calling convention requirement, and the code
10214 // needs to find the first stack passed argument from the caller. This is done by iterating over
10215 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
10217 unsigned CodeGen::getFirstArgWithStackSlot()
10219 #if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_)
10220 unsigned baseVarNum = 0;
10221 // Iterate over all the lvParam variables in the Lcl var table until we find the first one
10222 // that's passed on the stack.
10223 LclVarDsc* varDsc = nullptr;
10224 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
10226 varDsc = &(compiler->lvaTable[i]);
10228 // We should have found a stack parameter (and broken out of this loop) before
10229 // we find any non-parameters.
10230 assert(varDsc->lvIsParam);
10232 if (varDsc->lvArgReg == REG_STK)
10238 assert(varDsc != nullptr);
10241 #elif defined(_TARGET_AMD64_)
10243 #else // _TARGET_X86
10244 // Not implemented for x86.
10245 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
10246 return BAD_VAR_NUM;
10247 #endif // _TARGET_X86_
10250 //------------------------------------------------------------------------
10251 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
10253 void CodeGen::genSinglePush()
10255 AddStackLevel(REGSIZE_BYTES);
10258 //------------------------------------------------------------------------
10259 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
10261 void CodeGen::genSinglePop()
10263 SubtractStackLevel(REGSIZE_BYTES);
10266 //------------------------------------------------------------------------
10267 // genPushRegs: Push the given registers.
10270 // regs - mask or registers to push
10271 // byrefRegs - OUT arg. Set to byref registers that were pushed.
10272 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
10275 // Mask of registers pushed.
10278 // This function does not check if the register is marked as used, etc.
10280 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
10282 *byrefRegs = RBM_NONE;
10283 *noRefRegs = RBM_NONE;
10285 if (regs == RBM_NONE)
10290 #if FEATURE_FIXED_OUT_ARGS
10292 NYI("Don't call genPushRegs with real regs!");
10295 #else // FEATURE_FIXED_OUT_ARGS
10297 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
10298 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
10300 regMaskTP pushedRegs = regs;
10302 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
10304 regMaskTP regBit = regMaskTP(1) << reg;
10306 if ((regBit & regs) == RBM_NONE)
10310 if (regBit & gcInfo.gcRegGCrefSetCur)
10314 else if (regBit & gcInfo.gcRegByrefSetCur)
10316 *byrefRegs |= regBit;
10319 else if (noRefRegs != NULL)
10321 *noRefRegs |= regBit;
10329 inst_RV(INS_push, reg, type);
10332 gcInfo.gcMarkRegSetNpt(regBit);
10339 #endif // FEATURE_FIXED_OUT_ARGS
10342 //------------------------------------------------------------------------
10343 // genPopRegs: Pop the registers that were pushed by genPushRegs().
10346 // regs - mask of registers to pop
10347 // byrefRegs - The byref registers that were pushed by genPushRegs().
10348 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
10353 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
10355 if (regs == RBM_NONE)
10360 #if FEATURE_FIXED_OUT_ARGS
10362 NYI("Don't call genPopRegs with real regs!");
10364 #else // FEATURE_FIXED_OUT_ARGS
10366 noway_assert((regs & byrefRegs) == byrefRegs);
10367 noway_assert((regs & noRefRegs) == noRefRegs);
10368 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
10370 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
10371 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
10373 // Walk the registers in the reverse order as genPushRegs()
10374 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
10376 regMaskTP regBit = regMaskTP(1) << reg;
10378 if ((regBit & regs) == RBM_NONE)
10382 if (regBit & byrefRegs)
10386 else if (regBit & noRefRegs)
10395 inst_RV(INS_pop, reg, type);
10398 if (type != TYP_INT)
10399 gcInfo.gcMarkRegPtrVal(reg, type);
10404 #endif // FEATURE_FIXED_OUT_ARGS
10407 /*****************************************************************************
10410 * This function should be called only after the sizes of the emitter blocks
10411 * have been finalized.
10414 void CodeGen::genSetScopeInfo()
10416 if (!compiler->opts.compScopeInfo)
10424 printf("*************** In genSetScopeInfo()\n");
10428 if (compiler->info.compVarScopesCount == 0)
10430 compiler->eeSetLVcount(0);
10431 compiler->eeSetLVdone();
10435 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
10436 noway_assert(psiOpenScopeList.scNext == nullptr);
10439 unsigned scopeCnt = siScopeCnt + psiScopeCnt;
10441 compiler->eeSetLVcount(scopeCnt);
10444 genTrnslLocalVarCount = scopeCnt;
10447 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
10451 // Record the scopes found for the parameters over the prolog.
10452 // The prolog needs to be treated differently as a variable may not
10453 // have the same info in the prolog block as is given by compiler->lvaTable.
10454 // eg. A register parameter is actually on the stack, before it is loaded to reg.
10456 CodeGen::psiScope* scopeP;
10458 for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
10460 noway_assert(scopeP != nullptr);
10461 noway_assert(scopeP->scStartLoc.Valid());
10462 noway_assert(scopeP->scEndLoc.Valid());
10464 UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
10465 UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
10467 unsigned varNum = scopeP->scSlotNum;
10468 noway_assert(startOffs <= endOffs);
10470 // The range may be 0 if the prolog is empty. For such a case,
10471 // report the liveness of arguments to span at least the first
10472 // instruction in the method. This will be incorrect (except on
10473 // entry to the method) if the very first instruction of the method
10474 // is part of a loop. However, this should happen
10475 // very rarely, and the incorrectness is worth being able to look
10476 // at the argument on entry to the method.
10477 if (startOffs == endOffs)
10479 noway_assert(startOffs == 0);
10483 Compiler::siVarLoc varLoc;
10485 if (scopeP->scRegister)
10487 varLoc.vlType = Compiler::VLT_REG;
10488 varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
10492 varLoc.vlType = Compiler::VLT_STK;
10493 varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
10494 varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
10497 genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
10500 // Record the scopes for the rest of the method.
10501 // Check that the LocalVarInfo scopes look OK
10502 noway_assert(siOpenScopeList.scNext == nullptr);
10504 CodeGen::siScope* scopeL;
10506 for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
10508 noway_assert(scopeL != nullptr);
10509 noway_assert(scopeL->scStartLoc.Valid());
10510 noway_assert(scopeL->scEndLoc.Valid());
10512 // Find the start and end IP
10514 UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
10515 UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
10517 noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
10519 // For stack vars, find the base register, and offset
10522 signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
10524 if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
10526 baseReg = REG_SPBASE;
10527 offset += scopeL->scStackLevel;
10531 baseReg = REG_FPBASE;
10534 // Now fill in the varLoc
10536 Compiler::siVarLoc varLoc;
10538 // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
10539 // for part of its lifetime, or in different registers for different parts of its lifetime.
10540 // This should only matter for non-debug code, where we do variable enregistration.
10541 // We should store the ranges of variable enregistration in the scope table.
10542 if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
10544 var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
10550 #ifdef _TARGET_64BIT_
10552 #endif // _TARGET_64BIT_
10554 varLoc.vlType = Compiler::VLT_REG;
10555 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10558 #ifndef _TARGET_64BIT_
10560 #if !CPU_HAS_FP_SUPPORT
10564 if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
10566 varLoc.vlType = Compiler::VLT_REG_REG;
10567 varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10568 varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
10572 varLoc.vlType = Compiler::VLT_REG_STK;
10573 varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10574 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
10575 if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
10577 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10579 varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
10582 #endif // !_TARGET_64BIT_
10584 #ifdef _TARGET_64BIT_
10588 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10589 // so no XMM registers can get debug information.
10590 varLoc.vlType = Compiler::VLT_REG_FP;
10591 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10594 #else // !_TARGET_64BIT_
10596 #if CPU_HAS_FP_SUPPORT
10599 if (isFloatRegType(type))
10601 varLoc.vlType = Compiler::VLT_FPSTK;
10602 varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10605 #endif // CPU_HAS_FP_SUPPORT
10607 #endif // !_TARGET_64BIT_
10609 #ifdef FEATURE_SIMD
10614 varLoc.vlType = Compiler::VLT_REG_FP;
10616 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10617 // so no XMM registers can get debug information.
10619 // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
10620 // in eeDispVar() --> getRegName() that regNumber is valid.
10621 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10623 #endif // FEATURE_SIMD
10626 noway_assert(!"Invalid type");
10631 assert(offset != BAD_STK_OFFS);
10632 LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
10633 switch (genActualType(varDsc->TypeGet()))
10640 case TYP_BLK: // Needed because of the TYP_BLK stress mode
10641 #ifdef FEATURE_SIMD
10647 #ifdef _TARGET_64BIT_
10650 #endif // _TARGET_64BIT_
10651 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
10652 // In the AMD64 ABI we are supposed to pass a struct by reference when its
10653 // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
10654 // the IR to comply with the ABI and therefore changes the type of the lclVar
10655 // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
10656 // this is still a struct by setting the lvIsTemp flag.
10657 // The same is true for ARM64 and structs > 16 bytes.
10658 // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
10659 // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
10660 // to accomodate for this situation.
10661 if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
10663 assert(varDsc->lvIsParam);
10664 varLoc.vlType = Compiler::VLT_STK_BYREF;
10667 #endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
10669 varLoc.vlType = Compiler::VLT_STK;
10671 varLoc.vlStk.vlsBaseReg = baseReg;
10672 varLoc.vlStk.vlsOffset = offset;
10673 if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
10675 varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10679 #ifndef _TARGET_64BIT_
10682 varLoc.vlType = Compiler::VLT_STK2;
10683 varLoc.vlStk2.vls2BaseReg = baseReg;
10684 varLoc.vlStk2.vls2Offset = offset;
10685 if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
10687 varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10690 #endif // !_TARGET_64BIT_
10693 noway_assert(!"Invalid type");
10697 genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
10698 scopeL->scAvailable, varLoc);
10701 compiler->eeSetLVdone();
10704 //------------------------------------------------------------------------
10705 // genSetScopeInfo: Record scope information for debug info
10709 // startOffs - the starting offset for this scope
10710 // length - the length of this scope
10711 // varNum - the lclVar for this scope info
10717 // Called for every scope info piece to record by the main genSetScopeInfo()
10719 void CodeGen::genSetScopeInfo(unsigned which,
10720 UNATIVE_OFFSET startOffs,
10721 UNATIVE_OFFSET length,
10725 Compiler::siVarLoc& varLoc)
10727 // We need to do some mapping while reporting back these variables.
10729 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
10730 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
10732 #ifdef _TARGET_X86_
10733 // Non-x86 platforms are allowed to access all arguments directly
10734 // so we don't need this code.
10736 // Is this a varargs function?
10738 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
10739 varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
10741 noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
10743 // All stack arguments (except the varargs handle) have to be
10744 // accessed via the varargs cookie. Discard generated info,
10745 // and just find its position relative to the varargs handle
10747 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
10748 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
10750 noway_assert(!compiler->opts.compDbgCode);
10754 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
10755 // arguments of vararg functions to avoid reporting them to GC.
10756 noway_assert(!compiler->lvaTable[varNum].lvRegister);
10757 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
10758 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
10760 noway_assert(cookieOffset < varOffset);
10761 unsigned offset = varOffset - cookieOffset;
10762 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
10763 noway_assert(offset < stkArgSize);
10764 offset = stkArgSize - offset;
10766 varLoc.vlType = Compiler::VLT_FIXED_VA;
10767 varLoc.vlFixedVarArg.vlfvOffset = offset;
10770 #endif // _TARGET_X86_
10772 VarName name = nullptr;
10776 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
10778 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
10780 name = compiler->info.compVarScopes[scopeNum].vsdName;
10784 // Hang on to this compiler->info.
10786 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
10788 tlvi.tlviVarNum = ilVarNum;
10789 tlvi.tlviLVnum = LVnum;
10790 tlvi.tlviName = name;
10791 tlvi.tlviStartPC = startOffs;
10792 tlvi.tlviLength = length;
10793 tlvi.tlviAvailable = avail;
10794 tlvi.tlviVarLoc = varLoc;
10798 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
10801 /*****************************************************************************/
10804 /*****************************************************************************
10807 * Can be called only after lviSetLocalVarInfo() has been called
10811 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
10813 if (!compiler->opts.compScopeInfo)
10816 if (compiler->info.compVarScopesCount == 0)
10819 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10821 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10823 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
10824 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10825 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10827 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10834 /*****************************************************************************
10837 * Can be called only after lviSetLocalVarInfo() has been called
10841 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
10843 if (!compiler->opts.compScopeInfo)
10846 if (compiler->info.compVarScopesCount == 0)
10849 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10851 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10853 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
10854 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10855 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10857 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10864 /*****************************************************************************/
10865 #endif // defined(DEBUG)
10866 #endif // LATE_DISASM
10870 /*****************************************************************************
10871 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
10874 void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
10876 if (mappingNum != unsigned(-1))
10878 printf("%d: ", mappingNum);
10881 IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
10883 if (offsx == BAD_IL_OFFSET)
10889 Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
10891 if (jitIsStackEmpty(offsx))
10893 printf(" STACK_EMPTY");
10896 if (jitIsCallInstruction(offsx))
10898 printf(" CALL_INSTRUCTION");
10903 ipMapping->ipmdNativeLoc.Print();
10904 // We can only call this after code generation. Is there any way to tell when it's legal to call?
10905 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
10907 if (ipMapping->ipmdIsLabel)
10915 void CodeGen::genIPmappingListDisp()
10917 unsigned mappingNum = 0;
10918 Compiler::IPmappingDsc* ipMapping;
10920 for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
10922 genIPmappingDisp(mappingNum, ipMapping);
10929 /*****************************************************************************
10931 * Append an IPmappingDsc struct to the list that we're maintaining
10932 * for the debugger.
10933 * Record the instr offset as being at the current code gen position.
10936 void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
10938 if (!compiler->opts.compDbgInfo)
10943 assert(offsx != BAD_IL_OFFSET);
10945 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10947 case ICorDebugInfo::PROLOG:
10948 case ICorDebugInfo::EPILOG:
10953 if (offsx != ICorDebugInfo::NO_MAPPING)
10955 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
10958 // Ignore this one if it's the same IL offset as the last one we saw.
10959 // Note that we'll let through two identical IL offsets if the flag bits
10960 // differ, or two identical "special" mappings (e.g., PROLOG).
10961 if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
10963 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
10969 /* Create a mapping entry and append it to the list */
10971 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
10972 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
10973 addMapping->ipmdILoffsx = offsx;
10974 addMapping->ipmdIsLabel = isLabel;
10975 addMapping->ipmdNext = nullptr;
10977 if (compiler->genIPmappingList != nullptr)
10979 assert(compiler->genIPmappingLast != nullptr);
10980 assert(compiler->genIPmappingLast->ipmdNext == nullptr);
10981 compiler->genIPmappingLast->ipmdNext = addMapping;
10985 assert(compiler->genIPmappingLast == nullptr);
10986 compiler->genIPmappingList = addMapping;
10989 compiler->genIPmappingLast = addMapping;
10994 printf("Added IP mapping: ");
10995 genIPmappingDisp(unsigned(-1), addMapping);
11000 /*****************************************************************************
11002 * Prepend an IPmappingDsc struct to the list that we're maintaining
11003 * for the debugger.
11004 * Record the instr offset as being at the current code gen position.
11006 void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
11008 if (!compiler->opts.compDbgInfo)
11013 assert(offsx != BAD_IL_OFFSET);
11014 assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
11016 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11018 case ICorDebugInfo::NO_MAPPING:
11019 case ICorDebugInfo::PROLOG:
11020 case ICorDebugInfo::EPILOG:
11024 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
11028 /* Create a mapping entry and prepend it to the list */
11030 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
11031 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
11032 addMapping->ipmdILoffsx = offsx;
11033 addMapping->ipmdIsLabel = true;
11034 addMapping->ipmdNext = nullptr;
11036 addMapping->ipmdNext = compiler->genIPmappingList;
11037 compiler->genIPmappingList = addMapping;
11039 if (compiler->genIPmappingLast == nullptr)
11041 compiler->genIPmappingLast = addMapping;
11047 printf("Added IP mapping to front: ");
11048 genIPmappingDisp(unsigned(-1), addMapping);
11053 /*****************************************************************************/
11055 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
11056 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
11057 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
11059 C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
11060 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
11061 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
11062 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
11064 //------------------------------------------------------------------------
11065 // jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
11066 // Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
11067 // is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
11070 // offsx - the IL_OFFSETX value with the IL offset to extract.
11075 IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
11077 assert(offsx != BAD_IL_OFFSET);
11079 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11081 case ICorDebugInfo::NO_MAPPING:
11082 case ICorDebugInfo::PROLOG:
11083 case ICorDebugInfo::EPILOG:
11087 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11091 //------------------------------------------------------------------------
11092 // jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
11093 // distinguished values. Asserts if passed BAD_IL_OFFSET.
11096 // offsx - the IL_OFFSETX value with the IL offset to extract.
11101 IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
11103 assert(offsx != BAD_IL_OFFSET);
11105 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11107 case ICorDebugInfo::NO_MAPPING:
11108 case ICorDebugInfo::PROLOG:
11109 case ICorDebugInfo::EPILOG:
11110 return IL_OFFSET(offsx);
11113 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11117 //------------------------------------------------------------------------
11118 // jitIsStackEmpty: Does the IL offset have the stack empty bit set?
11119 // Asserts if passed BAD_IL_OFFSET.
11122 // offsx - the IL_OFFSETX value to check
11125 // 'true' if the stack empty bit is set; 'false' otherwise.
11127 bool jitIsStackEmpty(IL_OFFSETX offsx)
11129 assert(offsx != BAD_IL_OFFSET);
11131 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11133 case ICorDebugInfo::NO_MAPPING:
11134 case ICorDebugInfo::PROLOG:
11135 case ICorDebugInfo::EPILOG:
11139 return (offsx & IL_OFFSETX_STKBIT) == 0;
11143 //------------------------------------------------------------------------
11144 // jitIsCallInstruction: Does the IL offset have the call instruction bit set?
11145 // Asserts if passed BAD_IL_OFFSET.
11148 // offsx - the IL_OFFSETX value to check
11151 // 'true' if the call instruction bit is set; 'false' otherwise.
11153 bool jitIsCallInstruction(IL_OFFSETX offsx)
11155 assert(offsx != BAD_IL_OFFSET);
11157 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11159 case ICorDebugInfo::NO_MAPPING:
11160 case ICorDebugInfo::PROLOG:
11161 case ICorDebugInfo::EPILOG:
11165 return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
11169 /*****************************************************************************/
11171 void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
11173 if (!compiler->opts.compDbgCode)
11178 if (offsx == BAD_IL_OFFSET)
11183 /* If other IL were offsets reported, skip */
11185 if (compiler->genIPmappingLast == nullptr)
11190 if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
11195 /* offsx was the last reported offset. Make sure that we generated native code */
11197 if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
11203 /*****************************************************************************
11205 * Shut down the IP-mapping logic, report the info to the EE.
11208 void CodeGen::genIPmappingGen()
11210 if (!compiler->opts.compDbgInfo)
11218 printf("*************** In genIPmappingGen()\n");
11222 if (compiler->genIPmappingList == nullptr)
11224 compiler->eeSetLIcount(0);
11225 compiler->eeSetLIdone();
11229 Compiler::IPmappingDsc* tmpMapping;
11230 Compiler::IPmappingDsc* prevMapping;
11231 unsigned mappingCnt;
11232 UNATIVE_OFFSET lastNativeOfs;
11234 /* First count the number of distinct mapping records */
11237 lastNativeOfs = UNATIVE_OFFSET(~0);
11239 for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
11240 tmpMapping = tmpMapping->ipmdNext)
11242 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11244 // Managed RetVal - since new sequence points are emitted to identify IL calls,
11245 // make sure that those are not filtered and do not interfere with filtering of
11246 // other sequence points.
11247 if (jitIsCallInstruction(srcIP))
11253 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11255 if (nextNativeOfs != lastNativeOfs)
11258 lastNativeOfs = nextNativeOfs;
11259 prevMapping = tmpMapping;
11263 /* If there are mappings with the same native offset, then:
11264 o If one of them is NO_MAPPING, ignore it
11265 o If one of them is a label, report that and ignore the other one
11266 o Else report the higher IL offset
11269 PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
11270 if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11272 // If the previous entry was NO_MAPPING, ignore it
11273 prevMapping->ipmdNativeLoc.Init();
11274 prevMapping = tmpMapping;
11276 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11278 // If the current entry is NO_MAPPING, ignore it
11279 // Leave prevMapping unchanged as tmpMapping is no longer valid
11280 tmpMapping->ipmdNativeLoc.Init();
11282 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11284 // counting for special cases: see below
11286 prevMapping = tmpMapping;
11290 noway_assert(prevMapping != nullptr);
11291 noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
11292 lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
11294 /* The previous block had the same native offset. We have to
11295 discard one of the mappings. Simply reinitialize ipmdNativeLoc
11296 and prevMapping will be ignored later. */
11298 if (prevMapping->ipmdIsLabel)
11300 // Leave prevMapping unchanged as tmpMapping is no longer valid
11301 tmpMapping->ipmdNativeLoc.Init();
11305 prevMapping->ipmdNativeLoc.Init();
11306 prevMapping = tmpMapping;
11311 /* Tell them how many mapping records we've got */
11313 compiler->eeSetLIcount(mappingCnt);
11315 /* Now tell them about the mappings */
11318 lastNativeOfs = UNATIVE_OFFSET(~0);
11320 for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
11322 // Do we have to skip this record ?
11323 if (!tmpMapping->ipmdNativeLoc.Valid())
11328 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11329 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11331 if (jitIsCallInstruction(srcIP))
11333 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
11335 else if (nextNativeOfs != lastNativeOfs)
11337 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11338 lastNativeOfs = nextNativeOfs;
11340 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11342 // For the special case of an IL instruction with no body
11343 // followed by the epilog (say ret void immediately preceding
11344 // the method end), we put two entries in, so that we'll stop
11345 // at the (empty) ret statement if the user tries to put a
11346 // breakpoint there, and then have the option of seeing the
11347 // epilog or not based on SetUnmappedStopMask for the stepper.
11348 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11354 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
11355 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
11356 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
11357 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
11359 if (compiler->opts.compDbgCode)
11361 //Assert that the first instruction of every basic block with more than one incoming edge has a
11362 //different sequence point from each incoming block.
11364 //It turns out that the only thing we really have to assert is that the first statement in each basic
11365 //block has an IL offset and appears in eeBoundaries.
11366 for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
11368 if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
11370 noway_assert(block->bbTreeList->gtOper == GT_STMT);
11371 bool found = false;
11372 if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
11374 IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
11375 for (unsigned i = 0; i < eeBoundariesCount; ++i)
11377 if (eeBoundaries[i].ilOffset == ilOffs)
11384 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
11390 compiler->eeSetLIdone();
11393 /*============================================================================
11395 * These are empty stubs to help the late dis-assembler to compile
11396 * if the late disassembler is being built into a non-DEBUG build.
11398 *============================================================================
11401 #if defined(LATE_DISASM)
11402 #if !defined(DEBUG)
11405 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
11411 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
11416 /*****************************************************************************/
11417 #endif // !defined(DEBUG)
11418 #endif // defined(LATE_DISASM)
11419 /*****************************************************************************/
11421 //------------------------------------------------------------------------
11422 // indirForm: Make a temporary indir we can feed to pattern matching routines
11423 // in cases where we don't want to instantiate all the indirs that happen.
11425 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
11427 GenTreeIndir i(GT_IND, type, base, nullptr);
11428 i.gtRegNum = REG_NA;
11433 //------------------------------------------------------------------------
11434 // intForm: Make a temporary int we can feed to pattern matching routines
11435 // in cases where we don't want to instantiate.
11437 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
11439 GenTreeIntCon i(type, value);
11440 i.gtRegNum = REG_NA;
11444 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11445 //------------------------------------------------------------------------
11446 // genLongReturn: Generates code for long return statement for x86 and arm.
11448 // Note: treeNode's and op1's registers are already consumed.
11451 // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
11456 void CodeGen::genLongReturn(GenTree* treeNode)
11458 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11459 assert(treeNode->TypeGet() == TYP_LONG);
11460 GenTree* op1 = treeNode->gtGetOp1();
11461 var_types targetType = treeNode->TypeGet();
11463 assert(op1 != nullptr);
11464 assert(op1->OperGet() == GT_LONG);
11465 GenTree* loRetVal = op1->gtGetOp1();
11466 GenTree* hiRetVal = op1->gtGetOp2();
11467 assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
11469 genConsumeReg(loRetVal);
11470 genConsumeReg(hiRetVal);
11471 if (loRetVal->gtRegNum != REG_LNGRET_LO)
11473 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
11475 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
11477 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
11480 #endif // _TARGET_X86_ || _TARGET_ARM_
11482 //------------------------------------------------------------------------
11483 // genReturn: Generates code for return statement.
11484 // In case of struct return, delegates to the genStructReturn method.
11487 // treeNode - The GT_RETURN or GT_RETFILT tree node.
11492 void CodeGen::genReturn(GenTree* treeNode)
11494 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11495 GenTree* op1 = treeNode->gtGetOp1();
11496 var_types targetType = treeNode->TypeGet();
11498 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
11499 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
11500 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
11501 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
11504 if (targetType == TYP_VOID)
11506 assert(op1 == nullptr);
11510 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11511 if (targetType == TYP_LONG)
11513 genLongReturn(treeNode);
11516 #endif // _TARGET_X86_ || _TARGET_ARM_
11518 if (isStructReturn(treeNode))
11520 genStructReturn(treeNode);
11522 else if (targetType != TYP_VOID)
11524 assert(op1 != nullptr);
11525 noway_assert(op1->gtRegNum != REG_NA);
11527 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
11528 // consumed a reg for the operand. This is because the variable
11529 // is dead after return. But we are issuing more instructions
11530 // like "profiler leave callback" after this consumption. So
11531 // if you are issuing more instructions after this point,
11532 // remember to keep the variable live up until the new method
11533 // exit point where it is actually dead.
11534 genConsumeReg(op1);
11536 #if defined(_TARGET_ARM64_)
11537 genSimpleReturn(treeNode);
11538 #else // !_TARGET_ARM64_
11539 #if defined(_TARGET_X86_)
11540 if (varTypeIsFloating(treeNode))
11542 genFloatReturn(treeNode);
11545 #elif defined(_TARGET_ARM_)
11546 if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
11548 if (targetType == TYP_FLOAT)
11550 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
11554 assert(targetType == TYP_DOUBLE);
11555 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
11560 #endif // _TARGET_ARM_
11562 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
11563 if (op1->gtRegNum != retReg)
11565 inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
11568 #endif // !_TARGET_ARM64_
11572 #ifdef PROFILING_SUPPORTED
11574 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
11575 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
11576 // in the handling of the GT_RETURN statement.
11577 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
11578 // for the return registers containing GC refs.
11580 // There will be a single return block while generating profiler ELT callbacks.
11582 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
11583 // In flowgraph and other places assert that the last node of a block marked as
11584 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
11585 // maintain such an invariant irrespective of whether profiler hook needed or not.
11586 // Also, there is not much to be gained by materializing it as an explicit node.
11587 if (compiler->compCurBB == compiler->genReturnBB)
11590 // Since we are invalidating the assumption that we would slip into the epilog
11591 // right after the "return", we need to preserve the return reg's GC state
11592 // across the call until actual method return.
11593 ReturnTypeDesc retTypeDesc;
11594 unsigned regCount = 0;
11595 if (compiler->compMethodReturnsMultiRegRetType())
11597 if (varTypeIsLong(compiler->info.compRetNativeType))
11599 retTypeDesc.InitializeLongReturnType(compiler);
11601 else // we must have a struct return type
11603 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
11605 regCount = retTypeDesc.GetReturnRegCount();
11608 if (varTypeIsGC(compiler->info.compRetType))
11610 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
11612 else if (compiler->compMethodReturnsMultiRegRetType())
11614 for (unsigned i = 0; i < regCount; ++i)
11616 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11618 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
11623 genProfilingLeaveCallback();
11625 if (varTypeIsGC(compiler->info.compRetType))
11627 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
11629 else if (compiler->compMethodReturnsMultiRegRetType())
11631 for (unsigned i = 0; i < regCount; ++i)
11633 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11635 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
11640 #endif // PROFILING_SUPPORTED
11642 #if defined(DEBUG) && defined(_TARGET_XARCH_)
11643 bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
11645 #if FEATURE_EH_FUNCLETS
11646 // Don't do stack pointer check at the return from a funclet; only for the main function.
11647 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
11649 doStackPointerCheck = false;
11651 #else // !FEATURE_EH_FUNCLETS
11652 // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
11653 // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
11654 if ((compiler->compCurBB->bbJumpKind == BBJ_EHFINALLYRET) || (compiler->compCurBB->bbJumpKind == BBJ_EHFILTERRET))
11656 doStackPointerCheck = false;
11658 #endif // !FEATURE_EH_FUNCLETS
11660 genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
11661 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11664 #if defined(DEBUG) && defined(_TARGET_XARCH_)
11666 //------------------------------------------------------------------------
11667 // genStackPointerCheck: Generate code to check the stack pointer against a saved value.
11668 // This is a debug check.
11671 // doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
11672 // lvaStackPointerVar - The local variable number that holds the value of the stack pointer
11673 // we are comparing against.
11678 void CodeGen::genStackPointerCheck(bool doStackPointerCheck, unsigned lvaStackPointerVar)
11680 if (doStackPointerCheck)
11682 noway_assert(lvaStackPointerVar != 0xCCCCCCCC && compiler->lvaTable[lvaStackPointerVar].lvDoNotEnregister &&
11683 compiler->lvaTable[lvaStackPointerVar].lvOnFrame);
11684 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0);
11686 BasicBlock* sp_check = genCreateTempLabel();
11687 getEmitter()->emitIns_J(INS_je, sp_check);
11688 instGen(INS_BREAKPOINT);
11689 genDefineTempLabel(sp_check);
11693 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)