1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Code Generator Common: XX
9 XX Methods common to all architectures and register allocation strategies XX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
16 // identical, and which should probably be moved here.
27 #ifndef JIT32_GCENCODER
28 #include "gcinfoencoder.h"
31 /*****************************************************************************/
33 const BYTE genTypeSizes[] = {
34 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
39 const BYTE genTypeAlignments[] = {
40 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
45 const BYTE genTypeStSzs[] = {
46 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
51 const BYTE genActualTypes[] = {
52 #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
57 void CodeGenInterface::setFramePointerRequiredEH(bool value)
59 m_cgFramePointerRequired = value;
61 #ifndef JIT32_GCENCODER
64 // EnumGcRefs will only enumerate slots in aborted frames
65 // if they are fully-interruptible. So if we have a catch
66 // or finally that will keep frame-vars alive, we need to
67 // force fully-interruptible.
68 CLANG_FORMAT_COMMENT_ANCHOR;
73 printf("Method has EH, marking method as fully interruptible\n");
77 m_cgInterruptible = true;
79 #endif // JIT32_GCENCODER
82 /*****************************************************************************/
83 CodeGenInterface* getCodeGenerator(Compiler* comp)
85 return new (comp, CMK_Codegen) CodeGen(comp);
88 // CodeGen constructor
89 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
94 /*****************************************************************************/
96 CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
98 #if defined(_TARGET_XARCH_)
99 negBitmaskFlt = nullptr;
100 negBitmaskDbl = nullptr;
101 absBitmaskFlt = nullptr;
102 absBitmaskDbl = nullptr;
103 u8ToDblBitmask = nullptr;
104 #endif // defined(_TARGET_XARCH_)
106 #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107 m_stkArgVarNum = BAD_VAR_NUM;
110 #if defined(UNIX_X86_ABI)
111 curNestedAlignment = 0;
112 maxNestedAlignment = 0;
115 gcInfo.regSet = ®Set;
116 m_cgEmitter = new (compiler->getAllocator()) emitter();
117 m_cgEmitter->codeGen = this;
118 m_cgEmitter->gcInfo = &gcInfo;
121 setVerbose(compiler->verbose);
129 getDisAssembler().disInit(compiler);
133 genTempLiveChg = true;
134 genTrnslLocalVarCount = 0;
136 // Shouldn't be used before it is set in genFnProlog()
137 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
139 #if defined(_TARGET_XARCH_)
140 // Shouldn't be used before it is set in genFnProlog()
141 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
142 #endif // defined(_TARGET_XARCH_)
145 #ifdef _TARGET_AMD64_
146 // This will be set before final frame layout.
147 compiler->compVSQuirkStackPaddingNeeded = 0;
149 // Set to true if we perform the Quirk that fixes the PPP issue
150 compiler->compQuirkForPPPflag = false;
151 #endif // _TARGET_AMD64_
153 // Initialize the IP-mapping logic.
154 compiler->genIPmappingList = nullptr;
155 compiler->genIPmappingLast = nullptr;
156 compiler->genCallSite2ILOffsetMap = nullptr;
158 /* Assume that we not fully interruptible */
160 genInterruptible = false;
161 #ifdef _TARGET_ARMARCH_
162 hasTailCalls = false;
163 #endif // _TARGET_ARMARCH_
165 genInterruptibleUsed = false;
166 genCurDispOffset = (unsigned)-1;
169 #ifdef _TARGET_ARM64_
170 genSaveFpLrWithAllCalleeSavedRegisters = false;
171 #endif // _TARGET_ARM64_
174 void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
176 tree->gtRegNum = reg;
179 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
181 //---------------------------------------------------------------------
182 // genTotalFrameSize - return the "total" size of the stack frame, including local size
183 // and callee-saved register size. There are a few things "missing" depending on the
184 // platform. The function genCallerSPtoInitialSPdelta() includes those things.
186 // For ARM, this doesn't include the prespilled registers.
188 // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
189 // It also doesn't include the pushed return address.
194 int CodeGenInterface::genTotalFrameSize() const
196 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
198 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
200 assert(totalFrameSize >= 0);
201 return totalFrameSize;
204 //---------------------------------------------------------------------
205 // genSPtoFPdelta - return the offset from SP to the frame pointer.
206 // This number is going to be positive, since SP must be at the lowest
209 // There must be a frame pointer to call this function!
211 int CodeGenInterface::genSPtoFPdelta() const
213 assert(isFramePointerUsed());
217 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
223 //---------------------------------------------------------------------
224 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
225 // This number is going to be negative, since the Caller-SP is at a higher
226 // address than the frame pointer.
228 // There must be a frame pointer to call this function!
230 int CodeGenInterface::genCallerSPtoFPdelta() const
232 assert(isFramePointerUsed());
233 int callerSPtoFPdelta = 0;
235 #if defined(_TARGET_ARM_)
236 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
237 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
238 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
239 #elif defined(_TARGET_X86_)
240 // Thanks to ebp chaining, the difference between ebp-based addresses
241 // and caller-SP-relative addresses is just the 2 pointers:
244 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
246 #error "Unknown _TARGET_"
249 assert(callerSPtoFPdelta <= 0);
250 return callerSPtoFPdelta;
253 //---------------------------------------------------------------------
254 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
256 // This number will be negative.
258 int CodeGenInterface::genCallerSPtoInitialSPdelta() const
260 int callerSPtoSPdelta = 0;
262 #if defined(_TARGET_ARM_)
263 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
264 callerSPtoSPdelta -= genTotalFrameSize();
265 #elif defined(_TARGET_X86_)
266 callerSPtoSPdelta -= genTotalFrameSize();
267 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
269 // compCalleeRegsPushed does not account for the frame pointer
270 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
271 if (isFramePointerUsed())
273 callerSPtoSPdelta -= REGSIZE_BYTES;
276 #error "Unknown _TARGET_"
279 assert(callerSPtoSPdelta <= 0);
280 return callerSPtoSPdelta;
283 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
285 /*****************************************************************************
286 * Should we round simple operations (assignments, arithmetic operations, etc.)
291 bool CodeGen::genShouldRoundFP()
293 RoundLevel roundLevel = getRoundFloatLevel();
298 case ROUND_CMP_CONST:
303 assert(roundLevel == ROUND_ALWAYS);
308 /*****************************************************************************
310 * Initialize some global variables.
313 void CodeGen::genPrepForCompiler()
315 treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
317 /* Figure out which non-register variables hold pointers */
319 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
321 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
322 // in a register (i.e. they live on the stack for all or part of their lifetime).
323 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
327 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
329 if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
331 if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
333 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
337 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
338 genLastLiveMask = RBM_NONE;
340 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
344 /*****************************************************************************
345 * To report exception handling information to the VM, we need the size of the exception
346 * handling regions. To compute that, we need to emit labels for the beginning block of
347 * an EH region, and the block that immediately follows a region. Go through the EH
348 * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
350 * The beginning blocks of the EH regions already should have this flag set.
352 * No blocks should be added or removed after this.
354 * This code is closely couple with genReportEH() in the sense that any block
355 * that this procedure has determined it needs to have a label has to be selected
356 * using the same logic both here and in genReportEH(), so basically any time there is
357 * a change in the way we handle EH reporting, we have to keep the logic of these two
361 void CodeGen::genPrepForEHCodegen()
363 assert(!compiler->fgSafeBasicBlockCreation);
368 bool anyFinallys = false;
370 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
371 HBtab < HBtabEnd; HBtab++)
373 assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
374 assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
376 if (HBtab->ebdTryLast->bbNext != nullptr)
378 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
381 if (HBtab->ebdHndLast->bbNext != nullptr)
383 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
386 if (HBtab->HasFilter())
388 assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
389 // The block after the last block of the filter is
390 // the handler begin block, which we already asserted
391 // has BBF_HAS_LABEL set.
394 #if FEATURE_EH_CALLFINALLY_THUNKS
395 if (HBtab->HasFinallyHandler())
399 #endif // FEATURE_EH_CALLFINALLY_THUNKS
402 #if FEATURE_EH_CALLFINALLY_THUNKS
405 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
407 if (block->bbJumpKind == BBJ_CALLFINALLY)
409 BasicBlock* bbToLabel = block->bbNext;
410 if (block->isBBCallAlwaysPair())
412 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
414 if (bbToLabel != nullptr)
416 bbToLabel->bbFlags |= BBF_HAS_LABEL;
418 } // block is BBJ_CALLFINALLY
420 } // if (anyFinallys)
421 #endif // FEATURE_EH_CALLFINALLY_THUNKS
424 void CodeGenInterface::genUpdateLife(GenTree* tree)
426 treeLifeUpdater->UpdateLife(tree);
429 void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
431 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
434 // Return the register mask for the given register variable
436 regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
438 regMaskTP regMask = RBM_NONE;
440 assert(varDsc->lvIsInReg());
442 if (varTypeIsFloating(varDsc->TypeGet()))
444 regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
448 regMask = genRegMask(varDsc->lvRegNum);
453 // Return the register mask for the given lclVar or regVar tree node
455 regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
457 assert(tree->gtOper == GT_LCL_VAR);
459 regMaskTP regMask = RBM_NONE;
460 const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
461 if (varDsc->lvPromoted)
463 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
465 noway_assert(compiler->lvaTable[i].lvIsStructField);
466 if (compiler->lvaTable[i].lvIsInReg())
468 regMask |= genGetRegMask(&compiler->lvaTable[i]);
472 else if (varDsc->lvIsInReg())
474 regMask = genGetRegMask(varDsc);
479 // The given lclVar is either going live (being born) or dying.
480 // It might be both going live and dying (that is, it is a dead store) under MinOpts.
481 // Update regSet.rsMaskVars accordingly.
483 void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
485 regMaskTP regMask = genGetRegMask(varDsc);
488 if (compiler->verbose)
490 printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
491 varDsc->PrintVarReg();
492 printf(" is becoming %s ", (isDying) ? "dead" : "live");
493 Compiler::printTreeID(tree);
500 // We'd like to be able to assert the following, however if we are walking
501 // through a qmark/colon tree, we may encounter multiple last-use nodes.
502 // assert((regSet.rsMaskVars & regMask) == regMask);
503 regSet.RemoveMaskVars(regMask);
507 assert((regSet.rsMaskVars & regMask) == 0);
508 regSet.AddMaskVars(regMask);
512 //----------------------------------------------------------------------
513 // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
514 // Not all JIT Helper calls follow the standard ABI on the target architecture.
516 // TODO-CQ: Currently this list is incomplete (not all helpers calls are
517 // enumerated) and not 100% accurate (some killsets are bigger than
518 // what they really are).
519 // There's some work to be done in several places in the JIT to
520 // accurately track the registers that are getting killed by
522 // a) LSRA needs several changes to accomodate more precise killsets
523 // for every helper call it sees (both explicitly [easy] and
524 // implicitly [hard])
525 // b) Currently for AMD64, when we generate code for a helper call
526 // we're independently over-pessimizing the killsets of the call
527 // (independently from LSRA) and this needs changes
528 // both in CodeGenAmd64.cpp and emitx86.cpp.
530 // The best solution for this problem would be to try to centralize
531 // the killset information in a single place but then make the
532 // corresponding changes so every code generation phase is in sync
535 // The interim solution is to only add known helper calls that don't
536 // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
539 // helper - The helper being inquired about
542 // Mask of register kills -- registers whose values are no longer guaranteed to be the same.
544 regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
548 case CORINFO_HELP_ASSIGN_BYREF:
549 #if defined(_TARGET_AMD64_)
550 return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
551 #elif defined(_TARGET_ARMARCH_)
552 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
553 #elif defined(_TARGET_X86_)
554 return RBM_ESI | RBM_EDI | RBM_ECX;
556 NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
557 return RBM_CALLEE_TRASH;
560 #if defined(_TARGET_ARMARCH_)
561 case CORINFO_HELP_ASSIGN_REF:
562 case CORINFO_HELP_CHECKED_ASSIGN_REF:
563 return RBM_CALLEE_TRASH_WRITEBARRIER;
566 case CORINFO_HELP_PROF_FCN_ENTER:
567 #ifdef RBM_PROFILER_ENTER_TRASH
568 return RBM_PROFILER_ENTER_TRASH;
570 NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
573 case CORINFO_HELP_PROF_FCN_LEAVE:
574 #ifdef RBM_PROFILER_LEAVE_TRASH
575 return RBM_PROFILER_LEAVE_TRASH;
577 NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
580 case CORINFO_HELP_PROF_FCN_TAILCALL:
581 #ifdef RBM_PROFILER_TAILCALL_TRASH
582 return RBM_PROFILER_TAILCALL_TRASH;
584 NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
588 case CORINFO_HELP_ASSIGN_REF_EAX:
589 case CORINFO_HELP_ASSIGN_REF_ECX:
590 case CORINFO_HELP_ASSIGN_REF_EBX:
591 case CORINFO_HELP_ASSIGN_REF_EBP:
592 case CORINFO_HELP_ASSIGN_REF_ESI:
593 case CORINFO_HELP_ASSIGN_REF_EDI:
595 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
596 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
597 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
598 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
599 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
600 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
603 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
604 case CORINFO_HELP_ASSIGN_REF:
605 case CORINFO_HELP_CHECKED_ASSIGN_REF:
606 return RBM_EAX | RBM_EDX;
607 #endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
610 case CORINFO_HELP_STOP_FOR_GC:
611 return RBM_STOP_FOR_GC_TRASH;
613 case CORINFO_HELP_INIT_PINVOKE_FRAME:
614 return RBM_INIT_PINVOKE_FRAME_TRASH;
617 return RBM_CALLEE_TRASH;
621 template <bool ForCodeGen>
622 void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
629 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
630 dumpConvertedVarSet(this, compCurLife);
631 printf(" -> %s ", VarSetOps::ToString(this, newLife));
632 dumpConvertedVarSet(this, newLife);
637 /* We should only be called when the live set has actually changed */
639 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
643 VarSetOps::Assign(this, compCurLife, newLife);
647 /* Figure out which variables are becoming live/dead at this point */
649 // deadSet = compCurLife - newLife
650 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
652 // bornSet = newLife - compCurLife
653 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
655 /* Can't simultaneously become live and dead at the same time */
657 // (deadSet UNION bornSet) != EMPTY
658 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
659 // (deadSet INTERSECTION bornSet) == EMPTY
660 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
662 VarSetOps::Assign(this, compCurLife, newLife);
664 // Handle the dying vars first, then the newly live vars.
665 // This is because, in the RyuJIT backend case, they may occupy registers that
666 // will be occupied by another var that is newly live.
667 VarSetOps::Iter deadIter(this, deadSet);
668 unsigned deadVarIndex = 0;
669 while (deadIter.NextElem(&deadVarIndex))
671 unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
672 varDsc = lvaTable + varNum;
673 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
674 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
676 if (varDsc->lvIsInReg())
678 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
680 regMaskTP regMask = varDsc->lvRegMask();
683 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
687 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
689 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
691 // This isn't in a register, so update the gcVarPtrSetCur.
692 else if (isGCRef || isByRef)
694 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
695 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
699 VarSetOps::Iter bornIter(this, bornSet);
700 unsigned bornVarIndex = 0;
701 while (bornIter.NextElem(&bornVarIndex))
703 unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
704 varDsc = lvaTable + varNum;
705 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
706 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
708 if (varDsc->lvIsInReg())
711 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
713 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
716 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
717 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
718 regMaskTP regMask = varDsc->lvRegMask();
721 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
725 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
728 // This isn't in a register, so update the gcVarPtrSetCur
729 else if (lvaIsGCTracked(varDsc))
731 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
732 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
735 #ifdef USING_SCOPE_INFO
737 #endif // USING_SCOPE_INFO
740 // Need an explicit instantiation.
741 template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
743 /*****************************************************************************
747 void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
749 getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
752 /*****************************************************************************
756 void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
758 getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
762 regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
767 //----------------------------------------------------------------------
768 // getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
771 // tree - spilled GenTree node
774 // TempDsc corresponding to tree
775 TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
777 // tree must be in spilled state.
778 assert((tree->gtFlags & GTF_SPILLED) != 0);
780 // Get the tree's SpillDsc.
781 RegSet::SpillDsc* prevDsc;
782 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
783 assert(spillDsc != nullptr);
785 // Get the temp desc.
786 TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
790 #ifdef _TARGET_XARCH_
792 #ifdef _TARGET_AMD64_
793 // Returns relocation type hint for an addr.
794 // Note that there are no reloc hints on x86.
797 // addr - data address
800 // relocation type hint
802 unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
804 return compiler->eeGetRelocTypeHint((void*)addr);
806 #endif //_TARGET_AMD64_
808 // Return true if an absolute indirect data address can be encoded as IP-relative.
809 // offset. Note that this method should be used only when the caller knows that
810 // the address is an icon value that VM has given and there is no GenTree node
811 // representing it. Otherwise, one should always use FitsInAddrBase().
814 // addr - an absolute indirect data address
817 // true if indir data addr could be encoded as IP-relative offset.
819 bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
821 #ifdef _TARGET_AMD64_
822 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
824 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
829 // Return true if an indirect code address can be encoded as IP-relative offset.
830 // Note that this method should be used only when the caller knows that the
831 // address is an icon value that VM has given and there is no GenTree node
832 // representing it. Otherwise, one should always use FitsInAddrBase().
835 // addr - an absolute indirect code address
838 // true if indir code addr could be encoded as IP-relative offset.
840 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
842 #ifdef _TARGET_AMD64_
843 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
845 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
850 // Return true if an indirect code address can be encoded as 32-bit displacement
851 // relative to zero. Note that this method should be used only when the caller
852 // knows that the address is an icon value that VM has given and there is no
853 // GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
856 // addr - absolute indirect code address
859 // true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
861 bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
863 return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
866 // Return true if an absolute indirect code address needs a relocation recorded with VM.
869 // addr - an absolute indirect code address
872 // true if indir code addr needs a relocation recorded with VM
874 bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
876 // If generating relocatable ngen code, then all code addr should go through relocation
877 if (compiler->opts.compReloc)
882 #ifdef _TARGET_AMD64_
883 // See if the code indir addr can be encoded as 32-bit displacement relative to zero.
884 // We don't need a relocation in that case.
885 if (genCodeIndirAddrCanBeEncodedAsZeroRelOffset(addr))
890 // Else we need a relocation.
893 // On x86 there is no need to record or ask for relocations during jitting,
894 // because all addrs fit within 32-bits.
896 #endif //_TARGET_X86_
899 // Return true if a direct code address needs to be marked as relocatable.
902 // addr - absolute direct code address
905 // true if direct code addr needs a relocation recorded with VM
907 bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
909 // If generating relocatable ngen code, then all code addr should go through relocation
910 if (compiler->opts.compReloc)
915 #ifdef _TARGET_AMD64_
916 // By default all direct code addresses go through relocation so that VM will setup
917 // a jump stub if addr cannot be encoded as pc-relative offset.
920 // On x86 there is no need for recording relocations during jitting,
921 // because all addrs fit within 32-bits.
923 #endif //_TARGET_X86_
925 #endif //_TARGET_XARCH_
927 /*****************************************************************************
929 * The following can be used to create basic blocks that serve as labels for
930 * the emitter. Use with caution - these are not real basic blocks!
935 BasicBlock* CodeGen::genCreateTempLabel()
938 // These blocks don't affect FP
939 compiler->fgSafeBasicBlockCreation = true;
942 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
945 compiler->fgSafeBasicBlockCreation = false;
948 block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
950 // Use coldness of current block, as this label will
951 // be contained in it.
952 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
956 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
958 block->bbTgtStkDepth = genStackLevel / sizeof(int);
965 void CodeGen::genDefineTempLabel(BasicBlock* label)
968 if (compiler->opts.dspCode)
970 printf("\n L_M%03u_" FMT_BB ":\n", Compiler::s_compMethodsCount, label->bbNum);
974 label->bbEmitCookie =
975 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
978 /*****************************************************************************
980 * Adjust the stack pointer by the given value; assumes that this follows
981 * a call so only callee-saved registers (and registers that may hold a
982 * return value) are used at this point.
985 void CodeGen::genAdjustSP(target_ssize_t delta)
987 #if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
988 if (delta == sizeof(int))
989 inst_RV(INS_pop, REG_ECX, TYP_INT);
992 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
995 //------------------------------------------------------------------------
996 // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
999 // block - The BasicBlock for which we are about to generate code.
1002 // Must be called just prior to generating code for 'block'.
1005 // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1006 // and if 'block' is a throw helper block with a non-zero stack level.
1008 void CodeGen::genAdjustStackLevel(BasicBlock* block)
1010 #if !FEATURE_FIXED_OUT_ARGS
1011 // Check for inserted throw blocks and adjust genStackLevel.
1012 CLANG_FORMAT_COMMENT_ANCHOR;
1014 #if defined(UNIX_X86_ABI)
1015 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1017 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1018 // at this point if a jump to this block is made in the middle of pushing arugments.
1020 // Here we restore SP to prevent potential stack alignment issues.
1021 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1025 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1027 noway_assert(block->bbFlags & BBF_JMP_TARGET);
1029 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1031 if (genStackLevel != 0)
1034 getEmitter()->emitMarkStackLvl(genStackLevel);
1035 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1037 #else // _TARGET_X86_
1038 NYI("Need emitMarkStackLvl()");
1039 #endif // _TARGET_X86_
1042 #endif // !FEATURE_FIXED_OUT_ARGS
1045 #ifdef _TARGET_ARMARCH_
1047 // alignmentWB is out param
1048 unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1050 unsigned alignment = 0;
1051 unsigned opSize = 0;
1053 if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
1055 opSize = InferStructOpSizeAlign(op, &alignment);
1059 alignment = genTypeAlignments[op->TypeGet()];
1060 opSize = genTypeSizes[op->TypeGet()];
1063 assert(opSize != 0);
1064 assert(alignment != 0);
1066 (*alignmentWB) = alignment;
1070 // alignmentWB is out param
1071 unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1073 unsigned alignment = 0;
1074 unsigned opSize = 0;
1076 while (op->gtOper == GT_COMMA)
1078 op = op->gtOp.gtOp2;
1081 if (op->gtOper == GT_OBJ)
1083 CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1084 opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1085 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1087 else if (op->gtOper == GT_LCL_VAR)
1089 unsigned varNum = op->gtLclVarCommon.gtLclNum;
1090 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1091 assert(varDsc->lvType == TYP_STRUCT);
1092 opSize = varDsc->lvSize();
1093 #ifndef _TARGET_64BIT_
1094 if (varDsc->lvStructDoubleAlign)
1096 alignment = TARGET_POINTER_SIZE * 2;
1099 #endif // !_TARGET_64BIT_
1101 alignment = TARGET_POINTER_SIZE;
1104 else if (op->OperIsCopyBlkOp())
1106 GenTree* op2 = op->gtOp.gtOp2;
1108 if (op2->OperGet() == GT_CNS_INT)
1110 if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1112 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1113 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1115 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1119 opSize = (unsigned)op2->gtIntCon.gtIconVal;
1120 GenTree* op1 = op->gtOp.gtOp1;
1121 assert(op1->OperGet() == GT_LIST);
1122 GenTree* dstAddr = op1->gtOp.gtOp1;
1123 if (dstAddr->OperGet() == GT_ADDR)
1125 InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1129 assert(!"Unhandle dstAddr node");
1130 alignment = TARGET_POINTER_SIZE;
1136 noway_assert(!"Variable sized COPYBLK register arg!");
1138 alignment = TARGET_POINTER_SIZE;
1141 else if (op->gtOper == GT_MKREFANY)
1143 opSize = TARGET_POINTER_SIZE * 2;
1144 alignment = TARGET_POINTER_SIZE;
1146 else if (op->IsArgPlaceHolderNode())
1148 CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1149 assert(clsHnd != 0);
1150 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1151 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1155 assert(!"Unhandled gtOper");
1156 opSize = TARGET_POINTER_SIZE;
1157 alignment = TARGET_POINTER_SIZE;
1160 assert(opSize != 0);
1161 assert(alignment != 0);
1163 (*alignmentWB) = alignment;
1167 #endif // _TARGET_ARMARCH_
1169 /*****************************************************************************
1171 * Take an address expression and try to find the best set of components to
1172 * form an address mode; returns non-zero if this is successful.
1174 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1175 * Refactor this code so that the underlying analysis can be used in
1176 * the RyuJIT Backend to do lowering, instead of having to call this method with the
1177 * option to not generate the code.
1179 * 'fold' specifies if it is OK to fold the array index which hangs off
1182 * If successful, the parameters will be set to the following values:
1184 * *rv1Ptr ... base operand
1185 * *rv2Ptr ... optional operand
1186 * *revPtr ... true if rv2 is before rv1 in the evaluation order
1187 * #if SCALED_ADDR_MODES
1188 * *mulPtr ... optional multiplier (2/4/8) for rv2
1189 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1191 * *cnsPtr ... integer constant [optional]
1193 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1194 * identifies the components that might be used to
1195 * form an address mode later on.
1198 bool CodeGen::genCreateAddrMode(GenTree* addr,
1203 #if SCALED_ADDR_MODES
1205 #endif // SCALED_ADDR_MODES
1209 The following indirections are valid address modes on x86/x64:
1211 [ icon] * not handled here
1215 [reg1 + reg2 + icon]
1222 [reg1 + 2 * reg2 + icon]
1223 [reg1 + 4 * reg2 + icon]
1224 [reg1 + 8 * reg2 + icon]
1226 The following indirections are valid address modes on arm64:
1231 [reg1 + reg2 * natural-scale]
1235 /* All indirect address modes require the address to be an addition */
1237 if (addr->gtOper != GT_ADD)
1242 // Can't use indirect addressing mode as we need to check for overflow.
1243 // Also, can't use 'lea' as it doesn't set the flags.
1245 if (addr->gtOverflow())
1250 GenTree* rv1 = nullptr;
1251 GenTree* rv2 = nullptr;
1257 #if SCALED_ADDR_MODES
1259 #endif // SCALED_ADDR_MODES
1263 /* What order are the sub-operands to be evaluated */
1265 if (addr->gtFlags & GTF_REVERSE_OPS)
1267 op1 = addr->gtOp.gtOp2;
1268 op2 = addr->gtOp.gtOp1;
1272 op1 = addr->gtOp.gtOp1;
1273 op2 = addr->gtOp.gtOp2;
1276 bool rev = false; // Is op2 first in the evaluation order?
1279 A complex address mode can combine the following operands:
1281 op1 ... base address
1282 op2 ... optional scaled index
1283 #if SCALED_ADDR_MODES
1284 mul ... optional multiplier (2/4/8) for op2
1286 cns ... optional displacement
1288 Here we try to find such a set of operands and arrange for these
1289 to sit in registers.
1293 #if SCALED_ADDR_MODES
1295 #endif // SCALED_ADDR_MODES
1298 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1299 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1300 here if we find a scaled index.
1302 CLANG_FORMAT_COMMENT_ANCHOR;
1304 #if SCALED_ADDR_MODES
1306 #endif // SCALED_ADDR_MODES
1308 /* Special case: keep constants as 'op2' */
1310 if (op1->IsCnsIntOrI())
1312 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1318 /* Check for an addition of a constant */
1320 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
1322 /* We're adding a constant */
1324 cns += op2->gtIntConCommon.IconValue();
1326 #if defined(_TARGET_ARMARCH_)
1330 /* Inspect the operand the constant is being added to */
1332 switch (op1->gtOper)
1336 if (op1->gtOverflow())
1341 op2 = op1->gtOp.gtOp2;
1342 op1 = op1->gtOp.gtOp1;
1346 #if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1347 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1349 if (op1->gtOverflow())
1351 return false; // Need overflow check
1358 mul = op1->GetScaledIndex();
1361 /* We can use "[mul*rv2 + icon]" */
1364 rv2 = op1->gtOp.gtOp1;
1369 #endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1376 /* The best we can do is "[rv1 + icon]" */
1384 // op2 is not a constant. So keep on trying.
1386 /* Neither op1 nor op2 are sitting in a register right now */
1388 switch (op1->gtOper)
1390 #if !defined(_TARGET_ARMARCH_)
1391 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1394 if (op1->gtOverflow())
1399 if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
1401 cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
1402 op1 = op1->gtOp.gtOp1;
1409 #if SCALED_ADDR_MODES
1413 if (op1->gtOverflow())
1422 mul = op1->GetScaledIndex();
1425 /* 'op1' is a scaled value */
1428 rv2 = op1->gtOp.gtOp1;
1431 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1433 if (jitIsScaleIndexMul(argScale * mul))
1435 mul = mul * argScale;
1436 rv2 = rv2->gtOp.gtOp1;
1444 noway_assert(rev == false);
1451 #endif // SCALED_ADDR_MODES
1452 #endif // !_TARGET_ARMARCH
1456 op1 = op1->gtOp.gtOp1;
1461 op1 = op1->gtOp.gtOp2;
1469 switch (op2->gtOper)
1471 #if !defined(_TARGET_ARMARCH_)
1472 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1475 if (op2->gtOverflow())
1480 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
1482 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1483 op2 = op2->gtOp.gtOp1;
1490 #if SCALED_ADDR_MODES
1494 if (op2->gtOverflow())
1503 mul = op2->GetScaledIndex();
1506 // 'op2' is a scaled value...is it's argument also scaled?
1508 rv2 = op2->gtOp.gtOp1;
1509 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1511 if (jitIsScaleIndexMul(argScale * mul))
1513 mul = mul * argScale;
1514 rv2 = rv2->gtOp.gtOp1;
1528 #endif // SCALED_ADDR_MODES
1529 #endif // !_TARGET_ARMARCH
1533 op2 = op2->gtOp.gtOp1;
1538 op2 = op2->gtOp.gtOp2;
1545 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
1549 #ifdef _TARGET_ARM64_
1557 /* Make sure a GC address doesn't end up in 'rv2' */
1559 if (varTypeIsGC(rv2->TypeGet()))
1561 noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
1570 /* Special case: constant array index (that is range-checked) */
1577 if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
1579 /* For valuetype arrays where we can't use the scaled address
1580 mode, rv2 will point to the scaled index. So we have to do
1583 tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1591 /* May be a simple array. rv2 will points to the actual index */
1597 /* Get hold of the array index and see if it's a constant */
1598 if (index->IsIntCnsFitsInI32())
1600 /* Get hold of the index value */
1601 ssize_t ixv = index->AsIntConCommon()->IconValue();
1603 #if SCALED_ADDR_MODES
1604 /* Scale the index if necessary */
1611 if (FitsIn<INT32>(cns + ixv))
1613 /* Add the scaled index to the offset value */
1617 #if SCALED_ADDR_MODES
1618 /* There is no scaled operand any more */
1627 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
1628 noway_assert(rv1 || mul != 1);
1630 noway_assert(FitsIn<INT32>(cns));
1632 if (rv1 == nullptr && rv2 == nullptr)
1637 /* Success - return the various components to the caller */
1642 #if SCALED_ADDR_MODES
1650 #ifdef _TARGET_ARMARCH_
1651 //------------------------------------------------------------------------
1652 // genEmitGSCookieCheck: Generate code to check that the GS cookie
1653 // wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
1655 void CodeGen::genEmitGSCookieCheck(bool pushReg)
1657 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
1659 // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
1660 // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
1661 if (!pushReg && (compiler->info.compRetType == TYP_REF))
1662 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
1664 // We need two temporary registers, to load the GS cookie values and compare them. We can't use
1665 // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
1666 // callee-trash registers, which should not contain anything interesting at this point.
1667 // We don't have any IR node representing this check, so LSRA can't communicate registers
1670 regNumber regGSConst = REG_GSCOOKIE_TMP_0;
1671 regNumber regGSValue = REG_GSCOOKIE_TMP_1;
1673 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
1675 // load the GS cookie constant into a reg
1677 genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
1681 // Ngen case - GS cookie constant needs to be accessed through an indirection.
1682 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
1683 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
1685 // Load this method's GS value from the stack frame
1686 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
1687 // Compare with the GC cookie constant
1688 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
1690 BasicBlock* gsCheckBlk = genCreateTempLabel();
1691 inst_JMP(EJ_eq, gsCheckBlk);
1692 // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
1693 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
1694 genDefineTempLabel(gsCheckBlk);
1696 #endif // _TARGET_ARMARCH_
1698 /*****************************************************************************
1700 * Generate an exit sequence for a return from a method (note: when compiling
1701 * for speed there might be multiple exit points).
1704 void CodeGen::genExitCode(BasicBlock* block)
1706 /* Just wrote the first instruction of the epilog - inform debugger
1707 Note that this may result in a duplicate IPmapping entry, and
1710 // For non-optimized debuggable code, there is only one epilog.
1711 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
1713 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
1714 if (compiler->getNeedsGSSecurityCookie())
1716 genEmitGSCookieCheck(jmpEpilog);
1721 // The GS cookie check created a temp label that has no live
1722 // incoming GC registers, we need to fix that
1727 /* Figure out which register parameters hold pointers */
1729 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1732 noway_assert(varDsc->lvIsParam);
1734 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
1737 getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1738 getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1742 genReserveEpilog(block);
1745 //------------------------------------------------------------------------
1746 // genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1749 // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1750 // Otherwise, we generate the 'throw' inline.
1753 // jumpKind - jump kind to generate;
1754 // codeKind - the special throw-helper kind;
1755 // failBlk - optional fail target block, if it is already known;
1757 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk)
1759 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1760 #if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
1761 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1762 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1763 #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1767 // For code with throw helper blocks, find and use the helper block for
1768 // raising the exception. The block may be shared by other trees too.
1770 BasicBlock* excpRaisingBlock;
1772 if (failBlk != nullptr)
1774 // We already know which block to jump to. Use that.
1775 excpRaisingBlock = failBlk;
1778 Compiler::AddCodeDsc* add =
1779 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1780 assert(excpRaisingBlock == add->acdDstBlk);
1781 #if !FEATURE_FIXED_OUT_ARGS
1782 assert(add->acdStkLvlInit || isFramePointerUsed());
1783 #endif // !FEATURE_FIXED_OUT_ARGS
1788 // Find the helper-block which raises the exception.
1789 Compiler::AddCodeDsc* add =
1790 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1791 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
1792 excpRaisingBlock = add->acdDstBlk;
1793 #if !FEATURE_FIXED_OUT_ARGS
1794 assert(add->acdStkLvlInit || isFramePointerUsed());
1795 #endif // !FEATURE_FIXED_OUT_ARGS
1798 noway_assert(excpRaisingBlock != nullptr);
1800 // Jump to the exception-throwing block on error.
1801 inst_JMP(jumpKind, excpRaisingBlock);
1805 // The code to throw the exception will be generated inline, and
1806 // we will jump around it in the normal non-exception case.
1808 BasicBlock* tgtBlk = nullptr;
1809 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
1810 if (reverseJumpKind != jumpKind)
1812 tgtBlk = genCreateTempLabel();
1813 inst_JMP(reverseJumpKind, tgtBlk);
1816 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
1818 // Define the spot for the normal non-exception case to jump to.
1819 if (tgtBlk != nullptr)
1821 assert(reverseJumpKind != jumpKind);
1822 genDefineTempLabel(tgtBlk);
1827 /*****************************************************************************
1829 * The last operation done was generating code for "tree" and that would
1830 * have set the flags. Check if the operation caused an overflow.
1834 void CodeGen::genCheckOverflow(GenTree* tree)
1836 // Overflow-check should be asked for this tree
1837 noway_assert(tree->gtOverflow());
1839 const var_types type = tree->TypeGet();
1841 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
1842 noway_assert(!varTypeIsSmall(type));
1844 emitJumpKind jumpKind;
1846 #ifdef _TARGET_ARM64_
1847 if (tree->OperGet() == GT_MUL)
1854 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
1856 #if defined(_TARGET_XARCH_)
1858 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
1860 #elif defined(_TARGET_ARMARCH_)
1862 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
1864 if (jumpKind == EJ_lo)
1866 if (tree->OperGet() != GT_SUB)
1872 #endif // defined(_TARGET_ARMARCH_)
1875 // Jump to the block which will throw the expection
1877 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
1880 #if FEATURE_EH_FUNCLETS
1882 /*****************************************************************************
1884 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
1885 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
1890 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
1892 if (block->bbFlags & BBF_FUNCLET_BEG)
1894 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
1895 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
1897 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
1901 // We shouldn't see FUNC_ROOT
1902 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
1903 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
1908 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
1909 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
1911 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
1913 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
1915 assert(!block->hasHndIndex());
1919 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
1920 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
1925 #if defined(_TARGET_ARM_)
1926 void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
1928 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
1929 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
1930 // calls the funclet during non-exceptional control flow.
1931 if (block->bbFlags & BBF_FINALLY_TARGET)
1933 assert(block->bbFlags & BBF_JMP_TARGET);
1936 if (compiler->verbose)
1938 printf("\nEmitting finally target NOP predecessor for " FMT_BB "\n", block->bbNum);
1941 // Create a label that we'll use for computing the start of an EH region, if this block is
1942 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
1943 // determining the EH regions, then this NOP would end up outside of the region, if this
1944 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
1945 // would be executed, which we would prefer not to do.
1947 block->bbUnwindNopEmitCookie =
1948 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1955 #endif // FEATURE_EH_FUNCLETS
1957 /*****************************************************************************
1959 * Generate code for the function.
1962 void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
1967 printf("*************** In genGenerateCode()\n");
1968 compiler->fgDispBasicBlocks(compiler->verboseTrees);
1973 unsigned prologSize;
1974 unsigned epilogSize;
1979 genInterruptibleUsed = true;
1981 compiler->fgDebugCheckBBlist();
1984 /* This is the real thing */
1986 genPrepForCompiler();
1988 /* Prepare the emitter */
1989 getEmitter()->Init();
1991 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
1995 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
1997 compiler->opts.disAsm = true;
2000 if (compiler->opts.disAsm)
2002 printf("; Assembly listing for method %s\n", compiler->info.compFullName);
2004 printf("; Emitting ");
2006 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
2008 printf("SMALL_CODE");
2010 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
2012 printf("FAST_CODE");
2016 printf("BLENDED_CODE");
2021 if (compiler->info.genCPU == CPU_X86)
2023 printf("generic X86 CPU");
2025 else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
2027 printf("Pentium 4");
2029 else if (compiler->info.genCPU == CPU_X64)
2031 if (compiler->canUseVexEncoding())
2033 printf("X64 CPU with AVX");
2037 printf("X64 CPU with SSE2");
2040 else if (compiler->info.genCPU == CPU_ARM)
2042 printf("generic ARM CPU");
2044 else if (compiler->info.genCPU == CPU_ARM64)
2046 printf("generic ARM64 CPU");
2050 printf("unknown architecture");
2053 #if defined(_TARGET_WINDOWS_)
2054 printf(" - Windows");
2055 #elif defined(_TARGET_UNIX_)
2061 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2063 printf("; Tier-0 compilation\n");
2065 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
2067 printf("; Tier-1 compilation\n");
2070 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
2072 printf("; optimized code\n");
2074 else if (compiler->opts.compDbgCode)
2076 printf("; debuggable code\n");
2078 else if (compiler->opts.MinOpts())
2080 printf("; compiler->opts.MinOpts() is true\n");
2084 printf("; unknown optimization flags\n");
2088 if (compiler->genDoubleAlign())
2089 printf("; double-aligned frame\n");
2092 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
2094 if (genInterruptible)
2096 printf("; fully interruptible\n");
2100 printf("; partially interruptible\n");
2103 if (compiler->fgHaveProfileData())
2105 printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
2106 compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
2109 if (compiler->fgProfileData_ILSizeMismatch)
2111 printf("; discarded IBC profile data due to mismatch in ILSize\n");
2116 // We compute the final frame layout before code generation. This is because LSRA
2117 // has already computed exactly the maximum concurrent number of spill temps of each type that are
2118 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
2119 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
2120 // memory from the VM.
2124 unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps.
2126 getEmitter()->emitBegFN(isFramePointerUsed()
2129 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
2130 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
2135 /* Now generate code for the function */
2139 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
2140 // generation hasn't touched it (it shouldn't!).
2143 compiler->lvaTableDump();
2147 /* We can now generate the function prolog and epilog */
2149 genGeneratePrologsAndEpilogs();
2151 /* Bind jump distances */
2153 getEmitter()->emitJumpDistBind();
2155 /* The code is now complete and final; it should not change after this. */
2157 /* Compute the size of the code sections that we are going to ask the VM
2158 to allocate. Note that this might not be precisely the size of the
2159 code we emit, though it's fatal if we emit more code than the size we
2161 (Note: an example of a case where we emit less code would be useful.)
2164 getEmitter()->emitComputeCodeSizes();
2168 // Code to test or stress our ability to run a fallback compile.
2169 // We trigger the fallback here, before asking the VM for any memory,
2170 // because if not, we will leak mem, as the current codebase can't free
2171 // the mem after the emitter asks the VM for it. As this is only a stress
2172 // mode, we only want the functionality, and don't care about the relative
2173 // ugliness of having the failure here.
2174 if (!compiler->jitFallbackCompile)
2176 // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
2177 // especially that caused by enabling JIT stress.
2178 if (!JitConfig.JitNoForceFallback())
2180 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
2182 NO_WAY_NOASSERT("Stress failure");
2189 /* We've finished collecting all the unwind information for the function. Now reserve
2190 space for it from the VM.
2193 compiler->unwindReserve();
2197 size_t dataSize = getEmitter()->emitDataSize();
2199 #endif // DISPLAY_SIZES
2203 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
2205 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2206 trackedStackPtrsContig = false;
2207 #elif defined(_TARGET_ARM_)
2208 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
2209 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
2211 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
2215 /* We're done generating code for this function */
2216 compiler->compCodeGenDone = true;
2219 compiler->EndPhase(PHASE_GENERATE_CODE);
2221 codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
2222 (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
2223 &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
2225 compiler->EndPhase(PHASE_EMIT_CODE);
2228 if (compiler->opts.disAsm)
2230 printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
2231 compiler->info.compFullName);
2232 printf("; ============================================================\n");
2233 printf(""); // in our logic this causes a flush
2238 printf("*************** After end code gen, before unwindEmit()\n");
2239 getEmitter()->emitDispIGlist(true);
2243 #if EMIT_TRACK_STACK_DEPTH
2244 // Check our max stack level. Needed for fgAddCodeRef().
2245 // We need to relax the assert as our estimation won't include code-gen
2246 // stack changes (which we know don't affect fgAddCodeRef()).
2247 // NOTE: after emitEndCodeGen (including here), emitMaxStackDepth is a
2248 // count of DWORD-sized arguments, NOT argument size in bytes.
2250 unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
2251 compiler->compHndBBtabCount + // Return address for locally-called finallys
2252 genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
2253 (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
2254 #if defined(UNIX_X86_ABI)
2255 // Convert maxNestedAlignment to DWORD count before adding to maxAllowedStackDepth.
2256 assert(maxNestedAlignment % sizeof(int) == 0);
2257 maxAllowedStackDepth += maxNestedAlignment / sizeof(int);
2259 noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
2261 #endif // EMIT_TRACK_STACK_DEPTH
2263 *nativeSizeOfCode = codeSize;
2264 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2266 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2268 // Make sure that the x86 alignment and cache prefetch optimization rules
2271 // Don't start a method in the last 7 bytes of a 16-byte alignment area
2272 // unless we are generating SMALL_CODE
2273 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
2275 /* Now that the code is issued, we can finalize and emit the unwind data */
2277 compiler->unwindEmit(*codePtr, coldCodePtr);
2279 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
2283 /* Finalize the Local Var info in terms of generated code */
2288 unsigned finalHotCodeSize;
2289 unsigned finalColdCodeSize;
2290 if (compiler->fgFirstColdBlock != nullptr)
2292 // We did some hot/cold splitting. The hot section is always padded out to the
2293 // size we thought it would be, but the cold section is not.
2294 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2295 assert(compiler->info.compTotalHotCodeSize > 0);
2296 assert(compiler->info.compTotalColdCodeSize > 0);
2297 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2298 finalColdCodeSize = codeSize - finalHotCodeSize;
2302 // No hot/cold splitting
2303 assert(codeSize <= compiler->info.compTotalHotCodeSize);
2304 assert(compiler->info.compTotalHotCodeSize > 0);
2305 assert(compiler->info.compTotalColdCodeSize == 0);
2306 finalHotCodeSize = codeSize;
2307 finalColdCodeSize = 0;
2309 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2310 #endif // LATE_DISASM
2312 /* Report any exception handlers to the VM */
2316 #ifdef JIT32_GCENCODER
2321 // Create and store the GC info for this method.
2322 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2325 FILE* dmpf = jitstdout;
2327 compiler->opts.dmpHex = false;
2328 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2331 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2336 compiler->opts.dmpHex = true;
2339 if (compiler->opts.dmpHex)
2341 size_t consSize = getEmitter()->emitDataSize();
2343 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2344 fprintf(dmpf, "\n");
2348 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2352 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2354 #ifdef JIT32_GCENCODER
2355 size_t infoSize = compiler->compInfoBlkSize;
2357 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2358 #endif // JIT32_GCENCODER
2360 fprintf(dmpf, "\n");
2364 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
2368 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2370 #ifdef JIT32_GCENCODER
2372 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2373 #endif // JIT32_GCENCODER
2378 if (dmpf != jitstdout)
2385 /* Tell the emitter that we're done with this function */
2387 getEmitter()->emitEndFN();
2389 /* Shut down the spill logic */
2391 regSet.rsSpillDone();
2393 /* Shut down the temp logic */
2399 grossVMsize += compiler->info.compILCodeSize;
2400 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2401 grossNCsize += codeSize + dataSize;
2403 #endif // DISPLAY_SIZES
2405 compiler->EndPhase(PHASE_EMIT_GCEH);
2408 /*****************************************************************************
2410 * Report EH clauses to the VM
2413 void CodeGen::genReportEH()
2415 if (compiler->compHndBBtabCount == 0)
2421 if (compiler->opts.dspEHTable)
2423 printf("*************** EH table for %s\n", compiler->info.compFullName);
2431 bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
2433 unsigned EHCount = compiler->compHndBBtabCount;
2435 #if FEATURE_EH_FUNCLETS
2436 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2438 unsigned duplicateClauseCount = 0;
2439 unsigned enclosingTryIndex;
2441 // Duplicate clauses are not used by CoreRT ABI
2444 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
2446 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2447 // ignoring 'mutual protect' trys
2448 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2449 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2451 ++duplicateClauseCount;
2454 EHCount += duplicateClauseCount;
2457 #if FEATURE_EH_CALLFINALLY_THUNKS
2458 unsigned clonedFinallyCount = 0;
2460 // Duplicate clauses are not used by CoreRT ABI
2463 // We don't keep track of how many cloned finally there are. So, go through and count.
2464 // We do a quick pass first through the EH table to see if there are any try/finally
2465 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2467 bool anyFinallys = false;
2468 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2469 HBtab < HBtabEnd; HBtab++)
2471 if (HBtab->HasFinallyHandler())
2479 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
2481 if (block->bbJumpKind == BBJ_CALLFINALLY)
2483 ++clonedFinallyCount;
2487 EHCount += clonedFinallyCount;
2490 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2492 #endif // FEATURE_EH_FUNCLETS
2495 if (compiler->opts.dspEHTable)
2497 #if FEATURE_EH_FUNCLETS
2498 #if FEATURE_EH_CALLFINALLY_THUNKS
2499 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2500 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2501 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2502 #else // !FEATURE_EH_CALLFINALLY_THUNKS
2503 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2504 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2505 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2506 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
2507 #else // !FEATURE_EH_FUNCLETS
2508 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2509 assert(compiler->compHndBBtabCount == EHCount);
2510 #endif // !FEATURE_EH_FUNCLETS
2514 // Tell the VM how many EH clauses to expect.
2515 compiler->eeSetEHcount(EHCount);
2517 XTnum = 0; // This is the index we pass to the VM
2519 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2520 HBtab < HBtabEnd; HBtab++)
2522 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2524 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2525 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2527 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2528 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2529 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2530 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2532 if (HBtab->HasFilter())
2534 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2538 hndTyp = HBtab->ebdTyp;
2541 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2543 if (isCoreRTABI && (XTnum > 0))
2545 // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2546 // try block as the previous one. The runtime cannot reliably infer this information from
2547 // native code offsets because of different try blocks can have same offsets. Alternative
2548 // solution to this problem would be inserting extra nops to ensure that different try
2549 // blocks have different offsets.
2550 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
2552 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2553 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2554 // IL as "try { try {} catch {} catch {} } finally {}".
2555 assert(HBtab->HasCatchHandler());
2556 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
2560 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2561 // the fields aren't accurate.
2563 CORINFO_EH_CLAUSE clause;
2564 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2565 clause.Flags = flags;
2566 clause.TryOffset = tryBeg;
2567 clause.TryLength = tryEnd;
2568 clause.HandlerOffset = hndBeg;
2569 clause.HandlerLength = hndEnd;
2571 assert(XTnum < EHCount);
2573 // Tell the VM about this EH clause.
2574 compiler->eeSetEHinfo(XTnum, &clause);
2579 #if FEATURE_EH_FUNCLETS
2580 // Now output duplicated clauses.
2582 // If a funclet has been created by moving a handler out of a try region that it was originally nested
2583 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
2584 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2585 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2586 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2587 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2588 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2591 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2592 // try or handler region):
2610 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2611 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
2612 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2613 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2614 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2615 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2616 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2617 // representing try (1) protecting the new funclets catch (3) and (4).
2618 // The code will be generated as follows:
2620 // ABCFH // "main" code
2625 // The EH regions are:
2630 // D -> G // "duplicate" clause
2631 // E -> G // "duplicate" clause
2633 // Note that we actually need to generate one of these additional "duplicate" clauses for every
2634 // region the funclet is nested in. Take this example:
2666 // When we pull out funclets, we get the following generated code:
2668 // ABCDEHJMO // "main" function
2676 // And the EH regions we report to the VM are (in order; main clauses
2677 // first in most-to-least nested order, funclets ("duplicated clauses")
2678 // last, in most-to-least nested) are:
2686 // F -> I // funclet clause #1 for F
2687 // F -> K // funclet clause #2 for F
2688 // F -> L // funclet clause #3 for F
2689 // F -> N // funclet clause #4 for F
2690 // G -> I // funclet clause #1 for G
2691 // G -> K // funclet clause #2 for G
2692 // G -> L // funclet clause #3 for G
2693 // G -> N // funclet clause #4 for G
2694 // I -> K // funclet clause #1 for I
2695 // I -> L // funclet clause #2 for I
2696 // I -> N // funclet clause #3 for I
2697 // K -> N // funclet clause #1 for K
2698 // L -> N // funclet clause #1 for L
2700 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2701 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2702 // to add a clause "F -> G" because F is NOT protected by G, but we still have
2703 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2705 // The overall ordering of the clauses is still the same most-to-least nesting
2706 // after front-to-back start offset. Because we place the funclets at the end
2707 // these new clauses should also go at the end by this ordering.
2710 if (duplicateClauseCount > 0)
2712 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
2714 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2716 unsigned enclosingTryIndex;
2718 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2720 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2721 // ignoring 'mutual protect' trys
2722 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2723 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2725 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2726 // that will have the enclosing try protecting the funclet.
2728 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2729 // greater EH table index
2731 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2733 // The try region is the handler of the funclet. Note that for filters, we don't protect the
2734 // filter region, only the filter handler region. This is because exceptions in filters never
2735 // escape; the VM swallows them.
2737 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2738 BasicBlock* bbTryLast = fletTab->ebdHndLast;
2740 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2741 BasicBlock* bbHndLast = encTab->ebdHndLast;
2743 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2745 tryBeg = compiler->ehCodeOffset(bbTryBeg);
2746 hndBeg = compiler->ehCodeOffset(bbHndBeg);
2748 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2749 : compiler->ehCodeOffset(bbTryLast->bbNext);
2750 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2751 : compiler->ehCodeOffset(bbHndLast->bbNext);
2753 if (encTab->HasFilter())
2755 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2759 hndTyp = encTab->ebdTyp;
2762 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2764 // Tell the VM this is an extra clause caused by moving funclets out of line.
2765 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
2767 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2768 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2769 // instruction immediately after the 'try' body. So, it really could be more accurately named
2772 CORINFO_EH_CLAUSE clause;
2773 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2774 clause.Flags = flags;
2775 clause.TryOffset = tryBeg;
2776 clause.TryLength = tryEnd;
2777 clause.HandlerOffset = hndBeg;
2778 clause.HandlerLength = hndEnd;
2780 assert(XTnum < EHCount);
2782 // Tell the VM about this EH clause (a duplicated clause).
2783 compiler->eeSetEHinfo(XTnum, &clause);
2786 ++reportedDuplicateClauseCount;
2789 if (duplicateClauseCount == reportedDuplicateClauseCount)
2791 break; // we've reported all of them; no need to continue looking
2795 } // for each 'true' enclosing 'try'
2796 } // for each EH table entry
2798 assert(duplicateClauseCount == reportedDuplicateClauseCount);
2799 } // if (duplicateClauseCount > 0)
2801 #if FEATURE_EH_CALLFINALLY_THUNKS
2802 if (clonedFinallyCount > 0)
2804 unsigned reportedClonedFinallyCount = 0;
2805 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
2807 if (block->bbJumpKind == BBJ_CALLFINALLY)
2809 UNATIVE_OFFSET hndBeg, hndEnd;
2811 hndBeg = compiler->ehCodeOffset(block);
2813 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
2814 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
2815 BasicBlock* bbLabel = block->bbNext;
2816 if (block->isBBCallAlwaysPair())
2818 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
2820 if (bbLabel == nullptr)
2822 hndEnd = compiler->info.compNativeCodeSize;
2826 assert(bbLabel->bbEmitCookie != nullptr);
2827 hndEnd = compiler->ehCodeOffset(bbLabel);
2830 CORINFO_EH_CLAUSE clause;
2831 clause.ClassToken = 0; // unused
2832 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
2833 clause.TryOffset = hndBeg;
2834 clause.TryLength = hndBeg;
2835 clause.HandlerOffset = hndBeg;
2836 clause.HandlerLength = hndEnd;
2838 assert(XTnum < EHCount);
2840 // Tell the VM about this EH clause (a cloned finally clause).
2841 compiler->eeSetEHinfo(XTnum, &clause);
2844 ++reportedClonedFinallyCount;
2847 if (clonedFinallyCount == reportedClonedFinallyCount)
2849 break; // we're done; no need to keep looking
2852 } // block is BBJ_CALLFINALLY
2855 assert(clonedFinallyCount == reportedClonedFinallyCount);
2856 } // if (clonedFinallyCount > 0)
2857 #endif // FEATURE_EH_CALLFINALLY_THUNKS
2859 #endif // FEATURE_EH_FUNCLETS
2861 assert(XTnum == EHCount);
2864 //----------------------------------------------------------------------
2865 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
2866 // helper should be used.
2869 // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
2872 // true if an optimized write barrier helper should be used, false otherwise.
2873 // Note: only x86 implements register-specific source optimized write
2874 // barriers currently.
2876 bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
2878 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2880 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
2889 //----------------------------------------------------------------------
2890 // genUseOptimizedWriteBarriers: Determine if an optimized write barrier
2891 // helper should be used.
2893 // This has the same functionality as the version of
2894 // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
2895 // determining what the required write barrier form is, if possible.
2898 // tgt - target tree of write (e.g., GT_STOREIND)
2899 // assignVal - tree with value to write
2902 // true if an optimized write barrier helper should be used, false otherwise.
2903 // Note: only x86 implements register-specific source optimized write
2904 // barriers currently.
2906 bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
2908 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2910 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
2911 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
2920 //----------------------------------------------------------------------
2921 // genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
2922 // barrier, and the write barrier form required, determine the helper to call.
2925 // tgt - target tree of write (e.g., GT_STOREIND)
2926 // wbf - already computed write barrier form to use
2929 // Write barrier helper to use.
2931 // Note: do not call this function to get an optimized write barrier helper (e.g.,
2934 CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
2936 noway_assert(tgt->gtOper == GT_STOREIND);
2938 CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
2941 if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2943 helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
2947 if (tgt->gtOper != GT_CLS_VAR)
2949 if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
2951 if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
2953 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
2955 else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
2957 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
2961 assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
2962 ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
2963 (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
2964 ((helper == CORINFO_HELP_ASSIGN_REF) &&
2965 (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
2970 //----------------------------------------------------------------------
2971 // genGCWriteBarrier: Generate a write barrier for a node.
2974 // tgt - target tree of write (e.g., GT_STOREIND)
2975 // wbf - already computed write barrier form to use
2977 void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
2979 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
2981 #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
2982 // We classify the "tgt" trees as follows:
2983 // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
2984 // IND [-> ADDR -> IND] -> { GT_LCL_VAR, ADD({GT_LCL_VAR}, X), ADD(X, (GT_LCL_VAR)) }
2985 // then let "v" be the GT_LCL_VAR.
2986 // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
2987 // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
2988 // * Otherwise, classify as CWBKind_OtherByRefLocal.
2989 // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
2990 // Otherwise, classify as CWBKind_Unclassified.
2992 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
2993 if (tgt->gtOper == GT_IND)
2995 GenTree* lcl = NULL;
2997 GenTree* indArg = tgt->gtOp.gtOp1;
2998 if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
3000 indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
3002 if (indArg->gtOper == GT_LCL_VAR)
3006 else if (indArg->gtOper == GT_ADD)
3008 if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR)
3010 lcl = indArg->gtOp.gtOp1;
3012 else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR)
3014 lcl = indArg->gtOp.gtOp2;
3019 wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
3020 unsigned lclNum = lcl->AsLclVar()->GetLclNum();
3021 if (lclNum == compiler->info.compRetBuffArg)
3023 wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
3027 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
3028 if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
3030 wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
3036 // We should have eliminated the barrier for this case.
3037 assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
3041 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
3045 // Enable this to sample the unclassified trees.
3046 static int unclassifiedBarrierSite = 0;
3047 if (wbKind == CWBKind_Unclassified)
3049 unclassifiedBarrierSite++;
3050 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
3055 inst_IV(INS_push, wbKind);
3056 genEmitHelperCall(helper,
3058 EA_PTRSIZE); // retSize
3059 SubtractStackLevel(4);
3063 genEmitHelperCall(helper,
3065 EA_PTRSIZE); // retSize
3068 #else // !FEATURE_COUNT_GC_WRITE_BARRIERS
3069 genEmitHelperCall(helper,
3071 EA_PTRSIZE); // retSize
3072 #endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
3076 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3077 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3079 XX Prolog / Epilog XX
3081 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3082 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3085 /*****************************************************************************
3087 * Generates code for moving incoming register arguments to their
3088 * assigned location, in the function prolog.
3092 #pragma warning(push)
3093 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3095 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
3100 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
3104 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
3105 unsigned argNum; // current argNum, always in [0..argMax-1]
3106 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
3107 unsigned regArgNum; // index into the regArgTab[] table
3108 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
3109 bool doingFloat = regState->rsIsFloat;
3111 // We should be generating the prolog block when we are called
3112 assert(compiler->compGeneratingProlog);
3114 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
3115 noway_assert(regArgMaskLive != 0);
3117 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
3118 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
3120 // The regArgTab can always have unused entries,
3121 // for example if an architecture always increments the arg register number but uses either
3122 // an integer register or a floating point register to hold the next argument
3123 // then with a mix of float and integer args you could have:
3125 // sampleMethod(int i, float x, int j, float y, int k, float z);
3126 // r0, r2 and r4 as valid integer arguments with argMax as 5
3127 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
3128 // The first one is doingFloat==false and the second one is doingFloat==true
3130 // If a fixed return buffer (in r8) was also present then the first one would become:
3131 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
3134 argMax = regState->rsCalleeRegArgCount;
3135 fixedRetBufIndex = (unsigned)-1; // Invalid value
3137 // If necessary we will select a correct xtraReg for circular floating point args later.
3141 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
3143 else // we are doing the integer registers
3145 noway_assert(argMax <= MAX_REG_ARG);
3146 if (hasFixedRetBuffReg())
3148 fixedRetBufIndex = theFixedRetBuffArgNum();
3149 // We have an additional integer register argument when hasFixedRetBuffReg() is true
3150 argMax = fixedRetBufIndex + 1;
3151 assert(argMax == (MAX_REG_ARG + 1));
3156 // Construct a table with the register arguments, for detecting circular and
3157 // non-circular dependencies between the register arguments. A dependency is when
3158 // an argument register Rn needs to be moved to register Rm that is also an argument
3159 // register. The table is constructed in the order the arguments are passed in
3160 // registers: the first register argument is in regArgTab[0], the second in
3161 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
3162 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
3163 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
3164 // we have increased the allocated size of the regArgTab[] by one.
3168 unsigned varNum; // index into compiler->lvaTable[] for this register argument
3169 #if defined(UNIX_AMD64_ABI)
3170 var_types type; // the Jit type of this regArgTab entry
3171 #endif // defined(UNIX_AMD64_ABI)
3172 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
3173 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
3174 // argument register number 'x'. Only used when circular = true.
3175 char slot; // 0 means the register is not used for a register argument
3176 // 1 means the first part of a register argument
3177 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
3178 bool stackArg; // true if the argument gets homed to the stack
3179 bool processed; // true after we've processed the argument (and it is in its final location)
3180 bool circular; // true if this register participates in a circular dependency loop.
3182 #ifdef UNIX_AMD64_ABI
3184 // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
3185 // the type of the lclVar in ways that are not ascertainable from lvType.
3186 // So, for that case we retain the type of the register in the regArgTab.
3188 var_types getRegType(Compiler* compiler)
3190 return type; // UNIX_AMD64 implementation
3193 #else // !UNIX_AMD64_ABI
3195 // In other cases, we simply use the type of the lclVar to determine the type of the register.
3196 var_types getRegType(Compiler* compiler)
3198 const LclVarDsc& varDsc = compiler->lvaTable[varNum];
3199 // Check if this is an HFA register arg and return the HFA type
3200 if (varDsc.lvIsHfaRegArg())
3202 #if defined(_TARGET_WINDOWS_)
3203 // Cannot have hfa types on windows arm targets
3204 // in vararg methods.
3205 assert(!compiler->info.compIsVarArgs);
3206 #endif // defined(_TARGET_WINDOWS_)
3207 return varDsc.GetHfaType();
3209 return compiler->mangleVarArgsType(varDsc.lvType);
3212 #endif // !UNIX_AMD64_ABI
3213 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
3218 for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
3220 varDsc = compiler->lvaTable + varNum;
3222 // Is this variable a register arg?
3223 if (!varDsc->lvIsParam)
3228 if (!varDsc->lvIsRegArg)
3233 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
3234 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
3235 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
3236 // use the the original TYP_STRUCT argument.
3238 if (varDsc->lvPromoted || varDsc->lvIsStructField)
3240 LclVarDsc* parentVarDsc = varDsc;
3241 if (varDsc->lvIsStructField)
3243 assert(!varDsc->lvPromoted);
3244 parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
3247 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
3249 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
3251 noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
3253 // For register arguments that are independent promoted structs we put the promoted field varNum in the
3255 if (varDsc->lvPromoted)
3262 // For register arguments that are not independent promoted structs we put the parent struct varNum in
3264 if (varDsc->lvIsStructField)
3271 var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
3272 // Change regType to the HFA type when we have a HFA argument
3273 if (varDsc->lvIsHfaRegArg())
3275 #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3276 if (compiler->info.compIsVarArgs)
3278 assert(!"Illegal incoming HFA arg encountered in Vararg method.");
3280 #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3281 regType = varDsc->GetHfaType();
3284 #if defined(UNIX_AMD64_ABI)
3285 if (!varTypeIsStruct(regType))
3286 #endif // defined(UNIX_AMD64_ABI)
3288 // A struct might be passed partially in XMM register for System V calls.
3289 // So a single arg might use both register files.
3290 if (isFloatRegType(regType) != doingFloat)
3298 #if defined(UNIX_AMD64_ABI)
3299 if (varTypeIsStruct(varDsc))
3301 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
3302 assert(typeHnd != nullptr);
3303 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3304 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3305 if (!structDesc.passedInRegisters)
3307 // The var is not passed in registers.
3311 unsigned firstRegSlot = 0;
3312 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
3314 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3319 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3320 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3321 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3322 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3325 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3326 // registers or on stack, the upper most 4-bytes will be zero.
3328 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3329 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3332 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3333 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3334 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3335 // there is no need to clear upper 4-bytes of Vector3 type args.
3337 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3338 // Vector3 return values are returned two return registers and Caller assembles them into a
3339 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3340 // type args in prolog and Vector3 type return value of a call
3342 if (varDsc->lvType == TYP_SIMD12)
3344 regType = TYP_DOUBLE;
3349 regType = compiler->GetEightByteType(structDesc, slotCounter);
3352 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3354 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
3355 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
3357 // Store the reg for the first slot.
3360 firstRegSlot = regArgNum;
3363 // Bingo - add it to our table
3364 noway_assert(regArgNum < argMax);
3365 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
3366 // not be multiple vars representing this argument
3368 regArgTab[regArgNum].varNum = varNum;
3369 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
3370 regArgTab[regArgNum].type = regType;
3377 continue; // Nothing to do for this regState set.
3380 regArgNum = firstRegSlot;
3383 #endif // defined(UNIX_AMD64_ABI)
3385 // Bingo - add it to our table
3386 regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
3388 noway_assert(regArgNum < argMax);
3389 // We better not have added it already (there better not be multiple vars representing this argument
3391 noway_assert(regArgTab[regArgNum].slot == 0);
3393 #if defined(UNIX_AMD64_ABI)
3394 // Set the register type.
3395 regArgTab[regArgNum].type = regType;
3396 #endif // defined(UNIX_AMD64_ABI)
3398 regArgTab[regArgNum].varNum = varNum;
3399 regArgTab[regArgNum].slot = 1;
3403 #if FEATURE_MULTIREG_ARGS
3404 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
3406 if (varDsc->lvIsHfaRegArg())
3408 // We have an HFA argument, set slots to the number of registers used
3409 slots = varDsc->lvHfaSlots();
3413 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3414 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
3415 // We have a non-HFA multireg argument, set slots to two
3419 // Note that regArgNum+1 represents an argument index not an actual argument register.
3420 // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
3422 // This is the setup for the rest of a multireg struct arg
3424 for (int i = 1; i < slots; i++)
3426 noway_assert((regArgNum + i) < argMax);
3428 // We better not have added it already (there better not be multiple vars representing this argument
3430 noway_assert(regArgTab[regArgNum + i].slot == 0);
3432 regArgTab[regArgNum + i].varNum = varNum;
3433 regArgTab[regArgNum + i].slot = (char)(i + 1);
3436 #endif // FEATURE_MULTIREG_ARGS
3440 int lclSize = compiler->lvaLclSize(varNum);
3442 if (lclSize > REGSIZE_BYTES)
3444 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3445 slots = lclSize / REGSIZE_BYTES;
3446 if (regArgNum + slots > maxRegArgNum)
3448 slots = maxRegArgNum - regArgNum;
3451 C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
3452 assert(slots < INT8_MAX);
3453 for (char i = 1; i < slots; i++)
3455 regArgTab[regArgNum + i].varNum = varNum;
3456 regArgTab[regArgNum + i].slot = i + 1;
3458 #endif // _TARGET_ARM_
3460 for (int i = 0; i < slots; i++)
3462 regType = regArgTab[regArgNum + i].getRegType(compiler);
3463 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3465 #if !defined(UNIX_AMD64_ABI)
3466 assert((i > 0) || (regNum == varDsc->lvArgReg));
3467 #endif // defined(UNIX_AMD64_ABI)
3469 // Is the arg dead on entry to the method ?
3471 if ((regArgMaskLive & genRegMask(regNum)) == 0)
3473 if (varDsc->lvTrackedNonStruct())
3475 // We may now see some tracked locals with zero refs.
3476 // See Lowering::DoPhase. Tolerate these.
3477 if (varDsc->lvRefCnt() > 0)
3479 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3485 noway_assert(varDsc->lvType == TYP_STRUCT);
3486 #else // !_TARGET_X86_
3487 // For LSRA, it may not be in regArgMaskLive if it has a zero
3488 // refcnt. This is in contrast with the non-LSRA case in which all
3489 // non-tracked args are assumed live on entry.
3490 noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
3491 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs) ||
3492 (varDsc->lvAddrExposed && compiler->opts.compUseSoftFP));
3493 #endif // !_TARGET_X86_
3495 // Mark it as processed and be done with it
3496 regArgTab[regArgNum + i].processed = true;
3501 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3502 // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
3503 // they've already been spilled.
3505 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
3506 #endif // _TARGET_ARM_
3508 #if !defined(UNIX_AMD64_ABI)
3509 noway_assert(xtraReg != (varDsc->lvArgReg + i));
3511 noway_assert(regArgMaskLive & genRegMask(regNum));
3514 regArgTab[regArgNum + i].processed = false;
3516 /* mark stack arguments since we will take care of those first */
3517 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3519 /* If it goes on the stack or in a register that doesn't hold
3520 * an argument anymore -> CANNOT form a circular dependency */
3522 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3524 /* will trash another argument -> possible dependency
3525 * We may need several passes after the table is constructed
3526 * to decide on that */
3528 /* Maybe the argument stays in the register (IDEAL) */
3530 if ((i == 0) && (varDsc->lvRegNum == regNum))
3535 #if !defined(_TARGET_64BIT_)
3536 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
3540 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
3545 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3546 (REG_NEXT(varDsc->lvRegNum) == regNum))
3550 #endif // !defined(_TARGET_64BIT_)
3551 regArgTab[regArgNum + i].circular = true;
3556 regArgTab[regArgNum + i].circular = false;
3558 /* mark the argument register as free */
3559 regArgMaskLive &= ~genRegMask(regNum);
3564 /* Find the circular dependencies for the argument registers, if any.
3565 * A circular dependency is a set of registers R1, R2, ..., Rn
3566 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3571 /* Possible circular dependencies still exist; the previous pass was not enough
3572 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3578 for (argNum = 0; argNum < argMax; argNum++)
3580 // If we already marked the argument as non-circular then continue
3582 if (!regArgTab[argNum].circular)
3587 if (regArgTab[argNum].slot == 0) // Not a register argument
3592 varNum = regArgTab[argNum].varNum;
3593 noway_assert(varNum < compiler->lvaCount);
3594 varDsc = compiler->lvaTable + varNum;
3595 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3597 /* cannot possibly have stack arguments */
3598 noway_assert(varDsc->lvIsInReg());
3599 noway_assert(!regArgTab[argNum].stackArg);
3601 var_types regType = regArgTab[argNum].getRegType(compiler);
3602 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3604 regNumber destRegNum = REG_NA;
3605 if (regArgTab[argNum].slot == 1)
3607 destRegNum = varDsc->lvRegNum;
3609 #if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
3612 assert(regArgTab[argNum].slot == 2);
3614 assert(regArgTab[argNum - 1].slot == 1);
3615 assert(regArgTab[argNum - 1].varNum == varNum);
3616 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
3617 regArgMaskLive &= ~genRegMask(regNum);
3618 regArgTab[argNum].circular = false;
3622 #elif !defined(_TARGET_64BIT_)
3623 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
3625 destRegNum = varDsc->lvOtherReg;
3629 assert(regArgTab[argNum].slot == 2);
3630 assert(varDsc->TypeGet() == TYP_DOUBLE);
3631 destRegNum = REG_NEXT(varDsc->lvRegNum);
3633 #endif // !defined(_TARGET_64BIT_)
3634 noway_assert(destRegNum != REG_NA);
3635 if (genRegMask(destRegNum) & regArgMaskLive)
3637 /* we are trashing a live argument register - record it */
3638 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3639 noway_assert(destRegArgNum < argMax);
3640 regArgTab[destRegArgNum].trashBy = argNum;
3644 /* argument goes to a free register */
3645 regArgTab[argNum].circular = false;
3648 /* mark the argument register as free */
3649 regArgMaskLive &= ~genRegMask(regNum);
3655 /* At this point, everything that has the "circular" flag
3656 * set to "true" forms a circular dependency */
3657 CLANG_FORMAT_COMMENT_ANCHOR;
3664 printf("Circular dependencies found while home-ing the incoming arguments.\n");
3669 // LSRA allocates registers to incoming parameters in order and will not overwrite
3670 // a register still holding a live parameter.
3672 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
3673 "Homing of float argument registers with circular dependencies not implemented.");
3675 /* Now move the arguments to their locations.
3676 * First consider ones that go on the stack since they may
3677 * free some registers. */
3679 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3680 for (argNum = 0; argNum < argMax; argNum++)
3684 #if defined(UNIX_AMD64_ABI)
3685 // If this is the wrong register file, just continue.
3686 if (regArgTab[argNum].type == TYP_UNDEF)
3688 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3689 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3690 // The next register file processing will process it.
3693 #endif // defined(UNIX_AMD64_ABI)
3695 // If the arg is dead on entry to the method, skip it
3697 if (regArgTab[argNum].processed)
3702 if (regArgTab[argNum].slot == 0) // Not a register argument
3707 varNum = regArgTab[argNum].varNum;
3708 noway_assert(varNum < compiler->lvaCount);
3709 varDsc = compiler->lvaTable + varNum;
3711 #ifndef _TARGET_64BIT_
3712 // If not a stack arg go to the next one
3713 if (varDsc->lvType == TYP_LONG)
3715 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
3719 else if (varDsc->lvOtherReg != REG_STK)
3725 #endif // !_TARGET_64BIT_
3727 // If not a stack arg go to the next one
3728 if (!regArgTab[argNum].stackArg)
3734 #if defined(_TARGET_ARM_)
3735 if (varDsc->lvType == TYP_DOUBLE)
3737 if (regArgTab[argNum].slot == 2)
3739 // We handled the entire double when processing the first half (slot == 1)
3745 noway_assert(regArgTab[argNum].circular == false);
3747 noway_assert(varDsc->lvIsParam);
3748 noway_assert(varDsc->lvIsRegArg);
3749 noway_assert(varDsc->lvIsInReg() == false ||
3750 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
3752 var_types storeType = TYP_UNDEF;
3753 unsigned slotSize = TARGET_POINTER_SIZE;
3755 if (varTypeIsStruct(varDsc))
3757 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3758 #if FEATURE_MULTIREG_ARGS
3759 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3760 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3761 #endif // FEATURE_MULTIREG_ARGS
3762 #ifdef UNIX_AMD64_ABI
3763 storeType = regArgTab[argNum].type;
3764 #endif // !UNIX_AMD64_ABI
3765 if (varDsc->lvIsHfaRegArg())
3768 // On ARM32 the storeType for HFA args is always TYP_FLOAT
3769 storeType = TYP_FLOAT;
3770 slotSize = (unsigned)emitActualTypeSize(storeType);
3771 #else // _TARGET_ARM64_
3772 storeType = genActualType(varDsc->GetHfaType());
3773 slotSize = (unsigned)emitActualTypeSize(storeType);
3774 #endif // _TARGET_ARM64_
3777 else // Not a struct type
3779 storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
3781 size = emitActualTypeSize(storeType);
3783 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
3784 #endif //_TARGET_X86_
3786 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
3788 // Stack argument - if the ref count is 0 don't care about it
3790 if (!varDsc->lvOnFrame)
3792 noway_assert(varDsc->lvRefCnt() == 0);
3796 // Since slot is typically 1, baseOffset is typically 0
3797 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
3799 getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
3801 #ifndef UNIX_AMD64_ABI
3802 // Check if we are writing past the end of the struct
3803 if (varTypeIsStruct(varDsc))
3805 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
3807 #endif // !UNIX_AMD64_ABI
3808 #ifdef USING_SCOPE_INFO
3809 if (regArgTab[argNum].slot == 1)
3811 psiMoveToStack(varNum);
3813 #endif // USING_SCOPE_INFO
3816 /* mark the argument as processed */
3818 regArgTab[argNum].processed = true;
3819 regArgMaskLive &= ~genRegMask(srcRegNum);
3821 #if defined(_TARGET_ARM_)
3822 if (storeType == TYP_DOUBLE)
3824 regArgTab[argNum + 1].processed = true;
3825 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
3830 /* Process any circular dependencies */
3833 unsigned begReg, destReg, srcReg;
3834 unsigned varNumDest, varNumSrc;
3835 LclVarDsc* varDscDest;
3836 LclVarDsc* varDscSrc;
3837 instruction insCopy = INS_mov;
3841 #if defined(FEATURE_HFA) || defined(UNIX_AMD64_ABI)
3842 insCopy = ins_Copy(TYP_DOUBLE);
3843 // Compute xtraReg here when we have a float argument
3844 assert(xtraReg == REG_NA);
3846 regMaskTP fpAvailMask;
3848 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
3849 #if defined(FEATURE_HFA)
3850 fpAvailMask &= RBM_ALLDOUBLE;
3852 #if !defined(UNIX_AMD64_ABI)
3853 #error Error. Wrong architecture.
3854 #endif // !defined(UNIX_AMD64_ABI)
3855 #endif // defined(FEATURE_HFA)
3857 if (fpAvailMask == RBM_NONE)
3859 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
3860 #if defined(FEATURE_HFA)
3861 fpAvailMask &= RBM_ALLDOUBLE;
3863 #if !defined(UNIX_AMD64_ABI)
3864 #error Error. Wrong architecture.
3865 #endif // !defined(UNIX_AMD64_ABI)
3866 #endif // defined(FEATURE_HFA)
3869 assert(fpAvailMask != RBM_NONE);
3871 // We pick the lowest avail register number
3872 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
3873 xtraReg = genRegNumFromMask(tempMask);
3874 #elif defined(_TARGET_X86_)
3875 // This case shouldn't occur on x86 since NYI gets converted to an assert
3876 NYI("Homing circular FP registers via xtraReg");
3880 for (argNum = 0; argNum < argMax; argNum++)
3882 // If not a circular dependency then continue
3883 if (!regArgTab[argNum].circular)
3888 // If already processed the dependency then continue
3890 if (regArgTab[argNum].processed)
3895 if (regArgTab[argNum].slot == 0) // Not a register argument
3900 destReg = begReg = argNum;
3901 srcReg = regArgTab[argNum].trashBy;
3903 varNumDest = regArgTab[destReg].varNum;
3904 noway_assert(varNumDest < compiler->lvaCount);
3905 varDscDest = compiler->lvaTable + varNumDest;
3906 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
3908 noway_assert(srcReg < argMax);
3909 varNumSrc = regArgTab[srcReg].varNum;
3910 noway_assert(varNumSrc < compiler->lvaCount);
3911 varDscSrc = compiler->lvaTable + varNumSrc;
3912 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
3914 emitAttr size = EA_PTRSIZE;
3916 #ifdef _TARGET_XARCH_
3918 // The following code relies upon the target architecture having an
3919 // 'xchg' instruction which directly swaps the values held in two registers.
3920 // On the ARM architecture we do not have such an instruction.
3922 if (destReg == regArgTab[srcReg].trashBy)
3924 /* only 2 registers form the circular dependency - use "xchg" */
3926 varNum = regArgTab[argNum].varNum;
3927 noway_assert(varNum < compiler->lvaCount);
3928 varDsc = compiler->lvaTable + varNum;
3929 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3931 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
3933 /* Set "size" to indicate GC if one and only one of
3934 * the operands is a pointer
3935 * RATIONALE: If both are pointers, nothing changes in
3936 * the GC pointer tracking. If only one is a pointer we
3937 * have to "swap" the registers in the GC reg pointer mask
3940 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
3945 noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
3947 getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
3948 regSet.verifyRegUsed(varDscSrc->lvRegNum);
3949 regSet.verifyRegUsed(varDscSrc->lvArgReg);
3951 /* mark both arguments as processed */
3952 regArgTab[destReg].processed = true;
3953 regArgTab[srcReg].processed = true;
3955 regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
3956 regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
3957 #ifdef USING_SCOPE_INFO
3958 psiMoveToReg(varNumSrc);
3959 psiMoveToReg(varNumDest);
3960 #endif // USING_SCOPE_INFO
3963 #endif // _TARGET_XARCH_
3965 var_types destMemType = varDscDest->TypeGet();
3968 bool cycleAllDouble = true; // assume the best
3970 unsigned iter = begReg;
3973 if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
3975 cycleAllDouble = false;
3978 iter = regArgTab[iter].trashBy;
3979 } while (iter != begReg);
3981 // We may treat doubles as floats for ARM because we could have partial circular
3982 // dependencies of a float with a lo/hi part of the double. We mark the
3983 // trashBy values for each slot of the double, so let the circular dependency
3984 // logic work its way out for floats rather than doubles. If a cycle has all
3985 // doubles, then optimize so that instead of two vmov.f32's to move a double,
3986 // we can use one vmov.f64.
3988 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
3990 destMemType = TYP_FLOAT;
3992 #endif // _TARGET_ARM_
3994 if (destMemType == TYP_REF)
3998 else if (destMemType == TYP_BYREF)
4002 else if (destMemType == TYP_DOUBLE)
4006 else if (destMemType == TYP_FLOAT)
4011 /* move the dest reg (begReg) in the extra reg */
4013 assert(xtraReg != REG_NA);
4015 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
4017 getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
4019 regSet.verifyRegUsed(xtraReg);
4021 *pXtraRegClobbered = true;
4022 #ifdef USING_SCOPE_INFO
4023 psiMoveToReg(varNumDest, xtraReg);
4024 #endif // USING_SCOPE_INFO
4025 /* start moving everything to its right place */
4027 while (srcReg != begReg)
4031 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4032 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
4034 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
4036 regSet.verifyRegUsed(destRegNum);
4038 /* mark 'src' as processed */
4039 noway_assert(srcReg < argMax);
4040 regArgTab[srcReg].processed = true;
4042 if (size == EA_8BYTE)
4043 regArgTab[srcReg + 1].processed = true;
4045 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4047 /* move to the next pair */
4049 srcReg = regArgTab[srcReg].trashBy;
4051 varDscDest = varDscSrc;
4052 destMemType = varDscDest->TypeGet();
4054 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4056 destMemType = TYP_FLOAT;
4059 varNumSrc = regArgTab[srcReg].varNum;
4060 noway_assert(varNumSrc < compiler->lvaCount);
4061 varDscSrc = compiler->lvaTable + varNumSrc;
4062 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4064 if (destMemType == TYP_REF)
4068 else if (destMemType == TYP_DOUBLE)
4078 /* take care of the beginning register */
4080 noway_assert(srcReg == begReg);
4082 /* move the dest reg (begReg) in the extra reg */
4084 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4086 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
4088 regSet.verifyRegUsed(destRegNum);
4089 #ifdef USING_SCOPE_INFO
4090 psiMoveToReg(varNumSrc);
4091 #endif // USING_SCOPE_INFO
4092 /* mark the beginning register as processed */
4094 regArgTab[srcReg].processed = true;
4096 if (size == EA_8BYTE)
4097 regArgTab[srcReg + 1].processed = true;
4099 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4104 /* Finally take care of the remaining arguments that must be enregistered */
4105 while (regArgMaskLive)
4107 regMaskTP regArgMaskLiveSave = regArgMaskLive;
4109 for (argNum = 0; argNum < argMax; argNum++)
4111 /* If already processed go to the next one */
4112 if (regArgTab[argNum].processed)
4117 if (regArgTab[argNum].slot == 0)
4118 { // Not a register argument
4122 varNum = regArgTab[argNum].varNum;
4123 noway_assert(varNum < compiler->lvaCount);
4124 varDsc = compiler->lvaTable + varNum;
4125 var_types regType = regArgTab[argNum].getRegType(compiler);
4126 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4128 #if defined(UNIX_AMD64_ABI)
4129 if (regType == TYP_UNDEF)
4131 // This could happen if the reg in regArgTab[argNum] is of the other register file -
4132 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
4133 // The next register file processing will process it.
4134 regArgMaskLive &= ~genRegMask(regNum);
4137 #endif // defined(UNIX_AMD64_ABI)
4139 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4140 #ifndef _TARGET_64BIT_
4141 #ifndef _TARGET_ARM_
4142 // Right now we think that incoming arguments are not pointer sized. When we eventually
4143 // understand the calling convention, this still won't be true. But maybe we'll have a better
4144 // idea of how to ignore it.
4146 // On Arm, a long can be passed in register
4147 noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE);
4149 #endif //_TARGET_64BIT_
4151 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
4153 /* Register argument - hopefully it stays in the same register */
4154 regNumber destRegNum = REG_NA;
4155 var_types destMemType = varDsc->TypeGet();
4157 if (regArgTab[argNum].slot == 1)
4159 destRegNum = varDsc->lvRegNum;
4162 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
4164 // The second half of the double has already been processed! Treat this as a single.
4165 destMemType = TYP_FLOAT;
4167 #endif // _TARGET_ARM_
4169 #ifndef _TARGET_64BIT_
4170 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
4172 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
4173 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
4175 destRegNum = regNum;
4179 destRegNum = varDsc->lvOtherReg;
4182 assert(destRegNum != REG_STK);
4186 assert(regArgTab[argNum].slot == 2);
4187 assert(destMemType == TYP_DOUBLE);
4189 // For doubles, we move the entire double using the argNum representing
4190 // the first half of the double. There are two things we won't do:
4191 // (1) move the double when the 1st half of the destination is free but the
4192 // 2nd half is occupied, and (2) move the double when the 2nd half of the
4193 // destination is free but the 1st half is occupied. Here we consider the
4194 // case where the first half can't be moved initially because its target is
4195 // still busy, but the second half can be moved. We wait until the entire
4196 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
4197 // and F2 single moving to F16. When we process F0, its target F2 is busy,
4198 // so we skip it on the first pass. When we process F1, its target F3 is
4199 // available. However, we want to move F0/F1 all at once, so we skip it here.
4200 // We process F2, which frees up F2. The next pass through, we process F0 and
4201 // F2/F3 are empty, so we move it. Note that if half of a double is involved
4202 // in a circularity with a single, then we will have already moved that half
4203 // above, so we go ahead and move the remaining half as a single.
4204 // Because there are no circularities left, we are guaranteed to terminate.
4207 assert(regArgTab[argNum - 1].slot == 1);
4209 if (!regArgTab[argNum - 1].processed)
4211 // The first half of the double hasn't been processed; try to be processed at the same time
4215 // The first half of the double has been processed but the second half hasn't!
4216 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
4217 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
4218 // dependency logic above will move them as singles, leaving just F3 to move. Treat
4219 // it as a single to finish the shuffling.
4221 destMemType = TYP_FLOAT;
4222 destRegNum = REG_NEXT(varDsc->lvRegNum);
4224 #endif // !_TARGET_64BIT_
4225 #if (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4228 assert(regArgTab[argNum].slot == 2);
4230 assert(regArgTab[argNum - 1].slot == 1);
4231 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
4232 destRegNum = varDsc->lvRegNum;
4233 noway_assert(regNum != destRegNum);
4236 #endif // (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4237 noway_assert(destRegNum != REG_NA);
4238 if (destRegNum != regNum)
4240 /* Cannot trash a currently live register argument.
4241 * Skip this one until its target will be free
4242 * which is guaranteed to happen since we have no circular dependencies. */
4244 regMaskTP destMask = genRegMask(destRegNum);
4246 // Don't process the double until both halves of the destination are clear.
4247 if (genActualType(destMemType) == TYP_DOUBLE)
4249 assert((destMask & RBM_DBL_REGS) != 0);
4250 destMask |= genRegMask(REG_NEXT(destRegNum));
4254 if (destMask & regArgMaskLive)
4259 /* Move it to the new register */
4261 emitAttr size = emitActualTypeSize(destMemType);
4263 #if defined(_TARGET_ARM64_)
4264 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4266 // For a SIMD type that is passed in two integer registers,
4267 // Limit the copy below to the first 8 bytes from the first integer register.
4268 // Handle the remaining 8 bytes from the second slot in the code further below
4269 assert(EA_SIZE(size) >= 8);
4274 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
4275 #ifdef USING_SCOPE_INFO
4276 psiMoveToReg(varNum);
4277 #endif // USING_SCOPE_INFO
4280 /* mark the argument as processed */
4282 assert(!regArgTab[argNum].processed);
4283 regArgTab[argNum].processed = true;
4284 regArgMaskLive &= ~genRegMask(regNum);
4285 #if FEATURE_MULTIREG_ARGS
4286 int argRegCount = 1;
4288 if (genActualType(destMemType) == TYP_DOUBLE)
4293 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4294 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4297 int nextArgNum = argNum + 1;
4298 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4299 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4300 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4301 // and moves the 0th element of the src reg into the 1st element of the dest reg.
4302 getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
4303 // Set destRegNum to regNum so that we skip the setting of the register below,
4304 // but mark argNum as processed and clear regNum from the live mask.
4305 destRegNum = regNum;
4307 #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4308 #if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4309 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4311 // For a SIMD type that is passed in two integer registers,
4312 // Code above copies the first integer argument register into the lower 8 bytes
4313 // of the target register. Here we must handle the second 8 bytes of the slot pair by
4314 // inserting the second integer register into the upper 8 bytes of the target
4315 // SIMD floating point register.
4317 int nextArgNum = argNum + 1;
4318 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4319 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4320 noway_assert(genIsValidIntReg(nextRegNum));
4321 noway_assert(genIsValidFloatReg(destRegNum));
4322 getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1);
4324 #endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4326 // Mark the rest of the argument registers corresponding to this multi-reg type as
4327 // being processed and no longer live.
4328 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
4330 int nextArgNum = argNum + regSlot;
4331 assert(!regArgTab[nextArgNum].processed);
4332 regArgTab[nextArgNum].processed = true;
4333 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4334 regArgMaskLive &= ~genRegMask(nextRegNum);
4336 #endif // FEATURE_MULTIREG_ARGS
4339 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4343 #pragma warning(pop)
4346 /*****************************************************************************
4347 * If any incoming stack arguments live in registers, load them.
4349 void CodeGen::genEnregisterIncomingStackArgs()
4354 printf("*************** In genEnregisterIncomingStackArgs()\n");
4358 assert(compiler->compGeneratingProlog);
4360 unsigned varNum = 0;
4362 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4364 /* Is this variable a parameter? */
4366 if (!varDsc->lvIsParam)
4371 /* If it's a register argument then it's already been taken care of.
4372 But, on Arm when under a profiler, we would have prespilled a register argument
4373 and hence here we need to load it from its prespilled location.
4375 bool isPrespilledForProfiling = false;
4376 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4377 isPrespilledForProfiling =
4378 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4381 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4386 /* Has the parameter been assigned to a register? */
4388 if (!varDsc->lvIsInReg())
4393 var_types type = genActualType(varDsc->TypeGet());
4395 /* Is the variable dead on entry */
4397 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4402 /* Load the incoming parameter into the register */
4404 /* Figure out the home offset of the incoming argument */
4406 regNumber regNum = varDsc->lvArgInitReg;
4407 assert(regNum != REG_STK);
4409 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
4410 regSet.verifyRegUsed(regNum);
4411 #ifdef USING_SCOPE_INFO
4412 psiMoveToReg(varNum);
4413 #endif // USING_SCOPE_INFO
4417 /*-------------------------------------------------------------------------
4419 * We have to decide whether we're going to use block initialization
4420 * in the prolog before we assign final stack offsets. This is because
4421 * when using block initialization we may need additional callee-saved
4422 * registers which need to be saved on the frame, thus increasing the
4425 * We'll count the number of locals we have to initialize,
4426 * and if there are lots of them we'll use block initialization.
4427 * Thus, the local variable table must have accurate register location
4428 * information for enregistered locals for their register state on entry
4431 * At the same time we set lvMustInit for locals (enregistered or on stack)
4432 * that must be initialized (e.g. initialize memory (comInitMem),
4433 * untracked pointers or disable DFA)
4435 void CodeGen::genCheckUseBlockInit()
4437 assert(!compiler->compGeneratingProlog);
4439 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
4440 // larger than int count for more than 1).
4441 unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
4442 // determine whether to use block init.
4447 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4449 if (varDsc->lvIsParam)
4454 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4456 noway_assert(varDsc->lvRefCnt() == 0);
4460 if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
4465 #if FEATURE_FIXED_OUT_ARGS
4466 if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
4470 if (varNum == compiler->lvaOutgoingArgSpaceVar)
4476 #if FEATURE_EH_FUNCLETS
4477 // There's no need to force 0-initialization of the PSPSym, it will be
4478 // initialized with a real value in the prolog
4479 if (varNum == compiler->lvaPSPSym)
4485 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4487 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4488 // initialized by the parent struct. No need to set the lvMustInit bit in the
4493 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
4496 if (varDsc->lvTracked)
4498 /* For uninitialized use of tracked variables, the liveness
4499 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4501 if (varDsc->lvMustInit ||
4502 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4504 /* This var must be initialized */
4506 varDsc->lvMustInit = 1;
4508 /* See if the variable is on the stack will be initialized
4509 * using rep stos - compute the total size to be zero-ed */
4511 if (varDsc->lvOnFrame)
4513 if (!varDsc->lvRegister)
4515 if (!varDsc->lvIsInReg())
4517 // Var is on the stack at entry.
4519 roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4524 // Var is partially enregistered
4525 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
4526 initStkLclCnt += genTypeStSz(TYP_INT);
4532 /* With compInitMem, all untracked vars will have to be init'ed */
4533 /* VSW 102460 - Do not force initialization of compiler generated temps,
4534 unless they are untracked GC type or structs that contain GC pointers */
4535 CLANG_FORMAT_COMMENT_ANCHOR;
4538 // TODO-1stClassStructs
4539 // This is here to duplicate previous behavior, where TYP_SIMD8 locals
4540 // were not being re-typed correctly.
4541 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
4542 #else // !FEATURE_SIMD
4543 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
4544 #endif // !FEATURE_SIMD
4545 varDsc->lvOnFrame &&
4546 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
4548 varDsc->lvMustInit = true;
4550 initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4556 /* Ignore if not a pointer variable or value class with a GC field */
4558 if (!compiler->lvaTypeIsGC(varNum))
4563 /* If we don't know lifetimes of variables, must be conservative */
4564 if (!compiler->backendRequiresLocalVarLifetimes())
4566 varDsc->lvMustInit = true;
4567 noway_assert(!varDsc->lvRegister);
4571 if (!varDsc->lvTracked)
4573 varDsc->lvMustInit = true;
4577 /* Is this a 'must-init' stack pointer local? */
4579 if (varDsc->lvMustInit && varDsc->lvOnFrame)
4581 initStkLclCnt += varDsc->lvStructGcCount;
4584 if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4))
4590 /* Don't forget about spill temps that hold pointers */
4592 if (!TRACK_GC_TEMP_LIFETIMES)
4594 assert(regSet.tmpAllFree());
4595 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4597 if (varTypeIsGC(tempThis->tdTempType()))
4604 // After debugging this further it was found that this logic is incorrect:
4605 // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
4606 // and this also double counts variables (we saw this in the debugger) around line 4829.
4607 // Even though this doesn't pose a problem with correctness it will improperly decide to
4608 // zero init the stack using a block operation instead of a 'case by case' basis.
4609 genInitStkLclCnt = initStkLclCnt;
4611 /* If we have more than 4 untracked locals, use block initialization */
4612 /* TODO-Review: If we have large structs, bias toward not using block initialization since
4613 we waste all the other slots. Really need to compute the correct
4614 and compare that against zeroing the slots individually */
4616 genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
4618 if (genUseBlockInit)
4620 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4622 // If there is a secret stub param, don't count it, as it will no longer
4623 // be live when we do block init.
4624 if (compiler->info.compPublishStubParam)
4626 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4629 #ifdef _TARGET_XARCH_
4630 // If we're going to use "REP STOS", remember that we will trash EDI
4631 // For fastcall we will have to save ECX, EAX
4632 // so reserve two extra callee saved
4633 // This is better than pushing eax, ecx, because we in the later
4634 // we will mess up already computed offsets on the stack (for ESP frames)
4635 regSet.rsSetRegsModified(RBM_EDI);
4637 #ifdef UNIX_AMD64_ABI
4638 // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
4639 // In such case use R12 and R13 registers.
4640 if (maskCalleeRegArgMask & RBM_RCX)
4642 regSet.rsSetRegsModified(RBM_R12);
4645 if (maskCalleeRegArgMask & RBM_RDI)
4647 regSet.rsSetRegsModified(RBM_R13);
4649 #else // !UNIX_AMD64_ABI
4650 if (maskCalleeRegArgMask & RBM_ECX)
4652 regSet.rsSetRegsModified(RBM_ESI);
4654 #endif // !UNIX_AMD64_ABI
4656 if (maskCalleeRegArgMask & RBM_EAX)
4658 regSet.rsSetRegsModified(RBM_EBX);
4661 #endif // _TARGET_XARCH_
4664 // On the Arm if we are using a block init to initialize, then we
4665 // must force spill R4/R5/R6 so that we can use them during
4666 // zero-initialization process.
4668 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
4669 if (forceSpillRegCount > 0)
4670 regSet.rsSetRegsModified(RBM_R4);
4671 if (forceSpillRegCount > 1)
4672 regSet.rsSetRegsModified(RBM_R5);
4673 if (forceSpillRegCount > 2)
4674 regSet.rsSetRegsModified(RBM_R6);
4675 #endif // _TARGET_ARM_
4679 /*-----------------------------------------------------------------------------
4681 * Push any callee-saved registers we have used
4684 #if defined(_TARGET_ARM64_)
4685 void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
4687 void CodeGen::genPushCalleeSavedRegisters()
4690 assert(compiler->compGeneratingProlog);
4692 #if defined(_TARGET_XARCH_)
4693 // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
4694 // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
4696 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
4697 #else // !defined(_TARGET_XARCH_)
4698 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
4702 if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
4704 noway_assert(!"Used register RBM_FPBASE as a scratch register!");
4708 #ifdef _TARGET_XARCH_
4709 // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
4710 if (isFramePointerUsed())
4712 rsPushRegs &= ~RBM_FPBASE;
4716 #ifdef _TARGET_ARMARCH_
4717 // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
4718 if (isFramePointerUsed())
4719 rsPushRegs |= RBM_FPBASE;
4722 // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
4723 // changes in GC suspension architecture.
4725 // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
4726 // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
4727 // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
4728 // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
4729 // be saved on the stack and the GC suspension would time out.
4731 // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
4732 // the following to make GC suspension work in the above scenario:
4733 // - Make return address hijacking work even when lr is not saved on the stack.
4734 // - Generate fully interruptible code for loops that contains calls
4735 // - Generate fully interruptible code for leaf methods
4737 // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
4740 rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
4742 regSet.rsMaskCalleeSaved = rsPushRegs;
4743 #endif // _TARGET_ARMARCH_
4746 if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
4748 printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
4749 compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
4750 dspRegMask(rsPushRegs);
4752 assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
4756 #if defined(_TARGET_ARM_)
4757 regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
4758 regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
4760 maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
4762 assert(FitsIn<int>(maskPushRegsInt));
4763 inst_IV(INS_push, (int)maskPushRegsInt);
4764 compiler->unwindPushMaskInt(maskPushRegsInt);
4766 if (maskPushRegsFloat != 0)
4768 genPushFltRegs(maskPushRegsFloat);
4769 compiler->unwindPushMaskFloat(maskPushRegsFloat);
4771 #elif defined(_TARGET_ARM64_)
4772 // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
4773 // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets()
4774 // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture)
4775 // for pictures of the funclet frame layouts.
4777 // For most frames, generate, e.g.:
4778 // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair.
4779 // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
4780 // // at top of frame (highest addresses).
4781 // stp r21, r22, [sp, 0x70]
4784 // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
4785 // at the top of the frame.
4786 // 2. If we save FP, then the first store is FP, LR.
4787 // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
4788 // preserve their lower 8 bytes, by calling convention.
4789 // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
4790 // consecutive, and at the top of the frame.
4791 // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
4793 // For functions with GS and localloc, we change the frame so the frame pointer and LR are saved at the top
4794 // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same
4795 // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP.
4796 // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage.
4798 // The frames look like the following (simplified to only include components that matter for establishing the
4799 // frames). See also Compiler::lvaAssignFrameOffsets().
4801 // Frames with FP, LR saved at bottom of frame (above outgoing argument space):
4804 // |-----------------------|
4805 // | incoming arguments |
4806 // +=======================+ <---- Caller's SP
4807 // | Varargs regs space | // Only for varargs functions; 64 bytes
4808 // |-----------------------|
4809 // |Callee saved registers | // not including FP/LR; multiple of 8 bytes
4810 // |-----------------------|
4811 // | PSP slot | // 8 bytes (omitted in CoreRT ABI)
4812 // |-----------------------|
4813 // | locals, temps, etc. |
4814 // |-----------------------|
4815 // | possible GS cookie |
4816 // |-----------------------|
4817 // | Saved LR | // 8 bytes
4818 // |-----------------------|
4819 // | Saved FP | // 8 bytes
4820 // |-----------------------|
4821 // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0)
4822 // |-----------------------| <---- Ambient SP
4824 // ~ | Stack grows ~
4828 // Frames with FP, LR saved at top of frame (below saved varargs incoming arguments):
4831 // |-----------------------|
4832 // | incoming arguments |
4833 // +=======================+ <---- Caller's SP
4834 // | Varargs regs space | // Only for varargs functions; 64 bytes
4835 // |-----------------------|
4836 // | Saved LR | // 8 bytes
4837 // |-----------------------|
4838 // | Saved FP | // 8 bytes
4839 // |-----------------------|
4840 // |Callee saved registers | // not including FP/LR; multiple of 8 bytes
4841 // |-----------------------|
4842 // | PSP slot | // 8 bytes (omitted in CoreRT ABI)
4843 // |-----------------------|
4844 // | locals, temps, etc. |
4845 // |-----------------------|
4846 // | possible GS cookie |
4847 // |-----------------------|
4848 // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0)
4849 // |-----------------------| <---- Ambient SP
4851 // ~ | Stack grows ~
4856 int totalFrameSize = genTotalFrameSize();
4858 int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
4860 regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
4861 regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
4866 printf("Save float regs: ");
4867 dspRegMask(maskSaveRegsFloat);
4869 printf("Save int regs: ");
4870 dspRegMask(maskSaveRegsInt);
4875 // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we
4876 // generate based on various sizes.
4879 // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the
4880 // first save instruction as a "predecrement" amount, if possible.
4881 int calleeSaveSPDelta = 0;
4883 if (isFramePointerUsed())
4885 // We need to save both FP and LR.
4887 assert((maskSaveRegsInt & RBM_FP) != 0);
4888 assert((maskSaveRegsInt & RBM_LR) != 0);
4890 // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
4891 // (FP and LR) are protected from buffer overrun by the GS cookie. If FP/LR are at the lowest addresses,
4892 // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will
4893 // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our
4894 // saved FP/LR. In that case, we save FP/LR along with the rest of the callee-saved registers, above
4897 // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to
4898 // create a frame pointer chain.
4900 // Do we need another frame pointer register to get good code quality in the case of having the frame pointer
4901 // point high in the frame, so we can take advantage of arm64's preference for positive offsets? C++ native
4902 // code dedicates callee-saved x19 to this, so generates:
4904 // in the prolog, then uses x19 for local var accesses. Given that this case is so rare, we currently do
4905 // not do this. That means that negative offsets from FP might need to use the reserved register to form
4906 // the local variable offset for an addressing mode.
4908 if (((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize <= 504)) &&
4909 !genSaveFpLrWithAllCalleeSavedRegisters)
4914 // stp fp,lr,[sp,#-framesz]!
4916 // The (totalFrameSize <= 504) condition ensures that both the pre-index STP instruction
4917 // used in the prolog, and the post-index LDP instruction used in the epilog, can be generated.
4918 // Note that STP and the unwind codes can handle -512, but LDP with a positive post-index value
4919 // can only handle up to 504, and we want our prolog and epilog to match.
4921 // After saving callee-saved registers, we establish the frame pointer with:
4923 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
4925 JITDUMP("Frame type 1. #outsz=0; #framesz=%d; LclFrameSize=%d\n", totalFrameSize,
4926 compiler->compLclFrameSize);
4930 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
4931 INS_OPTS_PRE_INDEX);
4932 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
4934 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
4935 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
4937 else if (totalFrameSize <= 512)
4941 // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP
4942 // with signed offset encoding. The maximum positive STP offset is 504, but when storing a pair of
4943 // 8 byte registers, the largest actual offset we use would be 512 - 8 * 2 = 496. And STR with positive
4944 // offset has a range 0 to 32760.
4946 // After saving callee-saved registers, we establish the frame pointer with:
4948 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
4950 if (genSaveFpLrWithAllCalleeSavedRegisters)
4952 JITDUMP("Frame type 4 (save FP/LR at top). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
4953 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
4957 // The frame will be allocated below, when the callee-saved registers are saved. This might mean a
4958 // separate SUB instruction or the SP adjustment might be folded in to the first STP if there is
4959 // no outgoing argument space AND no local frame space, that is, if the only thing the frame does
4960 // is save callee-saved registers (and possibly varargs argument registers).
4961 calleeSaveSPDelta = totalFrameSize;
4963 offset = (int)compiler->compLclFrameSize;
4967 JITDUMP("Frame type 2 (save FP/LR at bottom). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
4968 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
4973 // sub sp,sp,#framesz
4974 // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
4976 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
4977 compiler->unwindAllocStack(totalFrameSize);
4979 assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
4981 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
4982 compiler->lvaOutgoingArgSpaceSize);
4983 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
4985 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
4986 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
4993 // First, the callee-saved registers will be saved, and the callee-saved register code must use
4994 // pre-index to subtract from SP as the first instruction. It must also leave space for varargs
4995 // registers to be stored. For example:
4996 // stp r19,r20,[sp,#-96]!
4997 // stp d8,d9,[sp,#16]
4998 // ... save varargs incoming integer registers ...
4999 // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
5000 // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate
5001 // alignment). So, if there is an odd number of callee-saved registers, we use (for example, with just
5002 // one saved register):
5005 // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
5006 // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
5007 // above them. If that is preferable, we could implement it.
5009 // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument
5012 // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
5013 // padding from above). Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
5016 // sub sp,sp,#remainingFrameSz
5017 // or, for large frames:
5018 // mov rX, #remainingFrameSz // maybe multiple instructions
5022 // stp fp,lr,[sp,#outsz]
5025 // However, we need to handle the case where #outsz is larger than the constant signed offset encoding
5026 // can handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
5027 // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of
5028 // the following sequences:
5030 // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
5032 // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
5035 // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
5039 // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
5040 // // always guaranteed to be 8 byte aligned).
5041 // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
5043 // sub sp,sp,#outsz - #8
5045 // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
5046 // mov rX, #outsz - #8 // maybe multiple instructions
5050 // Note that even if we align the SP alterations, that does not imply that we are creating empty alignment
5051 // slots. In fact, we are not; any empty alignment slots were calculated in
5052 // Compiler::lvaAssignFrameOffsets() and its callees.
5054 int calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize;
5055 if (genSaveFpLrWithAllCalleeSavedRegisters)
5057 JITDUMP("Frame type 5 (save FP/LR at top). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
5058 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
5060 // This case is much simpler, because we allocate space for the callee-saved register area, including
5061 // FP/LR. Note the SP adjustment might be SUB or be folded into the first store as a predecrement.
5062 // Then, we use a single SUB to establish the rest of the frame. We need to be careful about where
5063 // to establish the frame pointer, as there is a limit of 2040 bytes offset from SP to FP in the
5064 // unwind codes when FP is established.
5069 JITDUMP("Frame type 3 (save FP/LR at bottom). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
5070 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
5074 calleeSaveSPDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
5076 // We'll take care of these later, but callee-saved regs code shouldn't see them.
5077 maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
5080 assert(calleeSaveSPDeltaUnaligned >= 0);
5081 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5082 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5084 offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5086 JITDUMP(" calleeSaveSPDelta=%d, offset=%d\n", calleeSaveSPDelta, offset);
5088 // At most one alignment slot between SP and where we store the callee-saved registers.
5089 assert((offset == 0) || (offset == REGSIZE_BYTES));
5094 // No frame pointer (no chaining).
5095 assert((maskSaveRegsInt & RBM_FP) == 0);
5096 assert((maskSaveRegsInt & RBM_LR) != 0);
5098 // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using
5099 // 'stp' if we only have one callee-saved register plus LR to save.
5101 NYI("Frame without frame pointer");
5105 assert(frameType != 0);
5107 JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
5108 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
5110 offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
5112 // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
5113 // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
5114 // need to add codes at all.
5116 if (compiler->info.compIsVarArgs)
5118 JITDUMP(" compIsVarArgs=true\n");
5120 // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
5121 assert((offset % 16) == 0);
5122 for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
5124 regNumber reg2 = REG_NEXT(reg1);
5125 // stp REG, REG + 1, [SP, #offset]
5126 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
5127 compiler->unwindNop();
5128 offset += 2 * REGSIZE_BYTES;
5132 // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.)
5133 bool establishFramePointer = true;
5135 // If we do establish the frame pointer, what is the amount we add to SP to do so?
5136 unsigned offsetSpToSavedFp = 0;
5140 assert(!genSaveFpLrWithAllCalleeSavedRegisters);
5141 assert(offsetSpToSavedFp == 0);
5143 else if (frameType == 2)
5145 assert(!genSaveFpLrWithAllCalleeSavedRegisters);
5147 offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
5149 else if (frameType == 3)
5151 assert(!genSaveFpLrWithAllCalleeSavedRegisters);
5153 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5154 assert(remainingFrameSz > 0);
5155 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
5156 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5158 if (compiler->lvaOutgoingArgSpaceSize > 504)
5160 // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
5161 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5162 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5163 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5164 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5165 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5166 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
5168 JITDUMP(" spAdjustment2=%d\n", spAdjustment2);
5170 genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
5171 offset += spAdjustment2;
5173 // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub"
5174 // included some of it)
5176 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5177 assert(spAdjustment3 > 0);
5178 assert((spAdjustment3 % 16) == 0);
5180 JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2);
5181 genEstablishFramePointer(alignmentAdjustment2, /* reportUnwindData */ true);
5183 // We just established the frame pointer chain; don't do it again.
5184 establishFramePointer = false;
5186 JITDUMP(" spAdjustment3=%d\n", spAdjustment3);
5188 // We've already established the frame pointer, so no need to report the stack pointer change to unwind
5190 genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed, /* reportUnwindData */ false);
5191 offset += spAdjustment3;
5195 genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
5197 offset += remainingFrameSz;
5199 offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
5202 else if (frameType == 4)
5204 assert(genSaveFpLrWithAllCalleeSavedRegisters);
5205 offsetSpToSavedFp = calleeSaveSPDelta - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
5206 2 * REGSIZE_BYTES; // -2 for FP, LR
5208 else if (frameType == 5)
5210 assert(genSaveFpLrWithAllCalleeSavedRegisters);
5212 offsetSpToSavedFp = calleeSaveSPDelta - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
5213 2 * REGSIZE_BYTES; // -2 for FP, LR
5214 JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
5215 genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
5217 // We just established the frame pointer chain; don't do it again.
5218 establishFramePointer = false;
5220 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5221 assert(remainingFrameSz > 0);
5222 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
5223 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5225 JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz);
5227 // We've already established the frame pointer, so no need to report the stack pointer change to unwind info.
5228 genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ false);
5229 offset += remainingFrameSz;
5236 if (establishFramePointer)
5238 JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
5239 genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
5242 assert(offset == totalFrameSize);
5244 #elif defined(_TARGET_XARCH_)
5245 // Push backwards so we match the order we will pop them in the epilog
5246 // and all the other code that expects it to be in this order.
5247 for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
5249 regMaskTP regBit = genRegMask(reg);
5251 if ((regBit & rsPushRegs) != 0)
5253 inst_RV(INS_push, reg, TYP_REF);
5254 compiler->unwindPush(reg);
5255 #ifdef USING_SCOPE_INFO
5256 if (!doubleAlignOrFramePointerUsed())
5258 psiAdjustStackLevel(REGSIZE_BYTES);
5260 #endif // USING_SCOPE_INFO
5261 rsPushRegs &= ~regBit;
5266 assert(!"Unknown TARGET");
5270 #if defined(_TARGET_ARM_)
5272 void CodeGen::genPushFltRegs(regMaskTP regMask)
5274 assert(regMask != 0); // Don't call uness we have some registers to push
5275 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5277 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5278 int slots = genCountBits(regMask);
5279 // regMask should be contiguously set
5280 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5281 assert((tmpMask & (tmpMask - 1)) == 0);
5282 assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
5284 // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
5285 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5286 noway_assert((slots % 2) == 0);
5288 getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
5291 void CodeGen::genPopFltRegs(regMaskTP regMask)
5293 assert(regMask != 0); // Don't call uness we have some registers to pop
5294 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5296 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5297 int slots = genCountBits(regMask);
5298 // regMask should be contiguously set
5299 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5300 assert((tmpMask & (tmpMask - 1)) == 0);
5302 // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
5303 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5304 noway_assert((slots % 2) == 0);
5306 getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
5309 /*-----------------------------------------------------------------------------
5311 * If we have a jmp call, then the argument registers cannot be used in the
5312 * epilog. So return the current call's argument registers as the argument
5313 * registers for the jmp call.
5315 regMaskTP CodeGen::genJmpCallArgMask()
5317 assert(compiler->compGeneratingEpilog);
5319 regMaskTP argMask = RBM_NONE;
5320 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
5322 const LclVarDsc& desc = compiler->lvaTable[varNum];
5323 if (desc.lvIsRegArg)
5325 argMask |= genRegMask(desc.lvArgReg);
5331 /*-----------------------------------------------------------------------------
5333 * Free the local stack frame: add to SP.
5334 * If epilog unwind hasn't been started, and we generate code, we start unwind
5335 * and set *pUnwindStarted = true.
5338 void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
5340 assert(compiler->compGeneratingEpilog);
5345 // Add 'frameSize' to SP.
5347 // Unfortunately, we can't just use:
5349 // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
5351 // because we need to generate proper unwind codes for each instruction generated,
5352 // and large frame sizes might generate a temp register load which might
5353 // need an unwind code. We don't want to generate a "NOP" code for this
5354 // temp register load; we want the unwind codes to start after that.
5356 if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
5358 if (!*pUnwindStarted)
5360 compiler->unwindBegEpilog();
5361 *pUnwindStarted = true;
5364 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
5368 regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
5371 // Do not use argument registers as scratch registers in the jmp epilog.
5372 grabMask &= ~genJmpCallArgMask();
5374 regNumber tmpReg = REG_TMP_0;
5375 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
5376 if (*pUnwindStarted)
5378 compiler->unwindPadding();
5381 // We're going to generate an unwindable instruction, so check again if
5382 // we need to start the unwind codes.
5384 if (!*pUnwindStarted)
5386 compiler->unwindBegEpilog();
5387 *pUnwindStarted = true;
5390 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
5393 compiler->unwindAllocStack(frameSize);
5396 /*-----------------------------------------------------------------------------
5398 * Move of relocatable displacement value to register
5400 void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
5402 getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
5403 getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
5405 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5407 getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
5411 /*-----------------------------------------------------------------------------
5413 * Move of relocatable data-label to register
5415 void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
5417 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
5418 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
5420 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5422 getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
5426 /*-----------------------------------------------------------------------------
5428 * Move of relocatable immediate to register
5430 void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber reg)
5432 _ASSERTE(EA_IS_RELOC(size));
5434 getEmitter()->emitIns_MovRelocatableImmediate(INS_movw, size, reg, addr);
5435 getEmitter()->emitIns_MovRelocatableImmediate(INS_movt, size, reg, addr);
5437 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5439 getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
5443 /*-----------------------------------------------------------------------------
5445 * Returns register mask to push/pop to allocate a small stack frame,
5446 * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
5447 * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
5449 regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
5451 assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
5453 // We can't do this optimization with callee saved floating point registers because
5454 // the stack would be allocated in a wrong spot.
5455 if (maskCalleeSavedFloat != RBM_NONE)
5458 // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
5459 // that extra sub sp,XXX/add sp,XXX.
5460 // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
5461 // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
5462 // significant negative side-effects (more memory bus traffic).
5467 case 2 * REGSIZE_BYTES:
5468 return RBM_R2 | RBM_R3;
5474 #endif // _TARGET_ARM_
5476 /*****************************************************************************
5478 * initFltRegs -- The mask of float regs to be zeroed.
5479 * initDblRegs -- The mask of double regs to be zeroed.
5480 * initReg -- A zero initialized integer reg to copy from.
5482 * Does best effort to move between VFP/xmm regs if one is already
5483 * initialized to 0. (Arm Only) Else copies from the integer register which
5486 void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
5488 assert(compiler->compGeneratingProlog);
5490 // The first float/double reg that is initialized to 0. So they can be used to
5491 // initialize the remaining registers.
5492 regNumber fltInitReg = REG_NA;
5493 regNumber dblInitReg = REG_NA;
5495 // Iterate through float/double registers and initialize them to 0 or
5496 // copy from already initialized register of the same type.
5497 regMaskTP regMask = genRegMask(REG_FP_FIRST);
5498 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
5500 if (regMask & initFltRegs)
5502 // Do we have a float register already set to 0?
5503 if (fltInitReg != REG_NA)
5506 inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
5511 // Do we have a double register initialized to 0?
5512 if (dblInitReg != REG_NA)
5514 // Copy from double.
5515 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
5520 inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
5522 #elif defined(_TARGET_XARCH_)
5523 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5524 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5526 #elif defined(_TARGET_ARM64_)
5527 // We will just zero out the entire vector register. This sets it to a double/float zero value
5528 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5530 #error Unsupported or unset target architecture
5535 else if (regMask & initDblRegs)
5537 // Do we have a double register already set to 0?
5538 if (dblInitReg != REG_NA)
5540 // Copy from double.
5541 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
5546 // Do we have a float register initialized to 0?
5547 if (fltInitReg != REG_NA)
5550 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
5555 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
5557 #elif defined(_TARGET_XARCH_)
5558 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5559 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5561 #elif defined(_TARGET_ARM64_)
5562 // We will just zero out the entire vector register. This sets it to a double/float zero value
5563 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5565 #error Unsupported or unset target architecture
5573 /*-----------------------------------------------------------------------------
5575 * Restore any callee-saved registers we have used
5578 #if defined(_TARGET_ARM_)
5580 bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
5582 assert(compiler->compGeneratingEpilog);
5584 if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
5590 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5592 assert(compiler->compGeneratingEpilog);
5594 regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5595 regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
5596 regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
5598 // First, pop float registers
5600 if (maskPopRegsFloat != RBM_NONE)
5602 genPopFltRegs(maskPopRegsFloat);
5603 compiler->unwindPopMaskFloat(maskPopRegsFloat);
5606 // Next, pop integer registers
5610 regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
5611 maskPopRegsInt |= maskStackAlloc;
5614 if (isFramePointerUsed())
5616 assert(!regSet.rsRegsModified(RBM_FPBASE));
5617 maskPopRegsInt |= RBM_FPBASE;
5620 if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
5622 maskPopRegsInt |= RBM_PC;
5623 // Record the fact that we use a pop to the PC to perform the return
5624 genUsedPopToReturn = true;
5628 maskPopRegsInt |= RBM_LR;
5629 // Record the fact that we did not use a pop to the PC to perform the return
5630 genUsedPopToReturn = false;
5633 assert(FitsIn<int>(maskPopRegsInt));
5634 inst_IV(INS_pop, (int)maskPopRegsInt);
5635 compiler->unwindPopMaskInt(maskPopRegsInt);
5638 #elif defined(_TARGET_ARM64_)
5640 void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
5642 assert(compiler->compGeneratingEpilog);
5644 regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5646 if (isFramePointerUsed())
5648 rsRestoreRegs |= RBM_FPBASE;
5651 rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
5653 regMaskTP regsToRestoreMask = rsRestoreRegs;
5655 int totalFrameSize = genTotalFrameSize();
5657 int calleeSaveSPOffset = 0; // This will be the starting place for restoring the callee-saved registers, in
5658 // decreasing order.
5659 int frameType = 0; // An indicator of what type of frame we are popping.
5660 int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored.
5662 if (isFramePointerUsed())
5664 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize <= 504) &&
5665 !genSaveFpLrWithAllCalleeSavedRegisters)
5667 JITDUMP("Frame type 1. #outsz=0; #framesz=%d; localloc? %s\n", totalFrameSize,
5668 dspBool(compiler->compLocallocUsed));
5671 if (compiler->compLocallocUsed)
5673 // Restore sp from fp
5675 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
5676 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5679 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5681 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the
5683 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5685 else if (totalFrameSize <= 512)
5687 if (compiler->compLocallocUsed)
5689 // Restore sp from fp
5690 // sub sp, fp, #outsz // Uses #outsz if FP/LR stored at bottom
5691 int SPtoFPdelta = genSPtoFPdelta();
5692 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, SPtoFPdelta);
5693 compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
5696 if (genSaveFpLrWithAllCalleeSavedRegisters)
5698 JITDUMP("Frame type 4 (save FP/LR at top). #outsz=%d; #framesz=%d; localloc? %s\n",
5699 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
5700 dspBool(compiler->compLocallocUsed));
5704 calleeSaveSPOffset = compiler->compLclFrameSize;
5706 // Remove the frame after we're done restoring the callee-saved registers.
5707 calleeSaveSPDelta = totalFrameSize;
5711 JITDUMP("Frame type 2 (save FP/LR at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
5712 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
5713 dspBool(compiler->compLocallocUsed));
5717 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5719 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the
5721 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5724 else if (!genSaveFpLrWithAllCalleeSavedRegisters)
5726 JITDUMP("Frame type 3 (save FP/LR at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
5727 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
5731 int calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
5732 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
5733 assert(calleeSaveSPDeltaUnaligned >= 0);
5734 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5735 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5737 JITDUMP(" calleeSaveSPDelta=%d\n", calleeSaveSPDelta);
5739 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
5741 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5742 assert(remainingFrameSz > 0);
5744 if (compiler->lvaOutgoingArgSpaceSize > 504)
5746 // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
5747 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5748 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5749 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5750 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5751 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5752 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
5754 // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp
5756 // sub sp, fp, #alignmentAdjustment2
5757 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
5758 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5762 // add sp,sp,#remainingFrameSz
5764 JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2);
5765 genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr);
5769 if (compiler->compLocallocUsed)
5771 // Restore sp from fp; here that's #outsz from SP
5772 // sub sp, fp, #outsz
5773 int SPtoFPdelta = genSPtoFPdelta();
5774 assert(SPtoFPdelta == (int)compiler->lvaOutgoingArgSpaceSize);
5775 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, SPtoFPdelta);
5776 compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
5780 // ldp fp,lr,[sp,#outsz]
5781 // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
5784 JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz);
5786 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false,
5790 // Unlike frameType=1 or frameType=2 that restore SP at the end,
5791 // frameType=3 already adjusted SP above to delete local frame.
5792 // There is at most one alignment slot between SP and where we store the callee-saved registers.
5793 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5794 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
5798 JITDUMP("Frame type 5 (save FP/LR at top). #outsz=%d; #framesz=%d; localloc? %s\n",
5799 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
5803 int calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize;
5804 assert(calleeSaveSPDeltaUnaligned >= 0);
5805 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5806 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5808 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5809 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
5811 // Restore sp from fp:
5812 // sub sp, fp, #sp-to-fp-delta
5813 // This is the same whether there is localloc or not. Note that we don't need to do anything to remove the
5814 // "remainingFrameSz" to reverse the SUB of that amount in the prolog.
5816 int offsetSpToSavedFp = calleeSaveSPDelta -
5817 (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
5818 2 * REGSIZE_BYTES; // -2 for FP, LR
5819 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, offsetSpToSavedFp);
5820 compiler->unwindSetFrameReg(REG_FPBASE, offsetSpToSavedFp);
5825 // No frame pointer (no chaining).
5826 NYI("Frame without frame pointer");
5827 calleeSaveSPOffset = 0;
5830 JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
5831 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
5836 // ldp fp,lr,[sp],#framesz
5838 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
5839 INS_OPTS_POST_INDEX);
5840 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5842 else if (frameType == 2)
5845 // ldr fp,lr,[sp,#outsz]
5846 // add sp,sp,#framesz
5848 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5849 compiler->lvaOutgoingArgSpaceSize);
5850 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5852 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5853 compiler->unwindAllocStack(totalFrameSize);
5855 else if (frameType == 3)
5857 // Nothing to do after restoring callee-saved registers.
5859 else if (frameType == 4)
5861 // Nothing to do after restoring callee-saved registers.
5863 else if (frameType == 5)
5865 // Nothing to do after restoring callee-saved registers.
5873 #elif defined(_TARGET_XARCH_)
5875 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5877 assert(compiler->compGeneratingEpilog);
5879 unsigned popCount = 0;
5880 if (regSet.rsRegsModified(RBM_EBX))
5883 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5885 if (regSet.rsRegsModified(RBM_FPBASE))
5887 // EBP cannot be directly modified for EBP frame and double-aligned frames
5888 assert(!doubleAlignOrFramePointerUsed());
5891 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5894 #ifndef UNIX_AMD64_ABI
5895 // For System V AMD64 calling convention ESI and EDI are volatile registers.
5896 if (regSet.rsRegsModified(RBM_ESI))
5899 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5901 if (regSet.rsRegsModified(RBM_EDI))
5904 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5906 #endif // !defined(UNIX_AMD64_ABI)
5908 #ifdef _TARGET_AMD64_
5909 if (regSet.rsRegsModified(RBM_R12))
5912 inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
5914 if (regSet.rsRegsModified(RBM_R13))
5917 inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
5919 if (regSet.rsRegsModified(RBM_R14))
5922 inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
5924 if (regSet.rsRegsModified(RBM_R15))
5927 inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
5929 #endif // _TARGET_AMD64_
5931 // Amd64/x86 doesn't support push/pop of xmm registers.
5932 // These will get saved to stack separately after allocating
5933 // space on stack in prolog sequence. PopCount is essentially
5934 // tracking the count of integer registers pushed.
5936 noway_assert(compiler->compCalleeRegsPushed == popCount);
5939 #elif defined(_TARGET_X86_)
5941 void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5943 assert(compiler->compGeneratingEpilog);
5945 unsigned popCount = 0;
5947 /* NOTE: The EBP-less frame code below depends on the fact that
5948 all of the pops are generated right at the start and
5949 each takes one byte of machine code.
5952 if (regSet.rsRegsModified(RBM_FPBASE))
5954 // EBP cannot be directly modified for EBP frame and double-aligned frames
5955 noway_assert(!doubleAlignOrFramePointerUsed());
5957 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5960 if (regSet.rsRegsModified(RBM_EBX))
5963 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5965 if (regSet.rsRegsModified(RBM_ESI))
5968 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5970 if (regSet.rsRegsModified(RBM_EDI))
5973 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5975 noway_assert(compiler->compCalleeRegsPushed == popCount);
5980 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
5981 // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
5982 regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
5984 #ifdef _TARGET_ARM64_
5986 #else // !_TARGET_ARM64_
5987 if (*pInitRegZeroed == false)
5989 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
5990 *pInitRegZeroed = true;
5993 #endif // !_TARGET_ARM64_
5996 /*-----------------------------------------------------------------------------
5998 * Do we have any untracked pointer locals at all,
5999 * or do we need to initialize memory for locspace?
6001 * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
6002 * initializing memory (not inclusive).
6003 * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
6004 * initializing memory.
6005 * initReg - A scratch register (that gets set to zero on some platforms).
6006 * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
6008 void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
6010 assert(compiler->compGeneratingProlog);
6012 if (genUseBlockInit)
6014 assert(untrLclHi > untrLclLo);
6015 #ifdef _TARGET_ARMARCH_
6017 Generate the following code:
6019 For cnt less than 10
6024 stm <rZero1,rZero2>,[rAddr!]
6025 <optional> stm <rZero1,rZero2>,[rAddr!]
6026 <optional> stm <rZero1,rZero2>,[rAddr!]
6027 <optional> stm <rZero1,rZero2>,[rAddr!]
6028 <optional> str rZero1,[rAddr]
6030 For rCnt greater than or equal to 10
6038 stm <rZero1,rZero2>,[rAddr!]
6042 <optional> str rZero1,[rAddr] // When cnt is odd
6044 NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
6048 regNumber rCnt = REG_NA; // Invalid
6051 regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
6052 availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
6054 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
6055 // a large constant.
6057 #if defined(_TARGET_ARM_)
6059 if (compiler->compLocallocUsed)
6061 availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
6064 regNumber rZero1; // We're going to use initReg for rZero1
6067 // We pick the next lowest register number for rZero2
6068 noway_assert(availMask != RBM_NONE);
6069 regMask = genFindLowestBit(availMask);
6070 rZero2 = genRegNumFromMask(regMask);
6071 availMask &= ~regMask;
6072 assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rZero2 is not a live incoming
6075 // We pick the next lowest register number for rAddr
6076 noway_assert(availMask != RBM_NONE);
6077 regMask = genFindLowestBit(availMask);
6078 rAddr = genRegNumFromMask(regMask);
6079 availMask &= ~regMask;
6081 #else // !define(_TARGET_ARM_)
6084 *pInitRegZeroed = false;
6086 #endif // !defined(_TARGET_ARM_)
6088 bool useLoop = false;
6089 unsigned uCntBytes = untrLclHi - untrLclLo;
6090 assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
6091 unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
6093 // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
6094 // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
6095 // In both of these cases the stm/stp instruction will write two zeros to memory
6096 // and we will use a single str instruction at the end whenever we have an odd count.
6097 if (uCntSlots >= 10)
6102 // We pick the next lowest register number for rCnt
6103 noway_assert(availMask != RBM_NONE);
6104 regMask = genFindLowestBit(availMask);
6105 rCnt = genRegNumFromMask(regMask);
6106 availMask &= ~regMask;
6109 // rAddr is not a live incoming argument reg
6110 assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0);
6112 #if defined(_TARGET_ARM_)
6113 if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
6114 #else // !_TARGET_ARM_
6115 if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
6116 #endif // !_TARGET_ARM_
6118 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
6122 // Load immediate into the InitReg register
6123 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
6124 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
6125 *pInitRegZeroed = false;
6130 noway_assert(uCntSlots >= 2);
6131 assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming
6133 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
6136 #if defined(_TARGET_ARM_)
6137 rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
6138 instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
6139 target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
6140 #endif // _TARGET_ARM_
6144 while (uCntBytes >= REGSIZE_BYTES * 2)
6147 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
6148 #else // !_TARGET_ARM_
6149 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6150 INS_OPTS_POST_INDEX);
6151 #endif // !_TARGET_ARM_
6152 uCntBytes -= REGSIZE_BYTES * 2;
6155 else // useLoop is true
6158 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
6159 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
6160 #else // !_TARGET_ARM_
6161 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6162 INS_OPTS_POST_INDEX); // zero stack slots
6163 getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
6164 #endif // !_TARGET_ARM_
6165 getEmitter()->emitIns_J(INS_bhi, NULL, -3);
6166 uCntBytes %= REGSIZE_BYTES * 2;
6169 if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
6172 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
6173 #else // _TARGET_ARM_
6174 if ((uCntBytes - REGSIZE_BYTES) == 0)
6176 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
6180 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
6182 #endif // !_TARGET_ARM_
6183 uCntBytes -= REGSIZE_BYTES;
6185 #ifdef _TARGET_ARM64_
6188 assert(uCntBytes == sizeof(int));
6189 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
6190 uCntBytes -= sizeof(int);
6192 #endif // _TARGET_ARM64_
6193 noway_assert(uCntBytes == 0);
6195 #elif defined(_TARGET_XARCH_)
6197 Generate the following code:
6199 lea edi, [ebp/esp-OFFS]
6205 noway_assert(regSet.rsRegsModified(RBM_EDI));
6207 #ifdef UNIX_AMD64_ABI
6208 // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
6209 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6211 noway_assert(regSet.rsRegsModified(RBM_R12));
6212 inst_RV_RV(INS_mov, REG_R12, REG_RCX);
6213 regSet.verifyRegUsed(REG_R12);
6216 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6218 noway_assert(regSet.rsRegsModified(RBM_R13));
6219 inst_RV_RV(INS_mov, REG_R13, REG_RDI);
6220 regSet.verifyRegUsed(REG_R13);
6222 #else // !UNIX_AMD64_ABI
6223 // For register arguments we may have to save ECX
6224 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6226 noway_assert(regSet.rsRegsModified(RBM_ESI));
6227 inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
6228 regSet.verifyRegUsed(REG_ESI);
6230 #endif // !UNIX_AMD64_ABI
6232 noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
6234 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
6235 regSet.verifyRegUsed(REG_EDI);
6237 inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
6238 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
6239 instGen(INS_r_stosd);
6241 #ifdef UNIX_AMD64_ABI
6242 // Move back the argument registers
6243 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6245 inst_RV_RV(INS_mov, REG_RCX, REG_R12);
6248 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6250 inst_RV_RV(INS_mov, REG_RDI, REG_R13);
6252 #else // !UNIX_AMD64_ABI
6253 // Move back the argument registers
6254 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6256 inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
6258 #endif // !UNIX_AMD64_ABI
6261 #error Unsupported or unset target architecture
6264 else if (genInitStkLclCnt > 0)
6266 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // initReg is not a live incoming
6269 /* Initialize any lvMustInit vars on the stack */
6274 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
6276 if (!varDsc->lvMustInit)
6281 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
6282 // partially-enregistered vars in the case where we don't use a block init.
6283 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
6285 // lvMustInit can only be set for GC types or TYP_STRUCT types
6286 // or when compInitMem is true
6287 // or when in debug code
6289 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
6290 compiler->info.compInitMem || compiler->opts.compDbgCode);
6292 if (!varDsc->lvOnFrame)
6297 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
6298 (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
6300 // We only initialize the GC variables in the TYP_STRUCT
6301 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
6302 const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
6304 for (unsigned i = 0; i < slots; i++)
6306 if (gcPtrs[i] != TYPE_GC_NONE)
6308 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
6309 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
6315 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
6317 // zero out the whole thing rounded up to a single stack slot size
6318 unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
6320 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
6322 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
6325 #ifdef _TARGET_64BIT_
6326 assert(i == lclSize || (i + sizeof(int) == lclSize));
6329 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
6332 #endif // _TARGET_64BIT_
6333 assert(i == lclSize);
6337 if (!TRACK_GC_TEMP_LIFETIMES)
6339 assert(regSet.tmpAllFree());
6340 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
6342 if (!varTypeIsGC(tempThis->tdTempType()))
6347 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
6349 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
6355 /*-----------------------------------------------------------------------------
6357 * Save the generic context argument.
6359 * We need to do this within the "prolog" in case anyone tries to inspect
6360 * the param-type-arg/this (which can be done after the prolog) using
6361 * ICodeManager::GetParamTypeArg().
6364 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
6366 assert(compiler->compGeneratingProlog);
6368 bool reportArg = compiler->lvaReportParamTypeArg();
6370 // We should report either generic context arg or "this" when used so.
6373 #ifndef JIT32_GCENCODER
6374 if (!compiler->lvaKeepAliveAndReportThis())
6381 // For JIT32_GCENCODER, we won't be here if reportArg is false.
6382 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
6384 noway_assert(contextArg != BAD_VAR_NUM);
6385 LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
6387 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
6388 // moved to its final home location. So we need to use it from the
6389 // incoming location.
6393 bool isPrespilledForProfiling = false;
6394 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
6395 isPrespilledForProfiling =
6396 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
6399 // Load from the argument register only if it is not prespilled.
6400 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
6402 reg = varDsc->lvArgReg;
6406 if (isFramePointerUsed())
6408 #if defined(_TARGET_ARM_)
6409 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6410 // will become enregistered.
6411 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
6412 noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
6413 (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
6415 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6416 // will become enregistered.
6417 noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
6421 // We will just use the initReg since it is an available register
6422 // and we are probably done using it anyway...
6424 *pInitRegZeroed = false;
6426 // mov reg, [compiler->info.compTypeCtxtArg]
6427 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
6428 regSet.verifyRegUsed(reg);
6431 #if defined(_TARGET_ARM64_)
6432 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6433 compiler->lvaCachedGenericContextArgOffset(), rsGetRsvdReg());
6434 #elif defined(_TARGET_ARM_)
6435 // ARM's emitIns_R_R_I automatically uses the reserved register if necessary.
6436 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6437 compiler->lvaCachedGenericContextArgOffset());
6438 #else // !ARM64 !ARM
6439 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
6440 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6441 compiler->lvaCachedGenericContextArgOffset());
6442 #endif // !ARM64 !ARM
6445 /*-----------------------------------------------------------------------------
6447 * Set the "GS" security cookie in the prolog.
6450 void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
6452 assert(compiler->compGeneratingProlog);
6454 if (!compiler->getNeedsGSSecurityCookie())
6459 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
6461 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
6463 #ifdef _TARGET_AMD64_
6464 // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
6465 getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
6466 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
6468 // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
6469 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
6470 compiler->lvaGSSecurityCookie, 0, initReg);
6476 #ifdef _TARGET_XARCH_
6477 // Always use EAX on x86 and x64
6478 // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
6481 // We will just use the initReg since it is an available register
6485 *pInitRegZeroed = false;
6487 #if CPU_LOAD_STORE_ARCH
6488 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6489 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
6490 regSet.verifyRegUsed(reg);
6492 // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
6493 // mov dword ptr [frame.GSSecurityCookie], reg
6494 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6495 regSet.verifyRegUsed(reg);
6497 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
6501 #ifdef PROFILING_SUPPORTED
6503 //-----------------------------------------------------------------------------------
6504 // genProfilingEnterCallback: Generate the profiling function enter callback.
6507 // initReg - register to use as scratch register
6508 // pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
6509 // not zero after this call.
6515 // The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
6516 // VM\i386\asmhelpers.asm for details):
6517 // 1. The calling sequence for calling the helper is:
6518 // push FunctionIDOrClientID
6519 // call ProfileEnterHelper
6520 // 2. The calling function has an EBP frame.
6521 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6522 // the following prolog is assumed:
6525 // 4. All registers are preserved.
6526 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
6528 void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
6530 assert(compiler->compGeneratingProlog);
6532 // Give profiler a chance to back out of hooking this method
6533 if (!compiler->compIsProfilerHookNeeded())
6538 #if defined(_TARGET_AMD64_)
6539 #if !defined(UNIX_AMD64_ABI)
6544 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6545 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6546 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6548 // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
6549 // In case of vararg methods, arg regs are already homed.
6551 // Note: Here we don't need to worry about updating gc'info since enter
6552 // callback is generated as part of prolog which is non-gc interruptible.
6553 // Moreover GC cannot kick while executing inside profiler callback which is a
6554 // profiler requirement so it can examine arguments which could be obj refs.
6555 if (!compiler->info.compIsVarArgs)
6557 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6559 noway_assert(varDsc->lvIsParam);
6561 if (!varDsc->lvIsRegArg)
6566 var_types storeType = varDsc->lvaArgType();
6567 regNumber argReg = varDsc->lvArgReg;
6569 instruction store_ins = ins_Store(storeType);
6572 if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
6574 store_ins = INS_mov;
6576 #endif // FEATURE_SIMD
6578 getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
6582 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6583 // RCX = ProfilerMethHnd
6584 if (compiler->compProfilerMethHndIndirected)
6586 // Profiler hooks enabled during Ngen time.
6587 // Profiler handle needs to be accessed through an indirection of a pointer.
6588 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6592 // No need to record relocations, if we are generating ELT hooks under the influence
6593 // of COMPlus_JitELTHookEnabled=1
6594 if (compiler->opts.compJitELTHookEnabled)
6596 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6600 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6604 // RDX = caller's SP
6606 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6607 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6608 // of that offset to FramePointer to obtain caller's SP value.
6609 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6610 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6611 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6613 // Can't have a call until we have enough padding for rejit
6614 genPrologPadForReJit();
6616 // This will emit either
6617 // "call ip-relative 32-bit offset" or
6618 // "mov rax, helper addr; call rax"
6619 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
6621 // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
6622 // generation logic that moves args around as required by first BB entry point conditions
6623 // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
6624 // and genEnregisterIncomingStackArgs().
6626 // Now reload arg registers from home locations.
6628 // - we need to reload only known (i.e. fixed) reg args.
6629 // - if floating point type, also reload it into corresponding integer reg
6630 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6632 noway_assert(varDsc->lvIsParam);
6634 if (!varDsc->lvIsRegArg)
6639 var_types loadType = varDsc->lvaArgType();
6640 regNumber argReg = varDsc->lvArgReg;
6642 instruction load_ins = ins_Load(loadType);
6645 if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
6649 #endif // FEATURE_SIMD
6651 getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
6654 if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
6656 regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
6657 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6658 inst_RV_RV(ins, argReg, intArgReg, loadType);
6660 #endif // FEATURE_VARARG
6663 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6664 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6666 *pInitRegZeroed = false;
6669 #else // !defined(UNIX_AMD64_ABI)
6671 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6672 // R14 = ProfilerMethHnd
6673 if (compiler->compProfilerMethHndIndirected)
6675 // Profiler hooks enabled during Ngen time.
6676 // Profiler handle needs to be accessed through an indirection of a pointer.
6677 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
6678 (ssize_t)compiler->compProfilerMethHnd);
6682 // No need to record relocations, if we are generating ELT hooks under the influence
6683 // of COMPlus_JitELTHookEnabled=1
6684 if (compiler->opts.compJitELTHookEnabled)
6686 genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6690 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6694 // R15 = caller's SP
6696 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6697 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6698 // of that offset to FramePointer to obtain caller's SP value.
6699 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6700 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6701 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
6703 // Can't have a call until we have enough padding for rejit
6704 genPrologPadForReJit();
6706 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6707 // We use R11 here. This will emit either
6708 // "call ip-relative 32-bit offset" or
6709 // "mov r11, helper addr; call r11"
6710 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6712 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6713 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6715 *pInitRegZeroed = false;
6718 #endif // !defined(UNIX_AMD64_ABI)
6720 #elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
6722 unsigned saveStackLvl2 = genStackLevel;
6724 #if defined(_TARGET_X86_)
6725 // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
6726 // for x86 stack unwinding
6728 #if defined(UNIX_X86_ABI)
6729 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6730 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6731 #endif // UNIX_X86_ABI
6733 // Push the profilerHandle
6734 if (compiler->compProfilerMethHndIndirected)
6736 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6740 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6743 #elif defined(_TARGET_ARM_)
6744 // On Arm arguments are prespilled on stack, which frees r0-r3.
6745 // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
6746 // The call target register could be any free register.
6747 regNumber argReg = REG_PROFILER_ENTER_ARG;
6748 regMaskTP argRegMask = genRegMask(argReg);
6749 assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
6751 if (compiler->compProfilerMethHndIndirected)
6753 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
6754 regSet.verifyRegUsed(argReg);
6758 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
6761 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6765 // Can't have a call until we have enough padding for rejit
6767 genPrologPadForReJit();
6769 // This will emit either
6770 // "call ip-relative 32-bit offset" or
6771 // "mov rax, helper addr; call rax"
6772 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
6773 0, // argSize. Again, we have to lie about it
6774 EA_UNKNOWN); // retSize
6776 #if defined(_TARGET_X86_)
6777 // Check that we have place for the push.
6778 assert(compiler->fgPtrArgCntMax >= 1);
6780 #if defined(UNIX_X86_ABI)
6781 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6782 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6783 #endif // UNIX_X86_ABI
6785 #elif defined(_TARGET_ARM_)
6786 if (initReg == argReg)
6788 *pInitRegZeroed = false;
6791 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6794 /* Restore the stack level */
6796 SetStackLevel(saveStackLvl2);
6799 NYI("Emit Profiler Enter callback");
6803 //-----------------------------------------------------------------------------------
6804 // genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
6805 // Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
6808 // helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
6814 // The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
6815 // ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
6816 // 1. The calling sequence for calling the helper is:
6817 // push FunctionIDOrClientID
6818 // call ProfileLeaveHelper or ProfileTailcallHelper
6819 // 2. The calling function has an EBP frame.
6820 // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6821 // the following prolog is assumed:
6824 // 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
6825 // helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
6826 // 5. The helper pops the FunctionIDOrClientID argument from the stack.
6828 void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
6830 assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
6832 // Only hook if profiler says it's okay.
6833 if (!compiler->compIsProfilerHookNeeded())
6838 compiler->info.compProfilerCallback = true;
6840 // Need to save on to the stack level, since the helper call will pop the argument
6841 unsigned saveStackLvl2 = genStackLevel;
6843 #if defined(_TARGET_AMD64_)
6844 #if !defined(UNIX_AMD64_ABI)
6846 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6847 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6848 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6850 // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
6851 // registers that profiler callback kills.
6852 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
6854 regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
6855 noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
6858 // At this point return value is computed and stored in RAX or XMM0.
6859 // On Amd64, Leave callback preserves the return register. We keep
6860 // RAX alive by not reporting as trashed by helper call. Also note
6861 // that GC cannot kick-in while executing inside profiler callback,
6862 // which is a requirement of profiler as well since it needs to examine
6863 // return value which could be an obj ref.
6865 // RCX = ProfilerMethHnd
6866 if (compiler->compProfilerMethHndIndirected)
6868 // Profiler hooks enabled during Ngen time.
6869 // Profiler handle needs to be accessed through an indirection of an address.
6870 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6874 // Don't record relocations, if we are generating ELT hooks under the influence
6875 // of COMPlus_JitELTHookEnabled=1
6876 if (compiler->opts.compJitELTHookEnabled)
6878 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6882 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6886 // RDX = caller's SP
6887 // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
6888 // of the stmnts to execute unconditionally and clean-up rest.
6889 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6891 // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
6892 // value of that offset to FramePointer to obtain caller's SP value.
6893 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6894 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6898 // If we are here means that it is a tentative frame layout during which we
6899 // cannot use caller's SP offset since it is an estimate. For now we require the
6900 // method to have at least a single arg so that we can use it to obtain caller's
6902 LclVarDsc* varDsc = compiler->lvaTable;
6903 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6905 // lea rdx, [FramePointer + Arg0's offset]
6906 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6909 // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
6910 // We use R8 here. This will emit either
6911 // "call ip-relative 32-bit offset" or
6912 // "mov r8, helper addr; call r8"
6913 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
6915 #else // !defined(UNIX_AMD64_ABI)
6917 // RDI = ProfilerMethHnd
6918 if (compiler->compProfilerMethHndIndirected)
6920 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6924 if (compiler->opts.compJitELTHookEnabled)
6926 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6930 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6934 // RSI = caller's SP
6935 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6937 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6938 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6942 LclVarDsc* varDsc = compiler->lvaTable;
6943 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6945 // lea rdx, [FramePointer + Arg0's offset]
6946 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6949 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6950 // We use R11 here. This will emit either
6951 // "call ip-relative 32-bit offset" or
6952 // "mov r11, helper addr; call r11"
6953 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6955 #endif // !defined(UNIX_AMD64_ABI)
6957 #elif defined(_TARGET_X86_)
6959 #if defined(UNIX_X86_ABI)
6960 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6961 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6963 AddNestedAlignment(0xC);
6964 #endif // UNIX_X86_ABI
6967 // Push the profilerHandle
6970 if (compiler->compProfilerMethHndIndirected)
6972 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6976 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6980 #if defined(UNIX_X86_ABI)
6981 int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
6983 int argSize = REGSIZE_BYTES;
6985 genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
6987 // Check that we have place for the push.
6988 assert(compiler->fgPtrArgCntMax >= 1);
6990 #if defined(UNIX_X86_ABI)
6991 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6992 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6993 SubtractStackLevel(0x10);
6994 SubtractNestedAlignment(0xC);
6995 #endif // UNIX_X86_ABI
6997 #elif defined(_TARGET_ARM_)
6999 // Push the profilerHandle
7002 // Contract between JIT and Profiler Leave callout on arm:
7003 // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
7004 // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
7005 // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
7006 // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
7008 // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
7011 emitAttr attr = EA_UNKNOWN;
7013 if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
7014 (varTypeIsFloating(compiler->info.compRetType) ||
7015 compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
7021 // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
7022 // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
7023 if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
7026 gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
7028 else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
7031 gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
7038 getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
7039 regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
7040 gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
7044 if (compiler->compProfilerMethHndIndirected)
7046 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7047 regSet.verifyRegUsed(REG_ARG_0);
7051 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
7054 genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
7056 EA_UNKNOWN); // retSize
7058 // Restore state that existed before profiler callback
7061 getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
7062 regSet.verifyRegUsed(REG_ARG_0);
7063 gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
7067 NYI("Emit Profiler Leave callback");
7070 /* Restore the stack level */
7071 SetStackLevel(saveStackLvl2);
7074 #endif // PROFILING_SUPPORTED
7076 /*****************************************************************************
7081 These instructions are just a reordering of the instructions used today.
7087 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7089 add esp, LOCALS_SIZE / pop dummyReg
7099 The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
7100 Everything else is similar, though in a different order.
7102 The security object will no longer be at a fixed offset. However, the
7103 offset can still be determined by looking up the GC-info and determining
7104 how many callee-saved registers are pushed.
7111 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7113 add esp, LOCALS_SIZE / pop dummyReg
7117 (mov esp, ebp if there are no callee-saved registers)
7121 Double-aligned frame :
7122 --------------------
7124 LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
7125 of callee-saved registers are pushed on the stack so that the locals
7126 themselves are qword-aligned. The instructions are the same as today,
7127 just in a different order.
7135 sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
7137 add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
7146 localloc (with ebp) frames :
7147 --------------------------
7149 The instructions are the same as today, just in a different order.
7150 Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
7151 which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
7158 sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7160 lea esp, [ebp-calleeSavedRegsPushedSize]
7164 (mov esp, ebp if there are no callee-saved registers)
7168 *****************************************************************************/
7170 /*****************************************************************************
7172 * Generates appropriate NOP padding for a function prolog to support ReJIT.
7175 void CodeGen::genPrologPadForReJit()
7177 assert(compiler->compGeneratingProlog);
7179 #ifdef _TARGET_XARCH_
7180 if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
7185 #if FEATURE_EH_FUNCLETS
7187 // No need to generate pad (nops) for funclets.
7188 // When compiling the main function (and not a funclet)
7189 // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
7190 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7195 #endif // FEATURE_EH_FUNCLETS
7197 unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
7205 /*****************************************************************************
7207 * Reserve space for a function prolog.
7210 void CodeGen::genReserveProlog(BasicBlock* block)
7212 assert(block != nullptr);
7214 JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
7216 /* Nothing is live on entry to the prolog */
7218 getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
7221 /*****************************************************************************
7223 * Reserve space for a function epilog.
7226 void CodeGen::genReserveEpilog(BasicBlock* block)
7228 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
7229 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
7231 /* The return value is special-cased: make sure it goes live for the epilog */
7233 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
7235 if (genFullPtrRegMap && !jmpEpilog)
7237 if (varTypeIsGC(compiler->info.compRetNativeType))
7239 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
7241 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
7243 switch (compiler->info.compRetNativeType)
7246 gcrefRegsArg |= RBM_INTRET;
7249 byrefRegsArg |= RBM_INTRET;
7257 JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
7259 assert(block != nullptr);
7260 const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
7261 bool last = (block->bbNext == nullptr);
7262 getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
7265 #if FEATURE_EH_FUNCLETS
7267 /*****************************************************************************
7269 * Reserve space for a funclet prolog.
7272 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
7274 assert(block != nullptr);
7276 /* Currently, no registers are live on entry to the prolog, except maybe
7277 the exception object. There might be some live stack vars, but they
7278 cannot be accessed until after the frame pointer is re-established.
7279 In order to potentially prevent emitting a death before the prolog
7280 and a birth right after it, we just report it as live during the
7281 prolog, and rely on the prolog being non-interruptible. Trust
7282 genCodeForBBlist to correctly initialize all the sets.
7284 We might need to relax these asserts if the VM ever starts
7285 restoring any registers, then we could have live-in reg vars...
7288 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
7289 noway_assert(gcInfo.gcRegByrefSetCur == 0);
7291 JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
7293 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7294 gcInfo.gcRegByrefSetCur, false);
7297 /*****************************************************************************
7299 * Reserve space for a funclet epilog.
7302 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
7304 assert(block != nullptr);
7306 JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
7308 bool last = (block->bbNext == nullptr);
7309 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7310 gcInfo.gcRegByrefSetCur, last);
7313 #endif // FEATURE_EH_FUNCLETS
7315 /*****************************************************************************
7316 * Finalize the frame size and offset assignments.
7318 * No changes can be made to the modified register set after this, since that can affect how many
7319 * callee-saved registers get saved.
7321 void CodeGen::genFinalizeFrame()
7323 JITDUMP("Finalizing stack frame\n");
7325 // Initializations need to happen based on the var locations at the start
7326 // of the first basic block, so load those up. In particular, the determination
7327 // of whether or not to use block init in the prolog is dependent on the variable
7328 // locations on entry to the function.
7329 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
7331 genCheckUseBlockInit();
7333 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7334 CLANG_FORMAT_COMMENT_ANCHOR;
7336 #if defined(_TARGET_X86_)
7338 if (compiler->compTailCallUsed)
7340 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
7341 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
7342 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
7343 // actually get saved.
7345 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
7347 #endif // _TARGET_X86_
7349 #if defined(_TARGET_ARMARCH_)
7350 // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
7351 // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
7352 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7354 regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
7356 #endif // defined(_TARGET_ARMARCH_)
7358 #if defined(_TARGET_ARM_)
7359 // If there are any reserved registers, add them to the
7360 if (regSet.rsMaskResvd != RBM_NONE)
7362 regSet.rsSetRegsModified(regSet.rsMaskResvd);
7364 #endif // _TARGET_ARM_
7369 printf("Modified regs: ");
7370 dspRegMask(regSet.rsGetModifiedRegsMask());
7375 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7376 if (compiler->opts.compDbgEnC)
7378 // We always save FP.
7379 noway_assert(isFramePointerUsed());
7380 #ifdef _TARGET_AMD64_
7381 // On x64 we always save exactly RBP, RSI and RDI for EnC.
7382 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
7383 regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
7384 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
7385 #else // !_TARGET_AMD64_
7386 // On x86 we save all callee saved regs so the saved reg area size is consistent
7387 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7388 #endif // !_TARGET_AMD64_
7391 /* If we have any pinvoke calls, we might potentially trash everything */
7392 if (compiler->info.compCallUnmanaged)
7394 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
7395 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7398 #ifdef UNIX_AMD64_ABI
7399 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
7400 if (compiler->compIsProfilerHookNeeded())
7402 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
7406 /* Count how many callee-saved registers will actually be saved (pushed) */
7408 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
7409 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
7412 // EBP cannot be (directly) modified
7413 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
7416 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
7418 #ifdef _TARGET_ARMARCH_
7419 if (isFramePointerUsed())
7421 // For a FP based frame we have to push/pop the FP register
7423 maskCalleeRegsPushed |= RBM_FPBASE;
7425 // This assert check that we are not using REG_FP
7426 // as both the frame pointer and as a codegen register
7428 assert(!regSet.rsRegsModified(RBM_FPBASE));
7431 // we always push LR. See genPushCalleeSavedRegisters
7433 maskCalleeRegsPushed |= RBM_LR;
7435 #if defined(_TARGET_ARM_)
7436 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
7437 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
7438 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
7440 if ((maskPushRegsFloat != RBM_NONE) ||
7441 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
7443 // Here we try to keep stack double-aligned before the vpush
7444 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
7446 regNumber extraPushedReg = REG_R4;
7447 while (maskPushRegsInt & genRegMask(extraPushedReg))
7449 extraPushedReg = REG_NEXT(extraPushedReg);
7451 if (extraPushedReg < REG_R11)
7453 maskPushRegsInt |= genRegMask(extraPushedReg);
7454 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
7457 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
7460 // We currently only expect to push/pop consecutive FP registers
7461 // and these have to be double-sized registers as well.
7462 // Here we will insure that maskPushRegsFloat obeys these requirements.
7464 if (maskPushRegsFloat != RBM_NONE)
7466 regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
7467 while (maskPushRegsFloat > contiguousMask)
7469 contiguousMask <<= 2;
7470 contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
7472 if (maskPushRegsFloat != contiguousMask)
7474 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
7475 maskPushRegsFloat |= maskExtraRegs;
7476 regSet.rsSetRegsModified(maskExtraRegs);
7477 maskCalleeRegsPushed |= maskExtraRegs;
7480 #endif // _TARGET_ARM_
7481 #endif // _TARGET_ARMARCH_
7483 #if defined(_TARGET_XARCH_)
7484 // Compute the count of callee saved float regs saved on stack.
7485 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
7486 // regs are stack allocated and preserved in their stack locations.
7487 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
7488 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
7489 #endif // defined(_TARGET_XARCH_)
7491 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
7496 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
7497 dspRegMask(maskCalleeRegsPushed);
7502 /* Assign the final offsets to things living on the stack frame */
7504 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
7506 /* We want to make sure that the prolog size calculated here is accurate
7507 (that is instructions will not shrink because of conservative stack
7508 frame approximations). We do this by filling in the correct size
7509 here (where we have committed to the final numbers for the frame offsets)
7510 This will ensure that the prolog size is always correct
7512 getEmitter()->emitMaxTmpSize = regSet.tmpGetTotalSize();
7515 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
7517 compiler->lvaTableDump();
7522 //------------------------------------------------------------------------
7523 // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
7526 // delta - the offset to add to the current stack pointer to establish the frame pointer
7527 // reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
7529 void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
7531 assert(compiler->compGeneratingProlog);
7533 #if defined(_TARGET_XARCH_)
7537 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
7538 #ifdef USING_SCOPE_INFO
7540 #endif // USING_SCOPE_INFO
7544 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7545 // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
7549 if (reportUnwindData)
7551 compiler->unwindSetFrameReg(REG_FPBASE, delta);
7554 #elif defined(_TARGET_ARM_)
7556 assert(arm_Valid_Imm_For_Add_SP(delta));
7557 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7559 if (reportUnwindData)
7561 compiler->unwindPadding();
7564 #elif defined(_TARGET_ARM64_)
7568 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
7572 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7575 if (reportUnwindData)
7577 compiler->unwindSetFrameReg(REG_FPBASE, delta);
7581 NYI("establish frame pointer");
7585 /*****************************************************************************
7587 * Generates code for a function prolog.
7589 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
7591 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
7592 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
7593 * only instructions which result in control not going to the next instruction. Basically, any time execution would
7594 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
7595 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
7596 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
7598 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
7599 * debugger team to ensure that stepping still works.
7601 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
7605 #pragma warning(push)
7606 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
7608 void CodeGen::genFnProlog()
7610 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
7612 compiler->funSetCurrentFunc(0);
7617 printf("*************** In genFnProlog()\n");
7622 genInterruptibleUsed = true;
7625 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
7627 /* Ready to start on the prolog proper */
7629 getEmitter()->emitBegProlog();
7630 compiler->unwindBegProlog();
7632 // Do this so we can put the prolog instruction group ahead of
7633 // other instruction groups
7634 genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
7637 if (compiler->opts.dspCode)
7639 printf("\n__prolog:\n");
7642 #ifdef USING_SCOPE_INFO
7643 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
7645 // Create new scopes for the method-parameters for the prolog-block.
7648 #endif // USING_SCOPE_INFO
7652 if (compiler->compJitHaltMethod())
7654 /* put a nop first because the debugger and other tools are likely to
7655 put an int3 at the begining and we don't want to confuse them */
7658 instGen(INS_BREAKPOINT);
7660 #ifdef _TARGET_ARMARCH_
7661 // Avoid asserts in the unwind info because these instructions aren't accounted for.
7662 compiler->unwindPadding();
7663 #endif // _TARGET_ARMARCH_
7667 #if FEATURE_EH_FUNCLETS && defined(DEBUG)
7669 // We cannot force 0-initialization of the PSPSym
7670 // as it will overwrite the real value
7671 if (compiler->lvaPSPSym != BAD_VAR_NUM)
7673 LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
7674 assert(!varDsc->lvMustInit);
7677 #endif // FEATURE_EH_FUNCLETS && DEBUG
7679 /*-------------------------------------------------------------------------
7681 * Record the stack frame ranges that will cover all of the tracked
7682 * and untracked pointer variables.
7683 * Also find which registers will need to be zero-initialized.
7685 * 'initRegs': - Generally, enregistered variables should not need to be
7686 * zero-inited. They only need to be zero-inited when they
7687 * have a possibly uninitialized read on some control
7688 * flow path. Apparently some of the IL_STUBs that we
7689 * generate have this property.
7692 int untrLclLo = +INT_MAX;
7693 int untrLclHi = -INT_MAX;
7694 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
7695 // Note that they may be tracked, but simply not allocated to a register.
7696 bool hasUntrLcl = false;
7698 int GCrefLo = +INT_MAX;
7699 int GCrefHi = -INT_MAX;
7700 bool hasGCRef = false;
7702 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
7703 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
7704 regMaskTP initDblRegs = RBM_NONE;
7709 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7711 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
7716 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
7718 noway_assert(varDsc->lvRefCnt() == 0);
7722 signed int loOffs = varDsc->lvStkOffs;
7723 signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
7725 /* We need to know the offset range of tracked stack GC refs */
7726 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
7728 if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
7730 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
7731 // taken care of by the parent struct.
7732 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
7736 if (loOffs < GCrefLo)
7740 if (hiOffs > GCrefHi)
7747 /* For lvMustInit vars, gather pertinent info */
7749 if (!varDsc->lvMustInit)
7754 if (varDsc->lvIsInReg())
7756 regMaskTP regMask = genRegMask(varDsc->lvRegNum);
7757 if (!varDsc->IsFloatRegType())
7759 initRegs |= regMask;
7761 if (varTypeIsMultiReg(varDsc))
7763 if (varDsc->lvOtherReg != REG_STK)
7765 initRegs |= genRegMask(varDsc->lvOtherReg);
7769 /* Upper DWORD is on the stack, and needs to be inited */
7771 loOffs += sizeof(int);
7776 else if (varDsc->TypeGet() == TYP_DOUBLE)
7778 initDblRegs |= regMask;
7782 initFltRegs |= regMask;
7791 if (loOffs < untrLclLo)
7795 if (hiOffs > untrLclHi)
7802 /* Don't forget about spill temps that hold pointers */
7804 if (!TRACK_GC_TEMP_LIFETIMES)
7806 assert(regSet.tmpAllFree());
7807 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
7809 if (!varTypeIsGC(tempThis->tdTempType()))
7814 signed int loOffs = tempThis->tdTempOffs();
7815 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
7817 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
7818 // previous frame pointer. Thus, stkOffs can't be zero.
7819 CLANG_FORMAT_COMMENT_ANCHOR;
7821 #if !defined(_TARGET_AMD64_)
7822 // However, on amd64 there is no requirement to chain frame pointers.
7824 noway_assert(!isFramePointerUsed() || loOffs != 0);
7825 #endif // !defined(_TARGET_AMD64_)
7827 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
7831 if (loOffs < untrLclLo)
7835 if (hiOffs > untrLclHi)
7842 assert((genInitStkLclCnt > 0) == hasUntrLcl);
7847 if (genInitStkLclCnt > 0)
7849 printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
7856 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
7857 // Ditto for all enregistered user arguments in a varargs method.
7858 // These registers will be available to use for the initReg. We just remove
7859 // all of these registers from the rsCalleeRegArgMaskLiveIn.
7861 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
7864 /* Choose the register to use for zero initialization */
7866 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
7867 bool initRegZeroed = false;
7868 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
7871 // We should not use the special PINVOKE registers as the initReg
7872 // since they are trashed by the jithelper call to setup the PINVOKE frame
7873 if (compiler->info.compCallUnmanaged)
7875 excludeMask |= RBM_PINVOKE_FRAME;
7877 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
7878 if (!compiler->opts.ShouldUsePInvokeHelpers())
7880 noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
7882 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
7884 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
7886 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
7887 if (varDsc->lvRegister)
7889 excludeMask |= genRegMask(varDsc->lvRegNum);
7895 // If we have a variable sized frame (compLocallocUsed is true)
7896 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
7897 if (compiler->compLocallocUsed)
7899 excludeMask |= RBM_SAVED_LOCALLOC_SP;
7901 #endif // _TARGET_ARM_
7903 #if defined(_TARGET_XARCH_)
7904 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7906 // We currently must use REG_EAX on x86 here
7907 // because the loop's backwards branch depends upon the size of EAX encodings
7908 assert(initReg == REG_EAX);
7911 #endif // _TARGET_XARCH_
7913 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
7915 if (tempMask != RBM_NONE)
7917 // We will use one of the registers that we were planning to zero init anyway.
7918 // We pick the lowest register number.
7919 tempMask = genFindLowestBit(tempMask);
7920 initReg = genRegNumFromMask(tempMask);
7922 // Next we prefer to use one of the unused argument registers.
7923 // If they aren't available we use one of the caller-saved integer registers.
7926 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
7927 if (tempMask != RBM_NONE)
7929 // We pick the lowest register number
7930 tempMask = genFindLowestBit(tempMask);
7931 initReg = genRegNumFromMask(tempMask);
7936 noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
7938 #if defined(_TARGET_AMD64_)
7939 // If we are a varargs call, in order to set up the arguments correctly this
7940 // must be done in a 2 step process. As per the x64 ABI:
7941 // a) The caller sets up the argument shadow space (just before the return
7942 // address, 4 pointer sized slots).
7943 // b) The callee is responsible to home the arguments on the shadow space
7944 // provided by the caller.
7945 // This way, the varargs iterator will be able to retrieve the
7946 // call arguments properly since both the arg regs and the stack allocated
7947 // args will be contiguous.
7948 if (compiler->info.compIsVarArgs)
7950 getEmitter()->spillIntArgRegsToShadowSlots();
7953 #endif // _TARGET_AMD64_
7956 /*-------------------------------------------------------------------------
7958 * Now start emitting the part of the prolog which sets up the frame
7961 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
7963 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
7964 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
7966 #endif // _TARGET_ARM_
7968 #ifdef _TARGET_XARCH_
7969 if (doubleAlignOrFramePointerUsed())
7971 inst_RV(INS_push, REG_FPBASE, TYP_REF);
7972 compiler->unwindPush(REG_FPBASE);
7973 #ifdef USING_SCOPE_INFO
7974 psiAdjustStackLevel(REGSIZE_BYTES);
7975 #endif // USING_SCOPE_INFO
7976 #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
7977 genEstablishFramePointer(0, /*reportUnwindData*/ true);
7978 #endif // !_TARGET_AMD64_
7981 if (compiler->genDoubleAlign())
7983 noway_assert(isFramePointerUsed() == false);
7984 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
7986 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
7988 #endif // DOUBLE_ALIGN
7990 #endif // _TARGET_XARCH_
7992 #ifdef _TARGET_ARM64_
7993 // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame. Note that
7994 // for arm64, genAllocLclFrame only probes the frame; it does not actually allocate it (it does not change SP).
7995 // For arm64, we are probing the frame before the callee-saved registers are saved. The 'initReg' might have
7996 // been calculated to be one of the callee-saved registers (say, if all the integer argument registers are
7997 // in use, and perhaps with other conditions being satisfied). This is ok in other cases, after the callee-saved
7998 // registers have been saved. So instead of letting genAllocLclFrame use initReg as a temporary register,
7999 // always use REG_SCRATCH. We don't care if it trashes it, so ignore the initRegZeroed output argument.
8000 bool ignoreInitRegZeroed = false;
8001 genAllocLclFrame(compiler->compLclFrameSize, REG_SCRATCH, &ignoreInitRegZeroed,
8002 intRegState.rsCalleeRegArgMaskLiveIn);
8003 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
8004 #else // !_TARGET_ARM64_
8005 genPushCalleeSavedRegisters();
8006 #endif // !_TARGET_ARM64_
8009 bool needToEstablishFP = false;
8010 int afterLclFrameSPtoFPdelta = 0;
8011 if (doubleAlignOrFramePointerUsed())
8013 needToEstablishFP = true;
8015 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
8016 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
8017 // too big, we go ahead and do it here.
8019 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
8020 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
8021 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
8023 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
8024 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
8025 needToEstablishFP = false;
8028 #endif // _TARGET_ARM_
8030 //-------------------------------------------------------------------------
8032 // Subtract the local frame size from SP.
8034 //-------------------------------------------------------------------------
8035 CLANG_FORMAT_COMMENT_ANCHOR;
8037 #ifndef _TARGET_ARM64_
8038 regMaskTP maskStackAlloc = RBM_NONE;
8042 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
8043 #endif // _TARGET_ARM_
8045 if (maskStackAlloc == RBM_NONE)
8047 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
8049 #endif // !_TARGET_ARM64_
8051 //-------------------------------------------------------------------------
8054 if (compiler->compLocallocUsed)
8056 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
8057 regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
8058 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
8060 #endif // _TARGET_ARMARCH_
8062 #if defined(_TARGET_XARCH_)
8063 // Preserve callee saved float regs to stack.
8064 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
8065 #endif // defined(_TARGET_XARCH_)
8067 #ifdef _TARGET_AMD64_
8068 // Establish the AMD64 frame pointer after the OS-reported prolog.
8069 if (doubleAlignOrFramePointerUsed())
8071 bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
8072 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
8074 #endif //_TARGET_AMD64_
8076 //-------------------------------------------------------------------------
8078 // This is the end of the OS-reported prolog for purposes of unwinding
8080 //-------------------------------------------------------------------------
8083 if (needToEstablishFP)
8085 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
8086 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
8088 #endif // _TARGET_ARM_
8090 if (compiler->info.compPublishStubParam)
8092 #if CPU_LOAD_STORE_ARCH
8093 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM,
8094 compiler->lvaStubArgumentVar, 0);
8096 // mov [lvaStubArgumentVar], EAX
8097 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8098 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8100 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
8102 // It's no longer live; clear it out so it can be used after this in the prolog
8103 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
8107 // Zero out the frame as needed
8110 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
8112 #if FEATURE_EH_FUNCLETS
8114 genSetPSPSym(initReg, &initRegZeroed);
8116 #else // !FEATURE_EH_FUNCLETS
8118 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
8119 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
8121 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
8122 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
8124 // Zero out the slot for nesting level 0
8125 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
8129 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8130 initRegZeroed = true;
8133 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
8137 #endif // !FEATURE_EH_FUNCLETS
8139 genReportGenericContextArg(initReg, &initRegZeroed);
8141 // The local variable representing the security object must be on the stack frame
8142 // and must be 0 initialized.
8143 noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
8144 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
8145 compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
8147 #ifdef JIT32_GCENCODER
8148 // Initialize the LocalAllocSP slot if there is localloc in the function.
8149 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
8151 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
8153 #endif // JIT32_GCENCODER
8155 // Set up the GS security cookie
8157 genSetGSSecurityCookie(initReg, &initRegZeroed);
8159 #ifdef PROFILING_SUPPORTED
8161 // Insert a function entry callback for profiling, if requested.
8162 genProfilingEnterCallback(initReg, &initRegZeroed);
8164 #endif // PROFILING_SUPPORTED
8166 if (!genInterruptible)
8168 /*-------------------------------------------------------------------------
8170 * The 'real' prolog ends here for non-interruptible methods.
8171 * For fully-interruptible methods, we extend the prolog so that
8172 * we do not need to track GC inforation while shuffling the
8175 * Make sure there's enough padding for ReJIT.
8178 genPrologPadForReJit();
8179 getEmitter()->emitMarkPrologEnd();
8182 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
8183 // The unused bits of Vector3 arguments must be cleared
8184 // since native compiler doesn't initize the upper bits to zeros.
8186 // TODO-Cleanup: This logic can be implemented in
8187 // genFnPrologCalleeRegArgs() for argument registers and
8188 // genEnregisterIncomingStackArgs() for stack arguments.
8189 genClearStackVec3ArgUpperBits();
8190 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
8192 /*-----------------------------------------------------------------------------
8193 * Take care of register arguments first
8198 // Update the arg initial register locations.
8199 compiler->lvaUpdateArgsWithInitialReg();
8201 FOREACH_REGISTER_FILE(regState)
8203 if (regState->rsCalleeRegArgMaskLiveIn)
8205 // If we need an extra register to shuffle around the incoming registers
8206 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
8207 // if we don't need to use the xtraReg then this flag will stay false
8210 bool xtraRegClobbered = false;
8212 if (genRegMask(initReg) & RBM_ARG_REGS)
8218 xtraReg = REG_SCRATCH;
8219 initRegZeroed = false;
8222 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
8224 if (xtraRegClobbered)
8226 initRegZeroed = false;
8231 // Home the incoming arguments
8232 genEnregisterIncomingStackArgs();
8234 /* Initialize any must-init registers variables now */
8238 regMaskTP regMask = 0x1;
8240 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
8242 if (regMask & initRegs)
8244 // Check if we have already zeroed this register
8245 if ((reg == initReg) && initRegZeroed)
8251 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
8254 initRegZeroed = true;
8261 if (initFltRegs | initDblRegs)
8263 // If initReg is not in initRegs then we will use REG_SCRATCH
8264 if ((genRegMask(initReg) & initRegs) == 0)
8266 initReg = REG_SCRATCH;
8267 initRegZeroed = false;
8271 // This is needed only for Arm since it can use a zero initialized int register
8272 // to initialize vfp registers.
8275 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8276 initRegZeroed = true;
8278 #endif // _TARGET_ARM_
8280 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
8283 //-----------------------------------------------------------------------------
8286 // Increase the prolog size here only if fully interruptible.
8287 // And again make sure it's big enough for ReJIT
8290 if (genInterruptible)
8292 genPrologPadForReJit();
8293 getEmitter()->emitMarkPrologEnd();
8295 #ifdef USING_SCOPE_INFO
8296 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
8300 #endif // USING_SCOPE_INFO
8303 getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
8307 noway_assert(GCrefLo == +INT_MAX);
8308 noway_assert(GCrefHi == -INT_MAX);
8312 if (compiler->opts.dspCode)
8319 // On non-x86 the VARARG cookie does not need any special treatment.
8321 // Load up the VARARG argument pointer register so it doesn't get clobbered.
8322 // only do this if we actually access any statically declared args
8323 // (our argument pointer register has a refcount > 0).
8324 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
8326 if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt() > 0)
8328 varDsc = &compiler->lvaTable[argsStartVar];
8330 noway_assert(compiler->info.compArgsCount > 0);
8332 // MOV EAX, <VARARGS HANDLE>
8333 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
8334 regSet.verifyRegUsed(REG_EAX);
8337 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
8339 // EDX might actually be holding something here. So make sure to only use EAX for this code
8342 LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
8343 noway_assert(!lastArg->lvRegister);
8344 signed offset = lastArg->lvStkOffs;
8345 assert(offset != BAD_STK_OFFS);
8346 noway_assert(lastArg->lvFramePointerBased);
8348 // LEA EAX, &<VARARGS HANDLE> + EAX
8349 getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
8351 if (varDsc->lvIsInReg())
8353 if (varDsc->lvRegNum != REG_EAX)
8355 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
8356 regSet.verifyRegUsed(varDsc->lvRegNum);
8361 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
8365 #endif // _TARGET_X86_
8367 #if defined(DEBUG) && defined(_TARGET_XARCH_)
8368 if (compiler->opts.compStackCheckOnRet)
8370 noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC &&
8371 compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister &&
8372 compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame);
8373 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0);
8375 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
8377 getEmitter()->emitEndProlog();
8378 compiler->unwindEndProlog();
8380 noway_assert(getEmitter()->emitMaxTmpSize == regSet.tmpGetTotalSize());
8383 #pragma warning(pop)
8386 /*****************************************************************************
8388 * Generates code for a function epilog.
8390 * Please consult the "debugger team notification" comment in genFnProlog().
8393 #if defined(_TARGET_ARMARCH_)
8395 void CodeGen::genFnEpilog(BasicBlock* block)
8399 printf("*************** In genFnEpilog()\n");
8402 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8404 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8405 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8406 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8409 if (compiler->opts.dspCode)
8410 printf("\n__epilog:\n");
8414 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8415 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8416 printf(", gcRegGCrefSetCur=");
8417 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8418 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8419 printf(", gcRegByrefSetCur=");
8420 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8421 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8426 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8428 GenTree* lastNode = block->lastNode();
8430 // Method handle and address info used in case of jump epilog
8431 CORINFO_METHOD_HANDLE methHnd = nullptr;
8432 CORINFO_CONST_LOOKUP addrInfo;
8433 addrInfo.addr = nullptr;
8434 addrInfo.accessType = IAT_VALUE;
8436 if (jmpEpilog && lastNode->gtOper == GT_JMP)
8438 methHnd = (CORINFO_METHOD_HANDLE)lastNode->gtVal.gtVal1;
8439 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8443 // We delay starting the unwind codes until we have an instruction which we know
8444 // needs an unwind code. In particular, for large stack frames in methods without
8445 // localloc, the sequence might look something like this:
8448 // pop {r4,r5,r6,r10,r11,pc}
8449 // In this case, the "movw" should not be part of the unwind codes, since it will
8450 // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
8451 // also sets the current location as the beginning offset of the epilog, so every
8452 // instruction afterwards needs an unwind code. In the case above, if you call
8453 // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
8455 bool unwindStarted = false;
8457 // Tear down the stack frame
8459 if (compiler->compLocallocUsed)
8463 compiler->unwindBegEpilog();
8464 unwindStarted = true;
8468 inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
8469 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
8473 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
8476 genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
8481 // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
8482 compiler->unwindBegEpilog();
8483 unwindStarted = true;
8486 if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE)
8488 // IAT_RELPVALUE jump at the end is done using relative indirection, so,
8489 // additional helper register is required.
8490 // We use LR just before it is going to be restored from stack, i.e.
8501 regNumber indCallReg = REG_R12;
8502 regNumber vptrReg1 = REG_LR;
8504 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8505 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg);
8506 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8507 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1);
8510 genPopCalleeSavedRegisters(jmpEpilog);
8512 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
8514 // We better not have used a pop PC to return otherwise this will be unreachable code
8515 noway_assert(!genUsedPopToReturn);
8517 int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
8518 inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
8519 compiler->unwindAllocStack(preSpillRegArgSize);
8524 // We better not have used a pop PC to return otherwise this will be unreachable code
8525 noway_assert(!genUsedPopToReturn);
8528 #else // _TARGET_ARM64_
8529 compiler->unwindBegEpilog();
8531 genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
8532 #endif // _TARGET_ARM64_
8536 hasTailCalls = true;
8538 noway_assert(block->bbJumpKind == BBJ_RETURN);
8539 noway_assert(block->bbTreeList != nullptr);
8541 /* figure out what jump we have */
8542 GenTree* jmpNode = lastNode;
8543 #if !FEATURE_FASTTAILCALL
8544 noway_assert(jmpNode->gtOper == GT_JMP);
8545 #else // FEATURE_FASTTAILCALL
8547 // If jmpNode is GT_JMP then gtNext must be null.
8548 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8549 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8551 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8552 noway_assert((jmpNode->gtOper == GT_JMP) ||
8553 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8555 // The next block is associated with this "if" stmt
8556 if (jmpNode->gtOper == GT_JMP)
8557 #endif // FEATURE_FASTTAILCALL
8559 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8560 // the same descriptor with some minor adjustments.
8561 assert(methHnd != nullptr);
8562 assert(addrInfo.addr != nullptr);
8564 #ifdef _TARGET_ARMARCH_
8565 emitter::EmitCallType callType;
8567 regNumber indCallReg;
8568 switch (addrInfo.accessType)
8571 if (validImmForBL((ssize_t)addrInfo.addr))
8573 // Simple direct call
8574 callType = emitter::EC_FUNC_TOKEN;
8575 addr = addrInfo.addr;
8576 indCallReg = REG_NA;
8580 // otherwise the target address doesn't fit in an immediate
8581 // so we have to burn a register...
8585 // Load the address into a register, load indirect and call through a register
8586 // We have to use R12 since we assume the argument registers are in use
8587 callType = emitter::EC_INDIR_R;
8588 indCallReg = REG_INDIRECT_CALL_TARGET_REG;
8590 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8591 if (addrInfo.accessType == IAT_PVALUE)
8593 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8594 regSet.verifyRegUsed(indCallReg);
8600 // Load the address into a register, load relative indirect and call through a register
8601 // We have to use R12 since we assume the argument registers are in use
8602 // LR is used as helper register right before it is restored from stack, thus,
8603 // all relative address calculations are performed before LR is restored.
8604 callType = emitter::EC_INDIR_R;
8605 indCallReg = REG_R12;
8608 regSet.verifyRegUsed(indCallReg);
8614 NO_WAY("Unsupported JMP indirection");
8617 /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
8618 * the same descriptor with some minor adjustments.
8622 getEmitter()->emitIns_Call(callType,
8624 INDEBUG_LDISASM_COMMA(nullptr)
8627 EA_UNKNOWN, // retSize
8628 #if defined(_TARGET_ARM64_)
8629 EA_UNKNOWN, // secondRetSize
8631 gcInfo.gcVarPtrSetCur,
8632 gcInfo.gcRegGCrefSetCur,
8633 gcInfo.gcRegByrefSetCur,
8634 BAD_IL_OFFSET, // IL offset
8641 CLANG_FORMAT_COMMENT_ANCHOR;
8642 #endif //_TARGET_ARMARCH_
8644 #if FEATURE_FASTTAILCALL
8648 // Call target = REG_FASTTAILCALL_TARGET
8649 // https://github.com/dotnet/coreclr/issues/4827
8650 // Do we need a special encoding for stack walker like rex.w prefix for x64?
8651 getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
8653 #endif // FEATURE_FASTTAILCALL
8658 if (!genUsedPopToReturn)
8660 // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
8661 // so we need a "bx lr" instruction to return from the function.
8662 inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
8663 compiler->unwindBranch16();
8665 #else // _TARGET_ARM64_
8666 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
8667 compiler->unwindReturn(REG_LR);
8668 #endif // _TARGET_ARM64_
8671 compiler->unwindEndEpilog();
8674 #elif defined(_TARGET_XARCH_)
8676 void CodeGen::genFnEpilog(BasicBlock* block)
8681 printf("*************** In genFnEpilog()\n");
8685 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8687 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8688 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8689 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8691 noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
8694 genInterruptibleUsed = true;
8697 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8700 if (compiler->opts.dspCode)
8702 printf("\n__epilog:\n");
8707 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8708 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8709 printf(", gcRegGCrefSetCur=");
8710 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8711 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8712 printf(", gcRegByrefSetCur=");
8713 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8714 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8719 // Restore float registers that were saved to stack before SP is modified.
8720 genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
8722 #ifdef JIT32_GCENCODER
8723 // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
8724 // the above call to `genRestoreCalleeSavedFltRegs` because that function
8725 // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
8726 // which is the only target that uses the JIT32 GC encoder
8727 // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
8728 // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
8729 // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
8730 // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
8731 // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
8732 // we will need to rethink this.
8733 getEmitter()->emitStartEpilog();
8736 /* Compute the size in bytes we've pushed/popped */
8738 if (!doubleAlignOrFramePointerUsed())
8740 // We have an ESP frame */
8742 noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
8744 /* Get rid of our local variables */
8746 if (compiler->compLclFrameSize)
8749 /* Add 'compiler->compLclFrameSize' to ESP */
8750 /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
8752 if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
8754 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8755 regSet.verifyRegUsed(REG_ECX);
8758 #endif // _TARGET_X86
8760 /* Add 'compiler->compLclFrameSize' to ESP */
8761 /* Generate "add esp, <stack-size>" */
8762 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8766 genPopCalleeSavedRegisters();
8770 noway_assert(doubleAlignOrFramePointerUsed());
8772 /* Tear down the stack frame */
8774 bool needMovEspEbp = false;
8777 if (compiler->genDoubleAlign())
8780 // add esp, compLclFrameSize
8782 // We need not do anything (except the "mov esp, ebp") if
8783 // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
8784 // also complicates the code manager. Hence, we ignore that case.
8786 noway_assert(compiler->compLclFrameSize != 0);
8787 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8789 needMovEspEbp = true;
8792 #endif // DOUBLE_ALIGN
8794 bool needLea = false;
8796 if (compiler->compLocallocUsed)
8798 // ESP may be variable if a localloc was actually executed. Reset it.
8799 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8803 else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
8805 if (compiler->compLclFrameSize != 0)
8807 #ifdef _TARGET_AMD64_
8808 // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
8809 // do an LEA to "pop off" the frame allocation.
8811 #else // !_TARGET_AMD64_
8812 // We will just generate "mov esp, ebp" and be done with it.
8813 needMovEspEbp = true;
8814 #endif // !_TARGET_AMD64_
8817 else if (compiler->compLclFrameSize == 0)
8819 // do nothing before popping the callee-saved registers
8822 else if (compiler->compLclFrameSize == REGSIZE_BYTES)
8824 // "pop ecx" will make ESP point to the callee-saved registers
8825 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8826 regSet.verifyRegUsed(REG_ECX);
8828 #endif // _TARGET_X86
8831 // We need to make ESP point to the callee-saved registers
8839 #ifdef _TARGET_AMD64_
8840 // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
8842 // Case 1: localloc not used.
8843 // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
8844 // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8845 // The amount to be subtracted from RBP to point at callee saved int regs.
8847 // Case 2: localloc used
8848 // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
8849 // Offset = Amount to be added to RBP to point at callee saved int regs.
8850 offset = genSPtoFPdelta() - compiler->compLclFrameSize;
8852 // Offset should fit within a byte if localloc is not used.
8853 if (!compiler->compLocallocUsed)
8855 noway_assert(offset < UCHAR_MAX);
8858 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8859 offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8860 noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
8863 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
8868 // Pop the callee-saved registers (if any)
8871 genPopCalleeSavedRegisters();
8873 #ifdef _TARGET_AMD64_
8874 assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
8875 #else // !_TARGET_AMD64_
8879 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
8881 #endif // !_TARGET_AMD64_
8884 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
8887 getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
8889 /* Check if this a special return block i.e.
8890 * CEE_JMP instruction */
8894 noway_assert(block->bbJumpKind == BBJ_RETURN);
8895 noway_assert(block->bbTreeList);
8897 // figure out what jump we have
8898 GenTree* jmpNode = block->lastNode();
8899 #if !FEATURE_FASTTAILCALL
8901 noway_assert(jmpNode->gtOper == GT_JMP);
8904 // If jmpNode is GT_JMP then gtNext must be null.
8905 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8906 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8908 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8909 noway_assert((jmpNode->gtOper == GT_JMP) ||
8910 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8912 // The next block is associated with this "if" stmt
8913 if (jmpNode->gtOper == GT_JMP)
8916 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8917 // the same descriptor with some minor adjustments.
8918 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
8920 CORINFO_CONST_LOOKUP addrInfo;
8921 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8922 if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
8924 NO_WAY("Unsupported JMP indirection");
8927 const emitter::EmitCallType callType =
8928 (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
8930 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8931 // the same descriptor with some minor adjustments.
8934 getEmitter()->emitIns_Call(callType,
8936 INDEBUG_LDISASM_COMMA(nullptr)
8939 EA_UNKNOWN // retSize
8940 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
8941 gcInfo.gcVarPtrSetCur,
8942 gcInfo.gcRegGCrefSetCur,
8943 gcInfo.gcRegByrefSetCur,
8944 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
8949 #if FEATURE_FASTTAILCALL
8952 #ifdef _TARGET_AMD64_
8954 // Call target = RAX.
8955 // Stack walker requires that a register indirect tail call be rex.w prefixed.
8956 getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
8958 assert(!"Fast tail call as epilog+jmp");
8960 #endif //_TARGET_AMD64_
8962 #endif // FEATURE_FASTTAILCALL
8966 unsigned stkArgSize = 0; // Zero on all platforms except x86
8968 #if defined(_TARGET_X86_)
8969 bool fCalleePop = true;
8971 // varargs has caller pop
8972 if (compiler->info.compIsVarArgs)
8976 if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
8978 #endif // UNIX_X86_ABI
8982 noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
8983 stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
8985 noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
8987 #endif // _TARGET_X86_
8989 /* Return, popping our arguments (if any) */
8990 instGen_Return(stkArgSize);
8995 #error Unsupported or unset target architecture
8998 #if FEATURE_EH_FUNCLETS
9002 /*****************************************************************************
9004 * Generates code for an EH funclet prolog.
9006 * Funclets have the following incoming arguments:
9008 * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
9009 * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
9010 * finally/fault: none
9012 * Funclets set the following registers on exit:
9014 * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
9015 * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9016 * finally/fault: none
9018 * The ARM funclet prolog sequence is:
9020 * push {regs,lr} ; We push the callee-saved regs and 'lr'.
9021 * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
9022 * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
9023 * ; calculated for the entire function.
9024 * sub sp, XXX ; Establish the rest of the frame.
9025 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
9026 * ; up to preserve stack alignment. If we push an odd number of registers, we also
9027 * ; generate this, to keep the stack aligned.
9029 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
9031 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
9034 * if (this is a filter funclet)
9036 * // r1 on entry to a filter funclet is CallerSP of the containing function:
9037 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
9038 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
9039 * // a funclet. Consider:
9043 * // throw new Exception();
9044 * // } catch(Exception) {
9045 * // throw new Exception(); // The exception thrown here ...
9047 * // } filter { // ... will be processed here, while the "catch" funclet frame is
9048 * // // still on the stack
9049 * // } filter-handler {
9052 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
9053 * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
9054 * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
9056 * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
9057 * ; the dynamically containing funclet or function)
9058 * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
9059 * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
9063 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
9064 * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
9066 * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
9067 * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
9070 * The epilog sequence is then:
9072 * add sp, XXX ; if necessary
9075 * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
9076 * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
9078 * The funclet frame is thus:
9081 * |-----------------------|
9084 * +=======================+ <---- Caller's SP
9085 * |Callee saved registers |
9086 * |-----------------------|
9087 * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
9088 * | | // in function and funclet
9089 * |-----------------------|
9090 * | PSP slot | // Omitted in CoreRT ABI
9091 * |-----------------------|
9092 * ~ possible 4 byte pad ~
9094 * |-----------------------|
9095 * | Outgoing arg space |
9096 * |-----------------------| <---- Ambient SP
9103 void CodeGen::genFuncletProlog(BasicBlock* block)
9107 printf("*************** In genFuncletProlog()\n");
9110 assert(block != NULL);
9111 assert(block->bbFlags & BBF_FUNCLET_BEG);
9113 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9115 gcInfo.gcResetForBB();
9117 compiler->unwindBegProlog();
9119 regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9120 regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
9122 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
9123 maskPushRegsInt |= maskStackAlloc;
9125 assert(FitsIn<int>(maskPushRegsInt));
9126 inst_IV(INS_push, (int)maskPushRegsInt);
9127 compiler->unwindPushMaskInt(maskPushRegsInt);
9129 if (maskPushRegsFloat != RBM_NONE)
9131 genPushFltRegs(maskPushRegsFloat);
9132 compiler->unwindPushMaskFloat(maskPushRegsFloat);
9135 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
9137 regMaskTP maskArgRegsLiveIn;
9140 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
9142 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9144 maskArgRegsLiveIn = RBM_NONE;
9148 maskArgRegsLiveIn = RBM_R0;
9151 regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
9152 bool initRegZeroed = false;
9154 if (maskStackAlloc == RBM_NONE)
9156 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9159 // This is the end of the OS-reported prolog for purposes of unwinding
9160 compiler->unwindEndProlog();
9162 // If there is no PSPSym (CoreRT ABI), we are done.
9163 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9170 // This is the first block of a filter
9172 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
9173 genFuncletInfo.fiPSP_slot_CallerSP_offset);
9174 regSet.verifyRegUsed(REG_R1);
9175 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
9176 genFuncletInfo.fiPSP_slot_SP_offset);
9177 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
9178 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9182 // This is a non-filter funclet
9183 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
9184 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9185 regSet.verifyRegUsed(REG_R3);
9186 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
9187 genFuncletInfo.fiPSP_slot_SP_offset);
9191 /*****************************************************************************
9193 * Generates code for an EH funclet epilog.
9196 void CodeGen::genFuncletEpilog()
9200 printf("*************** In genFuncletEpilog()\n");
9203 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9205 // Just as for the main function, we delay starting the unwind codes until we have
9206 // an instruction which we know needs an unwind code. This is to support code like
9210 // pop {r4,r5,r6,r10,r11,pc}
9211 // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
9213 bool unwindStarted = false;
9215 /* The saved regs info saves the LR register. We need to pop the PC register to return */
9216 assert(genFuncletInfo.fiSaveRegs & RBM_LR);
9218 regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9219 regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
9221 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
9222 maskPopRegsInt |= maskStackAlloc;
9224 if (maskStackAlloc == RBM_NONE)
9226 genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
9231 // We'll definitely generate an unwindable instruction next
9232 compiler->unwindBegEpilog();
9233 unwindStarted = true;
9236 maskPopRegsInt &= ~RBM_LR;
9237 maskPopRegsInt |= RBM_PC;
9239 if (maskPopRegsFloat != RBM_NONE)
9241 genPopFltRegs(maskPopRegsFloat);
9242 compiler->unwindPopMaskFloat(maskPopRegsFloat);
9245 assert(FitsIn<int>(maskPopRegsInt));
9246 inst_IV(INS_pop, (int)maskPopRegsInt);
9247 compiler->unwindPopMaskInt(maskPopRegsInt);
9249 compiler->unwindEndEpilog();
9252 /*****************************************************************************
9254 * Capture the information used to generate the funclet prologs and epilogs.
9255 * Note that all funclet prologs are identical, and all funclet epilogs are
9256 * identical (per type: filters are identical, and non-filters are identical).
9257 * Thus, we compute the data used for these just once.
9259 * See genFuncletProlog() for more information about the prolog/epilog sequences.
9262 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9264 if (compiler->ehAnyFunclets())
9266 assert(isFramePointerUsed());
9267 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
9270 // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
9271 // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
9272 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
9273 // (also assumed in genFnProlog()).
9274 assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
9275 unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9276 genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
9278 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
9279 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
9280 unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
9281 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9282 unsigned funcletFrameSize =
9283 preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
9285 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
9286 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
9287 unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
9289 unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
9290 int PSP_slot_CallerSP_offset =
9291 -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
9293 /* Now save it for future use */
9295 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
9296 genFuncletInfo.fiSpDelta = spDelta;
9297 genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
9298 genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
9304 printf("Funclet prolog / epilog info\n");
9305 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9306 printf(" Save regs: ");
9307 dspRegMask(rsMaskSaveRegs);
9309 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9310 printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
9311 printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
9313 if (PSP_slot_CallerSP_offset != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym))
9315 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
9316 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
9321 assert(PSP_slot_CallerSP_offset < 0);
9322 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9324 assert(PSP_slot_CallerSP_offset ==
9325 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
9326 // function and funclet!
9331 #elif defined(_TARGET_AMD64_)
9333 /*****************************************************************************
9335 * Generates code for an EH funclet prolog.
9337 * Funclets have the following incoming arguments:
9339 * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
9340 * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
9341 * finally/fault: rcx = InitialSP
9343 * Funclets set the following registers on exit:
9345 * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
9346 * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9347 * finally/fault: none
9349 * The AMD64 funclet prolog sequence is:
9352 * push callee-saved regs
9353 * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
9354 * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
9355 * ; the entire function.
9356 * sub sp, XXX ; Establish the rest of the frame.
9357 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
9358 * ; up to preserve stack alignment. If we push an odd number of registers, we also
9359 * ; generate this, to keep the stack aligned.
9361 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
9363 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
9365 * ; Also, re-establish the frame pointer from the PSP.
9367 * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
9368 * ; PSP of the dynamically containing funclet or function)
9369 * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
9370 * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
9371 * ; Function_InitialSP_to_FP_delta==0, we don't need this
9374 * The epilog sequence is then:
9377 * pop callee-saved regs ; if necessary
9381 * The funclet frame is thus:
9384 * |-----------------------|
9387 * +=======================+ <---- Caller's SP
9388 * | Return address |
9389 * |-----------------------|
9391 * |-----------------------|
9392 * |Callee saved registers |
9393 * |-----------------------|
9394 * ~ possible 8 byte pad ~
9396 * |-----------------------|
9397 * | PSP slot | // Omitted in CoreRT ABI
9398 * |-----------------------|
9399 * | Outgoing arg space | // this only exists if the function makes a call
9400 * |-----------------------| <---- Initial SP
9406 * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
9407 * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
9408 * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
9409 * "FRAMEPTR OFFSETS" for details.
9412 void CodeGen::genFuncletProlog(BasicBlock* block)
9417 printf("*************** In genFuncletProlog()\n");
9421 assert(!regSet.rsRegsModified(RBM_FPBASE));
9422 assert(block != nullptr);
9423 assert(block->bbFlags & BBF_FUNCLET_BEG);
9424 assert(isFramePointerUsed());
9426 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9428 gcInfo.gcResetForBB();
9430 compiler->unwindBegProlog();
9432 // We need to push ebp, since it's callee-saved.
9433 // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
9434 // keep track of that on a per-funclet basis, so we push the same set as in the main function.
9435 // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
9436 // is stored here (all temps are allocated in the parent frame).
9437 // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
9438 // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
9440 inst_RV(INS_push, REG_FPBASE, TYP_REF);
9441 compiler->unwindPush(REG_FPBASE);
9443 // Callee saved int registers are pushed to stack.
9444 genPushCalleeSavedRegisters();
9446 regMaskTP maskArgRegsLiveIn;
9447 if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9449 maskArgRegsLiveIn = RBM_ARG_0;
9453 maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
9456 regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
9457 bool initRegZeroed = false;
9459 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9461 // Callee saved float registers are copied to stack in their assigned stack slots
9462 // after allocating space for them as part of funclet frame.
9463 genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9465 // This is the end of the OS-reported prolog for purposes of unwinding
9466 compiler->unwindEndProlog();
9468 // If there is no PSPSym (CoreRT ABI), we are done.
9469 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9474 getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9476 regSet.verifyRegUsed(REG_FPBASE);
9478 getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9480 if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
9482 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
9483 genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9486 // We've modified EBP, but not really. Say that we haven't...
9487 regSet.rsRemoveRegsModified(RBM_FPBASE);
9490 /*****************************************************************************
9492 * Generates code for an EH funclet epilog.
9494 * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
9497 void CodeGen::genFuncletEpilog()
9502 printf("*************** In genFuncletEpilog()\n");
9506 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9508 // Restore callee saved XMM regs from their stack slots before modifying SP
9509 // to position at callee saved int regs.
9510 genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9511 inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
9512 genPopCalleeSavedRegisters();
9513 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
9517 /*****************************************************************************
9519 * Capture the information used to generate the funclet prologs and epilogs.
9522 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9524 if (!compiler->ehAnyFunclets())
9529 // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
9530 // because we're not going to allocate the same size frame as the parent.
9532 assert(isFramePointerUsed());
9533 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
9535 assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
9537 // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
9538 // that's ok, because we're figuring out an offset in the parent frame.
9539 genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
9540 compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
9543 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9544 #ifndef UNIX_AMD64_ABI
9545 // No 4 slots for outgoing params on the stack for System V systems.
9546 assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
9547 (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
9548 // slots if there are any calls in the function.
9549 #endif // UNIX_AMD64_ABI
9550 unsigned offset = compiler->lvaOutgoingArgSpaceSize;
9552 genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
9554 // How much stack do we allocate in the funclet?
9555 // We need to 16-byte align the stack.
9557 unsigned totalFrameSize =
9558 REGSIZE_BYTES // return address
9559 + REGSIZE_BYTES // pushed EBP
9560 + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
9562 // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
9563 // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
9564 unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
9565 unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
9567 unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
9569 totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
9570 + calleeFPRegsSavedSize // pushed callee-saved float regs
9571 // below calculated 'pad' will go here
9572 + PSPSymSize // PSPSym
9573 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9576 unsigned pad = AlignmentPad(totalFrameSize, 16);
9578 genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
9579 + calleeFPRegsSavedSize // Callee saved xmm regs
9580 + pad + PSPSymSize // PSPSym
9581 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9588 printf("Funclet prolog / epilog info\n");
9589 printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9590 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9591 printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
9594 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9596 assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
9597 compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
9603 #elif defined(_TARGET_ARM64_)
9605 // Look in CodeGenArm64.cpp
9607 #elif defined(_TARGET_X86_)
9609 /*****************************************************************************
9611 * Generates code for an EH funclet prolog.
9614 * Funclets have the following incoming arguments:
9616 * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
9617 * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
9618 * finally/fault: none
9620 * Funclets set the following registers on exit:
9622 * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
9623 * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9624 * finally/fault: none
9626 * Funclet prolog/epilog sequence and funclet frame layout are TBD.
9630 void CodeGen::genFuncletProlog(BasicBlock* block)
9635 printf("*************** In genFuncletProlog()\n");
9639 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9641 gcInfo.gcResetForBB();
9643 compiler->unwindBegProlog();
9645 // This is the end of the OS-reported prolog for purposes of unwinding
9646 compiler->unwindEndProlog();
9648 // TODO We may need EBP restore sequence here if we introduce PSPSym
9650 // Add a padding for 16-byte alignment
9651 inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
9654 /*****************************************************************************
9656 * Generates code for an EH funclet epilog.
9659 void CodeGen::genFuncletEpilog()
9664 printf("*************** In genFuncletEpilog()\n");
9668 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9670 // Revert a padding that was added for 16-byte alignment
9671 inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
9676 /*****************************************************************************
9678 * Capture the information used to generate the funclet prologs and epilogs.
9681 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9683 if (!compiler->ehAnyFunclets())
9691 /*****************************************************************************
9693 * Generates code for an EH funclet prolog.
9696 void CodeGen::genFuncletProlog(BasicBlock* block)
9698 NYI("Funclet prolog");
9701 /*****************************************************************************
9703 * Generates code for an EH funclet epilog.
9706 void CodeGen::genFuncletEpilog()
9708 NYI("Funclet epilog");
9711 /*****************************************************************************
9713 * Capture the information used to generate the funclet prologs and epilogs.
9716 void CodeGen::genCaptureFuncletPrologEpilogInfo()
9718 if (compiler->ehAnyFunclets())
9720 NYI("genCaptureFuncletPrologEpilogInfo()");
9726 /*-----------------------------------------------------------------------------
9728 * Set the main function PSPSym value in the frame.
9729 * Funclets use different code to load the PSP sym and save it in their frame.
9730 * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
9731 * The PSPSym section of that document is copied here.
9733 ***********************************
9734 * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
9735 * accesses locals from the main function body.
9737 * First, two definitions.
9739 * Caller-SP is the value of the stack pointer in a function's caller before the call
9740 * instruction is executed. That is, when function A calls function B, Caller-SP for B
9741 * is the value of the stack pointer immediately before the call instruction in A
9742 * (calling B) was executed. Note that this definition holds for both AMD64, which
9743 * pushes the return value when a call instruction is executed, and for ARM, which
9744 * doesn't. For AMD64, Caller-SP is the address above the call return address.
9746 * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
9747 * the frame has been allocated. That is, before any "alloca"-type allocations.
9749 * The PSPSym is a pointer-sized local variable in the frame of the main function and
9750 * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
9751 * for the main function. The stack offset of the PSPSym is reported to the VM in the
9752 * GC information header. The value reported in the GC information is the offset of the
9753 * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
9754 * value is reported to the VM, differs between architectures. In particular, note that
9755 * most things in the GC information header are reported as offsets relative to Caller-SP,
9756 * but PSPSym on AMD64 is one (maybe the only) exception.)
9758 * The VM uses the PSPSym to find other locals it cares about (such as the generics context
9759 * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
9760 * the frame pointer is the same value in a funclet as it is in the main function body.
9762 * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
9763 * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
9764 * only true for first pass funclets (currently just filters) and it is passed as the second
9765 * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
9766 * frame in the exception processing system. For the CLR, it points either to the main function
9767 * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
9768 * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
9770 * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
9771 * don't know if the Establisher Frame is from the main function or a funclet, we design the
9772 * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
9773 * offset from the Establisher Frame in each case. (This is also required because we only report
9774 * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
9775 * function and all of its funclets). Then, the funclet uses this known offset to compute the
9776 * PSPSym address and read its value. From this, it can compute the value of the frame pointer
9777 * (which is a constant offset from the PSPSym value) and set the frame register to be the same
9778 * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
9779 * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
9780 * for every nested funclet invocation.
9782 * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
9783 * restores all non-volatile registers to their values within the parent frame. This includes
9784 * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
9785 * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
9787 * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
9788 * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
9789 * ARM this is the first argument and passed in R0.
9791 * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
9792 * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
9793 * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
9794 * is required in all funclets as well as the main function, whereas if the establisher frame was
9795 * correctly reported, the PSPSym could be omitted in some cases.)
9796 ***********************************
9798 void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
9800 assert(compiler->compGeneratingProlog);
9802 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9807 noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
9809 #if defined(_TARGET_ARM_)
9811 // We either generate:
9813 // str r1, [reg + PSPSymOffset]
9816 // str r1, [reg + PSPSymOffset]
9817 // depending on the smallest encoding
9819 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9824 if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
9826 // use the "add <reg>, sp, imm" form
9828 callerSPOffs = SPtoCallerSPdelta;
9829 regBase = REG_SPBASE;
9833 // use the "add <reg>, r11, imm" form
9835 int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
9836 noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
9838 callerSPOffs = FPtoCallerSPdelta;
9839 regBase = REG_FPBASE;
9842 // We will just use the initReg since it is an available register
9843 // and we are probably done using it anyway...
9844 regNumber regTmp = initReg;
9845 *pInitRegZeroed = false;
9847 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
9848 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9850 #elif defined(_TARGET_ARM64_)
9852 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9854 // We will just use the initReg since it is an available register
9855 // and we are probably done using it anyway...
9856 regNumber regTmp = initReg;
9857 *pInitRegZeroed = false;
9859 getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
9860 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9862 #elif defined(_TARGET_AMD64_)
9864 // The PSP sym value is Initial-SP, not Caller-SP!
9865 // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
9866 // has been established.
9869 // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
9871 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
9875 NYI("Set function PSP sym");
9880 #endif // FEATURE_EH_FUNCLETS
9882 /*****************************************************************************
9884 * Generates code for all the function and funclet prologs and epilogs.
9887 void CodeGen::genGeneratePrologsAndEpilogs()
9892 printf("*************** Before prolog / epilog generation\n");
9893 getEmitter()->emitDispIGlist(false);
9897 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
9898 // This affects our code that determines which untracked locals need to be zero initialized.
9899 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
9901 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
9903 getEmitter()->emitStartPrologEpilogGeneration();
9905 gcInfo.gcResetForBB();
9908 // Generate all the prologs and epilogs.
9909 CLANG_FORMAT_COMMENT_ANCHOR;
9911 #if FEATURE_EH_FUNCLETS
9913 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
9914 // information computed during codegen, or during function prolog generation, like
9915 // frame offsets. It must run after main function prolog generation.
9917 genCaptureFuncletPrologEpilogInfo();
9919 #endif // FEATURE_EH_FUNCLETS
9921 // Walk the list of prologs and epilogs and generate them.
9922 // We maintain a list of prolog and epilog basic blocks in
9923 // the insGroup structure in the emitter. This list was created
9924 // during code generation by the genReserve*() functions.
9926 // TODO: it seems like better design would be to create a list of prologs/epilogs
9927 // in the code generator (not the emitter), and then walk that list. But we already
9928 // have the insGroup list, which serves well, so we don't need the extra allocations
9929 // for a prolog/epilog list in the code generator.
9931 getEmitter()->emitGeneratePrologEpilog();
9933 // Tell the emitter we're done with all prolog and epilog generation.
9935 getEmitter()->emitFinishPrologEpilogGeneration();
9940 printf("*************** After prolog / epilog generation\n");
9941 getEmitter()->emitDispIGlist(false);
9947 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9948 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9950 XX End Prolog / Epilog XX
9952 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9953 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9956 #if defined(_TARGET_XARCH_)
9957 // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9958 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9959 // Here offset = 16-byte aligned offset after pushing integer registers.
9962 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
9963 // non-funclet: this will be compLclFrameSize.
9964 // funclet frames: this will be FuncletInfo.fiSpDelta.
9965 void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
9967 genVzeroupperIfNeeded(false);
9968 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9970 // Only callee saved floating point registers should be in regMask
9971 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9974 if (regMask == RBM_NONE)
9979 #ifdef _TARGET_AMD64_
9980 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9981 unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9983 // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
9984 assert((offset % 16) == 0);
9985 instruction copyIns = ins_Copy(TYP_FLOAT);
9986 #else // !_TARGET_AMD64_
9987 unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
9988 instruction copyIns = INS_movupd;
9989 #endif // !_TARGET_AMD64_
9991 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9993 regMaskTP regBit = genRegMask(reg);
9994 if ((regBit & regMask) != 0)
9996 // ABI requires us to preserve lower 128-bits of YMM register.
9997 getEmitter()->emitIns_AR_R(copyIns,
9998 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
10000 reg, REG_SPBASE, offset);
10001 compiler->unwindSaveReg(reg, offset);
10002 regMask &= ~regBit;
10003 offset -= XMM_REGSIZE_BYTES;
10008 // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
10009 // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
10010 // Here offset = 16-byte aligned offset after pushing integer registers.
10013 // lclFrameSize - Fixed frame size excluding callee pushed int regs.
10014 // non-funclet: this will be compLclFrameSize.
10015 // funclet frames: this will be FuncletInfo.fiSpDelta.
10016 void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
10018 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
10020 // Only callee saved floating point registers should be in regMask
10021 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
10023 // fast path return
10024 if (regMask == RBM_NONE)
10026 genVzeroupperIfNeeded();
10030 #ifdef _TARGET_AMD64_
10031 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
10032 instruction copyIns = ins_Copy(TYP_FLOAT);
10033 #else // !_TARGET_AMD64_
10034 unsigned firstFPRegPadding = 0;
10035 instruction copyIns = INS_movupd;
10036 #endif // !_TARGET_AMD64_
10040 if (compiler->compLocallocUsed)
10042 // localloc frame: use frame pointer relative offset
10043 assert(isFramePointerUsed());
10044 regBase = REG_FPBASE;
10045 offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
10049 regBase = REG_SPBASE;
10050 offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
10053 #ifdef _TARGET_AMD64_
10054 // Offset is 16-byte aligned since we use movaps for restoring xmm regs
10055 assert((offset % 16) == 0);
10056 #endif // _TARGET_AMD64_
10058 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
10060 regMaskTP regBit = genRegMask(reg);
10061 if ((regBit & regMask) != 0)
10063 // ABI requires us to restore lower 128-bits of YMM register.
10064 getEmitter()->emitIns_R_AR(copyIns,
10065 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
10067 reg, regBase, offset);
10068 regMask &= ~regBit;
10069 offset -= XMM_REGSIZE_BYTES;
10072 genVzeroupperIfNeeded();
10075 // Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
10076 // AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
10077 // (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
10078 // 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
10079 // code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
10080 // if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
10083 // check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
10084 // instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
10086 void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
10088 bool emitVzeroUpper = false;
10089 if (check256bitOnly)
10091 emitVzeroUpper = getEmitter()->Contains256bitAVX();
10095 emitVzeroUpper = getEmitter()->ContainsAVX();
10098 if (emitVzeroUpper)
10100 assert(compiler->canUseVexEncoding());
10101 instGen(INS_vzeroupper);
10105 #endif // defined(_TARGET_XARCH_)
10107 //-----------------------------------------------------------------------------------
10108 // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
10111 // hClass - type handle
10114 // true if type is returned in multiple registers, false otherwise.
10116 bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
10118 if (hClass == NO_CLASS_HANDLE)
10123 structPassingKind howToReturnStruct;
10124 var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
10126 return (varTypeIsStruct(returnType));
10129 //----------------------------------------------
10130 // Methods that support HFA's for ARM32/ARM64
10131 //----------------------------------------------
10133 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
10136 return varTypeIsFloating(GetHfaType(hClass));
10142 bool Compiler::IsHfa(GenTree* tree)
10145 return IsHfa(gtGetStructHandleIfPresent(tree));
10151 var_types Compiler::GetHfaType(GenTree* tree)
10154 return GetHfaType(gtGetStructHandleIfPresent(tree));
10160 unsigned Compiler::GetHfaCount(GenTree* tree)
10162 return GetHfaCount(gtGetStructHandleIfPresent(tree));
10165 var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
10167 var_types result = TYP_UNDEF;
10168 if (hClass != NO_CLASS_HANDLE)
10171 CorInfoType corType = info.compCompHnd->getHFAType(hClass);
10172 if (corType != CORINFO_TYPE_UNDEF)
10174 result = JITtype2varType(corType);
10176 #endif // FEATURE_HFA
10181 //------------------------------------------------------------------------
10182 // GetHfaCount: Given a class handle for an HFA struct
10183 // return the number of registers needed to hold the HFA
10185 // Note that on ARM32 the single precision registers overlap with
10186 // the double precision registers and for that reason each
10187 // double register is considered to be two single registers.
10188 // Thus for ARM32 an HFA of 4 doubles this function will return 8.
10189 // On ARM64 given an HFA of 4 singles or 4 doubles this function will
10190 // will return 4 for both.
10192 // hClass: the class handle of a HFA struct
10194 unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
10196 assert(IsHfa(hClass));
10197 #ifdef _TARGET_ARM_
10198 // A HFA of doubles is twice as large as an HFA of singles for ARM32
10199 // (i.e. uses twice the number of single precison registers)
10200 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
10201 #else // _TARGET_ARM64_
10202 var_types hfaType = GetHfaType(hClass);
10203 unsigned classSize = info.compCompHnd->getClassSize(hClass);
10204 // Note that the retail build issues a warning about a potential divsion by zero without the Max function
10205 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
10206 return classSize / elemSize;
10207 #endif // _TARGET_ARM64_
10210 #ifdef _TARGET_XARCH_
10212 //------------------------------------------------------------------------
10213 // genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
10214 // map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
10215 // X86/x64 has a special encoding for shift/rotate-by-constant-1.
10218 // ins: the base shift/rotate instruction
10219 // shiftByValue: the constant value by which we are shifting/rotating
10221 instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
10223 assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
10226 // Which format should we use?
10228 instruction shiftByConstantIns;
10230 if (shiftByValue == 1)
10232 // Use the shift-by-one format.
10234 assert(INS_rcl + 1 == INS_rcl_1);
10235 assert(INS_rcr + 1 == INS_rcr_1);
10236 assert(INS_rol + 1 == INS_rol_1);
10237 assert(INS_ror + 1 == INS_ror_1);
10238 assert(INS_shl + 1 == INS_shl_1);
10239 assert(INS_shr + 1 == INS_shr_1);
10240 assert(INS_sar + 1 == INS_sar_1);
10242 shiftByConstantIns = (instruction)(ins + 1);
10246 // Use the shift-by-NNN format.
10248 assert(INS_rcl + 2 == INS_rcl_N);
10249 assert(INS_rcr + 2 == INS_rcr_N);
10250 assert(INS_rol + 2 == INS_rol_N);
10251 assert(INS_ror + 2 == INS_ror_N);
10252 assert(INS_shl + 2 == INS_shl_N);
10253 assert(INS_shr + 2 == INS_shr_N);
10254 assert(INS_sar + 2 == INS_sar_N);
10256 shiftByConstantIns = (instruction)(ins + 2);
10259 return shiftByConstantIns;
10262 #endif // _TARGET_XARCH_
10264 //------------------------------------------------------------------------------------------------ //
10265 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
10268 // The number of the first argument with stack slot on the caller's frame.
10271 // On x64 Windows the caller always creates slots (homing space) in its frame for the
10272 // first 4 arguments of a callee (register passed args). So, the the variable number
10273 // (lclNum) for the first argument with a stack slot is always 0.
10274 // For System V systems or armarch, there is no such calling convention requirement, and the code
10275 // needs to find the first stack passed argument from the caller. This is done by iterating over
10276 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
10278 unsigned CodeGen::getFirstArgWithStackSlot()
10280 #if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_)
10281 unsigned baseVarNum = 0;
10282 // Iterate over all the lvParam variables in the Lcl var table until we find the first one
10283 // that's passed on the stack.
10284 LclVarDsc* varDsc = nullptr;
10285 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
10287 varDsc = &(compiler->lvaTable[i]);
10289 // We should have found a stack parameter (and broken out of this loop) before
10290 // we find any non-parameters.
10291 assert(varDsc->lvIsParam);
10293 if (varDsc->lvArgReg == REG_STK)
10299 assert(varDsc != nullptr);
10302 #elif defined(_TARGET_AMD64_)
10304 #else // _TARGET_X86
10305 // Not implemented for x86.
10306 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
10307 return BAD_VAR_NUM;
10308 #endif // _TARGET_X86_
10311 //------------------------------------------------------------------------
10312 // genSinglePush: Report a change in stack level caused by a single word-sized push instruction
10314 void CodeGen::genSinglePush()
10316 AddStackLevel(REGSIZE_BYTES);
10319 //------------------------------------------------------------------------
10320 // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
10322 void CodeGen::genSinglePop()
10324 SubtractStackLevel(REGSIZE_BYTES);
10327 //------------------------------------------------------------------------
10328 // genPushRegs: Push the given registers.
10331 // regs - mask or registers to push
10332 // byrefRegs - OUT arg. Set to byref registers that were pushed.
10333 // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
10336 // Mask of registers pushed.
10339 // This function does not check if the register is marked as used, etc.
10341 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
10343 *byrefRegs = RBM_NONE;
10344 *noRefRegs = RBM_NONE;
10346 if (regs == RBM_NONE)
10351 #if FEATURE_FIXED_OUT_ARGS
10353 NYI("Don't call genPushRegs with real regs!");
10356 #else // FEATURE_FIXED_OUT_ARGS
10358 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
10359 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
10361 regMaskTP pushedRegs = regs;
10363 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
10365 regMaskTP regBit = regMaskTP(1) << reg;
10367 if ((regBit & regs) == RBM_NONE)
10371 if (regBit & gcInfo.gcRegGCrefSetCur)
10375 else if (regBit & gcInfo.gcRegByrefSetCur)
10377 *byrefRegs |= regBit;
10380 else if (noRefRegs != NULL)
10382 *noRefRegs |= regBit;
10390 inst_RV(INS_push, reg, type);
10393 gcInfo.gcMarkRegSetNpt(regBit);
10400 #endif // FEATURE_FIXED_OUT_ARGS
10403 //------------------------------------------------------------------------
10404 // genPopRegs: Pop the registers that were pushed by genPushRegs().
10407 // regs - mask of registers to pop
10408 // byrefRegs - The byref registers that were pushed by genPushRegs().
10409 // noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
10414 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
10416 if (regs == RBM_NONE)
10421 #if FEATURE_FIXED_OUT_ARGS
10423 NYI("Don't call genPopRegs with real regs!");
10425 #else // FEATURE_FIXED_OUT_ARGS
10427 noway_assert((regs & byrefRegs) == byrefRegs);
10428 noway_assert((regs & noRefRegs) == noRefRegs);
10429 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
10431 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
10432 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
10434 // Walk the registers in the reverse order as genPushRegs()
10435 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
10437 regMaskTP regBit = regMaskTP(1) << reg;
10439 if ((regBit & regs) == RBM_NONE)
10443 if (regBit & byrefRegs)
10447 else if (regBit & noRefRegs)
10456 inst_RV(INS_pop, reg, type);
10459 if (type != TYP_INT)
10460 gcInfo.gcMarkRegPtrVal(reg, type);
10465 #endif // FEATURE_FIXED_OUT_ARGS
10468 /*****************************************************************************
10471 * This function should be called only after the sizes of the emitter blocks
10472 * have been finalized.
10475 void CodeGen::genSetScopeInfo()
10477 if (!compiler->opts.compScopeInfo)
10485 printf("*************** In genSetScopeInfo()\n");
10489 if (compiler->info.compVarScopesCount == 0)
10491 compiler->eeSetLVcount(0);
10492 compiler->eeSetLVdone();
10496 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
10498 unsigned varsHomeCount = 0;
10499 #ifdef USING_SCOPE_INFO
10500 varsHomeCount = siScopeCnt + psiScopeCnt;
10501 #endif // USING_SCOPE_INFO
10502 compiler->eeSetLVcount(varsHomeCount);
10505 genTrnslLocalVarCount = varsHomeCount;
10508 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[varsHomeCount];
10512 #ifdef USING_SCOPE_INFO
10513 genSetScopeInfoUsingsiScope();
10514 #endif // USING_SCOPE_INFO
10516 compiler->eeSetLVdone();
10519 #ifdef USING_SCOPE_INFO
10520 void CodeGen::genSetScopeInfoUsingsiScope()
10522 noway_assert(psiOpenScopeList.scNext == nullptr);
10524 // Record the scopes found for the parameters over the prolog.
10525 // The prolog needs to be treated differently as a variable may not
10526 // have the same info in the prolog block as is given by compiler->lvaTable.
10527 // eg. A register parameter is actually on the stack, before it is loaded to reg.
10529 CodeGen::psiScope* scopeP;
10532 for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
10534 noway_assert(scopeP != nullptr);
10535 noway_assert(scopeP->scStartLoc.Valid());
10536 noway_assert(scopeP->scEndLoc.Valid());
10538 UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
10539 UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
10541 unsigned varNum = scopeP->scSlotNum;
10542 noway_assert(startOffs <= endOffs);
10544 // The range may be 0 if the prolog is empty. For such a case,
10545 // report the liveness of arguments to span at least the first
10546 // instruction in the method. This will be incorrect (except on
10547 // entry to the method) if the very first instruction of the method
10548 // is part of a loop. However, this should happen
10549 // very rarely, and the incorrectness is worth being able to look
10550 // at the argument on entry to the method.
10551 if (startOffs == endOffs)
10553 noway_assert(startOffs == 0);
10557 siVarLoc varLoc = scopeP->getSiVarLoc();
10559 genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, &varLoc);
10562 // Record the scopes for the rest of the method.
10563 // Check that the LocalVarInfo scopes look OK
10564 noway_assert(siOpenScopeList.scNext == nullptr);
10566 CodeGen::siScope* scopeL;
10568 for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
10570 noway_assert(scopeL != nullptr);
10571 noway_assert(scopeL->scStartLoc.Valid());
10572 noway_assert(scopeL->scEndLoc.Valid());
10574 // Find the start and end IP
10576 UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
10577 UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
10579 noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
10581 LclVarDsc* varDsc = compiler->lvaGetDesc(scopeL->scVarNum);
10582 siVarLoc varLoc = getSiVarLoc(varDsc, scopeL);
10584 genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
10585 scopeL->scAvailable, &varLoc);
10588 #endif // USING_SCOPE_INFO
10590 //------------------------------------------------------------------------
10591 // genSetScopeInfo: Record scope information for debug info
10595 // startOffs - the starting offset for this scope
10596 // length - the length of this scope
10597 // varNum - the lclVar for this scope info
10599 // avail - a bool indicating if it has a home
10600 // varLoc - the position (reg or stack) of the variable
10603 // Called for every scope info piece to record by the main genSetScopeInfo()
10605 void CodeGen::genSetScopeInfo(unsigned which,
10606 UNATIVE_OFFSET startOffs,
10607 UNATIVE_OFFSET length,
10613 // We need to do some mapping while reporting back these variables.
10615 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
10616 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
10618 #ifdef _TARGET_X86_
10619 // Non-x86 platforms are allowed to access all arguments directly
10620 // so we don't need this code.
10622 // Is this a varargs function?
10624 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
10625 varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
10627 noway_assert(varLoc->vlType == VLT_STK || varLoc->vlType == VLT_STK2);
10629 // All stack arguments (except the varargs handle) have to be
10630 // accessed via the varargs cookie. Discard generated info,
10631 // and just find its position relative to the varargs handle
10633 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
10634 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
10636 noway_assert(!compiler->opts.compDbgCode);
10640 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
10641 // arguments of vararg functions to avoid reporting them to GC.
10642 noway_assert(!compiler->lvaTable[varNum].lvRegister);
10643 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
10644 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
10646 noway_assert(cookieOffset < varOffset);
10647 unsigned offset = varOffset - cookieOffset;
10648 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
10649 noway_assert(offset < stkArgSize);
10650 offset = stkArgSize - offset;
10652 varLoc->vlType = VLT_FIXED_VA;
10653 varLoc->vlFixedVarArg.vlfvOffset = offset;
10656 #endif // _TARGET_X86_
10658 VarName name = nullptr;
10662 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
10664 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
10666 name = compiler->info.compVarScopes[scopeNum].vsdName;
10670 // Hang on to this compiler->info.
10672 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
10674 tlvi.tlviVarNum = ilVarNum;
10675 tlvi.tlviLVnum = LVnum;
10676 tlvi.tlviName = name;
10677 tlvi.tlviStartPC = startOffs;
10678 tlvi.tlviLength = length;
10679 tlvi.tlviAvailable = avail;
10680 tlvi.tlviVarLoc = *varLoc;
10684 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
10687 /*****************************************************************************/
10690 /*****************************************************************************
10693 * Can be called only after lviSetLocalVarInfo() has been called
10697 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
10699 if (!compiler->opts.compScopeInfo)
10702 if (compiler->info.compVarScopesCount == 0)
10705 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10707 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10709 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
10710 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10711 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10713 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10720 /*****************************************************************************
10723 * Can be called only after lviSetLocalVarInfo() has been called
10727 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
10729 if (!compiler->opts.compScopeInfo)
10732 if (compiler->info.compVarScopesCount == 0)
10735 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10737 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10739 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
10740 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10741 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10743 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10750 /*****************************************************************************/
10751 #endif // defined(DEBUG)
10752 #endif // LATE_DISASM
10756 /*****************************************************************************
10757 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
10760 void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
10762 if (mappingNum != unsigned(-1))
10764 printf("%d: ", mappingNum);
10767 IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
10769 if (offsx == BAD_IL_OFFSET)
10775 Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
10777 if (jitIsStackEmpty(offsx))
10779 printf(" STACK_EMPTY");
10782 if (jitIsCallInstruction(offsx))
10784 printf(" CALL_INSTRUCTION");
10789 ipMapping->ipmdNativeLoc.Print();
10790 // We can only call this after code generation. Is there any way to tell when it's legal to call?
10791 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
10793 if (ipMapping->ipmdIsLabel)
10801 void CodeGen::genIPmappingListDisp()
10803 unsigned mappingNum = 0;
10804 Compiler::IPmappingDsc* ipMapping;
10806 for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
10808 genIPmappingDisp(mappingNum, ipMapping);
10815 /*****************************************************************************
10817 * Append an IPmappingDsc struct to the list that we're maintaining
10818 * for the debugger.
10819 * Record the instr offset as being at the current code gen position.
10822 void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
10824 if (!compiler->opts.compDbgInfo)
10829 assert(offsx != BAD_IL_OFFSET);
10831 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10833 case ICorDebugInfo::PROLOG:
10834 case ICorDebugInfo::EPILOG:
10839 if (offsx != (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
10841 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
10844 // Ignore this one if it's the same IL offset as the last one we saw.
10845 // Note that we'll let through two identical IL offsets if the flag bits
10846 // differ, or two identical "special" mappings (e.g., PROLOG).
10847 if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
10849 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
10855 /* Create a mapping entry and append it to the list */
10857 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
10858 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
10859 addMapping->ipmdILoffsx = offsx;
10860 addMapping->ipmdIsLabel = isLabel;
10861 addMapping->ipmdNext = nullptr;
10863 if (compiler->genIPmappingList != nullptr)
10865 assert(compiler->genIPmappingLast != nullptr);
10866 assert(compiler->genIPmappingLast->ipmdNext == nullptr);
10867 compiler->genIPmappingLast->ipmdNext = addMapping;
10871 assert(compiler->genIPmappingLast == nullptr);
10872 compiler->genIPmappingList = addMapping;
10875 compiler->genIPmappingLast = addMapping;
10880 printf("Added IP mapping: ");
10881 genIPmappingDisp(unsigned(-1), addMapping);
10886 /*****************************************************************************
10888 * Prepend an IPmappingDsc struct to the list that we're maintaining
10889 * for the debugger.
10890 * Record the instr offset as being at the current code gen position.
10892 void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
10894 if (!compiler->opts.compDbgInfo)
10899 assert(offsx != BAD_IL_OFFSET);
10900 assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
10902 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10904 case ICorDebugInfo::NO_MAPPING:
10905 case ICorDebugInfo::PROLOG:
10906 case ICorDebugInfo::EPILOG:
10910 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
10914 /* Create a mapping entry and prepend it to the list */
10916 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
10917 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
10918 addMapping->ipmdILoffsx = offsx;
10919 addMapping->ipmdIsLabel = true;
10920 addMapping->ipmdNext = nullptr;
10922 addMapping->ipmdNext = compiler->genIPmappingList;
10923 compiler->genIPmappingList = addMapping;
10925 if (compiler->genIPmappingLast == nullptr)
10927 compiler->genIPmappingLast = addMapping;
10933 printf("Added IP mapping to front: ");
10934 genIPmappingDisp(unsigned(-1), addMapping);
10939 /*****************************************************************************/
10941 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
10942 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
10943 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
10945 C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
10946 C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
10947 C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
10948 C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
10950 //------------------------------------------------------------------------
10951 // jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
10952 // Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
10953 // is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
10956 // offsx - the IL_OFFSETX value with the IL offset to extract.
10961 IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
10963 assert(offsx != BAD_IL_OFFSET);
10965 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10967 case ICorDebugInfo::NO_MAPPING:
10968 case ICorDebugInfo::PROLOG:
10969 case ICorDebugInfo::EPILOG:
10973 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
10977 //------------------------------------------------------------------------
10978 // jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
10979 // distinguished values. Asserts if passed BAD_IL_OFFSET.
10982 // offsx - the IL_OFFSETX value with the IL offset to extract.
10987 IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
10989 assert(offsx != BAD_IL_OFFSET);
10991 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10993 case ICorDebugInfo::NO_MAPPING:
10994 case ICorDebugInfo::PROLOG:
10995 case ICorDebugInfo::EPILOG:
10996 return IL_OFFSET(offsx);
10999 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11003 //------------------------------------------------------------------------
11004 // jitIsStackEmpty: Does the IL offset have the stack empty bit set?
11005 // Asserts if passed BAD_IL_OFFSET.
11008 // offsx - the IL_OFFSETX value to check
11011 // 'true' if the stack empty bit is set; 'false' otherwise.
11013 bool jitIsStackEmpty(IL_OFFSETX offsx)
11015 assert(offsx != BAD_IL_OFFSET);
11017 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11019 case ICorDebugInfo::NO_MAPPING:
11020 case ICorDebugInfo::PROLOG:
11021 case ICorDebugInfo::EPILOG:
11025 return (offsx & IL_OFFSETX_STKBIT) == 0;
11029 //------------------------------------------------------------------------
11030 // jitIsCallInstruction: Does the IL offset have the call instruction bit set?
11031 // Asserts if passed BAD_IL_OFFSET.
11034 // offsx - the IL_OFFSETX value to check
11037 // 'true' if the call instruction bit is set; 'false' otherwise.
11039 bool jitIsCallInstruction(IL_OFFSETX offsx)
11041 assert(offsx != BAD_IL_OFFSET);
11043 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11045 case ICorDebugInfo::NO_MAPPING:
11046 case ICorDebugInfo::PROLOG:
11047 case ICorDebugInfo::EPILOG:
11051 return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
11055 /*****************************************************************************/
11057 void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
11059 if (!compiler->opts.compDbgCode)
11064 if (offsx == BAD_IL_OFFSET)
11069 /* If other IL were offsets reported, skip */
11071 if (compiler->genIPmappingLast == nullptr)
11076 if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
11081 /* offsx was the last reported offset. Make sure that we generated native code */
11083 if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
11089 /*****************************************************************************
11091 * Shut down the IP-mapping logic, report the info to the EE.
11094 void CodeGen::genIPmappingGen()
11096 if (!compiler->opts.compDbgInfo)
11104 printf("*************** In genIPmappingGen()\n");
11108 if (compiler->genIPmappingList == nullptr)
11110 compiler->eeSetLIcount(0);
11111 compiler->eeSetLIdone();
11115 Compiler::IPmappingDsc* tmpMapping;
11116 Compiler::IPmappingDsc* prevMapping;
11117 unsigned mappingCnt;
11118 UNATIVE_OFFSET lastNativeOfs;
11120 /* First count the number of distinct mapping records */
11123 lastNativeOfs = UNATIVE_OFFSET(~0);
11125 for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
11126 tmpMapping = tmpMapping->ipmdNext)
11128 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11130 // Managed RetVal - since new sequence points are emitted to identify IL calls,
11131 // make sure that those are not filtered and do not interfere with filtering of
11132 // other sequence points.
11133 if (jitIsCallInstruction(srcIP))
11139 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11141 if (nextNativeOfs != lastNativeOfs)
11144 lastNativeOfs = nextNativeOfs;
11145 prevMapping = tmpMapping;
11149 /* If there are mappings with the same native offset, then:
11150 o If one of them is NO_MAPPING, ignore it
11151 o If one of them is a label, report that and ignore the other one
11152 o Else report the higher IL offset
11155 PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
11156 if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11158 // If the previous entry was NO_MAPPING, ignore it
11159 prevMapping->ipmdNativeLoc.Init();
11160 prevMapping = tmpMapping;
11162 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11164 // If the current entry is NO_MAPPING, ignore it
11165 // Leave prevMapping unchanged as tmpMapping is no longer valid
11166 tmpMapping->ipmdNativeLoc.Init();
11168 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11170 // counting for special cases: see below
11172 prevMapping = tmpMapping;
11176 noway_assert(prevMapping != nullptr);
11177 noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
11178 lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
11180 /* The previous block had the same native offset. We have to
11181 discard one of the mappings. Simply reinitialize ipmdNativeLoc
11182 and prevMapping will be ignored later. */
11184 if (prevMapping->ipmdIsLabel)
11186 // Leave prevMapping unchanged as tmpMapping is no longer valid
11187 tmpMapping->ipmdNativeLoc.Init();
11191 prevMapping->ipmdNativeLoc.Init();
11192 prevMapping = tmpMapping;
11197 /* Tell them how many mapping records we've got */
11199 compiler->eeSetLIcount(mappingCnt);
11201 /* Now tell them about the mappings */
11204 lastNativeOfs = UNATIVE_OFFSET(~0);
11206 for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
11208 // Do we have to skip this record ?
11209 if (!tmpMapping->ipmdNativeLoc.Valid())
11214 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11215 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11217 if (jitIsCallInstruction(srcIP))
11219 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
11221 else if (nextNativeOfs != lastNativeOfs)
11223 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11224 lastNativeOfs = nextNativeOfs;
11226 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11228 // For the special case of an IL instruction with no body
11229 // followed by the epilog (say ret void immediately preceding
11230 // the method end), we put two entries in, so that we'll stop
11231 // at the (empty) ret statement if the user tries to put a
11232 // breakpoint there, and then have the option of seeing the
11233 // epilog or not based on SetUnmappedStopMask for the stepper.
11234 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11240 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
11241 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
11242 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
11243 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
11245 if (compiler->opts.compDbgCode)
11247 //Assert that the first instruction of every basic block with more than one incoming edge has a
11248 //different sequence point from each incoming block.
11250 //It turns out that the only thing we really have to assert is that the first statement in each basic
11251 //block has an IL offset and appears in eeBoundaries.
11252 for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
11254 if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
11256 noway_assert(block->bbTreeList->gtOper == GT_STMT);
11257 bool found = false;
11258 if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
11260 IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
11261 for (unsigned i = 0; i < eeBoundariesCount; ++i)
11263 if (eeBoundaries[i].ilOffset == ilOffs)
11270 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
11276 compiler->eeSetLIdone();
11279 /*============================================================================
11281 * These are empty stubs to help the late dis-assembler to compile
11282 * if the late disassembler is being built into a non-DEBUG build.
11284 *============================================================================
11287 #if defined(LATE_DISASM)
11288 #if !defined(DEBUG)
11291 const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
11297 const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
11302 /*****************************************************************************/
11303 #endif // !defined(DEBUG)
11304 #endif // defined(LATE_DISASM)
11305 /*****************************************************************************/
11307 //------------------------------------------------------------------------
11308 // indirForm: Make a temporary indir we can feed to pattern matching routines
11309 // in cases where we don't want to instantiate all the indirs that happen.
11311 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
11313 GenTreeIndir i(GT_IND, type, base, nullptr);
11314 i.gtRegNum = REG_NA;
11319 //------------------------------------------------------------------------
11320 // indirForm: Make a temporary indir we can feed to pattern matching routines
11321 // in cases where we don't want to instantiate all the indirs that happen.
11323 GenTreeStoreInd CodeGen::storeIndirForm(var_types type, GenTree* base, GenTree* data)
11325 GenTreeStoreInd i(type, base, data);
11326 i.gtRegNum = REG_NA;
11330 //------------------------------------------------------------------------
11331 // intForm: Make a temporary int we can feed to pattern matching routines
11332 // in cases where we don't want to instantiate.
11334 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
11336 GenTreeIntCon i(type, value);
11337 i.gtRegNum = REG_NA;
11341 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11342 //------------------------------------------------------------------------
11343 // genLongReturn: Generates code for long return statement for x86 and arm.
11345 // Note: treeNode's and op1's registers are already consumed.
11348 // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
11353 void CodeGen::genLongReturn(GenTree* treeNode)
11355 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11356 assert(treeNode->TypeGet() == TYP_LONG);
11357 GenTree* op1 = treeNode->gtGetOp1();
11358 var_types targetType = treeNode->TypeGet();
11360 assert(op1 != nullptr);
11361 assert(op1->OperGet() == GT_LONG);
11362 GenTree* loRetVal = op1->gtGetOp1();
11363 GenTree* hiRetVal = op1->gtGetOp2();
11364 assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
11366 genConsumeReg(loRetVal);
11367 genConsumeReg(hiRetVal);
11368 if (loRetVal->gtRegNum != REG_LNGRET_LO)
11370 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
11372 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
11374 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
11377 #endif // _TARGET_X86_ || _TARGET_ARM_
11379 //------------------------------------------------------------------------
11380 // genReturn: Generates code for return statement.
11381 // In case of struct return, delegates to the genStructReturn method.
11384 // treeNode - The GT_RETURN or GT_RETFILT tree node.
11389 void CodeGen::genReturn(GenTree* treeNode)
11391 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11392 GenTree* op1 = treeNode->gtGetOp1();
11393 var_types targetType = treeNode->TypeGet();
11395 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
11396 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
11397 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
11398 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
11401 if (targetType == TYP_VOID)
11403 assert(op1 == nullptr);
11407 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11408 if (targetType == TYP_LONG)
11410 genLongReturn(treeNode);
11413 #endif // _TARGET_X86_ || _TARGET_ARM_
11415 if (isStructReturn(treeNode))
11417 genStructReturn(treeNode);
11419 else if (targetType != TYP_VOID)
11421 assert(op1 != nullptr);
11422 noway_assert(op1->gtRegNum != REG_NA);
11424 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
11425 // consumed a reg for the operand. This is because the variable
11426 // is dead after return. But we are issuing more instructions
11427 // like "profiler leave callback" after this consumption. So
11428 // if you are issuing more instructions after this point,
11429 // remember to keep the variable live up until the new method
11430 // exit point where it is actually dead.
11431 genConsumeReg(op1);
11433 #if defined(_TARGET_ARM64_)
11434 genSimpleReturn(treeNode);
11435 #else // !_TARGET_ARM64_
11436 #if defined(_TARGET_X86_)
11437 if (varTypeIsFloating(treeNode))
11439 genFloatReturn(treeNode);
11442 #elif defined(_TARGET_ARM_)
11443 if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
11445 if (targetType == TYP_FLOAT)
11447 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
11451 assert(targetType == TYP_DOUBLE);
11452 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
11457 #endif // _TARGET_ARM_
11459 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
11460 if (op1->gtRegNum != retReg)
11462 inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
11465 #endif // !_TARGET_ARM64_
11469 #ifdef PROFILING_SUPPORTED
11471 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
11472 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
11473 // in the handling of the GT_RETURN statement.
11474 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
11475 // for the return registers containing GC refs.
11477 // There will be a single return block while generating profiler ELT callbacks.
11479 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
11480 // In flowgraph and other places assert that the last node of a block marked as
11481 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
11482 // maintain such an invariant irrespective of whether profiler hook needed or not.
11483 // Also, there is not much to be gained by materializing it as an explicit node.
11484 if (compiler->compCurBB == compiler->genReturnBB)
11487 // Since we are invalidating the assumption that we would slip into the epilog
11488 // right after the "return", we need to preserve the return reg's GC state
11489 // across the call until actual method return.
11490 ReturnTypeDesc retTypeDesc;
11491 unsigned regCount = 0;
11492 if (compiler->compMethodReturnsMultiRegRetType())
11494 if (varTypeIsLong(compiler->info.compRetNativeType))
11496 retTypeDesc.InitializeLongReturnType(compiler);
11498 else // we must have a struct return type
11500 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
11502 regCount = retTypeDesc.GetReturnRegCount();
11505 if (varTypeIsGC(compiler->info.compRetType))
11507 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
11509 else if (compiler->compMethodReturnsMultiRegRetType())
11511 for (unsigned i = 0; i < regCount; ++i)
11513 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11515 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
11520 genProfilingLeaveCallback();
11522 if (varTypeIsGC(compiler->info.compRetType))
11524 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
11526 else if (compiler->compMethodReturnsMultiRegRetType())
11528 for (unsigned i = 0; i < regCount; ++i)
11530 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11532 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
11537 #endif // PROFILING_SUPPORTED
11539 #if defined(DEBUG) && defined(_TARGET_XARCH_)
11540 bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
11542 #if FEATURE_EH_FUNCLETS
11543 // Don't do stack pointer check at the return from a funclet; only for the main function.
11544 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
11546 doStackPointerCheck = false;
11548 #else // !FEATURE_EH_FUNCLETS
11549 // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
11550 // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
11551 if ((compiler->compCurBB->bbJumpKind == BBJ_EHFINALLYRET) || (compiler->compCurBB->bbJumpKind == BBJ_EHFILTERRET))
11553 doStackPointerCheck = false;
11555 #endif // !FEATURE_EH_FUNCLETS
11557 genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
11558 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11561 #if defined(DEBUG) && defined(_TARGET_XARCH_)
11563 //------------------------------------------------------------------------
11564 // genStackPointerCheck: Generate code to check the stack pointer against a saved value.
11565 // This is a debug check.
11568 // doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
11569 // lvaStackPointerVar - The local variable number that holds the value of the stack pointer
11570 // we are comparing against.
11575 void CodeGen::genStackPointerCheck(bool doStackPointerCheck, unsigned lvaStackPointerVar)
11577 if (doStackPointerCheck)
11579 noway_assert(lvaStackPointerVar != 0xCCCCCCCC && compiler->lvaTable[lvaStackPointerVar].lvDoNotEnregister &&
11580 compiler->lvaTable[lvaStackPointerVar].lvOnFrame);
11581 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0);
11583 BasicBlock* sp_check = genCreateTempLabel();
11584 getEmitter()->emitIns_J(INS_je, sp_check);
11585 instGen(INS_BREAKPOINT);
11586 genDefineTempLabel(sp_check);
11590 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11592 unsigned CodeGenInterface::getCurrentStackLevel() const
11594 return genStackLevel;