1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XX Does the register allocation and puts the remaining lclVars on the stack XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
25 DWORD val = JitConfig.JitRegisterFP();
27 return (enumConfigRegisterFP)(val & 0x3);
29 #endif // FEATURE_FP_REGALLOC
31 regMaskTP Compiler::raConfigRestrictMaskFP()
33 regMaskTP result = RBM_NONE;
35 #if FEATURE_FP_REGALLOC
36 switch (raConfigRegisterFP())
38 case CONFIG_REGISTER_FP_NONE:
41 case CONFIG_REGISTER_FP_CALLEE_TRASH:
42 result = RBM_FLT_CALLEE_TRASH;
44 case CONFIG_REGISTER_FP_CALLEE_SAVED:
45 result = RBM_FLT_CALLEE_SAVED;
47 case CONFIG_REGISTER_FP_FULL:
48 result = RBM_ALLFLOAT;
57 DWORD Compiler::getCanDoubleAlign()
60 if (compStressCompile(STRESS_DBL_ALN, 20))
61 return MUST_DOUBLE_ALIGN;
63 return JitConfig.JitDoubleAlign();
65 return DEFAULT_DOUBLE_ALIGN;
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
73 // refCntStk - sum of ref counts for all stack based variables
74 // refCntEBP - sum of ref counts for EBP enregistered variables
75 // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables
76 // refCntStkParam - sum of ref counts for all stack based parameters
77 // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 // with double fields).
81 // Returns true if this method estimates that a double-aligned frame would be beneficial
84 // The impact of a double-aligned frame is computed as follows:
85 // - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 // - We pay a byte of code for each non-parameter stack reference.
87 // - We save the misalignment penalty and possible cache-line crossing penalty.
88 // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 // - We pay 7 extra bytes for:
91 // LEA ESP,[EBP-offset]
92 // AND ESP,-8 to double align ESP
93 // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
95 // If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 // Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 // ref count for double-aligned values.
99 bool Compiler::shouldDoubleAlign(
100 unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
102 bool doDoubleAlign = false;
103 const unsigned DBL_ALIGN_SETUP_SIZE = 7;
105 unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106 unsigned misaligned_weight = 4;
108 if (compCodeOpt() == Compiler::SMALL_CODE)
109 misaligned_weight = 0;
111 if (compCodeOpt() == Compiler::FAST_CODE)
112 misaligned_weight *= 4;
114 JITDUMP("\nDouble alignment:\n");
115 JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116 JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117 JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
119 if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
121 JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
123 else if (refCntWtdEBP > refCntWtdStkDbl * 2)
125 // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126 // not double aligned.
127 // Here are the numbers that make this not double-aligned.
128 // refCntWtdStkDbl = 0x164
129 // refCntWtdEBP = 0x1a4
130 // We think we do need to change the heuristic to be in favor of double-align.
132 JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
136 // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137 JITDUMP(" Predicting to create a double-aligned frame\n");
138 doDoubleAlign = true;
140 return doDoubleAlign;
142 #endif // DOUBLE_ALIGN
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
146 void Compiler::raInit()
148 #if FEATURE_STACK_FP_X87
149 /* We have not assigned any FP variables to registers yet */
151 VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
153 codeGen->intRegState.rsIsFloat = false;
154 codeGen->floatRegState.rsIsFloat = true;
156 rpReverseEBPenreg = false;
159 rpPassesPessimize = rpPassesMax - 3;
160 if (opts.compDbgCode)
164 rpStkPredict = (unsigned)-1;
165 rpFrameType = FT_NOT_SET;
167 rpMustCreateEBPCalled = false;
168 rpRegAllocDone = false;
169 rpMaskPInvokeEpilogIntf = RBM_NONE;
171 rpPredictMap[PREDICT_NONE] = RBM_NONE;
172 rpPredictMap[PREDICT_ADDR] = RBM_NONE;
174 #if FEATURE_FP_REGALLOC
175 rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT;
176 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
178 rpPredictMap[PREDICT_REG] = RBM_ALLINT;
179 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
185 #if defined(_TARGET_ARM_)
187 rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188 rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189 rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL;
191 #elif defined(_TARGET_AMD64_)
193 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
197 #elif defined(_TARGET_X86_)
199 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
202 rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203 rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
207 rpBestRecordedPrediction = NULL;
210 /*****************************************************************************
212 * The following table(s) determines the order in which registers are considered
213 * for variables to live in
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
218 #if FEATURE_FP_REGALLOC
219 if (varTypeIsFloating(regType))
221 static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT};
222 const unsigned raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
224 if (wbVarOrderSize != NULL)
225 *wbVarOrderSize = raRegVarOrderFltSize;
227 return &raRegVarOrderFlt[0];
232 static const regNumber raRegVarOrder[] = {REG_VAR_ORDER};
233 const unsigned raRegVarOrderSize = _countof(raRegVarOrder);
235 if (wbVarOrderSize != NULL)
236 *wbVarOrderSize = raRegVarOrderSize;
238 return &raRegVarOrder[0];
244 /*****************************************************************************
246 * Dump out the variable interference graph
250 void Compiler::raDumpVarIntf()
255 printf("Var. interference graph for %s\n", info.compFullName);
257 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
259 /* Ignore the variable if it's not tracked */
261 if (!varDsc->lvTracked)
264 /* Get hold of the index and the interference mask for the variable */
265 unsigned varIndex = varDsc->lvVarIndex;
267 printf(" V%02u,T%02u and ", lclNum, varIndex);
271 for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
273 if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274 printf("T%02u ", refIndex);
285 /*****************************************************************************
287 * Dump out the register interference graph
290 void Compiler::raDumpRegIntf()
292 printf("Reg. interference graph for %s\n", info.compFullName);
297 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
301 /* Ignore the variable if it's not tracked */
303 if (!varDsc->lvTracked)
306 /* Get hold of the index and the interference mask for the variable */
308 varNum = varDsc->lvVarIndex;
310 printf(" V%02u,T%02u and ", lclNum, varNum);
312 if (varDsc->IsFloatRegType())
314 #if !FEATURE_STACK_FP_X87
315 for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
317 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318 printf("%3s ", getRegName(regNum, true));
326 for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
328 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329 printf("%3s ", getRegName(regNum));
342 /*****************************************************************************
344 * We'll adjust the ref counts based on interference
348 void Compiler::raAdjustVarIntf()
350 // This method was not correct and has been disabled.
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
359 inline regMaskTP Compiler::genReturnRegForTree(GenTree* tree)
361 var_types type = tree->TypeGet();
363 if (varTypeIsStruct(type) && IsHfa(tree))
365 int retSlots = GetHfaCount(tree);
366 return ((1 << retSlots) - 1) << REG_FLOATRET;
369 const static regMaskTP returnMap[TYP_COUNT] = {
370 RBM_ILLEGAL, // TYP_UNDEF,
371 RBM_NONE, // TYP_VOID,
372 RBM_INTRET, // TYP_BOOL,
373 RBM_INTRET, // TYP_BYTE,
374 RBM_INTRET, // TYP_UBYTE,
375 RBM_INTRET, // TYP_SHORT,
376 RBM_INTRET, // TYP_USHORT,
377 RBM_INTRET, // TYP_INT,
378 RBM_INTRET, // TYP_UINT,
379 RBM_LNGRET, // TYP_LONG,
380 RBM_LNGRET, // TYP_ULONG,
381 RBM_FLOATRET, // TYP_FLOAT,
382 RBM_DOUBLERET, // TYP_DOUBLE,
383 RBM_INTRET, // TYP_REF,
384 RBM_INTRET, // TYP_BYREF,
385 RBM_ILLEGAL, // TYP_STRUCT,
386 RBM_ILLEGAL, // TYP_BLK,
387 RBM_ILLEGAL, // TYP_LCLBLK,
388 RBM_ILLEGAL, // TYP_UNKNOWN,
391 assert((unsigned)type < _countof(returnMap));
392 assert(returnMap[TYP_LONG] == RBM_LNGRET);
393 assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
394 assert(returnMap[TYP_REF] == RBM_INTRET);
395 assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
397 regMaskTP result = returnMap[type];
398 assert(result != RBM_ILLEGAL);
402 /*****************************************************************************/
404 /****************************************************************************/
408 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
413 for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
415 if (!varDsc->lvTracked)
418 if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
421 if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
422 printf("V%02u ", lclNum);
428 /*****************************************************************************/
430 /*****************************************************************************
432 * Debugging helpers - display variables liveness info.
435 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
439 printf("BB%02u: ", beg->bbNum);
442 dispLifeSet(comp, mask, beg->bbLiveIn);
446 dispLifeSet(comp, mask, beg->bbLiveOut);
449 if (beg->bbFlags & BBF_VISITED)
450 printf(" inner=%u", beg->bbFPinVars);
457 } while (beg != end);
460 #if FEATURE_STACK_FP_X87
461 void Compiler::raDispFPlifeInfo()
465 for (block = fgFirstBB; block; block = block->bbNext)
469 printf("BB%02u: in = [ ", block->bbNum);
470 dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
473 VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
474 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
478 noway_assert(stmt->gtOper == GT_STMT);
480 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
482 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
484 dispLifeSet(this, optAllFloatVars, life);
486 gtDispTree(tree, 0, NULL, true);
492 printf("BB%02u: out = [ ", block->bbNum);
493 dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
497 #endif // FEATURE_STACK_FP_X87
498 /*****************************************************************************/
500 /*****************************************************************************/
502 /*****************************************************************************/
504 void Compiler::raSetRegVarOrder(
505 var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
507 unsigned normalVarOrderSize;
508 const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
510 unsigned listIndex = 0;
511 regMaskTP usedReg = avoidReg;
513 noway_assert(*customVarOrderSize >= normalVarOrderSize);
517 /* First place the preferred registers at the start of customVarOrder */
522 for (index = 0; index < normalVarOrderSize; index++)
524 regNum = normalVarOrder[index];
525 regBit = genRegMask(regNum);
527 if (usedReg & regBit)
530 if (prefReg & regBit)
533 noway_assert(listIndex < normalVarOrderSize);
534 customVarOrder[listIndex++] = regNum;
541 #if CPU_HAS_BYTE_REGS
542 /* Then if byteable registers are preferred place them */
544 if (prefReg & RBM_BYTE_REG_FLAG)
546 for (index = 0; index < normalVarOrderSize; index++)
548 regNum = normalVarOrder[index];
549 regBit = genRegMask(regNum);
551 if (usedReg & regBit)
554 if (RBM_BYTE_REGS & regBit)
557 noway_assert(listIndex < normalVarOrderSize);
558 customVarOrder[listIndex++] = regNum;
563 #endif // CPU_HAS_BYTE_REGS
566 /* Now place all the non-preferred registers */
568 for (index = 0; index < normalVarOrderSize; index++)
570 regNumber regNum = normalVarOrder[index];
571 regMaskTP regBit = genRegMask(regNum);
573 if (usedReg & regBit)
577 noway_assert(listIndex < normalVarOrderSize);
578 customVarOrder[listIndex++] = regNum;
583 /* Now place the "avoid" registers */
585 for (index = 0; index < normalVarOrderSize; index++)
587 regNumber regNum = normalVarOrder[index];
588 regMaskTP regBit = genRegMask(regNum);
590 if (avoidReg & regBit)
592 noway_assert(listIndex < normalVarOrderSize);
593 customVarOrder[listIndex++] = regNum;
601 *customVarOrderSize = listIndex;
602 noway_assert(listIndex == normalVarOrderSize);
605 /*****************************************************************************
607 * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
610 void Compiler::raSetupArgMasks(RegState* regState)
612 /* Determine the registers holding incoming register arguments */
613 /* and setup raAvoidArgRegMask to the set of registers that we */
614 /* may want to avoid when enregistering the locals. */
616 regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
617 raAvoidArgRegMask = RBM_NONE;
619 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
621 for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
623 noway_assert(argDsc->lvIsParam);
625 // Is it a register argument ?
626 if (!argDsc->lvIsRegArg)
629 // only process args that apply to the current register file
630 if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
635 // Is it dead on entry ??
636 // In certain cases such as when compJmpOpUsed is true,
637 // or when we have a generic type context arg that we must report
638 // then the arguments have to be kept alive throughout the prolog.
639 // So we have to consider it as live on entry.
641 bool keepArgAlive = compJmpOpUsed;
642 if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
643 ((lvaTable + info.compTypeCtxtArg) == argDsc))
648 if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
653 // The code to set the regState for each arg is outlined for shared use
655 regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
657 // Do we need to try to avoid this incoming arg registers?
659 // If it's not tracked, don't do the stuff below.
660 if (!argDsc->lvTracked)
663 // If the incoming arg is used after a call it is live accross
664 // a call and will have to be allocated to a caller saved
665 // register anyway (a very common case).
667 // In this case it is pointless to ask that the higher ref count
668 // locals to avoid using the incoming arg register
670 unsigned argVarIndex = argDsc->lvVarIndex;
672 /* Does the incoming register and the arg variable interfere? */
674 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
676 // No they do not interfere,
677 // so we add inArgReg to raAvoidArgRegMask
679 raAvoidArgRegMask |= genRegMask(inArgReg);
682 if (argDsc->lvType == TYP_DOUBLE)
684 // Avoid the double register argument pair for register allocation.
685 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
687 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
694 #endif // LEGACY_BACKEND
696 // The code to set the regState for each arg is outlined for shared use
697 // by linear scan. (It is not shared for System V AMD64 platform.)
698 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
700 regNumber inArgReg = argDsc->lvArgReg;
701 regMaskTP inArgMask = genRegMask(inArgReg);
703 if (regState->rsIsFloat)
705 noway_assert(inArgMask & RBM_FLTARG_REGS);
707 else // regState is for the integer registers
709 // This might be the fixed return buffer register argument (on ARM64)
710 // We check and allow inArgReg to be theFixedRetBuffReg
711 if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
713 // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
714 noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
715 // We should have recorded the variable number for the return buffer arg
716 noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
718 else // we have a regular arg
720 noway_assert(inArgMask & RBM_ARG_REGS);
724 regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
727 if (argDsc->lvType == TYP_DOUBLE)
729 if (info.compIsVarArgs || opts.compUseSoftFP)
731 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
732 assert(!regState->rsIsFloat);
736 assert(regState->rsIsFloat);
737 assert(emitter::isDoubleReg(inArgReg));
739 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
741 else if (argDsc->lvType == TYP_LONG)
743 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
744 assert(!regState->rsIsFloat);
745 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
747 #endif // _TARGET_ARM_
749 #if FEATURE_MULTIREG_ARGS
750 if (varTypeIsStruct(argDsc->lvType))
752 if (argDsc->lvIsHfaRegArg())
754 assert(regState->rsIsFloat);
755 unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
756 for (unsigned i = 1; i < cSlots; i++)
758 assert(inArgReg + i <= LAST_FP_ARGREG);
759 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
764 unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
765 for (unsigned i = 1; i < cSlots; i++)
767 regNumber nextArgReg = (regNumber)(inArgReg + i);
768 if (nextArgReg > REG_ARG_LAST)
772 assert(regState->rsIsFloat == false);
773 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
777 #endif // FEATURE_MULTIREG_ARGS
782 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
784 /*****************************************************************************
786 * Assign variables to live in registers, etc.
789 void Compiler::raAssignVars()
793 printf("*************** In raAssignVars()\n");
795 /* We need to keep track of which registers we ever touch */
797 codeGen->regSet.rsClearRegsModified();
799 #if FEATURE_STACK_FP_X87
800 // FP register allocation
801 raEnregisterVarsStackFP();
802 raGenerateFPRefCounts();
805 /* Predict registers used by code generation */
806 rpPredictRegUse(); // New reg predictor/allocator
808 // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
809 // so that the gc tracking logic and lvMustInit logic will ignore them.
814 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
816 if (varDsc->lvType != TYP_STRUCT)
819 if (!varDsc->lvPromoted)
822 if (varDsc->lvIsParam)
825 if (varDsc->lvRefCnt > 0)
831 printf("Mark unused struct local V%02u\n", lclNum);
834 lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
836 if (promotionType == PROMOTION_TYPE_DEPENDENT)
838 // This should only happen when all its field locals are unused as well.
840 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
843 noway_assert(lvaTable[varNum].lvRefCnt == 0);
844 lvaTable[varNum].lvIsStructField = false;
849 noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
852 varDsc->lvUnusedStruct = 1;
855 // Change such struct locals to ints
857 varDsc->lvType = TYP_INT; // Bash to a non-gc type.
858 noway_assert(!varDsc->lvTracked);
859 noway_assert(!varDsc->lvRegister);
860 varDsc->lvOnFrame = false; // Force it not to be onstack.
861 varDsc->lvMustInit = false; // Force not to init it.
862 varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
866 /*****************************************************************************/
867 /*****************************************************************************/
869 /*****************************************************************************
871 * Given a regNumber return the correct predictReg enum value
874 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
876 return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
879 /*****************************************************************************
881 * Given a varIndex return the correct predictReg enum value
884 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
886 return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
889 /*****************************************************************************
891 * Given a rpPredictReg return the correct varNumber value
894 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
896 return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
899 /*****************************************************************************
901 * Given a rpPredictReg return true if it specifies a Txx register
904 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
906 if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
912 /*****************************************************************************
914 * Given a regmask return the correct predictReg enum value
917 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
919 rpPredictReg result = PREDICT_NONE;
920 if (regmask != 0) /* Check if regmask has zero bits set */
922 if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
925 assert(FitsIn<DWORD>(regmask));
926 BitScanForward(®, (DWORD)regmask);
927 return rpGetPredictForReg((regNumber)reg);
930 #if defined(_TARGET_ARM_)
931 /* It has multiple bits set */
932 else if (regmask == (RBM_R0 | RBM_R1))
934 result = PREDICT_PAIR_R0R1;
936 else if (regmask == (RBM_R2 | RBM_R3))
938 result = PREDICT_PAIR_R2R3;
940 #elif defined(_TARGET_X86_)
941 /* It has multiple bits set */
942 else if (regmask == (RBM_EAX | RBM_EDX))
944 result = PREDICT_PAIR_EAXEDX;
946 else if (regmask == (RBM_ECX | RBM_EBX))
948 result = PREDICT_PAIR_ECXEBX;
951 else /* It doesn't match anything */
953 result = PREDICT_NONE;
954 assert(!"unreachable");
955 NO_WAY("bad regpair");
961 /*****************************************************************************
963 * Record a variable to register(s) interference
966 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
969 bool addedIntf = false;
973 for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
975 regMaskTP regBit = genRegMask(regNum);
977 if (regMask & regBit)
979 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
980 if (!VarSetOps::IsEmpty(this, newIntf))
985 VarSetOps::Iter newIntfIter(this, newIntf);
987 while (newIntfIter.NextElem(&varNum))
989 unsigned lclNum = lvaTrackedToVarNum[varNum];
990 LclVarDsc* varDsc = &lvaTable[varNum];
991 #if FEATURE_FP_REGALLOC
992 // Only print the useful interferences
993 // i.e. floating point LclVar interference with floating point registers
994 // or integer LclVar interference with general purpose registers
995 if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
998 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
999 getRegName(regNum), msg);
1005 VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1017 /*****************************************************************************
1019 * Record a new variable to variable(s) interference
1022 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1024 noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1025 noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1027 VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1028 VarSetOps::AddElemD(this, oneVar, varNum);
1030 bool newIntf = fgMarkIntf(intfVar, oneVar);
1033 rpAddedVarIntf = true;
1036 if (verbose && newIntf)
1038 for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1040 if (VarSetOps::IsMember(this, intfVar, oneNum))
1042 unsigned lclNum = lvaTrackedToVarNum[varNum];
1043 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1044 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1054 /*****************************************************************************
1056 * Determine preferred register mask for a given predictReg value
1059 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1061 if (rpHasVarIndexForPredict(predictReg))
1062 predictReg = PREDICT_REG;
1064 noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1065 noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1067 regMaskTP regAvailForType = rpPredictMap[predictReg];
1068 if (varTypeIsFloating(type))
1070 regAvailForType &= RBM_ALLFLOAT;
1074 regAvailForType &= RBM_ALLINT;
1077 if (type == TYP_DOUBLE)
1079 if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1081 // Fix 388433 ARM JitStress WP7
1082 if ((regAvailForType & RBM_DBL_REGS) != 0)
1084 regAvailForType |= (regAvailForType << 1);
1088 regAvailForType = RBM_NONE;
1093 return regAvailForType;
1096 /*****************************************************************************
1098 * Predict register choice for a type.
1100 * Adds the predicted registers to rsModifiedRegsMask.
1102 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1104 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1108 // Add any reserved register to the lockedRegs
1109 lockedRegs |= codeGen->regSet.rsMaskResvd;
1111 /* Clear out the lockedRegs from preferReg */
1112 preferReg &= ~lockedRegs;
1114 if (rpAsgVarNum != -1)
1116 noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1118 /* Don't pick the register used by rpAsgVarNum either */
1119 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1120 noway_assert(tgtVar->lvRegNum != REG_STK);
1122 preferReg &= ~genRegMask(tgtVar->lvRegNum);
1136 #ifdef _TARGET_AMD64_
1138 #endif // _TARGET_AMD64_
1140 // expand preferReg to all non-locked registers if no bits set
1141 preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1143 if (preferReg == 0) // no bits set?
1145 // Add one predefined spill choice register if no bits set.
1146 // (The jit will introduce one spill temp)
1147 preferReg |= RBM_SPILL_CHOICE;
1148 rpPredictSpillCnt++;
1152 printf("Predict one spill temp\n");
1158 /* Iterate the registers in the order specified by rpRegTmpOrder */
1160 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1162 regNum = rpRegTmpOrder[index];
1163 regBits = genRegMask(regNum);
1165 if ((preferReg & regBits) == regBits)
1171 /* Otherwise we have allocated all registers, so do nothing */
1174 #ifndef _TARGET_AMD64_
1177 if ((preferReg == 0) || // no bits set?
1178 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1180 // expand preferReg to all non-locked registers
1181 preferReg = RBM_ALLINT & ~lockedRegs;
1184 if (preferReg == 0) // no bits set?
1186 // Add EAX:EDX to the registers
1187 // (The jit will introduce two spill temps)
1188 preferReg = RBM_PAIR_TMP;
1189 rpPredictSpillCnt += 2;
1192 printf("Predict two spill temps\n");
1195 else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1197 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1199 // Add EAX to the registers
1200 // (The jit will introduce one spill temp)
1201 preferReg |= RBM_PAIR_TMP_LO;
1205 // Add EDX to the registers
1206 // (The jit will introduce one spill temp)
1207 preferReg |= RBM_PAIR_TMP_HI;
1209 rpPredictSpillCnt++;
1212 printf("Predict one spill temp\n");
1217 regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1218 if (regPair != REG_PAIR_NONE)
1220 regBits = genRegPairMask(regPair);
1224 /* Otherwise we have allocated all registers, so do nothing */
1226 #endif // _TARGET_AMD64_
1235 #if FEATURE_FP_REGALLOC
1236 regMaskTP restrictMask;
1237 restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1238 assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1240 // expand preferReg to all available non-locked registers if no bits set
1241 preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1242 regMaskTP preferDouble;
1243 preferDouble = preferReg & (preferReg >> 1);
1245 if ((preferReg == 0) // no bits set?
1247 || ((type == TYP_DOUBLE) &&
1248 ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1252 // Add one predefined spill choice register if no bits set.
1253 // (The jit will introduce one spill temp)
1254 preferReg |= RBM_SPILL_CHOICE_FLT;
1255 rpPredictSpillCnt++;
1259 printf("Predict one spill temp (float)\n");
1263 assert(preferReg != 0);
1265 /* Iterate the registers in the order specified by raRegFltTmpOrder */
1267 for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1269 regNum = raRegFltTmpOrder[index];
1270 regBits = genRegMask(regNum);
1272 if (varTypeIsFloating(type))
1275 if (type == TYP_DOUBLE)
1277 if ((regBits & RBM_DBL_REGS) == 0)
1279 continue; // We must restrict the set to the double registers
1283 // TYP_DOUBLE use two consecutive registers
1284 regBits |= genRegMask(REG_NEXT(regNum));
1288 // See if COMPlus_JitRegisterFP is restricting this FP register
1290 if ((restrictMask & regBits) != regBits)
1294 if ((preferReg & regBits) == regBits)
1299 /* Otherwise we have allocated all registers, so do nothing */
1302 #else // !FEATURE_FP_REGALLOC
1309 noway_assert(!"unexpected type in reg use prediction");
1312 /* Abnormal return */
1313 noway_assert(!"Ran out of registers in rpPredictRegPick");
1318 * If during the first prediction we need to allocate
1319 * one of the registers that we used for coloring locals
1320 * then flag this by setting rpPredictAssignAgain.
1321 * We will have to go back and repredict the registers
1323 if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1324 rpPredictAssignAgain = true;
1326 // Add a register interference to each of the last use variables
1327 if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1329 VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1330 VarSetOps::Assign(this, lastUse, rpLastUseVars);
1331 VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1332 VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1333 // While we still have any lastUse or inPlaceUse bits
1334 VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1336 VARSET_TP varAsSet(VarSetOps::MakeEmpty(this));
1337 VarSetOps::Iter iter(this, useUnion);
1338 unsigned varNum = 0;
1339 while (iter.NextElem(&varNum))
1341 // We'll need this for one of the calls...
1342 VarSetOps::ClearD(this, varAsSet);
1343 VarSetOps::AddElemD(this, varAsSet, varNum);
1345 // If this varBit and lastUse?
1346 if (VarSetOps::IsMember(this, lastUse, varNum))
1348 // Record a register to variable interference
1349 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1352 // If this varBit and inPlaceUse?
1353 if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1355 // Record a register to variable interference
1356 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1360 codeGen->regSet.rsSetRegsModified(regBits);
1365 /*****************************************************************************
1367 * Predict integer register use for generating an address mode for a tree,
1368 * by setting tree->gtUsedRegs to all registers used by this tree and its
1370 * tree - is the child of a GT_IND node
1371 * type - the type of the GT_IND node (floating point/integer)
1372 * lockedRegs - are the registers which are currently held by
1373 * a previously evaluated node.
1374 * rsvdRegs - registers which should not be allocated because they will
1375 * be needed to evaluate a node in the future
1376 * - Also if rsvdRegs has the RBM_LASTUSE bit set then
1377 * the rpLastUseVars set should be saved and restored
1378 * so that we don't add any new variables to rpLastUseVars
1379 * lenCSE - is non-NULL only when we have a lenCSE expression
1381 * Return the scratch registers to be held by this tree. (one or two registers
1382 * to form an address expression)
1385 regMaskTP Compiler::rpPredictAddressMode(
1386 GenTree* tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTree* lenCSE)
1391 genTreeOps oper = tree->OperGet();
1398 bool hasTwoAddConst = false;
1399 bool restoreLastUseVars = false;
1400 VARSET_TP oldLastUseVars(VarSetOps::MakeEmpty(this));
1402 /* do we need to save and restore the rpLastUseVars set ? */
1403 if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1405 restoreLastUseVars = true;
1406 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1408 rsvdRegs &= ~RBM_LASTUSE;
1410 /* if not an add, then just force it to a register */
1414 if (oper == GT_ARR_ELEM)
1416 regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1425 op1 = tree->gtOp.gtOp1;
1426 op2 = tree->gtOp.gtOp2;
1427 rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1429 /* look for (x + y) + icon address mode */
1431 if (op2->OperGet() == GT_CNS_INT)
1433 cns = op2->gtIntCon.gtIconVal;
1435 /* if not an add, then just force op1 into a register */
1436 if (op1->OperGet() != GT_ADD)
1439 hasTwoAddConst = true;
1441 /* Record the 'rev' flag, reverse evaluation order */
1442 rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1444 op2 = op1->gtOp.gtOp2;
1445 op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1448 /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1451 if (op2->OperGet() == GT_LSH)
1453 if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1455 sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1456 opTemp = op2->gtOp.gtOp1;
1470 if (opTemp->OperGet() == GT_NOP)
1472 opTemp = opTemp->gtOp.gtOp1;
1475 // Is this a const operand?
1476 if (opTemp->OperGet() == GT_CNS_INT)
1478 // Compute the new cns value that Codegen will end up using
1479 cns += (opTemp->gtIntCon.gtIconVal << sh);
1485 /* Check for LSH in op1 slot */
1487 if (op1->OperGet() != GT_LSH)
1490 opTemp = op1->gtOp.gtOp2;
1492 if (opTemp->OperGet() != GT_CNS_INT)
1495 sh = opTemp->gtIntCon.gtIconVal;
1497 /* Check for LSH of 0, special case */
1501 #if defined(_TARGET_XARCH_)
1503 /* Check for LSH of 1 2 or 3 */
1507 #elif defined(_TARGET_ARM_)
1509 /* Check for LSH of 1 to 30 */
1519 /* Matched a leftShift by 'sh' subtree, move op1 down */
1520 op1 = op1->gtOp.gtOp1;
1524 /* Now we have to evaluate op1 and op2 into registers */
1526 /* Evaluate op1 and op2 in the correct order */
1529 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1530 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1534 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1535 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1538 /* If op1 and op2 must be spilled and reloaded then
1539 * op1 and op2 might be reloaded into the same register
1540 * This can only happen when all the registers are lockedRegs
1542 if ((op1Mask == op2Mask) && (op1Mask != 0))
1544 /* We'll need to grab a different register for op2 */
1545 op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1549 // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1550 // [op2 + op1<<sh + cns]
1551 // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1553 if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1555 op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1559 // On the ARM we will need at least one scratch register for trees that have this form:
1560 // [op1 + op2 + cns] or [op1 + op2<<sh + cns]
1561 // or for a float/double or long when we have both op1 and op2
1562 // or when we have an 'cns' that is too large for the ld/st instruction
1564 if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1566 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1570 // If we create a CSE that immediately dies then we may need to add an additional register interference
1571 // so we don't color the CSE into R3
1573 if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1575 opTemp = op2->gtOp.gtOp2;
1576 if (opTemp->OperGet() == GT_LCL_VAR)
1578 unsigned varNum = opTemp->gtLclVar.gtLclNum;
1579 LclVarDsc* varDsc = &lvaTable[varNum];
1581 if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1583 rpRecordRegIntf(RBM_TMP_0,
1584 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1590 regMask = (op1Mask | op2Mask);
1591 tree->gtUsedRegs = (regMaskSmall)regMask;
1596 /* now we have to evaluate op1 into a register */
1598 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1603 // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1606 if (!codeGen->validDispForLdSt(cns, type))
1608 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1612 regMask = (op1Mask | op2Mask);
1613 tree->gtUsedRegs = (regMaskSmall)regMask;
1618 #if !CPU_LOAD_STORE_ARCH
1619 if (oper == GT_CNS_INT)
1621 /* Indirect of a constant does not require a register */
1627 /* now we have to evaluate tree into a register */
1628 regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1632 regMaskTP regUse = tree->gtUsedRegs;
1634 if (!VarSetOps::IsEmpty(this, compCurLife))
1636 // Add interference between the current set of life variables and
1637 // the set of temporary registers need to evaluate the sub tree
1640 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1644 /* Do we need to resore the oldLastUseVars value */
1645 if (restoreLastUseVars)
1648 * If we used a GT_ASG targeted register then we need to add
1649 * a variable interference between any new last use variables
1650 * and the GT_ASG targeted register
1652 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1654 rpRecordVarIntf(rpAsgVarNum,
1655 VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1657 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1663 /*****************************************************************************
1668 void Compiler::rpPredictRefAssign(unsigned lclNum)
1670 LclVarDsc* varDsc = lvaTable + lclNum;
1672 varDsc->lvRefAssign = 1;
1674 #if NOGC_WRITE_BARRIERS
1678 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1679 printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1680 varDsc->lvVarIndex);
1684 /* Make sure that write barrier pointer variables never land in EDX */
1685 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1686 #endif // NOGC_WRITE_BARRIERS
1689 /*****************************************************************************
1691 * Predict the internal temp physical register usage for a block assignment tree,
1692 * by setting tree->gtUsedRegs.
1693 * Records the internal temp physical register usage for this tree.
1694 * Returns a mask of interfering registers for this tree.
1696 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1697 * to the set of scratch registers needed when evaluating the tree.
1698 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1699 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1700 * predict additional internal temp physical registers to spill into.
1702 * tree - is the child of a GT_IND node
1703 * predictReg - what type of register does the tree need
1704 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1705 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1706 * rsvdRegs - registers which should not be allocated because they will
1707 * be needed to evaluate a node in the future
1708 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1709 * the rpLastUseVars set should be saved and restored
1710 * so that we don't add any new variables to rpLastUseVars.
1712 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTree* tree,
1713 rpPredictReg predictReg,
1714 regMaskTP lockedRegs,
1717 regMaskTP regMask = RBM_NONE;
1718 regMaskTP interferingRegs = RBM_NONE;
1720 bool hasGCpointer = false;
1721 bool dstIsOnStack = false;
1722 bool useMemHelper = false;
1723 bool useBarriers = false;
1724 GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
1725 GenTree* dstAddr = dst->Addr();
1726 GenTree* srcAddrOrFill = tree->gtGetOp2IfPresent();
1728 size_t blkSize = dst->gtBlkSize;
1730 hasGCpointer = (dst->HasGCPtr());
1732 bool isCopyBlk = tree->OperIsCopyBlkOp();
1733 bool isCopyObj = isCopyBlk && hasGCpointer;
1734 bool isInitBlk = tree->OperIsInitBlkOp();
1738 assert(srcAddrOrFill->OperIsIndir());
1739 srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1743 // For initBlk, we don't need to worry about the GC pointers.
1744 hasGCpointer = false;
1751 dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1756 if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1758 useMemHelper = true;
1764 useMemHelper = true;
1767 if (hasGCpointer && !dstIsOnStack)
1774 // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1776 if (!useMemHelper && !useBarriers)
1778 bool useLoop = false;
1779 unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1781 // A mask to use to force the predictor to choose low registers (to reduce code size)
1782 regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1784 // Allow the src and dst to be used in place, unless we use a loop, in which
1785 // case we will need scratch registers as we will be writing to them.
1786 rpPredictReg srcAndDstPredict = PREDICT_REG;
1788 // Will we be using a loop to implement this INITBLK/COPYBLK?
1789 if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1792 avoidReg = RBM_NONE;
1793 srcAndDstPredict = PREDICT_SCRATCH_REG;
1796 if (tree->gtFlags & GTF_REVERSE_OPS)
1798 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1799 dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1800 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1804 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1805 srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1806 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1809 // We need at least one scratch register for a copyBlk
1812 // Pick a low register to reduce the code size
1813 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1820 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1821 // Pick another low register to reduce the code size
1822 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1825 // We need a loop index register
1826 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1829 tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1831 return interferingRegs;
1834 // What order should the Dest, Val/Src, and Size be calculated
1836 regMaskTP regsPtr[3];
1838 #if defined(_TARGET_XARCH_)
1839 fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1841 // We're going to use these, might as well make them available now
1843 codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1845 codeGen->regSet.rsSetRegsModified(RBM_ESI);
1847 #elif defined(_TARGET_ARM_)
1851 // For all other cases that involve non-constants, we just call memcpy/memset
1853 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1854 interferingRegs |= RBM_CALLEE_TRASH;
1857 printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1862 assert(useBarriers);
1865 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1867 // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1868 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1871 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1874 #else // !_TARGET_X86_ && !_TARGET_ARM_
1875 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1876 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1877 regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1878 regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1879 opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1880 regMask |= regsPtr[0];
1881 opsPtr[0]->gtUsedRegs |= regsPtr[0];
1882 rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1884 regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1885 opsPtr2RsvdRegs | RBM_LASTUSE);
1886 regMask |= regsPtr[1];
1887 opsPtr[1]->gtUsedRegs |= regsPtr[1];
1888 rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1890 regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1891 if (opsPtr[2] == nullptr)
1893 // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1894 // Note that it is quite possible that no register is required, but this preserves
1896 regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1897 rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1901 regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1902 opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1904 regMask |= opsPtr2UsedRegs;
1906 tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1907 return interferingRegs;
1910 /*****************************************************************************
1912 * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1913 * Returns a regMask with the internal temp physical register usage for this tree.
1915 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1916 * to the set of scratch registers needed when evaluating the tree.
1917 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1918 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1919 * predict additional internal temp physical registers to spill into.
1921 * tree - is the child of a GT_IND node
1922 * predictReg - what type of register does the tree need
1923 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1924 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1925 * rsvdRegs - registers which should not be allocated because they will
1926 * be needed to evaluate a node in the future
1927 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1928 * the rpLastUseVars set should be saved and restored
1929 * so that we don't add any new variables to rpLastUseVars.
1932 #pragma warning(disable : 4701)
1935 #pragma warning(push)
1936 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1938 regMaskTP Compiler::rpPredictTreeRegUse(GenTree* tree,
1939 rpPredictReg predictReg,
1940 regMaskTP lockedRegs,
1943 regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL);
1946 rpPredictReg op1PredictReg;
1947 rpPredictReg op2PredictReg;
1948 LclVarDsc* varDsc = NULL;
1949 VARSET_TP oldLastUseVars(VarSetOps::UninitVal());
1951 VARSET_TP varBits(VarSetOps::UninitVal());
1952 VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1954 bool restoreLastUseVars = false;
1955 regMaskTP interferingRegs = RBM_NONE;
1958 // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1960 noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1961 noway_assert(RBM_ILLEGAL);
1962 noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1963 /* impossible values, to make sure that we set them */
1964 tree->gtUsedRegs = RBM_ILLEGAL;
1967 /* Figure out what kind of a node we have */
1969 genTreeOps oper = tree->OperGet();
1970 var_types type = tree->TypeGet();
1971 unsigned kind = tree->OperKind();
1973 // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1974 genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1975 if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1976 predictReg = PREDICT_NONE;
1977 else if (rpHasVarIndexForPredict(predictReg))
1979 // The only place where predictReg is set to a var is in the PURE
1980 // assignment case where varIndex is the var being assigned to.
1981 // We need to check whether the variable is used between here and
1982 // its redefinition.
1983 unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1984 unsigned lclNum = lvaTrackedToVarNum[varIndex];
1986 for (GenTree* nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1988 if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1990 // Is this the pure assignment?
1991 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1993 predictReg = PREDICT_SCRATCH_REG;
2002 if (rsvdRegs & RBM_LASTUSE)
2004 restoreLastUseVars = true;
2005 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2006 rsvdRegs &= ~RBM_LASTUSE;
2009 /* Is this a constant or leaf node? */
2011 if (kind & (GTK_CONST | GTK_LEAF))
2013 bool lastUse = false;
2014 regMaskTP enregMask = RBM_NONE;
2020 // Codegen for floating point constants on the ARM is currently
2021 // movw/movt rT1, <lo32 bits>
2022 // movw/movt rT2, <hi32 bits>
2023 // vmov.i2d dT0, rT1,rT2
2025 // For TYP_FLOAT one integer register is required
2027 // These integer register(s) immediately die
2028 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2029 if (type == TYP_DOUBLE)
2031 // For TYP_DOUBLE a second integer register is required
2033 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2036 // We also need a floating point register that we keep
2038 if (predictReg == PREDICT_NONE)
2039 predictReg = PREDICT_SCRATCH_REG;
2041 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2042 tree->gtUsedRegs = regMask | tmpMask;
2049 if (rpHasVarIndexForPredict(predictReg))
2051 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2052 rpAsgVarNum = tgtIndex;
2054 // We don't need any register as we plan on writing to the rpAsgVarNum register
2055 predictReg = PREDICT_NONE;
2057 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2058 tgtVar->lvDependReg = true;
2060 if (type == TYP_LONG)
2062 assert(oper == GT_CNS_LNG);
2064 if (tgtVar->lvOtherReg == REG_STK)
2066 // Well we do need one register for a partially enregistered
2068 predictReg = PREDICT_SCRATCH_REG;
2074 #if !CPU_LOAD_STORE_ARCH
2075 /* If the constant is a handle then it will need to have a relocation
2076 applied to it. It will need to be loaded into a register.
2077 But never throw away an existing hint.
2079 if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2082 if (predictReg == PREDICT_NONE)
2083 predictReg = PREDICT_SCRATCH_REG;
2092 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2093 (genTypeSize(type) < sizeof(int)))
2095 predictReg = PREDICT_SCRATCH_REG;
2098 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2100 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2102 // These integer register(s) immediately die
2103 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2104 // Two integer registers are required for a TYP_DOUBLE
2105 if (type == TYP_DOUBLE)
2106 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2108 // We need a temp register in some cases of loads/stores to a class var
2109 if (predictReg == PREDICT_NONE)
2111 predictReg = PREDICT_SCRATCH_REG;
2114 if (rpHasVarIndexForPredict(predictReg))
2116 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2117 rpAsgVarNum = tgtIndex;
2119 // We don't need any register as we plan on writing to the rpAsgVarNum register
2120 predictReg = PREDICT_NONE;
2122 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2123 tgtVar->lvDependReg = true;
2125 if (type == TYP_LONG)
2127 if (tgtVar->lvOtherReg == REG_STK)
2129 // Well we do need one register for a partially enregistered
2131 predictReg = PREDICT_SCRATCH_REG;
2139 // Check for a misalignment on a Floating Point field
2141 if (varTypeIsFloating(type))
2143 if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2145 // These integer register(s) immediately die
2146 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2147 // Two integer registers are required for a TYP_DOUBLE
2148 if (type == TYP_DOUBLE)
2149 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2158 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2160 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2161 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2162 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2164 #if FEATURE_STACK_FP_X87
2165 // If it's a floating point var, there's nothing to do
2166 if (varTypeIsFloating(type))
2168 tree->gtUsedRegs = RBM_NONE;
2174 // If the variable is already a register variable, no need to go further.
2175 if (oper == GT_REG_VAR)
2178 /* Apply the type of predictReg to the LCL_VAR */
2180 if (predictReg == PREDICT_REG)
2183 if (varDsc->lvRegNum == REG_STK)
2188 else if (predictReg == PREDICT_SCRATCH_REG)
2190 noway_assert(predictReg == PREDICT_SCRATCH_REG);
2192 /* Is this the last use of a local var? */
2195 if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2196 goto PREDICT_REG_COMMON;
2199 else if (rpHasVarIndexForPredict(predictReg))
2201 /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2203 unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2204 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2205 VarSetOps::MakeSingleton(this, tgtIndex1);
2207 noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2208 noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2209 #ifndef _TARGET_AMD64_
2210 // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2211 // so this assert is meaningless
2212 noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2213 #endif // !_TARGET_AMD64_
2215 if (varDsc->lvTracked)
2218 srcIndex = varDsc->lvVarIndex;
2220 // If this register has it's last use here then we will prefer
2221 // to color to the same register as tgtVar.
2225 * Add an entry in the lvaVarPref graph to indicate
2226 * that it would be worthwhile to color these two variables
2227 * into the same physical register.
2228 * This will help us avoid having an extra copy instruction
2230 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2231 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2234 // Add a variable interference from srcIndex to each of the last use variables
2235 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2237 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2240 rpAsgVarNum = tgtIndex1;
2242 /* We will rely on the target enregistered variable from the GT_ASG */
2249 if (genIsValidFloatReg(varDsc->lvRegNum))
2251 enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2255 enregMask = genRegMask(varDsc->lvRegNum);
2259 if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2261 // We need to compute the intermediate value using a TYP_DOUBLE
2262 // but we storing the result in a TYP_SINGLE enregistered variable
2269 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2270 if (enregMask & (rsvdRegs | lockedRegs))
2274 #ifndef _TARGET_64BIT_
2275 if (type == TYP_LONG)
2277 if (varDsc->lvOtherReg != REG_STK)
2279 tmpMask = genRegMask(varDsc->lvOtherReg);
2280 enregMask |= tmpMask;
2282 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2283 if (tmpMask & (rsvdRegs | lockedRegs))
2286 else // lvOtherReg == REG_STK
2291 #endif // _TARGET_64BIT_
2294 varDsc->lvDependReg = true;
2298 /* Does not need a register */
2299 predictReg = PREDICT_NONE;
2300 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2301 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2303 else // (grabCount > 0)
2305 #ifndef _TARGET_64BIT_
2306 /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2307 if ((type == TYP_LONG) && (grabCount == 1))
2309 /* We will need to pick one register */
2311 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2312 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2314 noway_assert((type == TYP_DOUBLE) ||
2315 (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2316 #else // !_TARGET_64BIT_
2317 noway_assert(grabCount == 1);
2318 #endif // !_TARGET_64BIT_
2321 else if (type == TYP_STRUCT)
2324 // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2325 // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2326 // As a workaround we just bash it to PREDICT_NONE here
2328 if (predictReg != PREDICT_NONE)
2329 predictReg = PREDICT_NONE;
2331 // Currently predictReg is saying that we will not need any scratch registers
2332 noway_assert(predictReg == PREDICT_NONE);
2334 /* We may need to sign or zero extend a small type when pushing a struct */
2335 if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2337 for (unsigned varNum = varDsc->lvFieldLclStart;
2338 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2340 LclVarDsc* fldVar = lvaTable + varNum;
2342 if (fldVar->lvStackAligned())
2344 // When we are stack aligned Codegen will just use
2345 // a push instruction and thus doesn't need any register
2346 // since we can push both a register or a stack frame location
2350 if (varTypeIsByte(fldVar->TypeGet()))
2352 // We will need to reserve one byteable register,
2355 predictReg = PREDICT_SCRATCH_REG;
2356 #if CPU_HAS_BYTE_REGS
2357 // It is best to enregister this fldVar in a byteable register
2359 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2362 else if (varTypeIsShort(fldVar->TypeGet()))
2364 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2365 // If fldVar is not enregistered then we will need a scratch register
2367 if (!isEnregistered)
2369 // We will need either an int register or a byte register
2370 // If we are not requesting a byte register we will request an int register
2372 if (type != TYP_BYTE)
2374 predictReg = PREDICT_SCRATCH_REG;
2382 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2385 if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2387 varDsc->addPrefReg(preferReg, this);
2391 break; /* end of case GT_LCL_VAR */
2394 tree->gtUsedRegs = RBM_NONE;
2397 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2398 // Mark the registers required to emit a tailcall profiler callback
2399 if (compIsProfilerHookNeeded())
2401 tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2408 } /* end of switch (oper) */
2410 /* If we don't need to evaluate to register, regmask is the empty set */
2411 /* Otherwise we grab a temp for the local variable */
2413 if (predictReg == PREDICT_NONE)
2417 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2419 if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2421 /* We need to sign or zero extend a small type when pushing a struct */
2422 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2424 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2425 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2427 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2430 LclVarDsc* fldVar = lvaTable + varNum;
2431 if (fldVar->lvTracked)
2433 VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2434 rpRecordRegIntf(regMask, fldBit DEBUGARG(
2435 "need scratch register when pushing a small field of a struct"));
2441 /* Update the set of lastUse variables that we encountered so far */
2444 VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2445 VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2448 * Add interference from any previously locked temps into this last use variable.
2452 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2455 * Add interference from any reserved temps into this last use variable.
2459 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2462 * For partially enregistered longs add an interference with the
2463 * register return by rpPredictRegPick
2465 if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2467 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2471 tree->gtUsedRegs = (regMaskSmall)regMask;
2475 /* Is it a 'simple' unary/binary operator? */
2477 if (kind & GTK_SMPOP)
2479 GenTree* op1 = tree->gtOp.gtOp1;
2480 GenTree* op2 = tree->gtGetOp2IfPresent();
2483 regMaskTP regsPtr[3];
2485 VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2491 /* Is the value being assigned into a LCL_VAR? */
2492 if (op1->gtOper == GT_LCL_VAR)
2494 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2496 /* Are we assigning a LCL_VAR the result of a call? */
2497 if (op2->gtOper == GT_CALL)
2499 /* Set a preferred register for the LCL_VAR */
2500 if (isRegPairType(varDsc->TypeGet()))
2501 varDsc->addPrefReg(RBM_LNGRET, this);
2502 else if (!varTypeIsFloating(varDsc->TypeGet()))
2503 varDsc->addPrefReg(RBM_INTRET, this);
2504 #ifdef _TARGET_AMD64_
2506 varDsc->addPrefReg(RBM_FLOATRET, this);
2509 * When assigning the result of a call we don't
2510 * bother trying to target the right side of the
2511 * assignment, since we have a fixed calling convention.
2514 else if (varDsc->lvTracked)
2516 // We interfere with uses in place
2517 if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2519 rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2522 // Did we predict that this local will be fully enregistered?
2523 // and the assignment type is the same as the expression type?
2524 // and it is dead on the right side of the assignment?
2525 // and we current have no other rpAsgVarNum active?
2527 if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2528 (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2531 // Yes, we should try to target the right side (op2) of this
2532 // assignment into the (enregistered) tracked variable.
2535 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2536 op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2538 // Remember that this is a new use in place
2540 // We've added "new UseInPlace"; remove from the global set.
2541 VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2543 // Note that later when we walk down to the leaf node for op2
2544 // if we decide to actually use the register for the 'varDsc'
2545 // to enregister the operand, the we will set rpAsgVarNum to
2546 // varDsc->lvVarIndex, by extracting this value using
2547 // rpGetVarIndexForPredict()
2549 // Also we reset rpAsgVarNum back to -1 after we have finished
2550 // predicting the current GT_ASG node
2556 else if (tree->OperIsBlkOp())
2558 interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2575 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2576 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2578 /* Is the value being assigned into an enregistered LCL_VAR? */
2579 /* For debug code we only allow a simple op2 to be assigned */
2580 if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2582 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2583 /* Did we predict that this local will be enregistered? */
2584 if (varDsc->lvRegNum != REG_STK)
2586 /* Yes, we can use "reg <op>= addr" */
2588 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2589 op2PredictReg = PREDICT_NONE;
2596 #if CPU_LOAD_STORE_ARCH
2599 op1PredictReg = PREDICT_REG;
2600 op2PredictReg = PREDICT_REG;
2606 * Otherwise, initialize the normal forcing of operands:
2609 op1PredictReg = PREDICT_ADDR;
2610 op2PredictReg = PREDICT_REG;
2615 #if !CPU_LOAD_STORE_ARCH
2616 if (op2PredictReg != PREDICT_NONE)
2618 /* Is the value being assigned a simple one? */
2619 if (rpCanAsgOperWithoutReg(op2, false))
2620 op2PredictReg = PREDICT_NONE;
2624 bool simpleAssignment;
2625 simpleAssignment = false;
2627 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2629 // Add a variable interference from the assign target
2630 // to each of the last use variables
2631 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2633 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2635 if (varDsc->lvTracked)
2637 unsigned varIndex = varDsc->lvVarIndex;
2639 rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2643 /* Record whether this tree is a simple assignment to a local */
2645 simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2648 bool requireByteReg;
2649 requireByteReg = false;
2651 #if CPU_HAS_BYTE_REGS
2652 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2654 if (varTypeIsByte(type) &&
2655 ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2658 // Byte-assignments typically need a byte register
2659 requireByteReg = true;
2661 if (op1->gtOper == GT_LCL_VAR)
2663 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2665 // Did we predict that this local will be enregistered?
2666 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2668 // We don't require a byte register when op1 is an enregistered local */
2669 requireByteReg = false;
2672 // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2673 if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2675 // We should try to put op1 in an byte register
2676 varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2682 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2684 bool isWriteBarrierAsgNode;
2685 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2687 GCInfo::WriteBarrierForm wbf;
2688 if (isWriteBarrierAsgNode)
2689 wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2691 wbf = GCInfo::WBF_NoBarrier;
2694 regMaskTP wbaLockedRegs;
2695 wbaLockedRegs = lockedRegs;
2696 if (isWriteBarrierAsgNode)
2698 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2700 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2703 wbaLockedRegs |= RBM_WRITE_BARRIER;
2704 op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2705 assert(REG_WRITE_BARRIER == REG_EDX);
2706 op1PredictReg = PREDICT_REG_EDX;
2711 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2713 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2716 op1PredictReg = PREDICT_REG_ECX;
2717 op2PredictReg = PREDICT_REG_EDX;
2718 #elif defined(_TARGET_ARM_)
2719 op1PredictReg = PREDICT_REG_R0;
2720 op2PredictReg = PREDICT_REG_R1;
2722 // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2723 if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2725 op1 = op1->gtOp.gtOp1;
2727 #else // !_TARGET_X86_ && !_TARGET_ARM_
2728 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2734 /* Are we supposed to evaluate RHS first? */
2736 if (tree->gtFlags & GTF_REVERSE_OPS)
2738 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2740 #if CPU_HAS_BYTE_REGS
2741 // Should we insure that op2 gets evaluated into a byte register?
2742 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2744 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2745 // and we can't select one that is already reserved (i.e. lockedRegs)
2747 op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2748 op2->gtUsedRegs |= op2Mask;
2750 // No longer a simple assignment because we're using extra registers and might
2751 // have interference between op1 and op2. See DevDiv #136681
2752 simpleAssignment = false;
2756 * For a simple assignment we don't want the op2Mask to be
2757 * marked as interferring with the LCL_VAR, since it is likely
2758 * that we will want to enregister the LCL_VAR in exactly
2759 * the register that is used to compute op2
2761 tmpMask = lockedRegs;
2763 if (!simpleAssignment)
2766 regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2768 // Did we relax the register prediction for op1 and op2 above ?
2769 // - because we are depending upon op1 being enregistered
2771 if ((op1PredictReg == PREDICT_NONE) &&
2772 ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2774 /* We must be assigning into an enregistered LCL_VAR */
2775 noway_assert(op1->gtOper == GT_LCL_VAR);
2776 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2777 noway_assert(varDsc->lvRegNum != REG_STK);
2779 /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2780 varDsc->lvDependReg = true;
2785 // For the case of simpleAssignments op2 should always be evaluated first
2786 noway_assert(!simpleAssignment);
2788 regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2789 if (isWriteBarrierAsgNode)
2791 wbaLockedRegs |= op1->gtUsedRegs;
2793 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2795 #if CPU_HAS_BYTE_REGS
2796 // Should we insure that op2 gets evaluated into a byte register?
2797 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2799 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2800 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2803 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2804 op2->gtUsedRegs |= op2Mask;
2809 if (rpHasVarIndexForPredict(op2PredictReg))
2814 if (isWriteBarrierAsgNode)
2816 #if NOGC_WRITE_BARRIERS
2818 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2822 /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2823 passed to the write-barrier call in REG_WRITE_BARRIER */
2827 if (op1->gtOper == GT_IND)
2834 /* Special handling of indirect assigns for write barrier */
2836 bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2839 /* Check address mode for enregisterable locals */
2843 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2845 rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2847 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2849 rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2854 if (op2->gtOper == GT_LCL_VAR)
2856 rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2859 // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2860 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2862 rpRecordRegIntf(RBM_WRITE_BARRIER,
2863 rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2865 tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2870 #endif // NOGC_WRITE_BARRIERS
2872 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2877 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2880 // For the ARM target we have an optimized JIT Helper
2881 // that only trashes a subset of the callee saved registers
2884 // NOTE: Adding it to the gtUsedRegs will cause the interference to
2885 // be added appropriately
2887 // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs
2888 // instead of gtUsedRegs, because the latter will be modified later, but we need
2889 // to remember to add the interference.
2891 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2893 op1->gtUsedRegs |= RBM_R0;
2894 op2->gtUsedRegs |= RBM_R1;
2895 #else // _TARGET_ARM_
2899 printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2901 // We have to call a normal JIT helper to perform the Write Barrier Assignment
2902 // It will trash the callee saved registers
2904 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2905 #endif // _TARGET_ARM_
2907 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2910 if (simpleAssignment)
2913 * Consider a simple assignment to a local:
2917 * Since the "=" node is visited after the variable
2918 * is marked live (assuming it's live after the
2919 * assignment), we don't want to use the register
2920 * use mask of the "=" node but rather that of the
2923 tree->gtUsedRegs = op1->gtUsedRegs;
2927 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2929 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2935 /* assigning shift operators */
2937 noway_assert(type != TYP_LONG);
2939 #if CPU_LOAD_STORE_ARCH
2940 predictReg = PREDICT_ADDR;
2942 predictReg = PREDICT_NONE;
2945 /* shift count is handled same as ordinary shift */
2946 goto HANDLE_SHIFT_COUNT;
2949 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2951 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2953 // We need a scratch register for the LEA instruction
2954 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2957 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2962 /* Cannot cast to VOID */
2963 noway_assert(type != TYP_VOID);
2965 /* cast to long is special */
2966 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2968 noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2969 #if CPU_LONG_USES_REGPAIR
2970 rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2972 if (rpHasVarIndexForPredict(predictReg))
2974 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2975 rpAsgVarNum = tgtIndex;
2977 // We don't need any register as we plan on writing to the rpAsgVarNum register
2978 predictReg = PREDICT_NONE;
2980 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2981 tgtVar->lvDependReg = true;
2983 if (tgtVar->lvOtherReg != REG_STK)
2985 predictRegHi = PREDICT_NONE;
2990 if (predictReg == PREDICT_NONE)
2992 predictReg = PREDICT_SCRATCH_REG;
2995 // If we are widening an int into a long using a targeted register pair we
2996 // should retarget so that the low part get loaded into the appropriate register
2997 else if (predictReg == PREDICT_PAIR_R0R1)
2999 predictReg = PREDICT_REG_R0;
3000 predictRegHi = PREDICT_REG_R1;
3002 else if (predictReg == PREDICT_PAIR_R2R3)
3004 predictReg = PREDICT_REG_R2;
3005 predictRegHi = PREDICT_REG_R3;
3009 // If we are widening an int into a long using a targeted register pair we
3010 // should retarget so that the low part get loaded into the appropriate register
3011 else if (predictReg == PREDICT_PAIR_EAXEDX)
3013 predictReg = PREDICT_REG_EAX;
3014 predictRegHi = PREDICT_REG_EDX;
3016 else if (predictReg == PREDICT_PAIR_ECXEBX)
3018 predictReg = PREDICT_REG_ECX;
3019 predictRegHi = PREDICT_REG_EBX;
3023 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3025 #if CPU_LONG_USES_REGPAIR
3026 if (predictRegHi != PREDICT_NONE)
3028 // Now get one more reg for the upper part
3029 regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3032 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3036 /* cast from long is special - it frees a register */
3037 if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types"
3038 && op1->gtType == TYP_LONG)
3040 if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3041 predictReg = PREDICT_REG;
3043 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3045 // If we have 2 or more regs, free one of them
3046 if (!genMaxOneBit(regMask))
3048 /* Clear the 2nd lowest bit in regMask */
3049 /* First set tmpMask to the lowest bit in regMask */
3050 tmpMask = genFindLowestBit(regMask);
3051 /* Next find the second lowest bit in regMask */
3052 tmpMask = genFindLowestBit(regMask & ~tmpMask);
3053 /* Clear this bit from regmask */
3054 regMask &= ~tmpMask;
3056 tree->gtUsedRegs = op1->gtUsedRegs;
3060 #if CPU_HAS_BYTE_REGS
3061 /* cast from signed-byte is special - it uses byteable registers */
3062 if (type == TYP_INT)
3064 var_types smallType;
3066 if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3067 smallType = tree->gtCast.CastOp()->TypeGet();
3069 smallType = tree->gtCast.gtCastType;
3071 if (smallType == TYP_BYTE)
3073 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3075 if ((regMask & RBM_BYTE_REGS) == 0)
3076 regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3078 tree->gtUsedRegs = (regMaskSmall)regMask;
3084 #if FEATURE_STACK_FP_X87
3085 /* cast to float/double is special */
3086 if (varTypeIsFloating(type))
3088 switch (op1->TypeGet())
3090 /* uses fild, so don't need to be loaded to reg */
3093 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3094 tree->gtUsedRegs = op1->gtUsedRegs;
3102 /* Casting from integral type to floating type is special */
3103 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3105 if (opts.compCanUseSSE2)
3107 // predict for SSE2 based casting
3108 if (predictReg <= PREDICT_REG)
3109 predictReg = PREDICT_SCRATCH_REG;
3110 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3112 // Get one more int reg to hold cast result
3113 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3114 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3120 #if FEATURE_FP_REGALLOC
3121 // Are we casting between int to float or float to int
3122 // Fix 388428 ARM JitStress WP7
3123 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3125 // op1 needs to go into a register
3126 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3129 if (varTypeIsFloating(op1->TypeGet()))
3131 // We also need a fp scratch register for the convert operation
3132 regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3133 PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3136 // We also need a register to hold the result
3137 regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3138 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3143 /* otherwise must load op1 into a register */
3148 #ifdef _TARGET_XARCH_
3149 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3151 // This is a special case to handle the following
3152 // optimization: conv.i4(round.d(d)) -> round.i(d)
3153 // if flowgraph 3186
3155 if (predictReg <= PREDICT_REG)
3156 predictReg = PREDICT_SCRATCH_REG;
3158 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3160 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3162 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3170 if (tree->TypeGet() == TYP_LONG)
3172 // On ARM this consumes an extra register for the '0' value
3173 if (predictReg <= PREDICT_REG)
3174 predictReg = PREDICT_SCRATCH_REG;
3176 regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3178 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3180 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3183 #endif // _TARGET_ARM_
3188 // these unary operators will write new values
3189 // and thus will need a scratch register
3191 /* generic unary operators */
3193 if (predictReg <= PREDICT_REG)
3194 predictReg = PREDICT_SCRATCH_REG;
3199 // these unary operators do not write new values
3200 // and thus won't need a scratch register
3201 CLANG_FORMAT_COMMENT_ANCHOR;
3206 tree->gtUsedRegs = 0;
3211 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3212 tree->gtUsedRegs = op1->gtUsedRegs;
3216 case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3218 bool intoReg = true;
3219 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3221 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3223 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3226 if (predictReg == PREDICT_ADDR)
3230 else if (predictReg == PREDICT_NONE)
3232 if (type != TYP_LONG)
3238 predictReg = PREDICT_REG;
3242 /* forcing to register? */
3243 if (intoReg && (type != TYP_LONG))
3245 rsvdRegs |= RBM_LASTUSE;
3251 /* check for address mode */
3252 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3255 #if CPU_LOAD_STORE_ARCH
3256 // We may need a scratch register for loading a long
3257 if (type == TYP_LONG)
3259 /* This scratch register immediately dies */
3260 tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3262 #endif // CPU_LOAD_STORE_ARCH
3265 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3267 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3269 /* These integer register(s) immediately die */
3270 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3271 // Two integer registers are required for a TYP_DOUBLE
3272 if (type == TYP_DOUBLE)
3274 rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3278 /* forcing to register? */
3281 regMaskTP lockedMask = lockedRegs | rsvdRegs;
3284 // We will compute a new regMask that holds the register(s)
3285 // that we will load the indirection into.
3287 CLANG_FORMAT_COMMENT_ANCHOR;
3289 #ifndef _TARGET_64BIT_
3290 if (type == TYP_LONG)
3292 // We need to use multiple load instructions here:
3293 // For the first register we can not choose
3294 // any registers that are being used in place or
3295 // any register in the current regMask
3297 regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3299 // For the second register we can choose a register that was
3300 // used in place or any register in the old now overwritten regMask
3301 // but not the same register that we picked above in 'regMask'
3303 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3304 regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3309 // We will use one load instruction here:
3310 // The load target register can be a register that was used in place
3311 // or one of the register from the orginal regMask.
3313 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3314 regMask = rpPredictRegPick(type, predictReg, lockedMask);
3317 else if (predictReg != PREDICT_ADDR)
3319 /* Unless the caller specified PREDICT_ADDR */
3320 /* we don't return the temp registers used */
3321 /* to form the address */
3326 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3338 /* Floating point comparison uses EAX for flags */
3339 if (varTypeIsFloating(op1->TypeGet()))
3345 if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3347 // Some comparisons are converted to ?:
3348 noway_assert(!fgMorphRelopToQmark(op1));
3350 if (predictReg <= PREDICT_REG)
3351 predictReg = PREDICT_SCRATCH_REG;
3353 // The set instructions need a byte register
3354 regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3359 #ifdef _TARGET_XARCH_
3361 // Optimize the compare with a constant cases for xarch
3362 if (op1->gtOper == GT_CNS_INT)
3364 if (op2->gtOper == GT_CNS_INT)
3366 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3367 rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3368 tree->gtUsedRegs = op2->gtUsedRegs;
3371 else if (op2->gtOper == GT_CNS_INT)
3373 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3374 tree->gtUsedRegs = op1->gtUsedRegs;
3377 else if (op2->gtOper == GT_CNS_LNG)
3379 regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3381 // We also need one extra register to read values from
3382 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3383 #endif // _TARGET_X86_
3384 tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3387 #endif // _TARGET_XARCH_
3390 unsigned op1TypeSize;
3391 unsigned op2TypeSize;
3393 op1TypeSize = genTypeSize(op1->TypeGet());
3394 op2TypeSize = genTypeSize(op2->TypeGet());
3396 op1PredictReg = PREDICT_REG;
3397 op2PredictReg = PREDICT_REG;
3399 if (tree->gtFlags & GTF_REVERSE_OPS)
3401 #ifdef _TARGET_XARCH_
3402 if (op1TypeSize == sizeof(int))
3403 op1PredictReg = PREDICT_NONE;
3406 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3407 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3411 #ifdef _TARGET_XARCH_
3412 // For full DWORD compares we can have
3414 // op1 is an address mode and op2 is a register
3416 // op1 is a register and op2 is an address mode
3418 if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3420 if (op2->gtOper == GT_LCL_VAR)
3422 unsigned lclNum = op2->gtLclVar.gtLclNum;
3423 varDsc = lvaTable + lclNum;
3424 /* Did we predict that this local will be enregistered? */
3425 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3427 op1PredictReg = PREDICT_ADDR;
3431 // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3432 if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3433 op2PredictReg = PREDICT_ADDR;
3434 #endif // _TARGET_XARCH_
3436 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3438 if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3441 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3445 #ifdef _TARGET_XARCH_
3446 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3447 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3448 // if one of the registers is small and the types aren't equal.
3450 if (regMask == RBM_NONE)
3452 rpPredictReg op1xPredictReg, op2xPredictReg;
3455 if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3457 op1xPredictReg = op2PredictReg;
3458 op2xPredictReg = op1PredictReg;
3464 op1xPredictReg = op1PredictReg;
3465 op2xPredictReg = op2PredictReg;
3469 if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir)
3470 (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3471 varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int
3473 bool needTmp = false;
3475 // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3476 // We could predict a byteable register for op2x, but what if we don't get it?
3477 // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3479 if (varTypeIsByte(op1x->TypeGet()))
3485 if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3487 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3492 if (op1x->TypeGet() != op2x->TypeGet())
3498 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3502 #endif // _TARGET_XARCH_
3504 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3509 #ifndef _TARGET_AMD64_
3510 if (type == TYP_LONG)
3512 assert(tree->gtIsValid64RsltMul());
3514 /* Strip out the cast nodes */
3516 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3517 op1 = op1->gtCast.CastOp();
3518 op2 = op2->gtCast.CastOp();
3522 #endif // !_TARGET_AMD64_
3525 #if defined(_TARGET_X86_)
3526 // This will done by a 64-bit imul "imul eax, reg"
3527 // (i.e. EDX:EAX = EAX * reg)
3529 /* Are we supposed to evaluate op2 first? */
3530 if (tree->gtFlags & GTF_REVERSE_OPS)
3532 rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3533 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3537 rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3538 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3541 /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3543 tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3545 /* set regMask to the set of held registers */
3547 regMask = RBM_PAIR_TMP_LO;
3549 if (type == TYP_LONG)
3550 regMask |= RBM_PAIR_TMP_HI;
3552 #elif defined(_TARGET_ARM_)
3553 // This will done by a 4 operand multiply
3555 // Are we supposed to evaluate op2 first?
3556 if (tree->gtFlags & GTF_REVERSE_OPS)
3558 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3559 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3563 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3564 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3567 // set regMask to the set of held registers,
3568 // the two scratch register we need to compute the mul result
3570 regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3572 // set gtUsedRegs toregMask and the registers needed by op1 and op2
3574 tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3576 #else // !_TARGET_X86_ && !_TARGET_ARM_
3577 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3584 /* We use imulEAX for most unsigned multiply operations */
3585 if (tree->gtOverflow())
3587 if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3602 tree->gtUsedRegs = 0;
3604 if (predictReg <= PREDICT_REG)
3605 predictReg = PREDICT_SCRATCH_REG;
3610 if (tree->gtFlags & GTF_REVERSE_OPS)
3612 op1PredictReg = PREDICT_REG;
3613 #if !CPU_LOAD_STORE_ARCH
3614 if (genTypeSize(op1->gtType) >= sizeof(int))
3615 op1PredictReg = PREDICT_NONE;
3617 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3618 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3622 op2PredictReg = PREDICT_REG;
3623 #if !CPU_LOAD_STORE_ARCH
3624 if (genTypeSize(op2->gtType) >= sizeof(int))
3625 op2PredictReg = PREDICT_NONE;
3627 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3629 // For most ALU operations we can generate a single instruction that encodes
3630 // a small immediate integer constant value. (except for multiply)
3632 if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3634 ssize_t ival = op2->gtIntCon.gtIconVal;
3635 if (codeGen->validImmForAlu(ival))
3637 op2PredictReg = PREDICT_NONE;
3639 else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3640 ((oper == GT_ADD) || (oper == GT_SUB)))
3642 op2PredictReg = PREDICT_NONE;
3645 if (op2PredictReg == PREDICT_NONE)
3647 op2->gtUsedRegs = RBM_NONE;
3652 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3655 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3657 #if CPU_HAS_BYTE_REGS
3658 /* We have special register requirements for byte operations */
3660 if (varTypeIsByte(tree->TypeGet()))
3662 /* For 8 bit arithmetic, one operands has to be in a
3663 byte-addressable register, and the other has to be
3664 in a byte-addrble reg or in memory. Assume its in a reg */
3666 regMaskTP regByteMask = 0;
3667 regMaskTP op1ByteMask = op1->gtUsedRegs;
3669 if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3671 // Pick a Byte register to use for op1
3672 regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3673 op1ByteMask = regByteMask;
3676 if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3678 // Pick a Byte register to use for op2, avoiding the one used by op1
3679 regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3684 tree->gtUsedRegs |= regByteMask;
3685 regMask = regByteMask;
3697 /* non-integer division handled in generic way */
3698 if (!varTypeIsIntegral(type))
3700 tree->gtUsedRegs = 0;
3701 if (predictReg <= PREDICT_REG)
3702 predictReg = PREDICT_SCRATCH_REG;
3703 goto GENERIC_BINARY;
3706 #ifndef _TARGET_64BIT_
3708 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3710 /* Special case: a mod with an int op2 is done inline using idiv or div
3711 to avoid a costly call to the helper */
3713 noway_assert((op2->gtOper == GT_CNS_LNG) &&
3714 (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3716 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3717 if (tree->gtFlags & GTF_REVERSE_OPS)
3719 tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3720 rsvdRegs | op1->gtRsvdRegs);
3721 tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3725 tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3727 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3729 regMask = RBM_PAIR_TMP;
3730 #else // !_TARGET_X86_ && !_TARGET_ARM_
3731 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3732 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3735 (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3736 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3740 #endif // _TARGET_64BIT_
3742 /* no divide immediate, so force integer constant which is not
3743 * a power of two to register
3746 if (op2->OperKind() & GTK_CONST)
3748 ssize_t ival = op2->gtIntConCommon.IconValue();
3750 /* Is the divisor a power of 2 ? */
3752 if (ival > 0 && genMaxOneBit(size_t(ival)))
3757 op2PredictReg = PREDICT_SCRATCH_REG;
3761 /* Non integer constant also must be enregistered */
3762 op2PredictReg = PREDICT_REG;
3765 regMaskTP trashedMask;
3766 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3767 regMaskTP op1ExcludeMask;
3768 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3769 regMaskTP op2ExcludeMask;
3770 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3772 #ifdef _TARGET_XARCH_
3773 /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3774 * we can safely allow the "b" value to die. Unfortunately, if we simply
3775 * mark the node "b" as using EDX, this will not work if "b" is a register
3776 * variable that dies with this particular reference. Thus, if we want to
3777 * avoid this situation (where we would have to spill the variable from
3778 * EDX to someplace else), we need to explicitly mark the interference
3779 * of the variable at this point.
3782 if (op2->gtOper == GT_LCL_VAR)
3784 unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3785 varDsc = lvaTable + lclNum;
3786 if (varDsc->lvTracked)
3791 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3792 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3793 varDsc->lvVarIndex);
3794 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3795 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3796 varDsc->lvVarIndex);
3799 VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3800 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3804 /* set the held register based on opcode */
3805 if (oper == GT_DIV || oper == GT_UDIV)
3809 trashedMask = (RBM_EAX | RBM_EDX);
3811 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3813 #endif // _TARGET_XARCH_
3816 trashedMask = RBM_NONE;
3817 op1ExcludeMask = RBM_NONE;
3818 op2ExcludeMask = RBM_NONE;
3821 /* set the lvPref reg if possible */
3824 * Walking the gtNext link twice from here should get us back
3825 * to our parent node, if this is an simple assignment tree.
3827 dest = tree->gtNext;
3828 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3829 dest->gtNext->gtOp.gtOp2 == tree)
3831 varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3832 varDsc->addPrefReg(regMask, this);
3834 #ifdef _TARGET_XARCH_
3835 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3837 op1PredictReg = PREDICT_SCRATCH_REG;
3840 /* are we supposed to evaluate op2 first? */
3841 if (tree->gtFlags & GTF_REVERSE_OPS)
3843 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3844 rsvdRegs | op1->gtRsvdRegs);
3845 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3849 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3850 rsvdRegs | op2->gtRsvdRegs);
3851 rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3856 /* grab EAX, EDX for this tree node */
3857 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3865 if (predictReg <= PREDICT_REG)
3866 predictReg = PREDICT_SCRATCH_REG;
3868 #ifndef _TARGET_64BIT_
3869 if (type == TYP_LONG)
3871 if (op2->IsCnsIntOrI())
3873 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3874 // no register used by op2
3875 op2->gtUsedRegs = 0;
3876 tree->gtUsedRegs = op1->gtUsedRegs;
3880 // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3881 tmpMask = lockedRegs;
3882 tmpMask &= ~RBM_LNGARG_0;
3883 tmpMask &= ~RBM_SHIFT_LNG;
3885 // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3886 if (tree->gtFlags & GTF_REVERSE_OPS)
3888 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3889 tmpMask |= RBM_SHIFT_LNG;
3890 // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3891 // Fix 383843 X86/ARM ILGEN
3892 rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3893 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3897 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3898 tmpMask |= RBM_LNGARG_0;
3899 // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3900 // Fix 383839 ARM ILGEN
3901 rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3902 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3904 regMask = RBM_LNGRET; // function return registers
3905 op1->gtUsedRegs |= RBM_LNGARG_0;
3906 op2->gtUsedRegs |= RBM_SHIFT_LNG;
3908 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3910 // We are using a helper function to do shift:
3912 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3916 #endif // _TARGET_64BIT_
3918 #ifdef _TARGET_XARCH_
3919 if (!op2->IsCnsIntOrI())
3920 predictReg = PREDICT_NOT_REG_ECX;
3924 // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3926 regMaskTP tmpRsvdRegs;
3928 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3930 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3931 rsvdRegs = RBM_LASTUSE;
3932 tmpRsvdRegs = RBM_NONE;
3937 // Special case op1 of a constant
3938 if (op1->IsCnsIntOrI())
3939 tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3940 // System.Xml.Schema.BitSet:Get(int):bool
3942 tmpRsvdRegs = op1->gtRsvdRegs;
3946 if (!op2->IsCnsIntOrI())
3948 if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3950 op2PredictReg = PREDICT_REG_SHIFT;
3954 op2PredictReg = PREDICT_REG;
3957 /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3958 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3960 // If our target arch has a REG_SHIFT register then
3961 // we set the PrefReg when we have a LclVar for op2
3962 // we add an interference with REG_SHIFT for any other LclVars alive at op2
3963 if (REG_SHIFT != REG_NA)
3965 VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3967 while (op2->gtOper == GT_COMMA)
3969 op2 = op2->gtOp.gtOp2;
3972 if (op2->gtOper == GT_LCL_VAR)
3974 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3975 varDsc->setPrefReg(REG_SHIFT, this);
3976 if (varDsc->lvTracked)
3978 VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3982 // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3983 // excluding the LclVar that was used for the shift amount as it is read-only
3984 // and can be kept alive through the shift operation
3986 rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3987 // In case op2Mask doesn't contain the required shift register,
3988 // we will or it in now.
3989 op2Mask |= RBM_SHIFT;
3993 if (tree->gtFlags & GTF_REVERSE_OPS)
3995 assert(regMask == RBM_NONE);
3996 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3999 #if CPU_HAS_BYTE_REGS
4000 if (varTypeIsByte(type))
4002 // Fix 383789 X86 ILGEN
4003 // Fix 383813 X86 ILGEN
4004 // Fix 383828 X86 ILGEN
4005 if (op1->gtOper == GT_LCL_VAR)
4007 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4008 if (varDsc->lvTracked)
4010 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4012 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4013 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4016 if ((regMask & RBM_BYTE_REGS) == 0)
4018 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4019 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4022 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4026 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4032 if (tree->gtFlags & GTF_REVERSE_OPS)
4034 if (predictReg == PREDICT_NONE)
4036 predictReg = PREDICT_REG;
4038 else if (rpHasVarIndexForPredict(predictReg))
4040 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4041 predictReg = PREDICT_SCRATCH_REG;
4044 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4045 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4049 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4051 /* CodeGen will enregister the op2 side of a GT_COMMA */
4052 if (predictReg == PREDICT_NONE)
4054 predictReg = PREDICT_REG;
4056 else if (rpHasVarIndexForPredict(predictReg))
4058 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4059 predictReg = PREDICT_SCRATCH_REG;
4062 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4064 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4066 tree->gtUsedRegs = op2->gtUsedRegs;
4067 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4069 LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4071 if (op2VarDsc->lvTracked)
4073 VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4074 rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4081 noway_assert(op1 != NULL && op2 != NULL);
4084 * If the gtUsedRegs conflicts with lockedRegs
4085 * then we going to have to spill some registers
4086 * into the non-trashed register set to keep it alive
4090 regMaskTP spillRegs;
4091 spillRegs = lockedRegs & tree->gtUsedRegs;
4095 /* Find the next register that needs to be spilled */
4096 tmpMask = genFindLowestBit(spillRegs);
4101 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4102 gtDispTree(tree, 0, NULL, true);
4105 /* In Codegen it will typically introduce a spill temp here */
4106 /* rather than relocating the register to a non trashed reg */
4107 rpPredictSpillCnt++;
4110 /* Remove it from the spillRegs and lockedRegs*/
4111 spillRegs &= ~tmpMask;
4112 lockedRegs &= ~tmpMask;
4115 VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4117 /* Evaluate the <cond> subtree */
4118 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4119 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4120 tree->gtUsedRegs = op1->gtUsedRegs;
4122 noway_assert(op2->gtOper == GT_COLON);
4123 if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4125 // Don't try to target the register specified in predictReg when we have complex subtrees
4127 predictReg = PREDICT_SCRATCH_REG;
4129 GenTree* elseTree = op2->AsColon()->ElseNode();
4130 GenTree* thenTree = op2->AsColon()->ThenNode();
4132 noway_assert(thenTree != NULL && elseTree != NULL);
4134 // Update compCurLife to only those vars live on the <then> subtree
4136 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4138 if (type == TYP_VOID)
4140 /* Evaluate the <then> subtree */
4141 rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4143 predictReg = PREDICT_NONE;
4147 // A mask to use to force the predictor to choose low registers (to reduce code size)
4148 regMaskTP avoidRegs = RBM_NONE;
4150 avoidRegs = (RBM_R12 | RBM_LR);
4152 if (predictReg <= PREDICT_REG)
4153 predictReg = PREDICT_SCRATCH_REG;
4155 /* Evaluate the <then> subtree */
4157 rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4161 rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4162 if (op1PredictReg != PREDICT_NONE)
4163 predictReg = op1PredictReg;
4167 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4169 /* Evaluate the <else> subtree */
4170 // First record the post-then liveness, and reset the current liveness to the else
4172 CLANG_FORMAT_COMMENT_ANCHOR;
4175 VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4178 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4180 rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4181 tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4183 // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4184 // They each have only one successor, which they share. Their live-out sets must equal the
4185 // live-in set of this virtual successor block, and thus must be the same. We can assert
4186 // that equality here.
4187 assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4191 regMaskTP reloadMask = RBM_NONE;
4195 regMaskTP reloadReg;
4197 /* Get an extra register to hold it */
4198 reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4202 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4203 gtDispTree(tree, 0, NULL, true);
4206 reloadMask |= reloadReg;
4211 /* update the gtUsedRegs mask */
4212 tree->gtUsedRegs |= reloadMask;
4219 tree->gtUsedRegs = RBM_NONE;
4222 /* Is there a return value? */
4225 #if FEATURE_FP_REGALLOC
4226 if (varTypeIsFloating(type))
4228 predictReg = PREDICT_FLTRET;
4229 if (type == TYP_FLOAT)
4230 regMask = RBM_FLOATRET;
4232 regMask = RBM_DOUBLERET;
4236 if (isRegPairType(type))
4238 predictReg = PREDICT_LNGRET;
4239 regMask = RBM_LNGRET;
4243 predictReg = PREDICT_INTRET;
4244 regMask = RBM_INTRET;
4246 if (info.compCallUnmanaged)
4248 lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4250 rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4251 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4254 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4255 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4256 // We could optimize on registers based on int/long or no return value. But to
4257 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4258 if (compIsProfilerHookNeeded())
4260 tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4269 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4270 regMask = genReturnRegForTree(tree);
4271 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4274 tree->gtUsedRegs = 0;
4280 /* This must be a test of a relational operator */
4282 noway_assert(op1->OperIsCompare());
4284 /* Only condition code set by this operation */
4286 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4288 tree->gtUsedRegs = op1->gtUsedRegs;
4294 noway_assert(type <= TYP_INT);
4295 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4298 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4299 unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4302 // Table based switch requires an extra register for the table base
4303 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4305 tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4307 #else // !_TARGET_ARM_
4308 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4309 tree->gtUsedRegs = op1->gtUsedRegs;
4310 #endif // _TARGET_ARM_
4315 if (predictReg <= PREDICT_REG)
4316 predictReg = PREDICT_SCRATCH_REG;
4318 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4319 // Need a reg to load exponent into
4320 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4321 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4325 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4329 if (info.compInitMem)
4331 // We zero out two registers in the ARM codegen path
4333 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4337 op1->gtUsedRegs |= (regMaskSmall)regMask;
4338 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4340 // The result will be put in the reg we picked for the size
4341 // regMask = <already set as we want it to be>
4348 if (predictReg <= PREDICT_REG)
4349 predictReg = PREDICT_SCRATCH_REG;
4351 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4352 // registers (to reduce code size)
4354 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4357 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4359 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4363 unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4364 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4365 // If it has one bit set, and that's an arg reg...
4366 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4368 // We are passing the 'obj' in the argument registers
4370 regNumber rn = genRegNumFromMask(preferReg);
4372 // Add the registers used to pass the 'obj' to regMask.
4373 for (unsigned i = 0; i < objSize / 4; i++)
4375 if (rn == MAX_REG_ARG)
4378 regMask |= genRegMask(rn);
4379 rn = genRegArgNext(rn);
4384 // We are passing the 'obj' in the outgoing arg space
4385 // We will need one register to load into unless the 'obj' size is 4 or less.
4389 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4392 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4394 #else // !_TARGET_ARM_
4396 #endif // _TARGET_ARM_
4402 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4404 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4406 // A MKREFANY takes up two registers.
4407 regNumber rn = genRegNumFromMask(preferReg);
4409 if (rn < MAX_REG_ARG)
4411 regMask |= genRegMask(rn);
4412 rn = genRegArgNext(rn);
4413 if (rn < MAX_REG_ARG)
4414 regMask |= genRegMask(rn);
4417 if (regMask != RBM_NONE)
4419 // Condensation of GENERIC_BINARY path.
4420 assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4421 op2PredictReg = PREDICT_REG;
4422 regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4423 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4424 regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4425 tree->gtUsedRegs = (regMaskSmall)regMask;
4428 tree->gtUsedRegs = op1->gtUsedRegs;
4429 #endif // _TARGET_ARM_
4430 goto GENERIC_BINARY;
4437 goto GENERIC_BINARY;
4441 // Ensure we can write to op2. op2 will hold the output.
4442 if (predictReg < PREDICT_SCRATCH_REG)
4443 predictReg = PREDICT_SCRATCH_REG;
4445 if (tree->gtFlags & GTF_REVERSE_OPS)
4447 op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4448 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4452 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4453 op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4455 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4462 // This unary operator simply passes through the value from its child (much like GT_NOP)
4463 // and thus won't need a scratch register.
4464 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4465 tree->gtUsedRegs = op1->gtUsedRegs;
4472 noway_assert(!"unexpected simple operator in reg use prediction");
4477 /* See what kind of a special operator we have here */
4482 GenTreeArgList* list;
4484 unsigned regArgsNum;
4486 regMaskTP regArgMask;
4487 regMaskTP curArgMask;
4493 /* initialize so we can just or in various bits */
4494 tree->gtUsedRegs = RBM_NONE;
4496 #if GTF_CALL_REG_SAVE
4498 * Unless the GTF_CALL_REG_SAVE flag is set,
4499 * we can't preserve the RBM_CALLEE_TRASH registers.
4500 * (likewise we can't preserve the return registers)
4501 * So we remove them from the lockedRegs set and
4502 * record any of them in the keepMask
4505 if (tree->gtFlags & GTF_CALL_REG_SAVE)
4507 regMaskTP trashMask = genReturnRegForTree(tree);
4509 keepMask = lockedRegs & trashMask;
4510 lockedRegs &= ~trashMask;
4515 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4516 lockedRegs &= ~RBM_CALLEE_TRASH;
4522 /* Is there an object pointer? */
4523 if (tree->gtCall.gtCallObjp)
4525 /* Evaluate the instance pointer first */
4527 args = tree->gtCall.gtCallObjp;
4529 /* the objPtr always goes to an integer register (through temp or directly) */
4530 noway_assert(regArgsNum == 0);
4533 /* Must be passed in a register */
4535 noway_assert(args->gtFlags & GTF_LATE_ARG);
4537 /* Must be either a deferred reg arg node or a GT_ASG node */
4539 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4540 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4542 if (!args->IsArgPlaceHolderNode())
4544 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4547 VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4548 VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4550 /* process argument list */
4551 for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4553 args = list->Current();
4555 if (args->gtFlags & GTF_LATE_ARG)
4557 /* Must be either a Placeholder/NOP node or a GT_ASG node */
4559 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4560 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4562 if (!args->IsArgPlaceHolderNode())
4564 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4571 #ifdef FEATURE_FIXED_OUT_ARGS
4572 // We'll store this argument into the outgoing argument area
4573 // It needs to be in a register to be stored.
4575 predictReg = PREDICT_REG;
4577 #else // !FEATURE_FIXED_OUT_ARGS
4578 // We'll generate a push for this argument
4580 predictReg = PREDICT_NONE;
4581 if (varTypeIsSmall(args->TypeGet()))
4583 /* We may need to sign or zero extend a small type using a register */
4584 predictReg = PREDICT_SCRATCH_REG;
4588 rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4590 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4591 tree->gtUsedRegs |= args->gtUsedRegs;
4594 /* Is there a late argument list */
4597 regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4600 /* process the late argument list */
4601 for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4603 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4604 LclVarDsc* promotedStructLocal = NULL;
4606 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4607 tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4609 assert(list->OperIsList());
4611 args = list->Current();
4612 list = list->Rest();
4614 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4616 fgArgTabEntry* curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4617 assert(curArgTabEntry);
4619 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4621 curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4623 rpPredictReg argPredictReg;
4624 regMaskTP avoidReg = RBM_NONE;
4626 if (regNum != REG_STK)
4628 argPredictReg = rpGetPredictForReg(regNum);
4629 curArgMask |= genRegMask(regNum);
4633 assert(numSlots > 0);
4634 argPredictReg = PREDICT_NONE;
4636 // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4637 avoidReg = (RBM_R12 | RBM_LR);
4642 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4644 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4646 // 64-bit longs and doubles require 2 consecutive argument registers
4647 curArgMask |= genRegMask(REG_NEXT(regNum));
4649 else if (args->TypeGet() == TYP_STRUCT)
4651 GenTree* argx = args;
4652 GenTree* lclVarTree = NULL;
4654 /* The GT_OBJ may be be a child of a GT_COMMA */
4655 while (argx->gtOper == GT_COMMA)
4657 argx = argx->gtOp.gtOp2;
4659 unsigned originalSize = 0;
4661 if (argx->gtOper == GT_OBJ)
4663 originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4665 // Is it the address of a promoted struct local?
4666 if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4668 lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1;
4669 LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4670 if (varDsc->lvPromoted)
4671 promotedStructLocal = varDsc;
4674 else if (argx->gtOper == GT_LCL_VAR)
4676 varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum;
4677 originalSize = varDsc->lvSize();
4679 // Is it a promoted struct local?
4680 if (varDsc->lvPromoted)
4681 promotedStructLocal = varDsc;
4683 else if (argx->gtOper == GT_MKREFANY)
4685 originalSize = 2 * TARGET_POINTER_SIZE;
4689 noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4692 // We only pass arguments differently if it a struct local "independently" promoted, which
4693 // allows the field locals can be independently enregistered.
4694 if (promotedStructLocal != NULL)
4696 if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4697 promotedStructLocal = NULL;
4700 unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4702 // Are we passing a TYP_STRUCT in multiple integer registers?
4703 // if so set up curArgMask to reflect this
4704 // Also slots is updated to reflect the number of outgoing arg slots that we will write
4705 if (regNum != REG_STK)
4707 regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4708 assert(genIsValidReg(regNum));
4709 regNumber nextReg = REG_NEXT(regNum);
4711 while (slots > 0 && nextReg <= regLast)
4713 curArgMask |= genRegMask(nextReg);
4714 nextReg = REG_NEXT(nextReg);
4719 if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4721 // All or a portion of this struct will be placed in the argument registers indicated by
4722 // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4723 // that the second arg to be evaluated interferes with the reg for the first, the third with
4724 // the regs for the first and second, etc. But since we always place the stack slots before
4725 // placing the register slots we do not add inteferences for any part of the struct that gets
4726 // passed on the stack.
4729 PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4730 regMaskTP prevArgMask = RBM_NONE;
4731 for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4733 LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4734 if (fieldVarDsc->lvTracked)
4736 assert(lclVarTree != NULL);
4737 if (prevArgMask != RBM_NONE)
4739 rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4740 DEBUGARG("fieldVar/argReg"));
4743 // Now see many registers this uses up.
4744 unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4745 unsigned nextAfterLastRegOffset =
4746 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4747 TARGET_POINTER_SIZE;
4748 unsigned nextAfterLastArgRegOffset =
4749 min(nextAfterLastRegOffset,
4750 genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4752 for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4755 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4758 if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4763 if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4765 // Add the argument register used here as a preferred register for this fieldVarDsc
4767 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4768 fieldVarDsc->setPrefReg(firstRegUsed, this);
4771 compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4774 // If slots is greater than zero then part or all of this TYP_STRUCT
4775 // argument is passed in the outgoing argument area. (except HFA arg)
4777 if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4779 // We will need a register to address the TYP_STRUCT
4780 // Note that we can use an argument register in curArgMask as in
4781 // codegen we pass the stack portion of the argument before we
4782 // setup the register part.
4785 // Force the predictor to choose a LOW_REG here to reduce code bloat
4786 avoidReg = (RBM_R12 | RBM_LR);
4788 assert(tmpMask == RBM_NONE);
4789 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4791 // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4795 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4796 lockedRegs | regArgMask | tmpMask | avoidReg);
4799 } // (args->TypeGet() == TYP_STRUCT)
4800 #endif // _TARGET_ARM_
4802 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4803 // as we have already calculated the correct tmpMask and curArgMask values and
4804 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4806 if (promotedStructLocal == NULL)
4808 /* Target the appropriate argument register */
4809 tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4812 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4813 // for the duration of the OBJ.
4814 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4816 GenTree* lclVarTree = fgIsIndirOfAddrOfLocal(args);
4817 assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4818 compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4821 regArgMask |= curArgMask;
4822 args->gtUsedRegs |= (tmpMask | regArgMask);
4823 tree->gtUsedRegs |= args->gtUsedRegs;
4824 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4826 if (args->gtUsedRegs != RBM_NONE)
4828 // Add register interference with the set of registers used or in use when we evaluated
4829 // the current arg, with whatever is alive after the current arg
4831 rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4833 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4835 assert(list == NULL);
4837 #ifdef LEGACY_BACKEND
4838 #if CPU_LOAD_STORE_ARCH
4839 #ifdef FEATURE_READYTORUN_COMPILER
4840 if (tree->gtCall.IsR2RRelativeIndir())
4842 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4844 #endif // FEATURE_READYTORUN_COMPILER
4845 #endif // CPU_LOAD_STORE_ARCH
4846 #endif // LEGACY_BACKEND
4848 regMaskTP callAddrMask;
4849 callAddrMask = RBM_NONE;
4850 #if CPU_LOAD_STORE_ARCH
4851 predictReg = PREDICT_SCRATCH_REG;
4853 predictReg = PREDICT_NONE;
4856 switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4858 case GTF_CALL_VIRT_STUB:
4860 // We only want to record an interference between the virtual stub
4861 // param reg and anything that's live AFTER the call, but we've not
4862 // yet processed the indirect target. So add virtualStubParamInfo.regMask
4863 // to interferingRegs.
4864 interferingRegs |= virtualStubParamInfo->GetRegMask();
4867 printf("Adding interference with Virtual Stub Param\n");
4869 codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4871 if (tree->gtCall.gtCallType == CT_INDIRECT)
4873 predictReg = virtualStubParamInfo->GetPredict();
4877 case GTF_CALL_VIRT_VTABLE:
4878 predictReg = PREDICT_SCRATCH_REG;
4881 case GTF_CALL_NONVIRT:
4882 predictReg = PREDICT_SCRATCH_REG;
4886 if (tree->gtCall.gtCallType == CT_INDIRECT)
4888 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4889 if (tree->gtCall.gtCallCookie)
4891 codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4893 callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4894 lockedRegs | regArgMask, RBM_LASTUSE);
4896 // Just in case we predict some other registers, force interference with our two special
4897 // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4898 callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4900 predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4904 rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4906 else if (predictReg != PREDICT_NONE)
4908 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4911 if (tree->gtFlags & GTF_CALL_UNMANAGED)
4913 // Need a register for tcbReg
4915 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4916 #if CPU_LOAD_STORE_ARCH
4917 // Need an extra register for tmpReg
4919 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4923 tree->gtUsedRegs |= callAddrMask;
4925 /* After the call restore the orginal value of lockedRegs */
4926 lockedRegs |= keepMask;
4928 /* set the return register */
4929 regMask = genReturnRegForTree(tree);
4931 if (regMask & rsvdRegs)
4933 // We will need to relocate the return register value
4934 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4935 #if FEATURE_FP_REGALLOC
4936 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4942 if (intRegMask == RBM_INTRET)
4944 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4946 else if (intRegMask == RBM_LNGRET)
4948 regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4952 noway_assert(!"unexpected return regMask");
4956 #if FEATURE_FP_REGALLOC
4959 if (floatRegMask == RBM_FLOATRET)
4961 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4963 else if (floatRegMask == RBM_DOUBLERET)
4965 regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4967 else // HFA return case
4969 for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4971 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4978 /* the return registers (if any) are killed */
4979 tree->gtUsedRegs |= regMask;
4981 #if GTF_CALL_REG_SAVE
4982 if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4985 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4986 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4990 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4991 // Mark required registers for emitting tailcall profiler callback as used
4992 if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4994 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
5001 // Figure out which registers can't be touched
5003 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5004 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5006 regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5011 #if CPU_LOAD_STORE_ARCH
5012 // We need a register to load the bounds of the MD array
5013 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5016 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5018 /* We need scratch registers to compute index-lower_bound.
5019 Also, gtArrInds[0]'s register will be used as the second
5020 addressability register (besides gtArrObj's) */
5022 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5023 lockedRegs | regMask | dimsMask, rsvdRegs);
5027 dimsMask |= dimMask;
5029 #ifdef _TARGET_XARCH_
5030 // INS_imul doesnt have an immediate constant.
5031 if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5032 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5034 tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5039 #ifdef _TARGET_XARCH_
5040 rsvdRegs |= RBM_EAX;
5042 if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5044 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5048 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5050 op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5052 #ifdef _TARGET_XARCH_
5053 rsvdRegs &= ~RBM_EAX;
5054 tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5055 rsvdRegs | regMask | op2Mask);
5056 tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5057 predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5060 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5065 case GT_ARR_BOUNDS_CHECK:
5067 regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5068 regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5069 rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5072 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5077 NO_WAY("unexpected special operator in reg use prediction");
5084 /* make sure we set them to something reasonable */
5085 if (tree->gtUsedRegs & RBM_ILLEGAL)
5086 noway_assert(!"used regs not set properly in reg use prediction");
5088 if (regMask & RBM_ILLEGAL)
5089 noway_assert(!"return value not set propery in reg use prediction");
5094 * If the gtUsedRegs conflicts with lockedRegs
5095 * then we going to have to spill some registers
5096 * into the non-trashed register set to keep it alive
5098 regMaskTP spillMask;
5099 spillMask = tree->gtUsedRegs & lockedRegs;
5105 /* Find the next register that needs to be spilled */
5106 tmpMask = genFindLowestBit(spillMask);
5111 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5112 gtDispTree(tree, 0, NULL, true);
5113 if ((tmpMask & regMask) == 0)
5115 printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5116 gtDispTree(tree, 0, NULL, true);
5120 /* In Codegen it will typically introduce a spill temp here */
5121 /* rather than relocating the register to a non trashed reg */
5122 rpPredictSpillCnt++;
5124 /* Remove it from the spillMask */
5125 spillMask &= ~tmpMask;
5130 * If the return registers in regMask conflicts with the lockedRegs
5131 * then we allocate extra registers for the reload of the conflicting
5134 * Set spillMask to the set of locked registers that have to be reloaded here.
5135 * reloadMask is set to the extra registers that are used to reload
5136 * the spilled lockedRegs.
5139 noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5140 spillMask = lockedRegs & regMask;
5144 /* Remove the spillMask from regMask */
5145 regMask &= ~spillMask;
5147 regMaskTP reloadMask = RBM_NONE;
5150 /* Get an extra register to hold it */
5151 regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5155 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5156 gtDispTree(tree, 0, NULL, true);
5159 reloadMask |= reloadReg;
5161 /* Remove it from the spillMask */
5162 spillMask &= ~genFindLowestBit(spillMask);
5165 /* Update regMask to use the reloadMask */
5166 regMask |= reloadMask;
5168 /* update the gtUsedRegs mask */
5169 tree->gtUsedRegs |= (regMaskSmall)regMask;
5172 regMaskTP regUse = tree->gtUsedRegs;
5173 regUse |= interferingRegs;
5175 if (!VarSetOps::IsEmpty(this, compCurLife))
5177 // Add interference between the current set of live variables and
5178 // the set of temporary registers need to evaluate the sub tree
5181 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5185 if (rpAsgVarNum != -1)
5187 // Add interference between the registers used (if any)
5188 // and the assignment target variable
5191 rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5194 // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5195 // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5196 // to the set of currently live variables. This new interference will prevent us
5197 // from using the register value used here for enregistering different live variable
5199 if (!VarSetOps::IsEmpty(this, compCurLife))
5201 rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5205 /* Do we need to resore the oldLastUseVars value */
5206 if (restoreLastUseVars)
5208 /* If we used a GT_ASG targeted register then we need to add
5209 * a variable interference between any new last use variables
5210 * and the GT_ASG targeted register
5212 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5214 rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5215 DEBUGARG("asgn tgt last use conflict"));
5217 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5223 #pragma warning(pop)
5226 #endif // LEGACY_BACKEND
5228 /****************************************************************************/
5229 /* Returns true when we must create an EBP frame
5230 This is used to force most managed methods to have EBP based frames
5231 which allows the ETW kernel stackwalker to walk the stacks of managed code
5232 this allows the kernel to perform light weight profiling
5234 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5236 bool result = false;
5238 const char* reason = nullptr;
5242 if (!result && (opts.MinOpts() || opts.compDbgCode))
5244 INDEBUG(reason = "Debug Code");
5247 if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5249 INDEBUG(reason = "IL Code Size");
5252 if (!result && (fgBBcount > 3))
5254 INDEBUG(reason = "BasicBlock Count");
5257 if (!result && fgHasLoops)
5259 INDEBUG(reason = "Method has Loops");
5262 if (!result && (optCallCount >= 2))
5264 INDEBUG(reason = "Call Count");
5267 if (!result && (optIndirectCallCount >= 1))
5269 INDEBUG(reason = "Indirect Call");
5272 #endif // ETW_EBP_FRAMED
5274 // VM wants to identify the containing frame of an InlinedCallFrame always
5275 // via the frame register never the stack register so we need a frame.
5276 if (!result && (optNativeCallCount != 0))
5278 INDEBUG(reason = "Uses PInvoke");
5282 #ifdef _TARGET_ARM64_
5283 // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5287 INDEBUG(reason = "Temporary ARM64 force frame pointer");
5290 #endif // _TARGET_ARM64_
5293 if ((result == true) && (wbReason != nullptr))
5302 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5304 /*****************************************************************************
5306 * Predict which variables will be assigned to registers
5307 * This is x86 specific and only predicts the integer registers and
5308 * must be conservative, any register that is predicted to be enregister
5309 * must end up being enregistered.
5311 * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5312 * predicted to be enregistered to minimize calls to rpPredictRegPick.
5317 #pragma warning(push)
5318 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5320 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5324 if (rpPasses <= rpPassesPessimize)
5326 // Assume that we won't have to reverse EBP enregistration
5327 rpReverseEBPenreg = false;
5329 // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5330 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5331 rpFrameType = FT_EBP_FRAME;
5333 rpFrameType = FT_ESP_FRAME;
5337 // If we are using FPBASE as the frame register, we cannot also use it for
5339 if (rpFrameType == FT_EBP_FRAME)
5341 regAvail &= ~RBM_FPBASE;
5343 #endif // !ETW_EBP_FRAMED
5346 rpPredictAssignMask = regAvail;
5348 raSetupArgMasks(&codeGen->intRegState);
5349 #if !FEATURE_STACK_FP_X87
5350 raSetupArgMasks(&codeGen->floatRegState);
5353 // If there is a secret stub param, it is also live in
5354 if (info.compPublishStubParam)
5356 codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5359 if (regAvail == RBM_NONE)
5364 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5366 #if FEATURE_STACK_FP_X87
5367 if (!varDsc->IsFloatRegType())
5370 varDsc->lvRegNum = REG_STK;
5371 if (isRegPairType(varDsc->lvType))
5372 varDsc->lvOtherReg = REG_STK;
5380 printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5381 printf("\n Available registers = ");
5382 dspRegMask(regAvail);
5387 if (regAvail == RBM_NONE)
5392 /* We cannot change the lvVarIndexes at this point, so we */
5393 /* can only re-order the existing set of tracked variables */
5394 /* Which will change the order in which we select the */
5395 /* locals for enregistering. */
5397 assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5399 // Should not be set unless optimizing
5400 noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5406 fgDebugCheckBBlist();
5409 /* Initialize the weighted count of variables that could have */
5410 /* been enregistered but weren't */
5411 unsigned refCntStk = 0; // sum of ref counts for all stack based variables
5412 unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables
5413 unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5415 unsigned refCntStkParam; // sum of ref counts for all stack based parameters
5416 unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5418 #if FEATURE_STACK_FP_X87
5419 refCntStkParam = raCntStkParamDblStackFP;
5420 refCntWtdStkDbl = raCntWtdStkDblStackFP;
5421 refCntStk = raCntStkStackFP;
5424 refCntWtdStkDbl = 0;
5426 #endif // FEATURE_STACK_FP_X87
5428 #endif // DOUBLE_ALIGN
5430 /* Set of registers used to enregister variables in the predition */
5431 regMaskTP regUsed = RBM_NONE;
5433 /*-------------------------------------------------------------------------
5435 * Predict/Assign the enregistered locals in ref-count order
5439 VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5441 unsigned FPRegVarLiveInCnt;
5442 FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5445 for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5447 bool notWorthy = false;
5451 regMaskTP regAvailForType;
5454 unsigned customVarOrderSize;
5455 regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5457 regNumber saveOtherReg;
5459 varDsc = lvaRefSorted[sortNum];
5461 #if FEATURE_STACK_FP_X87
5462 if (varTypeIsFloating(varDsc->TypeGet()))
5465 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5467 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5468 // be en-registered.
5469 noway_assert(!varDsc->lvRegister);
5476 /* Check the set of invariant things that would prevent enregistration */
5478 /* Ignore the variable if it's not tracked */
5479 if (!varDsc->lvTracked)
5482 /* Get hold of the index and the interference mask for the variable */
5483 varIndex = varDsc->lvVarIndex;
5485 // Remove 'varIndex' from unprocessedVars
5486 VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5488 // Skip the variable if it's marked as DoNotEnregister.
5490 if (varDsc->lvDoNotEnregister)
5493 /* TODO: For now if we have JMP all register args go to stack
5494 * TODO: Later consider extending the life of the argument or make a copy of it */
5496 if (compJmpOpUsed && varDsc->lvIsRegArg)
5499 /* Skip the variable if the ref count is zero */
5501 if (varDsc->lvRefCnt == 0)
5504 /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5506 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5511 /* Is the unweighted ref count too low to be interesting? */
5513 if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5514 (varDsc->lvRefCnt <= 1))
5516 /* Sometimes it's useful to enregister a variable with only one use */
5517 /* arguments referenced in loops are one example */
5519 if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5520 goto OK_TO_ENREGISTER;
5522 /* If the variable has a preferred register set it may be useful to put it there */
5523 if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5524 goto OK_TO_ENREGISTER;
5526 /* Keep going; the table is sorted by "weighted" ref count */
5532 if (varTypeIsFloating(varDsc->TypeGet()))
5534 regType = varDsc->TypeGet();
5535 regAvailForType = regAvail & RBM_ALLFLOAT;
5540 regAvailForType = regAvail & RBM_ALLINT;
5544 isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5548 regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5552 /* If we don't have any registers available then skip the enregistration attempt */
5553 if (regAvailForType == RBM_NONE)
5556 // On the pessimize passes don't even try to enregister LONGS
5557 if (isRegPairType(varDsc->lvType))
5559 if (rpPasses > rpPassesPessimize)
5561 else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5565 // Set of registers to avoid when performing register allocation
5566 avoidReg = RBM_NONE;
5568 if (!varDsc->lvIsRegArg)
5570 /* For local variables,
5571 * avoid the incoming arguments,
5572 * but only if you conflict with them */
5574 if (raAvoidArgRegMask != 0)
5577 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5579 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5581 if (!argDsc->lvIsRegArg)
5584 bool isFloat = argDsc->IsFloatRegType();
5585 regNumber inArgReg = argDsc->lvArgReg;
5586 regMaskTP inArgBit = genRegMask(inArgReg);
5588 // Is this inArgReg in the raAvoidArgRegMask set?
5590 if (!(raAvoidArgRegMask & inArgBit))
5593 noway_assert(argDsc->lvIsParam);
5594 noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5596 unsigned locVarIndex = varDsc->lvVarIndex;
5597 unsigned argVarIndex = argDsc->lvVarIndex;
5599 /* Does this variable interfere with the arg variable ? */
5600 if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5602 noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5603 /* Yes, so try to avoid the incoming arg reg */
5604 avoidReg |= inArgBit;
5608 noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5614 // Now we will try to predict which register the variable
5615 // could be enregistered in
5617 customVarOrderSize = MAX_VAR_ORDER_SIZE;
5619 raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5622 saveOtherReg = DUMMY_INIT(REG_NA);
5624 for (regInx = 0; regInx < customVarOrderSize; regInx++)
5626 regNumber regNum = customVarOrder[regInx];
5627 regMaskTP regBits = genRegMask(regNum);
5629 /* Skip this register if it isn't available */
5630 if ((regAvailForType & regBits) == 0)
5633 /* Skip this register if it interferes with the variable */
5635 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5638 if (varTypeIsFloating(regType))
5643 regNumber regNext = REG_NEXT(regNum);
5644 regBits |= genRegMask(regNext);
5646 /* Skip if regNext interferes with the variable */
5647 if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5653 bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5654 bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5655 bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0);
5657 /* Skip this register if the weighted ref count is less than two
5658 and we are considering a unused callee saved register */
5660 if (lessThanTwoRefWtd && // less than two references (weighted)
5661 firstUseOfReg && // first use of this register
5662 calleeSavedReg) // callee saved register
5664 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5666 // psc is abbeviation for possibleSameColor
5667 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5669 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5670 unsigned pscIndex = 0;
5671 while (pscIndexIter.NextElem(&pscIndex))
5673 LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5674 totalRefCntWtd += pscVar->lvRefCntWtd;
5675 if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5679 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5682 continue; // not worth spilling a callee saved register
5684 // otherwise we will spill this callee saved registers,
5685 // because its uses when combined with the uses of
5686 // other yet to be processed candidates exceed our threshold.
5687 // totalRefCntWtd = totalRefCntWtd;
5690 /* Looks good - mark the variable as living in the register */
5692 if (isRegPairType(varDsc->lvType))
5694 if (firstHalf == false)
5696 /* Enregister the first half of the long */
5697 varDsc->lvRegNum = regNum;
5698 saveOtherReg = varDsc->lvOtherReg;
5699 varDsc->lvOtherReg = REG_STK;
5704 /* Ensure 'well-formed' register pairs */
5705 /* (those returned by gen[Pick|Grab]RegPair) */
5707 if (regNum < varDsc->lvRegNum)
5709 varDsc->lvOtherReg = varDsc->lvRegNum;
5710 varDsc->lvRegNum = regNum;
5714 varDsc->lvOtherReg = regNum;
5721 varDsc->lvRegNum = regNum;
5725 varDsc->lvOtherReg = REG_NEXT(regNum);
5730 if (regNum == REG_FPBASE)
5732 refCntEBP += varDsc->lvRefCnt;
5733 refCntWtdEBP += varDsc->lvRefCntWtd;
5735 if (varDsc->lvIsParam)
5737 refCntStkParam += varDsc->lvRefCnt;
5742 /* Record this register in the regUsed set */
5745 /* The register is now ineligible for all interfering variables */
5747 VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5752 regNumber secondHalf = REG_NEXT(regNum);
5753 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5754 unsigned intfIndex = 0;
5755 while (iter.NextElem(&intfIndex))
5757 VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5762 /* If a register argument, remove its incoming register
5763 * from the "avoid" list */
5765 if (varDsc->lvIsRegArg)
5767 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5771 raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5776 /* A variable of TYP_LONG can take two registers */
5780 // Since we have successfully enregistered this variable it is
5781 // now time to move on and consider the next variable
5787 noway_assert(isRegPairType(varDsc->lvType));
5789 /* This TYP_LONG is partially enregistered */
5791 noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5793 if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5798 raAddToStkPredict(varDsc->lvRefCntWtd);
5803 if (varDsc->lvDependReg)
5810 /* Weighted count of variables that could have been enregistered but weren't */
5811 raAddToStkPredict(varDsc->lvRefCntWtd);
5813 if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5814 raAddToStkPredict(varDsc->lvRefCntWtd);
5818 varDsc->lvRegister = false;
5820 varDsc->lvRegNum = REG_STK;
5821 if (isRegPairType(varDsc->lvType))
5822 varDsc->lvOtherReg = REG_STK;
5824 /* unweighted count of variables that were not enregistered */
5826 refCntStk += varDsc->lvRefCnt;
5829 if (varDsc->lvIsParam)
5831 refCntStkParam += varDsc->lvRefCnt;
5835 /* Is it a stack based double? */
5836 /* Note that double params are excluded since they can not be double aligned */
5837 if (varDsc->lvType == TYP_DOUBLE)
5839 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5847 gtDispLclVar((unsigned)(varDsc - lvaTable));
5848 if (varDsc->lvTracked)
5849 printf("T%02u", varDsc->lvVarIndex);
5852 printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5853 if (varDsc->lvDoNotEnregister)
5854 printf(", do-not-enregister");
5862 varDsc->lvRegister = true;
5864 // Record the fact that we enregistered a stack arg when tail call is used.
5865 if (compJmpOpUsed && !varDsc->lvIsRegArg)
5867 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5868 if (isRegPairType(varDsc->lvType))
5870 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5878 gtDispLclVar((unsigned)(varDsc - lvaTable));
5879 printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5880 refCntWtd2str(varDsc->lvRefCntWtd));
5881 varDsc->PrintVarReg();
5885 printf(":%s", getRegName(varDsc->lvOtherReg));
5894 noway_assert(refCntEBP == 0);
5901 printf("; refCntStk = %u\n", refCntStk);
5903 printf("; refCntEBP = %u\n", refCntEBP);
5904 if (refCntWtdEBP > 0)
5905 printf("; refCntWtdEBP = %u\n", refCntWtdEBP);
5907 if (refCntStkParam > 0)
5908 printf("; refCntStkParam = %u\n", refCntStkParam);
5909 if (refCntWtdStkDbl > 0)
5910 printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5915 /* Determine how the EBP register should be used */
5916 CLANG_FORMAT_COMMENT_ANCHOR;
5920 if (!codeGen->isFramePointerRequired())
5922 noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5925 First let us decide if we should use EBP to create a
5926 double-aligned frame, instead of enregistering variables
5929 if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5931 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5932 goto REVERSE_EBP_ENREG;
5935 if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5937 if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5939 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5940 goto REVERSE_EBP_ENREG;
5945 #endif // DOUBLE_ALIGN
5947 if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5949 #ifdef _TARGET_XARCH_
5951 /* If we are using EBP to enregister variables then
5952 will we actually save bytes by setting up an EBP frame?
5954 Each stack reference is an extra byte of code if we use
5957 Here we measure the savings that we get by using EBP to
5958 enregister variables vs. the cost in code size that we
5959 pay when using an ESP based frame.
5961 We pay one byte of code for each refCntStk
5962 but we save one byte (or more) for each refCntEBP.
5964 Our savings are the elimination of a stack memory read/write.
5965 We use the loop weighted value of
5966 refCntWtdEBP * mem_access_weight (0, 3, 6)
5967 to represent this savings.
5970 // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5971 // to set up an EBP frame in the prolog and epilog
5972 #define EBP_FRAME_SETUP_SIZE 5
5975 if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5977 unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5978 unsigned mem_access_weight = 3;
5980 if (compCodeOpt() == SMALL_CODE)
5981 mem_access_weight = 0;
5982 else if (compCodeOpt() == FAST_CODE)
5983 mem_access_weight *= 2;
5985 if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5987 /* It's not be a good idea to use EBP in our predictions */
5988 CLANG_FORMAT_COMMENT_ANCHOR;
5990 if (verbose && (refCntEBP > 0))
5991 printf("; Predicting that it's not worth using EBP to enregister variables\n");
5993 rpFrameType = FT_EBP_FRAME;
5994 goto REVERSE_EBP_ENREG;
5997 #endif // _TARGET_XARCH_
5999 if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6004 if (rpMustCreateEBPCalled == false)
6006 rpMustCreateEBPCalled = true;
6007 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6011 printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6013 codeGen->setFrameRequired(true);
6015 rpFrameType = FT_EBP_FRAME;
6016 goto REVERSE_EBP_ENREG;
6026 noway_assert(rpFrameType != FT_ESP_FRAME);
6028 rpReverseEBPenreg = true;
6033 noway_assert(regUsed & RBM_FPBASE);
6035 regUsed &= ~RBM_FPBASE;
6037 /* variables that were enregistered in EBP become stack based variables */
6038 raAddToStkPredict(refCntWtdEBP);
6042 /* We're going to have to undo some predicted enregistered variables */
6043 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6045 /* Is this a register variable? */
6046 if (varDsc->lvRegNum != REG_STK)
6048 if (isRegPairType(varDsc->lvType))
6050 /* Only one can be EBP */
6051 if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6053 if (varDsc->lvRegNum == REG_FPBASE)
6054 varDsc->lvRegNum = varDsc->lvOtherReg;
6056 varDsc->lvOtherReg = REG_STK;
6058 if (varDsc->lvRegNum == REG_STK)
6059 varDsc->lvRegister = false;
6061 if (varDsc->lvDependReg)
6071 if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6073 varDsc->lvRegNum = REG_STK;
6075 varDsc->lvRegister = false;
6077 if (varDsc->lvDependReg)
6083 printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6084 varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6085 (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6093 #endif // ETW_EBP_FRAMED
6098 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6100 /* Clear the lvDependReg flag for next iteration of the predictor */
6101 varDsc->lvDependReg = false;
6103 // If we set rpLostEnreg and this is the first pessimize pass
6104 // then reverse the enreg of all TYP_LONG
6105 if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6107 varDsc->lvRegNum = REG_STK;
6108 varDsc->lvOtherReg = REG_STK;
6113 if (verbose && raNewBlocks)
6115 printf("\nAdded FP register killing blocks:\n");
6116 fgDispBasicBlocks();
6120 noway_assert(rpFrameType != FT_NOT_SET);
6122 /* return the set of registers used to enregister variables */
6126 #pragma warning(pop)
6129 /*****************************************************************************
6131 * Predict register use for every tree in the function. Note that we do this
6132 * at different times (not to mention in a totally different way) for x86 vs
6135 void Compiler::rpPredictRegUse()
6142 // We might want to adjust the ref counts based on interference
6145 regMaskTP allAcceptableRegs = RBM_ALLINT;
6147 #if FEATURE_FP_REGALLOC
6148 allAcceptableRegs |= raConfigRestrictMaskFP();
6151 allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6153 /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6154 to acdHelper(). This is done implicitly, without creating a GT_CALL
6155 node. Hence, this interference is be handled implicitly by
6156 restricting the registers used for enregistering variables */
6158 if (opts.compDbgCode)
6160 allAcceptableRegs &= RBM_CALLEE_SAVED;
6163 /* Compute the initial regmask to use for the first pass */
6164 regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6167 #if CPU_USES_BLOCK_MOVE
6168 /* If we might need to generate a rep mov instruction */
6169 /* remove ESI and EDI */
6171 regAvail &= ~(RBM_ESI | RBM_EDI);
6175 /* If we using longs then we remove ESI to allow */
6176 /* ESI:EBX to be saved accross a call */
6178 regAvail &= ~(RBM_ESI);
6182 // For the first register allocation pass we don't want to color using r4
6183 // as we want to allow it to be used to color the internal temps instead
6184 // when r0,r1,r2,r3 are all in use.
6186 regAvail &= ~(RBM_R4);
6190 // We never have EBP available when ETW_EBP_FRAME is defined
6191 regAvail &= ~RBM_FPBASE;
6193 /* If a frame pointer is required then we remove EBP */
6194 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6195 regAvail &= ~RBM_FPBASE;
6199 BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6201 regAvail = RBM_NONE;
6204 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6205 regAvail = RBM_NONE;
6207 #if FEATURE_STACK_FP_X87
6208 VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6209 VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6211 // Calculate the set of all tracked FP/non-FP variables
6212 // into optAllFloatVars and optAllNonFPvars
6217 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6219 /* Ignore the variable if it's not tracked */
6221 if (!varDsc->lvTracked)
6224 /* Get hold of the index and the interference mask for the variable */
6226 unsigned varNum = varDsc->lvVarIndex;
6228 /* add to the set of all tracked FP/non-FP variables */
6230 if (varDsc->IsFloatRegType())
6231 VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6233 VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6237 for (unsigned i = 0; i < REG_COUNT; i++)
6239 VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6241 for (unsigned i = 0; i < lvaTrackedCount; i++)
6243 VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6246 raNewBlocks = false;
6247 rpPredictAssignAgain = false;
6250 bool mustPredict = true;
6251 unsigned stmtNum = 0;
6252 unsigned oldStkPredict = DUMMY_INIT(~0);
6253 VARSET_TP oldLclRegIntf[REG_COUNT];
6255 for (unsigned i = 0; i < REG_COUNT; i++)
6257 VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6262 /* Assign registers to variables using the variable/register interference
6263 graph (raLclRegIntf[]) calculated in the previous pass */
6264 regUsed = rpPredictAssignRegVars(regAvail);
6266 mustPredict |= rpLostEnreg;
6269 // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6270 if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6271 !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6273 // We can release our reservation on R10 and use it to color registers
6274 codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6275 allAcceptableRegs |= RBM_OPT_RSVD;
6279 /* Is our new prediction good enough?? */
6282 /* For small methods (less than 12 stmts), we add a */
6283 /* extra pass if we are predicting the use of some */
6284 /* of the caller saved registers. */
6285 /* This fixes RAID perf bug 43440 VB Ackerman function */
6287 if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6292 /* If every variable was fully enregistered then we're done */
6293 if (rpStkPredict == 0)
6296 // This was a successful prediction. Record it, in case it turns out to be the best one.
6297 rpRecordPrediction();
6301 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6303 // Be careful about overflow
6304 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6305 if (oldStkPredict < highStkPredict)
6308 if (rpStkPredict < rpPasses * 8)
6311 if (rpPasses >= (rpPassesMax - 1))
6316 /* We will do another pass */;
6320 if (JitConfig.JitAssertOnMaxRAPasses())
6322 noway_assert(rpPasses < rpPassesMax &&
6323 "This may not a bug, but dev team should look and see what is happening");
6327 // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with
6328 // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope
6329 // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6330 if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6332 NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6341 printf("\n; Another pass due to rpLostEnreg");
6343 printf("\n; Another pass due to rpAddedVarIntf");
6344 if ((rpPasses == 1) && rpPredictAssignAgain)
6345 printf("\n; Another pass due to rpPredictAssignAgain");
6347 printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6351 /* Zero the variable/register interference graph */
6352 for (unsigned i = 0; i < REG_COUNT; i++)
6354 VarSetOps::ClearD(this, raLclRegIntf[i]);
6357 // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6358 // it must not be in a register trashed by the callee
6359 if (info.compLvFrameListRoot != BAD_VAR_NUM)
6361 assert(!opts.ShouldUsePInvokeHelpers());
6362 noway_assert(info.compLvFrameListRoot < lvaCount);
6364 LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6366 if (pinvokeVarDsc->lvTracked)
6368 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6369 DEBUGARG("compLvFrameListRoot"));
6371 // We would prefer to have this be enregister in the PINVOKE_TCB register
6372 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6375 // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6376 // worst case). Make sure that the return value compiler temp that we create for the single
6377 // return block knows about this interference.
6378 if (genReturnLocal != BAD_VAR_NUM)
6380 noway_assert(genReturnBB);
6381 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6382 if (localTmp->lvTracked)
6384 rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6385 VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6391 if (compFloatingPointUsed)
6393 bool hasMustInitFloat = false;
6395 // if we have any must-init floating point LclVars then we will add register interferences
6396 // for the arguments with RBM_SCRATCH
6397 // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6398 // we won't home the arguments into REG_SCRATCH
6403 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6405 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6407 hasMustInitFloat = true;
6412 if (hasMustInitFloat)
6414 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6416 // If is an incoming argument, that is tracked and not floating-point
6417 if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6419 rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6420 DEBUGARG("arg home with must-init fp"));
6428 rpAddedVarIntf = false;
6429 rpLostEnreg = false;
6431 /* Walk the basic blocks and predict reg use for each tree */
6433 for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6437 compCurLifeTree = NULL;
6438 VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6442 for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6444 noway_assert(stmt->gtOper == GT_STMT);
6446 rpPredictSpillCnt = 0;
6447 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6448 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6450 GenTree* tree = stmt->gtStmt.gtStmtExpr;
6455 printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6460 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6462 noway_assert(rpAsgVarNum == -1);
6464 if (rpPredictSpillCnt > tmpIntSpillMax)
6465 tmpIntSpillMax = rpPredictSpillCnt;
6470 /* Decide whether we need to set mustPredict */
6471 mustPredict = false;
6474 // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6475 // reserve r10, and if it was used, throw out the last prediction and repredict.
6476 if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6478 codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6479 allAcceptableRegs &= ~RBM_OPT_RSVD;
6480 if ((regUsed & RBM_OPT_RSVD) != 0)
6483 rpBestRecordedPrediction = nullptr;
6499 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6502 if (rpPredictAssignAgain)
6510 /* Calculate the new value to use for regAvail */
6512 regAvail = allAcceptableRegs;
6514 /* If a frame pointer is required then we remove EBP */
6515 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6516 regAvail &= ~RBM_FPBASE;
6519 // We never have EBP available when ETW_EBP_FRAME is defined
6520 regAvail &= ~RBM_FPBASE;
6523 // If we have done n-passes then we must continue to pessimize the
6524 // interference graph by or-ing the interferences from the previous pass
6526 if (rpPasses > rpPassesPessimize)
6528 for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6529 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6531 /* If we reverse an EBP enregistration then keep it that way */
6532 if (rpReverseEBPenreg)
6533 regAvail &= ~RBM_FPBASE;
6541 /* Save the old variable/register interference graph */
6542 for (unsigned i = 0; i < REG_COUNT; i++)
6544 VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6546 oldStkPredict = rpStkPredict;
6547 } // end of while (true)
6551 // If we recorded a better feasible allocation than we ended up with, go back to using it.
6552 rpUseRecordedPredictionIfBetter();
6555 codeGen->setDoubleAlign(false);
6558 switch (rpFrameType)
6561 noway_assert(!"rpFrameType not set correctly!");
6564 noway_assert(!codeGen->isFramePointerRequired());
6565 noway_assert(!codeGen->isFrameRequired());
6566 codeGen->setFramePointerUsed(false);
6569 noway_assert((regUsed & RBM_FPBASE) == 0);
6570 codeGen->setFramePointerUsed(true);
6573 case FT_DOUBLE_ALIGN_FRAME:
6574 noway_assert((regUsed & RBM_FPBASE) == 0);
6575 noway_assert(!codeGen->isFramePointerRequired());
6576 codeGen->setFramePointerUsed(false);
6577 codeGen->setDoubleAlign(true);
6582 /* Record the set of registers that we need */
6583 codeGen->regSet.rsClearRegsModified();
6584 if (regUsed != RBM_NONE)
6586 codeGen->regSet.rsSetRegsModified(regUsed);
6589 /* We need genFullPtrRegMap if :
6590 * The method is fully interruptible, or
6591 * We are generating an EBP-less frame (for stack-pointer deltas)
6594 genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6600 printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6601 printf(" rpStkPredict was %u\n", rpStkPredict);
6604 rpRegAllocDone = true;
6607 #endif // LEGACY_BACKEND
6609 /*****************************************************************************
6611 * Mark all variables as to whether they live on the stack frame
6612 * (part or whole), and if so what the base is (FP or SP).
6615 void Compiler::raMarkStkVars()
6620 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6622 // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6623 CLANG_FORMAT_COMMENT_ANCHOR;
6625 #ifdef LEGACY_BACKEND
6626 varDsc->lvOnFrame = false;
6627 #endif // LEGACY_BACKEND
6629 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6631 noway_assert(!varDsc->lvRegister);
6635 /* Fully enregistered variables don't need any frame space */
6637 if (varDsc->lvRegister)
6639 if (!isRegPairType(varDsc->TypeGet()))
6644 /* For "large" variables make sure both halves are enregistered */
6646 if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6651 /* Unused variables typically don't get any frame space */
6652 else if (varDsc->lvRefCnt == 0)
6654 bool needSlot = false;
6656 bool stkFixedArgInVarArgs =
6657 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6659 // If its address has been exposed, ignore lvRefCnt. However, exclude
6660 // fixed arguments in varargs method as lvOnFrame shouldn't be set
6661 // for them as we don't want to explicitly report them to GC.
6663 if (!stkFixedArgInVarArgs)
6665 needSlot |= varDsc->lvAddrExposed;
6668 #if FEATURE_FIXED_OUT_ARGS
6670 /* Is this the dummy variable representing GT_LCLBLK ? */
6671 needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6673 #endif // FEATURE_FIXED_OUT_ARGS
6676 /* For debugging, note that we have to reserve space even for
6677 unused variables if they are ever in scope. However, this is not
6678 an issue as fgExtendDbgLifetimes() adds an initialization and
6679 variables in scope will not have a zero ref-cnt.
6681 if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6683 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6685 noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6690 For Debug Code, we have to reserve space even if the variable is never
6691 in scope. We will also need to initialize it if it is a GC var.
6692 So we set lvMustInit and artifically bump up the ref-cnt.
6695 if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6699 if (lvaTypeIsGC(lclNum))
6701 varDsc->lvRefCnt = 1;
6704 if (!varDsc->lvIsParam)
6706 varDsc->lvMustInit = true;
6710 #ifndef LEGACY_BACKEND
6711 varDsc->lvOnFrame = needSlot;
6712 #endif // !LEGACY_BACKEND
6715 /* Clear the lvMustInit flag in case it is set */
6716 varDsc->lvMustInit = false;
6722 #ifndef LEGACY_BACKEND
6723 if (!varDsc->lvOnFrame)
6727 #endif // !LEGACY_BACKEND
6730 /* The variable (or part of it) lives on the stack frame */
6732 noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6733 #if FEATURE_FIXED_OUT_ARGS
6734 noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6735 #else // FEATURE_FIXED_OUT_ARGS
6736 noway_assert(lvaLclSize(lclNum) != 0);
6737 #endif // FEATURE_FIXED_OUT_ARGS
6739 varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6743 varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6747 if (codeGen->doDoubleAlign())
6749 noway_assert(codeGen->isFramePointerUsed() == false);
6751 /* All arguments are off of EBP with double-aligned frames */
6753 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6755 varDsc->lvFramePointerBased = true;
6761 /* Some basic checks */
6763 // It must be in a register, on frame, or have zero references.
6765 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6767 #ifndef LEGACY_BACKEND
6768 // We can't have both lvRegister and lvOnFrame for RyuJIT
6769 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6770 #else // LEGACY_BACKEND
6772 /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6773 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6774 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6775 #endif // LEGACY_BACKEND
6779 // For varargs functions, there should be no direct references to
6780 // parameter variables except for 'this' (because these were morphed
6781 // in the importer) and the 'arglist' parameter (which is not a GC
6782 // pointer). and the return buffer argument (if we are returning a
6784 // This is important because we don't want to try to report them
6785 // to the GC, as the frame offsets in these local varables would
6788 if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6790 if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6792 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6799 #ifdef LEGACY_BACKEND
6800 void Compiler::rpRecordPrediction()
6802 if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6804 if (rpBestRecordedPrediction == NULL)
6806 rpBestRecordedPrediction =
6807 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6809 for (unsigned k = 0; k < lvaCount; k++)
6811 rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6812 rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum();
6813 rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg();
6815 rpBestRecordedStkPredict = rpStkPredict;
6816 JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6820 void Compiler::rpUseRecordedPredictionIfBetter()
6822 JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6823 rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6824 if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6826 JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6827 rpBestRecordedStkPredict);
6829 for (unsigned k = 0; k < lvaCount; k++)
6831 lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6832 lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6833 lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6837 #endif // LEGACY_BACKEND