1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XX Does the register allocation and puts the remaining lclVars on the stack XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
25 DWORD val = JitConfig.JitRegisterFP();
27 return (enumConfigRegisterFP)(val & 0x3);
29 #endif // FEATURE_FP_REGALLOC
31 regMaskTP Compiler::raConfigRestrictMaskFP()
33 regMaskTP result = RBM_NONE;
35 #if FEATURE_FP_REGALLOC
36 switch (raConfigRegisterFP())
38 case CONFIG_REGISTER_FP_NONE:
41 case CONFIG_REGISTER_FP_CALLEE_TRASH:
42 result = RBM_FLT_CALLEE_TRASH;
44 case CONFIG_REGISTER_FP_CALLEE_SAVED:
45 result = RBM_FLT_CALLEE_SAVED;
47 case CONFIG_REGISTER_FP_FULL:
48 result = RBM_ALLFLOAT;
57 DWORD Compiler::getCanDoubleAlign()
60 if (compStressCompile(STRESS_DBL_ALN, 20))
61 return MUST_DOUBLE_ALIGN;
63 return JitConfig.JitDoubleAlign();
65 return DEFAULT_DOUBLE_ALIGN;
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
73 // refCntStk - sum of ref counts for all stack based variables
74 // refCntEBP - sum of ref counts for EBP enregistered variables
75 // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables
76 // refCntStkParam - sum of ref counts for all stack based parameters
77 // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 // with double fields).
81 // Returns true if this method estimates that a double-aligned frame would be beneficial
84 // The impact of a double-aligned frame is computed as follows:
85 // - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 // - We pay a byte of code for each non-parameter stack reference.
87 // - We save the misalignment penalty and possible cache-line crossing penalty.
88 // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 // - We pay 7 extra bytes for:
91 // LEA ESP,[EBP-offset]
92 // AND ESP,-8 to double align ESP
93 // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
95 // If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 // Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 // ref count for double-aligned values.
99 bool Compiler::shouldDoubleAlign(
100 unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
102 bool doDoubleAlign = false;
103 const unsigned DBL_ALIGN_SETUP_SIZE = 7;
105 unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106 unsigned misaligned_weight = 4;
108 if (compCodeOpt() == Compiler::SMALL_CODE)
109 misaligned_weight = 0;
111 if (compCodeOpt() == Compiler::FAST_CODE)
112 misaligned_weight *= 4;
114 JITDUMP("\nDouble alignment:\n");
115 JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116 JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117 JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
119 if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
121 JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
123 else if (refCntWtdEBP > refCntWtdStkDbl * 2)
125 // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126 // not double aligned.
127 // Here are the numbers that make this not double-aligned.
128 // refCntWtdStkDbl = 0x164
129 // refCntWtdEBP = 0x1a4
130 // We think we do need to change the heuristic to be in favor of double-align.
132 JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
136 // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137 JITDUMP(" Predicting to create a double-aligned frame\n");
138 doDoubleAlign = true;
140 return doDoubleAlign;
142 #endif // DOUBLE_ALIGN
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
146 void Compiler::raInit()
148 #if FEATURE_STACK_FP_X87
149 /* We have not assigned any FP variables to registers yet */
151 VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
153 codeGen->intRegState.rsIsFloat = false;
154 codeGen->floatRegState.rsIsFloat = true;
156 rpReverseEBPenreg = false;
159 rpPassesPessimize = rpPassesMax - 3;
160 if (opts.compDbgCode)
164 rpStkPredict = (unsigned)-1;
165 rpFrameType = FT_NOT_SET;
167 rpMustCreateEBPCalled = false;
168 rpRegAllocDone = false;
169 rpMaskPInvokeEpilogIntf = RBM_NONE;
171 rpPredictMap[PREDICT_NONE] = RBM_NONE;
172 rpPredictMap[PREDICT_ADDR] = RBM_NONE;
174 #if FEATURE_FP_REGALLOC
175 rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT;
176 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
178 rpPredictMap[PREDICT_REG] = RBM_ALLINT;
179 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
185 #if defined(_TARGET_ARM_)
187 rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188 rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189 rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL;
191 #elif defined(_TARGET_AMD64_)
193 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
197 #elif defined(_TARGET_X86_)
199 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
202 rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203 rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
207 rpBestRecordedPrediction = NULL;
210 /*****************************************************************************
212 * The following table(s) determines the order in which registers are considered
213 * for variables to live in
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
218 #if FEATURE_FP_REGALLOC
219 if (varTypeIsFloating(regType))
221 static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT};
222 const unsigned raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
224 if (wbVarOrderSize != NULL)
225 *wbVarOrderSize = raRegVarOrderFltSize;
227 return &raRegVarOrderFlt[0];
232 static const regNumber raRegVarOrder[] = {REG_VAR_ORDER};
233 const unsigned raRegVarOrderSize = _countof(raRegVarOrder);
235 if (wbVarOrderSize != NULL)
236 *wbVarOrderSize = raRegVarOrderSize;
238 return &raRegVarOrder[0];
244 /*****************************************************************************
246 * Dump out the variable interference graph
250 void Compiler::raDumpVarIntf()
255 printf("Var. interference graph for %s\n", info.compFullName);
257 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
259 /* Ignore the variable if it's not tracked */
261 if (!varDsc->lvTracked)
264 /* Get hold of the index and the interference mask for the variable */
265 unsigned varIndex = varDsc->lvVarIndex;
267 printf(" V%02u,T%02u and ", lclNum, varIndex);
271 for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
273 if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274 printf("T%02u ", refIndex);
285 /*****************************************************************************
287 * Dump out the register interference graph
290 void Compiler::raDumpRegIntf()
292 printf("Reg. interference graph for %s\n", info.compFullName);
297 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
301 /* Ignore the variable if it's not tracked */
303 if (!varDsc->lvTracked)
306 /* Get hold of the index and the interference mask for the variable */
308 varNum = varDsc->lvVarIndex;
310 printf(" V%02u,T%02u and ", lclNum, varNum);
312 if (varDsc->IsFloatRegType())
314 #if !FEATURE_STACK_FP_X87
315 for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
317 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318 printf("%3s ", getRegName(regNum, true));
326 for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
328 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329 printf("%3s ", getRegName(regNum));
342 /*****************************************************************************
344 * We'll adjust the ref counts based on interference
348 void Compiler::raAdjustVarIntf()
350 // This method was not correct and has been disabled.
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
361 var_types type = tree->TypeGet();
363 if (varTypeIsStruct(type) && IsHfa(tree))
365 int retSlots = GetHfaCount(tree);
366 return ((1 << retSlots) - 1) << REG_FLOATRET;
369 const static regMaskTP returnMap[TYP_COUNT] = {
370 RBM_ILLEGAL, // TYP_UNDEF,
371 RBM_NONE, // TYP_VOID,
372 RBM_INTRET, // TYP_BOOL,
373 RBM_INTRET, // TYP_BYTE,
374 RBM_INTRET, // TYP_UBYTE,
375 RBM_INTRET, // TYP_SHORT,
376 RBM_INTRET, // TYP_USHORT,
377 RBM_INTRET, // TYP_INT,
378 RBM_INTRET, // TYP_UINT,
379 RBM_LNGRET, // TYP_LONG,
380 RBM_LNGRET, // TYP_ULONG,
381 RBM_FLOATRET, // TYP_FLOAT,
382 RBM_DOUBLERET, // TYP_DOUBLE,
383 RBM_INTRET, // TYP_REF,
384 RBM_INTRET, // TYP_BYREF,
385 RBM_INTRET, // TYP_ARRAY,
386 RBM_ILLEGAL, // TYP_STRUCT,
387 RBM_ILLEGAL, // TYP_BLK,
388 RBM_ILLEGAL, // TYP_LCLBLK,
389 RBM_ILLEGAL, // TYP_PTR,
390 RBM_ILLEGAL, // TYP_FNC,
391 RBM_ILLEGAL, // TYP_UNKNOWN,
394 assert((unsigned)type < _countof(returnMap));
395 assert(returnMap[TYP_LONG] == RBM_LNGRET);
396 assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
397 assert(returnMap[TYP_REF] == RBM_INTRET);
398 assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
400 regMaskTP result = returnMap[type];
401 assert(result != RBM_ILLEGAL);
405 /*****************************************************************************/
407 /****************************************************************************/
411 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
416 for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
418 if (!varDsc->lvTracked)
421 if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
424 if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
425 printf("V%02u ", lclNum);
431 /*****************************************************************************/
433 /*****************************************************************************
435 * Debugging helpers - display variables liveness info.
438 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
442 printf("BB%02u: ", beg->bbNum);
445 dispLifeSet(comp, mask, beg->bbLiveIn);
449 dispLifeSet(comp, mask, beg->bbLiveOut);
452 if (beg->bbFlags & BBF_VISITED)
453 printf(" inner=%u", beg->bbFPinVars);
460 } while (beg != end);
463 #if FEATURE_STACK_FP_X87
464 void Compiler::raDispFPlifeInfo()
468 for (block = fgFirstBB; block; block = block->bbNext)
472 printf("BB%02u: in = [ ", block->bbNum);
473 dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
476 VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
477 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
481 noway_assert(stmt->gtOper == GT_STMT);
483 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
485 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
487 dispLifeSet(this, optAllFloatVars, life);
489 gtDispTree(tree, 0, NULL, true);
495 printf("BB%02u: out = [ ", block->bbNum);
496 dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
500 #endif // FEATURE_STACK_FP_X87
501 /*****************************************************************************/
503 /*****************************************************************************/
505 /*****************************************************************************/
507 void Compiler::raSetRegVarOrder(
508 var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
510 unsigned normalVarOrderSize;
511 const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
513 unsigned listIndex = 0;
514 regMaskTP usedReg = avoidReg;
516 noway_assert(*customVarOrderSize >= normalVarOrderSize);
520 /* First place the preferred registers at the start of customVarOrder */
525 for (index = 0; index < normalVarOrderSize; index++)
527 regNum = normalVarOrder[index];
528 regBit = genRegMask(regNum);
530 if (usedReg & regBit)
533 if (prefReg & regBit)
536 noway_assert(listIndex < normalVarOrderSize);
537 customVarOrder[listIndex++] = regNum;
544 #if CPU_HAS_BYTE_REGS
545 /* Then if byteable registers are preferred place them */
547 if (prefReg & RBM_BYTE_REG_FLAG)
549 for (index = 0; index < normalVarOrderSize; index++)
551 regNum = normalVarOrder[index];
552 regBit = genRegMask(regNum);
554 if (usedReg & regBit)
557 if (RBM_BYTE_REGS & regBit)
560 noway_assert(listIndex < normalVarOrderSize);
561 customVarOrder[listIndex++] = regNum;
566 #endif // CPU_HAS_BYTE_REGS
569 /* Now place all the non-preferred registers */
571 for (index = 0; index < normalVarOrderSize; index++)
573 regNumber regNum = normalVarOrder[index];
574 regMaskTP regBit = genRegMask(regNum);
576 if (usedReg & regBit)
580 noway_assert(listIndex < normalVarOrderSize);
581 customVarOrder[listIndex++] = regNum;
586 /* Now place the "avoid" registers */
588 for (index = 0; index < normalVarOrderSize; index++)
590 regNumber regNum = normalVarOrder[index];
591 regMaskTP regBit = genRegMask(regNum);
593 if (avoidReg & regBit)
595 noway_assert(listIndex < normalVarOrderSize);
596 customVarOrder[listIndex++] = regNum;
604 *customVarOrderSize = listIndex;
605 noway_assert(listIndex == normalVarOrderSize);
608 /*****************************************************************************
610 * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
613 void Compiler::raSetupArgMasks(RegState* regState)
615 /* Determine the registers holding incoming register arguments */
616 /* and setup raAvoidArgRegMask to the set of registers that we */
617 /* may want to avoid when enregistering the locals. */
619 regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
620 raAvoidArgRegMask = RBM_NONE;
622 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
624 for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
626 noway_assert(argDsc->lvIsParam);
628 // Is it a register argument ?
629 if (!argDsc->lvIsRegArg)
632 // only process args that apply to the current register file
633 if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
638 // Is it dead on entry ??
639 // In certain cases such as when compJmpOpUsed is true,
640 // or when we have a generic type context arg that we must report
641 // then the arguments have to be kept alive throughout the prolog.
642 // So we have to consider it as live on entry.
644 bool keepArgAlive = compJmpOpUsed;
645 if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
646 ((lvaTable + info.compTypeCtxtArg) == argDsc))
651 if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
656 // The code to set the regState for each arg is outlined for shared use
658 regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
660 // Do we need to try to avoid this incoming arg registers?
662 // If it's not tracked, don't do the stuff below.
663 if (!argDsc->lvTracked)
666 // If the incoming arg is used after a call it is live accross
667 // a call and will have to be allocated to a caller saved
668 // register anyway (a very common case).
670 // In this case it is pointless to ask that the higher ref count
671 // locals to avoid using the incoming arg register
673 unsigned argVarIndex = argDsc->lvVarIndex;
675 /* Does the incoming register and the arg variable interfere? */
677 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
679 // No they do not interfere,
680 // so we add inArgReg to raAvoidArgRegMask
682 raAvoidArgRegMask |= genRegMask(inArgReg);
685 if (argDsc->lvType == TYP_DOUBLE)
687 // Avoid the double register argument pair for register allocation.
688 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
690 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
697 #endif // LEGACY_BACKEND
699 // The code to set the regState for each arg is outlined for shared use
700 // by linear scan. (It is not shared for System V AMD64 platform.)
701 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
703 regNumber inArgReg = argDsc->lvArgReg;
704 regMaskTP inArgMask = genRegMask(inArgReg);
706 if (regState->rsIsFloat)
708 noway_assert(inArgMask & RBM_FLTARG_REGS);
710 else // regState is for the integer registers
712 // This might be the fixed return buffer register argument (on ARM64)
713 // We check and allow inArgReg to be theFixedRetBuffReg
714 if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
716 // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
717 noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
718 // We should have recorded the variable number for the return buffer arg
719 noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
721 else // we have a regular arg
723 noway_assert(inArgMask & RBM_ARG_REGS);
727 regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
730 if (argDsc->lvType == TYP_DOUBLE)
732 if (info.compIsVarArgs || opts.compUseSoftFP)
734 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
735 assert(!regState->rsIsFloat);
739 assert(regState->rsIsFloat);
740 assert(emitter::isDoubleReg(inArgReg));
742 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
744 else if (argDsc->lvType == TYP_LONG)
746 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
747 assert(!regState->rsIsFloat);
748 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
750 #endif // _TARGET_ARM_
752 #if FEATURE_MULTIREG_ARGS
753 if (varTypeIsStruct(argDsc->lvType))
755 if (argDsc->lvIsHfaRegArg())
757 assert(regState->rsIsFloat);
758 unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
759 for (unsigned i = 1; i < cSlots; i++)
761 assert(inArgReg + i <= LAST_FP_ARGREG);
762 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
767 unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
768 for (unsigned i = 1; i < cSlots; i++)
770 regNumber nextArgReg = (regNumber)(inArgReg + i);
771 if (nextArgReg > REG_ARG_LAST)
775 assert(regState->rsIsFloat == false);
776 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
780 #endif // FEATURE_MULTIREG_ARGS
785 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
787 /*****************************************************************************
789 * Assign variables to live in registers, etc.
792 void Compiler::raAssignVars()
796 printf("*************** In raAssignVars()\n");
798 /* We need to keep track of which registers we ever touch */
800 codeGen->regSet.rsClearRegsModified();
802 #if FEATURE_STACK_FP_X87
803 // FP register allocation
804 raEnregisterVarsStackFP();
805 raGenerateFPRefCounts();
808 /* Predict registers used by code generation */
809 rpPredictRegUse(); // New reg predictor/allocator
811 // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
812 // so that the gc tracking logic and lvMustInit logic will ignore them.
817 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
819 if (varDsc->lvType != TYP_STRUCT)
822 if (!varDsc->lvPromoted)
825 if (varDsc->lvIsParam)
828 if (varDsc->lvRefCnt > 0)
834 printf("Mark unused struct local V%02u\n", lclNum);
837 lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
839 if (promotionType == PROMOTION_TYPE_DEPENDENT)
841 // This should only happen when all its field locals are unused as well.
843 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
846 noway_assert(lvaTable[varNum].lvRefCnt == 0);
851 noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
854 varDsc->lvUnusedStruct = 1;
857 // Change such struct locals to ints
859 varDsc->lvType = TYP_INT; // Bash to a non-gc type.
860 noway_assert(!varDsc->lvTracked);
861 noway_assert(!varDsc->lvRegister);
862 varDsc->lvOnFrame = false; // Force it not to be onstack.
863 varDsc->lvMustInit = false; // Force not to init it.
864 varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
868 /*****************************************************************************/
869 /*****************************************************************************/
871 /*****************************************************************************
873 * Given a regNumber return the correct predictReg enum value
876 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
878 return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
881 /*****************************************************************************
883 * Given a varIndex return the correct predictReg enum value
886 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
888 return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
891 /*****************************************************************************
893 * Given a rpPredictReg return the correct varNumber value
896 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
898 return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
901 /*****************************************************************************
903 * Given a rpPredictReg return true if it specifies a Txx register
906 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
908 if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
914 /*****************************************************************************
916 * Given a regmask return the correct predictReg enum value
919 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
921 rpPredictReg result = PREDICT_NONE;
922 if (regmask != 0) /* Check if regmask has zero bits set */
924 if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
927 assert(FitsIn<DWORD>(regmask));
928 BitScanForward(®, (DWORD)regmask);
929 return rpGetPredictForReg((regNumber)reg);
932 #if defined(_TARGET_ARM_)
933 /* It has multiple bits set */
934 else if (regmask == (RBM_R0 | RBM_R1))
936 result = PREDICT_PAIR_R0R1;
938 else if (regmask == (RBM_R2 | RBM_R3))
940 result = PREDICT_PAIR_R2R3;
942 #elif defined(_TARGET_X86_)
943 /* It has multiple bits set */
944 else if (regmask == (RBM_EAX | RBM_EDX))
946 result = PREDICT_PAIR_EAXEDX;
948 else if (regmask == (RBM_ECX | RBM_EBX))
950 result = PREDICT_PAIR_ECXEBX;
953 else /* It doesn't match anything */
955 result = PREDICT_NONE;
956 assert(!"unreachable");
957 NO_WAY("bad regpair");
963 /*****************************************************************************
965 * Record a variable to register(s) interference
968 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
971 bool addedIntf = false;
975 for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
977 regMaskTP regBit = genRegMask(regNum);
979 if (regMask & regBit)
981 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
982 if (!VarSetOps::IsEmpty(this, newIntf))
987 VarSetOps::Iter newIntfIter(this, newIntf);
989 while (newIntfIter.NextElem(&varNum))
991 unsigned lclNum = lvaTrackedToVarNum[varNum];
992 LclVarDsc* varDsc = &lvaTable[varNum];
993 #if FEATURE_FP_REGALLOC
994 // Only print the useful interferences
995 // i.e. floating point LclVar interference with floating point registers
996 // or integer LclVar interference with general purpose registers
997 if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
1000 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
1001 getRegName(regNum), msg);
1007 VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1019 /*****************************************************************************
1021 * Record a new variable to variable(s) interference
1024 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1026 noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1027 noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1029 VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1030 VarSetOps::AddElemD(this, oneVar, varNum);
1032 bool newIntf = fgMarkIntf(intfVar, oneVar);
1035 rpAddedVarIntf = true;
1038 if (verbose && newIntf)
1040 for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1042 if (VarSetOps::IsMember(this, intfVar, oneNum))
1044 unsigned lclNum = lvaTrackedToVarNum[varNum];
1045 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1046 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1056 /*****************************************************************************
1058 * Determine preferred register mask for a given predictReg value
1061 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1063 if (rpHasVarIndexForPredict(predictReg))
1064 predictReg = PREDICT_REG;
1066 noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1067 noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1069 regMaskTP regAvailForType = rpPredictMap[predictReg];
1070 if (varTypeIsFloating(type))
1072 regAvailForType &= RBM_ALLFLOAT;
1076 regAvailForType &= RBM_ALLINT;
1079 if (type == TYP_DOUBLE)
1081 if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1083 // Fix 388433 ARM JitStress WP7
1084 if ((regAvailForType & RBM_DBL_REGS) != 0)
1086 regAvailForType |= (regAvailForType << 1);
1090 regAvailForType = RBM_NONE;
1095 return regAvailForType;
1098 /*****************************************************************************
1100 * Predict register choice for a type.
1102 * Adds the predicted registers to rsModifiedRegsMask.
1104 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1106 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1110 // Add any reserved register to the lockedRegs
1111 lockedRegs |= codeGen->regSet.rsMaskResvd;
1113 /* Clear out the lockedRegs from preferReg */
1114 preferReg &= ~lockedRegs;
1116 if (rpAsgVarNum != -1)
1118 noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1120 /* Don't pick the register used by rpAsgVarNum either */
1121 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1122 noway_assert(tgtVar->lvRegNum != REG_STK);
1124 preferReg &= ~genRegMask(tgtVar->lvRegNum);
1138 #ifdef _TARGET_AMD64_
1140 #endif // _TARGET_AMD64_
1142 // expand preferReg to all non-locked registers if no bits set
1143 preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1145 if (preferReg == 0) // no bits set?
1147 // Add one predefined spill choice register if no bits set.
1148 // (The jit will introduce one spill temp)
1149 preferReg |= RBM_SPILL_CHOICE;
1150 rpPredictSpillCnt++;
1154 printf("Predict one spill temp\n");
1160 /* Iterate the registers in the order specified by rpRegTmpOrder */
1162 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1164 regNum = rpRegTmpOrder[index];
1165 regBits = genRegMask(regNum);
1167 if ((preferReg & regBits) == regBits)
1173 /* Otherwise we have allocated all registers, so do nothing */
1176 #ifndef _TARGET_AMD64_
1179 if ((preferReg == 0) || // no bits set?
1180 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1182 // expand preferReg to all non-locked registers
1183 preferReg = RBM_ALLINT & ~lockedRegs;
1186 if (preferReg == 0) // no bits set?
1188 // Add EAX:EDX to the registers
1189 // (The jit will introduce two spill temps)
1190 preferReg = RBM_PAIR_TMP;
1191 rpPredictSpillCnt += 2;
1194 printf("Predict two spill temps\n");
1197 else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1199 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1201 // Add EAX to the registers
1202 // (The jit will introduce one spill temp)
1203 preferReg |= RBM_PAIR_TMP_LO;
1207 // Add EDX to the registers
1208 // (The jit will introduce one spill temp)
1209 preferReg |= RBM_PAIR_TMP_HI;
1211 rpPredictSpillCnt++;
1214 printf("Predict one spill temp\n");
1219 regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1220 if (regPair != REG_PAIR_NONE)
1222 regBits = genRegPairMask(regPair);
1226 /* Otherwise we have allocated all registers, so do nothing */
1228 #endif // _TARGET_AMD64_
1237 #if FEATURE_FP_REGALLOC
1238 regMaskTP restrictMask;
1239 restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1240 assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1242 // expand preferReg to all available non-locked registers if no bits set
1243 preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1244 regMaskTP preferDouble;
1245 preferDouble = preferReg & (preferReg >> 1);
1247 if ((preferReg == 0) // no bits set?
1249 || ((type == TYP_DOUBLE) &&
1250 ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1254 // Add one predefined spill choice register if no bits set.
1255 // (The jit will introduce one spill temp)
1256 preferReg |= RBM_SPILL_CHOICE_FLT;
1257 rpPredictSpillCnt++;
1261 printf("Predict one spill temp (float)\n");
1265 assert(preferReg != 0);
1267 /* Iterate the registers in the order specified by raRegFltTmpOrder */
1269 for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1271 regNum = raRegFltTmpOrder[index];
1272 regBits = genRegMask(regNum);
1274 if (varTypeIsFloating(type))
1277 if (type == TYP_DOUBLE)
1279 if ((regBits & RBM_DBL_REGS) == 0)
1281 continue; // We must restrict the set to the double registers
1285 // TYP_DOUBLE use two consecutive registers
1286 regBits |= genRegMask(REG_NEXT(regNum));
1290 // See if COMPlus_JitRegisterFP is restricting this FP register
1292 if ((restrictMask & regBits) != regBits)
1296 if ((preferReg & regBits) == regBits)
1301 /* Otherwise we have allocated all registers, so do nothing */
1304 #else // !FEATURE_FP_REGALLOC
1311 noway_assert(!"unexpected type in reg use prediction");
1314 /* Abnormal return */
1315 noway_assert(!"Ran out of registers in rpPredictRegPick");
1320 * If during the first prediction we need to allocate
1321 * one of the registers that we used for coloring locals
1322 * then flag this by setting rpPredictAssignAgain.
1323 * We will have to go back and repredict the registers
1325 if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1326 rpPredictAssignAgain = true;
1328 // Add a register interference to each of the last use variables
1329 if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1331 VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1332 VarSetOps::Assign(this, lastUse, rpLastUseVars);
1333 VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1334 VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1335 // While we still have any lastUse or inPlaceUse bits
1336 VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1338 VARSET_TP varAsSet(VarSetOps::MakeEmpty(this));
1339 VarSetOps::Iter iter(this, useUnion);
1340 unsigned varNum = 0;
1341 while (iter.NextElem(&varNum))
1343 // We'll need this for one of the calls...
1344 VarSetOps::ClearD(this, varAsSet);
1345 VarSetOps::AddElemD(this, varAsSet, varNum);
1347 // If this varBit and lastUse?
1348 if (VarSetOps::IsMember(this, lastUse, varNum))
1350 // Record a register to variable interference
1351 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1354 // If this varBit and inPlaceUse?
1355 if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1357 // Record a register to variable interference
1358 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1362 codeGen->regSet.rsSetRegsModified(regBits);
1367 /*****************************************************************************
1369 * Predict integer register use for generating an address mode for a tree,
1370 * by setting tree->gtUsedRegs to all registers used by this tree and its
1372 * tree - is the child of a GT_IND node
1373 * type - the type of the GT_IND node (floating point/integer)
1374 * lockedRegs - are the registers which are currently held by
1375 * a previously evaluated node.
1376 * rsvdRegs - registers which should not be allocated because they will
1377 * be needed to evaluate a node in the future
1378 * - Also if rsvdRegs has the RBM_LASTUSE bit set then
1379 * the rpLastUseVars set should be saved and restored
1380 * so that we don't add any new variables to rpLastUseVars
1381 * lenCSE - is non-NULL only when we have a lenCSE expression
1383 * Return the scratch registers to be held by this tree. (one or two registers
1384 * to form an address expression)
1387 regMaskTP Compiler::rpPredictAddressMode(
1388 GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1393 genTreeOps oper = tree->OperGet();
1400 bool hasTwoAddConst = false;
1401 bool restoreLastUseVars = false;
1402 VARSET_TP oldLastUseVars(VarSetOps::MakeEmpty(this));
1404 /* do we need to save and restore the rpLastUseVars set ? */
1405 if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1407 restoreLastUseVars = true;
1408 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1410 rsvdRegs &= ~RBM_LASTUSE;
1412 /* if not an add, then just force it to a register */
1416 if (oper == GT_ARR_ELEM)
1418 regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1427 op1 = tree->gtOp.gtOp1;
1428 op2 = tree->gtOp.gtOp2;
1429 rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1431 /* look for (x + y) + icon address mode */
1433 if (op2->OperGet() == GT_CNS_INT)
1435 cns = op2->gtIntCon.gtIconVal;
1437 /* if not an add, then just force op1 into a register */
1438 if (op1->OperGet() != GT_ADD)
1441 hasTwoAddConst = true;
1443 /* Record the 'rev' flag, reverse evaluation order */
1444 rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1446 op2 = op1->gtOp.gtOp2;
1447 op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1450 /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1453 if (op2->OperGet() == GT_LSH)
1455 if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1457 sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1458 opTemp = op2->gtOp.gtOp1;
1472 if (opTemp->OperGet() == GT_NOP)
1474 opTemp = opTemp->gtOp.gtOp1;
1477 // Is this a const operand?
1478 if (opTemp->OperGet() == GT_CNS_INT)
1480 // Compute the new cns value that Codegen will end up using
1481 cns += (opTemp->gtIntCon.gtIconVal << sh);
1487 /* Check for LSH in op1 slot */
1489 if (op1->OperGet() != GT_LSH)
1492 opTemp = op1->gtOp.gtOp2;
1494 if (opTemp->OperGet() != GT_CNS_INT)
1497 sh = opTemp->gtIntCon.gtIconVal;
1499 /* Check for LSH of 0, special case */
1503 #if defined(_TARGET_XARCH_)
1505 /* Check for LSH of 1 2 or 3 */
1509 #elif defined(_TARGET_ARM_)
1511 /* Check for LSH of 1 to 30 */
1521 /* Matched a leftShift by 'sh' subtree, move op1 down */
1522 op1 = op1->gtOp.gtOp1;
1526 /* Now we have to evaluate op1 and op2 into registers */
1528 /* Evaluate op1 and op2 in the correct order */
1531 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1532 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1536 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1537 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1540 /* If op1 and op2 must be spilled and reloaded then
1541 * op1 and op2 might be reloaded into the same register
1542 * This can only happen when all the registers are lockedRegs
1544 if ((op1Mask == op2Mask) && (op1Mask != 0))
1546 /* We'll need to grab a different register for op2 */
1547 op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1551 // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1552 // [op2 + op1<<sh + cns]
1553 // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1555 if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1557 op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1561 // On the ARM we will need at least one scratch register for trees that have this form:
1562 // [op1 + op2 + cns] or [op1 + op2<<sh + cns]
1563 // or for a float/double or long when we have both op1 and op2
1564 // or when we have an 'cns' that is too large for the ld/st instruction
1566 if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1568 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1572 // If we create a CSE that immediately dies then we may need to add an additional register interference
1573 // so we don't color the CSE into R3
1575 if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1577 opTemp = op2->gtOp.gtOp2;
1578 if (opTemp->OperGet() == GT_LCL_VAR)
1580 unsigned varNum = opTemp->gtLclVar.gtLclNum;
1581 LclVarDsc* varDsc = &lvaTable[varNum];
1583 if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1585 rpRecordRegIntf(RBM_TMP_0,
1586 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1592 regMask = (op1Mask | op2Mask);
1593 tree->gtUsedRegs = (regMaskSmall)regMask;
1598 /* now we have to evaluate op1 into a register */
1600 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1605 // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1608 if (!codeGen->validDispForLdSt(cns, type))
1610 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1614 regMask = (op1Mask | op2Mask);
1615 tree->gtUsedRegs = (regMaskSmall)regMask;
1620 #if !CPU_LOAD_STORE_ARCH
1621 if (oper == GT_CNS_INT)
1623 /* Indirect of a constant does not require a register */
1629 /* now we have to evaluate tree into a register */
1630 regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1634 regMaskTP regUse = tree->gtUsedRegs;
1636 if (!VarSetOps::IsEmpty(this, compCurLife))
1638 // Add interference between the current set of life variables and
1639 // the set of temporary registers need to evaluate the sub tree
1642 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1646 /* Do we need to resore the oldLastUseVars value */
1647 if (restoreLastUseVars)
1650 * If we used a GT_ASG targeted register then we need to add
1651 * a variable interference between any new last use variables
1652 * and the GT_ASG targeted register
1654 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1656 rpRecordVarIntf(rpAsgVarNum,
1657 VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1659 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1665 /*****************************************************************************
1670 void Compiler::rpPredictRefAssign(unsigned lclNum)
1672 LclVarDsc* varDsc = lvaTable + lclNum;
1674 varDsc->lvRefAssign = 1;
1676 #if NOGC_WRITE_BARRIERS
1680 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1681 printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1682 varDsc->lvVarIndex);
1686 /* Make sure that write barrier pointer variables never land in EDX */
1687 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1688 #endif // NOGC_WRITE_BARRIERS
1691 /*****************************************************************************
1693 * Predict the internal temp physical register usage for a block assignment tree,
1694 * by setting tree->gtUsedRegs.
1695 * Records the internal temp physical register usage for this tree.
1696 * Returns a mask of interfering registers for this tree.
1698 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1699 * to the set of scratch registers needed when evaluating the tree.
1700 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1701 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1702 * predict additional internal temp physical registers to spill into.
1704 * tree - is the child of a GT_IND node
1705 * predictReg - what type of register does the tree need
1706 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1707 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1708 * rsvdRegs - registers which should not be allocated because they will
1709 * be needed to evaluate a node in the future
1710 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1711 * the rpLastUseVars set should be saved and restored
1712 * so that we don't add any new variables to rpLastUseVars.
1714 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree,
1715 rpPredictReg predictReg,
1716 regMaskTP lockedRegs,
1719 regMaskTP regMask = RBM_NONE;
1720 regMaskTP interferingRegs = RBM_NONE;
1722 bool hasGCpointer = false;
1723 bool dstIsOnStack = false;
1724 bool useMemHelper = false;
1725 bool useBarriers = false;
1726 GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
1727 GenTreePtr dstAddr = dst->Addr();
1728 GenTreePtr srcAddrOrFill = tree->gtGetOp2IfPresent();
1730 size_t blkSize = dst->gtBlkSize;
1732 hasGCpointer = (dst->HasGCPtr());
1734 bool isCopyBlk = tree->OperIsCopyBlkOp();
1735 bool isCopyObj = isCopyBlk && hasGCpointer;
1736 bool isInitBlk = tree->OperIsInitBlkOp();
1740 assert(srcAddrOrFill->OperIsIndir());
1741 srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1745 // For initBlk, we don't need to worry about the GC pointers.
1746 hasGCpointer = false;
1753 dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1758 if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1760 useMemHelper = true;
1766 useMemHelper = true;
1769 if (hasGCpointer && !dstIsOnStack)
1776 // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1778 if (!useMemHelper && !useBarriers)
1780 bool useLoop = false;
1781 unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1783 // A mask to use to force the predictor to choose low registers (to reduce code size)
1784 regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1786 // Allow the src and dst to be used in place, unless we use a loop, in which
1787 // case we will need scratch registers as we will be writing to them.
1788 rpPredictReg srcAndDstPredict = PREDICT_REG;
1790 // Will we be using a loop to implement this INITBLK/COPYBLK?
1791 if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1794 avoidReg = RBM_NONE;
1795 srcAndDstPredict = PREDICT_SCRATCH_REG;
1798 if (tree->gtFlags & GTF_REVERSE_OPS)
1800 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1801 dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1802 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1806 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1807 srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1808 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1811 // We need at least one scratch register for a copyBlk
1814 // Pick a low register to reduce the code size
1815 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1822 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1823 // Pick another low register to reduce the code size
1824 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1827 // We need a loop index register
1828 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1831 tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1833 return interferingRegs;
1836 // What order should the Dest, Val/Src, and Size be calculated
1837 GenTreePtr opsPtr[3];
1838 regMaskTP regsPtr[3];
1840 #if defined(_TARGET_XARCH_)
1841 fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1843 // We're going to use these, might as well make them available now
1845 codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1847 codeGen->regSet.rsSetRegsModified(RBM_ESI);
1849 #elif defined(_TARGET_ARM_)
1853 // For all other cases that involve non-constants, we just call memcpy/memset
1855 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1856 interferingRegs |= RBM_CALLEE_TRASH;
1859 printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1864 assert(useBarriers);
1867 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1869 // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1870 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1873 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1876 #else // !_TARGET_X86_ && !_TARGET_ARM_
1877 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1878 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1879 regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1880 regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1881 opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1882 regMask |= regsPtr[0];
1883 opsPtr[0]->gtUsedRegs |= regsPtr[0];
1884 rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1886 regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1887 opsPtr2RsvdRegs | RBM_LASTUSE);
1888 regMask |= regsPtr[1];
1889 opsPtr[1]->gtUsedRegs |= regsPtr[1];
1890 rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1892 regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1893 if (opsPtr[2] == nullptr)
1895 // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1896 // Note that it is quite possible that no register is required, but this preserves
1898 regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1899 rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1903 regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1904 opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1906 regMask |= opsPtr2UsedRegs;
1908 tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1909 return interferingRegs;
1912 /*****************************************************************************
1914 * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1915 * Returns a regMask with the internal temp physical register usage for this tree.
1917 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1918 * to the set of scratch registers needed when evaluating the tree.
1919 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1920 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1921 * predict additional internal temp physical registers to spill into.
1923 * tree - is the child of a GT_IND node
1924 * predictReg - what type of register does the tree need
1925 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1926 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1927 * rsvdRegs - registers which should not be allocated because they will
1928 * be needed to evaluate a node in the future
1929 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1930 * the rpLastUseVars set should be saved and restored
1931 * so that we don't add any new variables to rpLastUseVars.
1934 #pragma warning(disable : 4701)
1937 #pragma warning(push)
1938 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1940 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
1941 rpPredictReg predictReg,
1942 regMaskTP lockedRegs,
1945 regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL);
1948 rpPredictReg op1PredictReg;
1949 rpPredictReg op2PredictReg;
1950 LclVarDsc* varDsc = NULL;
1951 VARSET_TP oldLastUseVars(VarSetOps::UninitVal());
1953 VARSET_TP varBits(VarSetOps::UninitVal());
1954 VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1956 bool restoreLastUseVars = false;
1957 regMaskTP interferingRegs = RBM_NONE;
1960 // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1962 noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1963 noway_assert(RBM_ILLEGAL);
1964 noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1965 /* impossible values, to make sure that we set them */
1966 tree->gtUsedRegs = RBM_ILLEGAL;
1969 /* Figure out what kind of a node we have */
1971 genTreeOps oper = tree->OperGet();
1972 var_types type = tree->TypeGet();
1973 unsigned kind = tree->OperKind();
1975 // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1976 genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1977 if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1978 predictReg = PREDICT_NONE;
1979 else if (rpHasVarIndexForPredict(predictReg))
1981 // The only place where predictReg is set to a var is in the PURE
1982 // assignment case where varIndex is the var being assigned to.
1983 // We need to check whether the variable is used between here and
1984 // its redefinition.
1985 unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1986 unsigned lclNum = lvaTrackedToVarNum[varIndex];
1988 for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1990 if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1992 // Is this the pure assignment?
1993 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1995 predictReg = PREDICT_SCRATCH_REG;
2004 if (rsvdRegs & RBM_LASTUSE)
2006 restoreLastUseVars = true;
2007 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2008 rsvdRegs &= ~RBM_LASTUSE;
2011 /* Is this a constant or leaf node? */
2013 if (kind & (GTK_CONST | GTK_LEAF))
2015 bool lastUse = false;
2016 regMaskTP enregMask = RBM_NONE;
2022 // Codegen for floating point constants on the ARM is currently
2023 // movw/movt rT1, <lo32 bits>
2024 // movw/movt rT2, <hi32 bits>
2025 // vmov.i2d dT0, rT1,rT2
2027 // For TYP_FLOAT one integer register is required
2029 // These integer register(s) immediately die
2030 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2031 if (type == TYP_DOUBLE)
2033 // For TYP_DOUBLE a second integer register is required
2035 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2038 // We also need a floating point register that we keep
2040 if (predictReg == PREDICT_NONE)
2041 predictReg = PREDICT_SCRATCH_REG;
2043 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2044 tree->gtUsedRegs = regMask | tmpMask;
2051 if (rpHasVarIndexForPredict(predictReg))
2053 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2054 rpAsgVarNum = tgtIndex;
2056 // We don't need any register as we plan on writing to the rpAsgVarNum register
2057 predictReg = PREDICT_NONE;
2059 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2060 tgtVar->lvDependReg = true;
2062 if (type == TYP_LONG)
2064 assert(oper == GT_CNS_LNG);
2066 if (tgtVar->lvOtherReg == REG_STK)
2068 // Well we do need one register for a partially enregistered
2070 predictReg = PREDICT_SCRATCH_REG;
2076 #if !CPU_LOAD_STORE_ARCH
2077 /* If the constant is a handle then it will need to have a relocation
2078 applied to it. It will need to be loaded into a register.
2079 But never throw away an existing hint.
2081 if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2084 if (predictReg == PREDICT_NONE)
2085 predictReg = PREDICT_SCRATCH_REG;
2094 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2095 (genTypeSize(type) < sizeof(int)))
2097 predictReg = PREDICT_SCRATCH_REG;
2100 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2102 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2104 // These integer register(s) immediately die
2105 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2106 // Two integer registers are required for a TYP_DOUBLE
2107 if (type == TYP_DOUBLE)
2108 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2110 // We need a temp register in some cases of loads/stores to a class var
2111 if (predictReg == PREDICT_NONE)
2113 predictReg = PREDICT_SCRATCH_REG;
2116 if (rpHasVarIndexForPredict(predictReg))
2118 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2119 rpAsgVarNum = tgtIndex;
2121 // We don't need any register as we plan on writing to the rpAsgVarNum register
2122 predictReg = PREDICT_NONE;
2124 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2125 tgtVar->lvDependReg = true;
2127 if (type == TYP_LONG)
2129 if (tgtVar->lvOtherReg == REG_STK)
2131 // Well we do need one register for a partially enregistered
2133 predictReg = PREDICT_SCRATCH_REG;
2141 // Check for a misalignment on a Floating Point field
2143 if (varTypeIsFloating(type))
2145 if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2147 // These integer register(s) immediately die
2148 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2149 // Two integer registers are required for a TYP_DOUBLE
2150 if (type == TYP_DOUBLE)
2151 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2160 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2162 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2163 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2164 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2166 #if FEATURE_STACK_FP_X87
2167 // If it's a floating point var, there's nothing to do
2168 if (varTypeIsFloating(type))
2170 tree->gtUsedRegs = RBM_NONE;
2176 // If the variable is already a register variable, no need to go further.
2177 if (oper == GT_REG_VAR)
2180 /* Apply the type of predictReg to the LCL_VAR */
2182 if (predictReg == PREDICT_REG)
2185 if (varDsc->lvRegNum == REG_STK)
2190 else if (predictReg == PREDICT_SCRATCH_REG)
2192 noway_assert(predictReg == PREDICT_SCRATCH_REG);
2194 /* Is this the last use of a local var? */
2197 if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2198 goto PREDICT_REG_COMMON;
2201 else if (rpHasVarIndexForPredict(predictReg))
2203 /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2205 unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2206 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2207 VarSetOps::MakeSingleton(this, tgtIndex1);
2209 noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2210 noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2211 #ifndef _TARGET_AMD64_
2212 // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2213 // so this assert is meaningless
2214 noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2215 #endif // !_TARGET_AMD64_
2217 if (varDsc->lvTracked)
2220 srcIndex = varDsc->lvVarIndex;
2222 // If this register has it's last use here then we will prefer
2223 // to color to the same register as tgtVar.
2227 * Add an entry in the lvaVarPref graph to indicate
2228 * that it would be worthwhile to color these two variables
2229 * into the same physical register.
2230 * This will help us avoid having an extra copy instruction
2232 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2233 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2236 // Add a variable interference from srcIndex to each of the last use variables
2237 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2239 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2242 rpAsgVarNum = tgtIndex1;
2244 /* We will rely on the target enregistered variable from the GT_ASG */
2251 if (genIsValidFloatReg(varDsc->lvRegNum))
2253 enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2257 enregMask = genRegMask(varDsc->lvRegNum);
2261 if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2263 // We need to compute the intermediate value using a TYP_DOUBLE
2264 // but we storing the result in a TYP_SINGLE enregistered variable
2271 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2272 if (enregMask & (rsvdRegs | lockedRegs))
2276 #ifndef _TARGET_64BIT_
2277 if (type == TYP_LONG)
2279 if (varDsc->lvOtherReg != REG_STK)
2281 tmpMask = genRegMask(varDsc->lvOtherReg);
2282 enregMask |= tmpMask;
2284 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2285 if (tmpMask & (rsvdRegs | lockedRegs))
2288 else // lvOtherReg == REG_STK
2293 #endif // _TARGET_64BIT_
2296 varDsc->lvDependReg = true;
2300 /* Does not need a register */
2301 predictReg = PREDICT_NONE;
2302 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2303 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2305 else // (grabCount > 0)
2307 #ifndef _TARGET_64BIT_
2308 /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2309 if ((type == TYP_LONG) && (grabCount == 1))
2311 /* We will need to pick one register */
2313 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2314 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2316 noway_assert((type == TYP_DOUBLE) ||
2317 (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2318 #else // !_TARGET_64BIT_
2319 noway_assert(grabCount == 1);
2320 #endif // !_TARGET_64BIT_
2323 else if (type == TYP_STRUCT)
2326 // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2327 // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2328 // As a workaround we just bash it to PREDICT_NONE here
2330 if (predictReg != PREDICT_NONE)
2331 predictReg = PREDICT_NONE;
2333 // Currently predictReg is saying that we will not need any scratch registers
2334 noway_assert(predictReg == PREDICT_NONE);
2336 /* We may need to sign or zero extend a small type when pushing a struct */
2337 if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2339 for (unsigned varNum = varDsc->lvFieldLclStart;
2340 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2342 LclVarDsc* fldVar = lvaTable + varNum;
2344 if (fldVar->lvStackAligned())
2346 // When we are stack aligned Codegen will just use
2347 // a push instruction and thus doesn't need any register
2348 // since we can push both a register or a stack frame location
2352 if (varTypeIsByte(fldVar->TypeGet()))
2354 // We will need to reserve one byteable register,
2357 predictReg = PREDICT_SCRATCH_REG;
2358 #if CPU_HAS_BYTE_REGS
2359 // It is best to enregister this fldVar in a byteable register
2361 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2364 else if (varTypeIsShort(fldVar->TypeGet()))
2366 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2367 // If fldVar is not enregistered then we will need a scratch register
2369 if (!isEnregistered)
2371 // We will need either an int register or a byte register
2372 // If we are not requesting a byte register we will request an int register
2374 if (type != TYP_BYTE)
2376 predictReg = PREDICT_SCRATCH_REG;
2384 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2387 if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2389 varDsc->addPrefReg(preferReg, this);
2393 break; /* end of case GT_LCL_VAR */
2396 tree->gtUsedRegs = RBM_NONE;
2399 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2400 // Mark the registers required to emit a tailcall profiler callback
2401 if (compIsProfilerHookNeeded())
2403 tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2410 } /* end of switch (oper) */
2412 /* If we don't need to evaluate to register, regmask is the empty set */
2413 /* Otherwise we grab a temp for the local variable */
2415 if (predictReg == PREDICT_NONE)
2419 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2421 if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2423 /* We need to sign or zero extend a small type when pushing a struct */
2424 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2426 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2427 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2429 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2432 LclVarDsc* fldVar = lvaTable + varNum;
2433 if (fldVar->lvTracked)
2435 VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2436 rpRecordRegIntf(regMask, fldBit DEBUGARG(
2437 "need scratch register when pushing a small field of a struct"));
2443 /* Update the set of lastUse variables that we encountered so far */
2446 VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2447 VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2450 * Add interference from any previously locked temps into this last use variable.
2454 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2457 * Add interference from any reserved temps into this last use variable.
2461 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2464 * For partially enregistered longs add an interference with the
2465 * register return by rpPredictRegPick
2467 if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2469 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2473 tree->gtUsedRegs = (regMaskSmall)regMask;
2477 /* Is it a 'simple' unary/binary operator? */
2479 if (kind & GTK_SMPOP)
2481 GenTreePtr op1 = tree->gtOp.gtOp1;
2482 GenTreePtr op2 = tree->gtGetOp2IfPresent();
2484 GenTreePtr opsPtr[3];
2485 regMaskTP regsPtr[3];
2487 VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2493 /* Is the value being assigned into a LCL_VAR? */
2494 if (op1->gtOper == GT_LCL_VAR)
2496 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2498 /* Are we assigning a LCL_VAR the result of a call? */
2499 if (op2->gtOper == GT_CALL)
2501 /* Set a preferred register for the LCL_VAR */
2502 if (isRegPairType(varDsc->TypeGet()))
2503 varDsc->addPrefReg(RBM_LNGRET, this);
2504 else if (!varTypeIsFloating(varDsc->TypeGet()))
2505 varDsc->addPrefReg(RBM_INTRET, this);
2506 #ifdef _TARGET_AMD64_
2508 varDsc->addPrefReg(RBM_FLOATRET, this);
2511 * When assigning the result of a call we don't
2512 * bother trying to target the right side of the
2513 * assignment, since we have a fixed calling convention.
2516 else if (varDsc->lvTracked)
2518 // We interfere with uses in place
2519 if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2521 rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2524 // Did we predict that this local will be fully enregistered?
2525 // and the assignment type is the same as the expression type?
2526 // and it is dead on the right side of the assignment?
2527 // and we current have no other rpAsgVarNum active?
2529 if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2530 (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2533 // Yes, we should try to target the right side (op2) of this
2534 // assignment into the (enregistered) tracked variable.
2537 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2538 op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2540 // Remember that this is a new use in place
2542 // We've added "new UseInPlace"; remove from the global set.
2543 VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2545 // Note that later when we walk down to the leaf node for op2
2546 // if we decide to actually use the register for the 'varDsc'
2547 // to enregister the operand, the we will set rpAsgVarNum to
2548 // varDsc->lvVarIndex, by extracting this value using
2549 // rpGetVarIndexForPredict()
2551 // Also we reset rpAsgVarNum back to -1 after we have finished
2552 // predicting the current GT_ASG node
2558 else if (tree->OperIsBlkOp())
2560 interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2577 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2578 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2580 /* Is the value being assigned into an enregistered LCL_VAR? */
2581 /* For debug code we only allow a simple op2 to be assigned */
2582 if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2584 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2585 /* Did we predict that this local will be enregistered? */
2586 if (varDsc->lvRegNum != REG_STK)
2588 /* Yes, we can use "reg <op>= addr" */
2590 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2591 op2PredictReg = PREDICT_NONE;
2598 #if CPU_LOAD_STORE_ARCH
2601 op1PredictReg = PREDICT_REG;
2602 op2PredictReg = PREDICT_REG;
2608 * Otherwise, initialize the normal forcing of operands:
2611 op1PredictReg = PREDICT_ADDR;
2612 op2PredictReg = PREDICT_REG;
2617 #if !CPU_LOAD_STORE_ARCH
2618 if (op2PredictReg != PREDICT_NONE)
2620 /* Is the value being assigned a simple one? */
2621 if (rpCanAsgOperWithoutReg(op2, false))
2622 op2PredictReg = PREDICT_NONE;
2626 bool simpleAssignment;
2627 simpleAssignment = false;
2629 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2631 // Add a variable interference from the assign target
2632 // to each of the last use variables
2633 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2635 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2637 if (varDsc->lvTracked)
2639 unsigned varIndex = varDsc->lvVarIndex;
2641 rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2645 /* Record whether this tree is a simple assignment to a local */
2647 simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2650 bool requireByteReg;
2651 requireByteReg = false;
2653 #if CPU_HAS_BYTE_REGS
2654 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2656 if (varTypeIsByte(type) &&
2657 ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2660 // Byte-assignments typically need a byte register
2661 requireByteReg = true;
2663 if (op1->gtOper == GT_LCL_VAR)
2665 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2667 // Did we predict that this local will be enregistered?
2668 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2670 // We don't require a byte register when op1 is an enregistered local */
2671 requireByteReg = false;
2674 // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2675 if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2677 // We should try to put op1 in an byte register
2678 varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2684 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2686 bool isWriteBarrierAsgNode;
2687 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2689 GCInfo::WriteBarrierForm wbf;
2690 if (isWriteBarrierAsgNode)
2691 wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2693 wbf = GCInfo::WBF_NoBarrier;
2696 regMaskTP wbaLockedRegs;
2697 wbaLockedRegs = lockedRegs;
2698 if (isWriteBarrierAsgNode)
2700 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2702 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2705 wbaLockedRegs |= RBM_WRITE_BARRIER;
2706 op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2707 assert(REG_WRITE_BARRIER == REG_EDX);
2708 op1PredictReg = PREDICT_REG_EDX;
2713 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2715 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2718 op1PredictReg = PREDICT_REG_ECX;
2719 op2PredictReg = PREDICT_REG_EDX;
2720 #elif defined(_TARGET_ARM_)
2721 op1PredictReg = PREDICT_REG_R0;
2722 op2PredictReg = PREDICT_REG_R1;
2724 // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2725 if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2727 op1 = op1->gtOp.gtOp1;
2729 #else // !_TARGET_X86_ && !_TARGET_ARM_
2730 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2736 /* Are we supposed to evaluate RHS first? */
2738 if (tree->gtFlags & GTF_REVERSE_OPS)
2740 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2742 #if CPU_HAS_BYTE_REGS
2743 // Should we insure that op2 gets evaluated into a byte register?
2744 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2746 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2747 // and we can't select one that is already reserved (i.e. lockedRegs)
2749 op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2750 op2->gtUsedRegs |= op2Mask;
2752 // No longer a simple assignment because we're using extra registers and might
2753 // have interference between op1 and op2. See DevDiv #136681
2754 simpleAssignment = false;
2758 * For a simple assignment we don't want the op2Mask to be
2759 * marked as interferring with the LCL_VAR, since it is likely
2760 * that we will want to enregister the LCL_VAR in exactly
2761 * the register that is used to compute op2
2763 tmpMask = lockedRegs;
2765 if (!simpleAssignment)
2768 regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2770 // Did we relax the register prediction for op1 and op2 above ?
2771 // - because we are depending upon op1 being enregistered
2773 if ((op1PredictReg == PREDICT_NONE) &&
2774 ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2776 /* We must be assigning into an enregistered LCL_VAR */
2777 noway_assert(op1->gtOper == GT_LCL_VAR);
2778 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2779 noway_assert(varDsc->lvRegNum != REG_STK);
2781 /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2782 varDsc->lvDependReg = true;
2787 // For the case of simpleAssignments op2 should always be evaluated first
2788 noway_assert(!simpleAssignment);
2790 regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2791 if (isWriteBarrierAsgNode)
2793 wbaLockedRegs |= op1->gtUsedRegs;
2795 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2797 #if CPU_HAS_BYTE_REGS
2798 // Should we insure that op2 gets evaluated into a byte register?
2799 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2801 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2802 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2805 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2806 op2->gtUsedRegs |= op2Mask;
2811 if (rpHasVarIndexForPredict(op2PredictReg))
2816 if (isWriteBarrierAsgNode)
2818 #if NOGC_WRITE_BARRIERS
2820 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2824 /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2825 passed to the write-barrier call in REG_WRITE_BARRIER */
2829 if (op1->gtOper == GT_IND)
2831 GenTreePtr rv1, rv2;
2835 /* Special handling of indirect assigns for write barrier */
2837 bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2840 /* Check address mode for enregisterable locals */
2844 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2846 rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2848 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2850 rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2855 if (op2->gtOper == GT_LCL_VAR)
2857 rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2860 // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2861 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2863 rpRecordRegIntf(RBM_WRITE_BARRIER,
2864 rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2866 tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2871 #endif // NOGC_WRITE_BARRIERS
2873 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2878 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2881 // For the ARM target we have an optimized JIT Helper
2882 // that only trashes a subset of the callee saved registers
2885 // NOTE: Adding it to the gtUsedRegs will cause the interference to
2886 // be added appropriately
2888 // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs
2889 // instead of gtUsedRegs, because the latter will be modified later, but we need
2890 // to remember to add the interference.
2892 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2894 op1->gtUsedRegs |= RBM_R0;
2895 op2->gtUsedRegs |= RBM_R1;
2896 #else // _TARGET_ARM_
2900 printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2902 // We have to call a normal JIT helper to perform the Write Barrier Assignment
2903 // It will trash the callee saved registers
2905 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2906 #endif // _TARGET_ARM_
2908 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2911 if (simpleAssignment)
2914 * Consider a simple assignment to a local:
2918 * Since the "=" node is visited after the variable
2919 * is marked live (assuming it's live after the
2920 * assignment), we don't want to use the register
2921 * use mask of the "=" node but rather that of the
2924 tree->gtUsedRegs = op1->gtUsedRegs;
2928 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2930 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2936 /* assigning shift operators */
2938 noway_assert(type != TYP_LONG);
2940 #if CPU_LOAD_STORE_ARCH
2941 predictReg = PREDICT_ADDR;
2943 predictReg = PREDICT_NONE;
2946 /* shift count is handled same as ordinary shift */
2947 goto HANDLE_SHIFT_COUNT;
2950 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2952 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2954 // We need a scratch register for the LEA instruction
2955 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2958 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2963 /* Cannot cast to VOID */
2964 noway_assert(type != TYP_VOID);
2966 /* cast to long is special */
2967 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2969 noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2970 #if CPU_LONG_USES_REGPAIR
2971 rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2973 if (rpHasVarIndexForPredict(predictReg))
2975 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2976 rpAsgVarNum = tgtIndex;
2978 // We don't need any register as we plan on writing to the rpAsgVarNum register
2979 predictReg = PREDICT_NONE;
2981 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2982 tgtVar->lvDependReg = true;
2984 if (tgtVar->lvOtherReg != REG_STK)
2986 predictRegHi = PREDICT_NONE;
2991 if (predictReg == PREDICT_NONE)
2993 predictReg = PREDICT_SCRATCH_REG;
2996 // If we are widening an int into a long using a targeted register pair we
2997 // should retarget so that the low part get loaded into the appropriate register
2998 else if (predictReg == PREDICT_PAIR_R0R1)
3000 predictReg = PREDICT_REG_R0;
3001 predictRegHi = PREDICT_REG_R1;
3003 else if (predictReg == PREDICT_PAIR_R2R3)
3005 predictReg = PREDICT_REG_R2;
3006 predictRegHi = PREDICT_REG_R3;
3010 // If we are widening an int into a long using a targeted register pair we
3011 // should retarget so that the low part get loaded into the appropriate register
3012 else if (predictReg == PREDICT_PAIR_EAXEDX)
3014 predictReg = PREDICT_REG_EAX;
3015 predictRegHi = PREDICT_REG_EDX;
3017 else if (predictReg == PREDICT_PAIR_ECXEBX)
3019 predictReg = PREDICT_REG_ECX;
3020 predictRegHi = PREDICT_REG_EBX;
3024 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3026 #if CPU_LONG_USES_REGPAIR
3027 if (predictRegHi != PREDICT_NONE)
3029 // Now get one more reg for the upper part
3030 regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3033 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3037 /* cast from long is special - it frees a register */
3038 if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types"
3039 && op1->gtType == TYP_LONG)
3041 if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3042 predictReg = PREDICT_REG;
3044 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3046 // If we have 2 or more regs, free one of them
3047 if (!genMaxOneBit(regMask))
3049 /* Clear the 2nd lowest bit in regMask */
3050 /* First set tmpMask to the lowest bit in regMask */
3051 tmpMask = genFindLowestBit(regMask);
3052 /* Next find the second lowest bit in regMask */
3053 tmpMask = genFindLowestBit(regMask & ~tmpMask);
3054 /* Clear this bit from regmask */
3055 regMask &= ~tmpMask;
3057 tree->gtUsedRegs = op1->gtUsedRegs;
3061 #if CPU_HAS_BYTE_REGS
3062 /* cast from signed-byte is special - it uses byteable registers */
3063 if (type == TYP_INT)
3065 var_types smallType;
3067 if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3068 smallType = tree->gtCast.CastOp()->TypeGet();
3070 smallType = tree->gtCast.gtCastType;
3072 if (smallType == TYP_BYTE)
3074 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3076 if ((regMask & RBM_BYTE_REGS) == 0)
3077 regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3079 tree->gtUsedRegs = (regMaskSmall)regMask;
3085 #if FEATURE_STACK_FP_X87
3086 /* cast to float/double is special */
3087 if (varTypeIsFloating(type))
3089 switch (op1->TypeGet())
3091 /* uses fild, so don't need to be loaded to reg */
3094 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3095 tree->gtUsedRegs = op1->gtUsedRegs;
3103 /* Casting from integral type to floating type is special */
3104 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3106 if (opts.compCanUseSSE2)
3108 // predict for SSE2 based casting
3109 if (predictReg <= PREDICT_REG)
3110 predictReg = PREDICT_SCRATCH_REG;
3111 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3113 // Get one more int reg to hold cast result
3114 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3115 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3121 #if FEATURE_FP_REGALLOC
3122 // Are we casting between int to float or float to int
3123 // Fix 388428 ARM JitStress WP7
3124 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3126 // op1 needs to go into a register
3127 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3130 if (varTypeIsFloating(op1->TypeGet()))
3132 // We also need a fp scratch register for the convert operation
3133 regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3134 PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3137 // We also need a register to hold the result
3138 regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3139 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3144 /* otherwise must load op1 into a register */
3149 #ifdef _TARGET_XARCH_
3150 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3152 // This is a special case to handle the following
3153 // optimization: conv.i4(round.d(d)) -> round.i(d)
3154 // if flowgraph 3186
3156 if (predictReg <= PREDICT_REG)
3157 predictReg = PREDICT_SCRATCH_REG;
3159 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3161 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3163 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3171 if (tree->TypeGet() == TYP_LONG)
3173 // On ARM this consumes an extra register for the '0' value
3174 if (predictReg <= PREDICT_REG)
3175 predictReg = PREDICT_SCRATCH_REG;
3177 regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3179 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3181 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3184 #endif // _TARGET_ARM_
3189 // these unary operators will write new values
3190 // and thus will need a scratch register
3192 /* generic unary operators */
3194 if (predictReg <= PREDICT_REG)
3195 predictReg = PREDICT_SCRATCH_REG;
3200 // these unary operators do not write new values
3201 // and thus won't need a scratch register
3202 CLANG_FORMAT_COMMENT_ANCHOR;
3207 tree->gtUsedRegs = 0;
3212 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3213 tree->gtUsedRegs = op1->gtUsedRegs;
3217 case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3219 bool intoReg = true;
3220 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3222 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3224 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3227 if (predictReg == PREDICT_ADDR)
3231 else if (predictReg == PREDICT_NONE)
3233 if (type != TYP_LONG)
3239 predictReg = PREDICT_REG;
3243 /* forcing to register? */
3244 if (intoReg && (type != TYP_LONG))
3246 rsvdRegs |= RBM_LASTUSE;
3252 /* check for address mode */
3253 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3256 #if CPU_LOAD_STORE_ARCH
3257 // We may need a scratch register for loading a long
3258 if (type == TYP_LONG)
3260 /* This scratch register immediately dies */
3261 tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3263 #endif // CPU_LOAD_STORE_ARCH
3266 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3268 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3270 /* These integer register(s) immediately die */
3271 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3272 // Two integer registers are required for a TYP_DOUBLE
3273 if (type == TYP_DOUBLE)
3275 rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3279 /* forcing to register? */
3282 regMaskTP lockedMask = lockedRegs | rsvdRegs;
3285 // We will compute a new regMask that holds the register(s)
3286 // that we will load the indirection into.
3288 CLANG_FORMAT_COMMENT_ANCHOR;
3290 #ifndef _TARGET_64BIT_
3291 if (type == TYP_LONG)
3293 // We need to use multiple load instructions here:
3294 // For the first register we can not choose
3295 // any registers that are being used in place or
3296 // any register in the current regMask
3298 regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3300 // For the second register we can choose a register that was
3301 // used in place or any register in the old now overwritten regMask
3302 // but not the same register that we picked above in 'regMask'
3304 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3305 regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3310 // We will use one load instruction here:
3311 // The load target register can be a register that was used in place
3312 // or one of the register from the orginal regMask.
3314 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3315 regMask = rpPredictRegPick(type, predictReg, lockedMask);
3318 else if (predictReg != PREDICT_ADDR)
3320 /* Unless the caller specified PREDICT_ADDR */
3321 /* we don't return the temp registers used */
3322 /* to form the address */
3327 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3339 /* Floating point comparison uses EAX for flags */
3340 if (varTypeIsFloating(op1->TypeGet()))
3346 if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3348 // Some comparisons are converted to ?:
3349 noway_assert(!fgMorphRelopToQmark(op1));
3351 if (predictReg <= PREDICT_REG)
3352 predictReg = PREDICT_SCRATCH_REG;
3354 // The set instructions need a byte register
3355 regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3360 #ifdef _TARGET_XARCH_
3362 // Optimize the compare with a constant cases for xarch
3363 if (op1->gtOper == GT_CNS_INT)
3365 if (op2->gtOper == GT_CNS_INT)
3367 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3368 rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3369 tree->gtUsedRegs = op2->gtUsedRegs;
3372 else if (op2->gtOper == GT_CNS_INT)
3374 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3375 tree->gtUsedRegs = op1->gtUsedRegs;
3378 else if (op2->gtOper == GT_CNS_LNG)
3380 regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3382 // We also need one extra register to read values from
3383 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3384 #endif // _TARGET_X86_
3385 tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3388 #endif // _TARGET_XARCH_
3391 unsigned op1TypeSize;
3392 unsigned op2TypeSize;
3394 op1TypeSize = genTypeSize(op1->TypeGet());
3395 op2TypeSize = genTypeSize(op2->TypeGet());
3397 op1PredictReg = PREDICT_REG;
3398 op2PredictReg = PREDICT_REG;
3400 if (tree->gtFlags & GTF_REVERSE_OPS)
3402 #ifdef _TARGET_XARCH_
3403 if (op1TypeSize == sizeof(int))
3404 op1PredictReg = PREDICT_NONE;
3407 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3408 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3412 #ifdef _TARGET_XARCH_
3413 // For full DWORD compares we can have
3415 // op1 is an address mode and op2 is a register
3417 // op1 is a register and op2 is an address mode
3419 if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3421 if (op2->gtOper == GT_LCL_VAR)
3423 unsigned lclNum = op2->gtLclVar.gtLclNum;
3424 varDsc = lvaTable + lclNum;
3425 /* Did we predict that this local will be enregistered? */
3426 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3428 op1PredictReg = PREDICT_ADDR;
3432 // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3433 if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3434 op2PredictReg = PREDICT_ADDR;
3435 #endif // _TARGET_XARCH_
3437 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3439 if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3442 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3446 #ifdef _TARGET_XARCH_
3447 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3448 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3449 // if one of the registers is small and the types aren't equal.
3451 if (regMask == RBM_NONE)
3453 rpPredictReg op1xPredictReg, op2xPredictReg;
3454 GenTreePtr op1x, op2x;
3455 if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3457 op1xPredictReg = op2PredictReg;
3458 op2xPredictReg = op1PredictReg;
3464 op1xPredictReg = op1PredictReg;
3465 op2xPredictReg = op2PredictReg;
3469 if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir)
3470 (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3471 varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int
3473 bool needTmp = false;
3475 // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3476 // We could predict a byteable register for op2x, but what if we don't get it?
3477 // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3479 if (varTypeIsByte(op1x->TypeGet()))
3485 if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3487 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3492 if (op1x->TypeGet() != op2x->TypeGet())
3498 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3502 #endif // _TARGET_XARCH_
3504 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3509 #ifndef _TARGET_AMD64_
3510 if (type == TYP_LONG)
3512 assert(tree->gtIsValid64RsltMul());
3514 /* Strip out the cast nodes */
3516 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3517 op1 = op1->gtCast.CastOp();
3518 op2 = op2->gtCast.CastOp();
3522 #endif // !_TARGET_AMD64_
3525 #if defined(_TARGET_X86_)
3526 // This will done by a 64-bit imul "imul eax, reg"
3527 // (i.e. EDX:EAX = EAX * reg)
3529 /* Are we supposed to evaluate op2 first? */
3530 if (tree->gtFlags & GTF_REVERSE_OPS)
3532 rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3533 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3537 rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3538 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3541 /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3543 tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3545 /* set regMask to the set of held registers */
3547 regMask = RBM_PAIR_TMP_LO;
3549 if (type == TYP_LONG)
3550 regMask |= RBM_PAIR_TMP_HI;
3552 #elif defined(_TARGET_ARM_)
3553 // This will done by a 4 operand multiply
3555 // Are we supposed to evaluate op2 first?
3556 if (tree->gtFlags & GTF_REVERSE_OPS)
3558 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3559 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3563 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3564 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3567 // set regMask to the set of held registers,
3568 // the two scratch register we need to compute the mul result
3570 regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3572 // set gtUsedRegs toregMask and the registers needed by op1 and op2
3574 tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3576 #else // !_TARGET_X86_ && !_TARGET_ARM_
3577 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3584 /* We use imulEAX for most unsigned multiply operations */
3585 if (tree->gtOverflow())
3587 if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3602 tree->gtUsedRegs = 0;
3604 if (predictReg <= PREDICT_REG)
3605 predictReg = PREDICT_SCRATCH_REG;
3610 if (tree->gtFlags & GTF_REVERSE_OPS)
3612 op1PredictReg = PREDICT_REG;
3613 #if !CPU_LOAD_STORE_ARCH
3614 if (genTypeSize(op1->gtType) >= sizeof(int))
3615 op1PredictReg = PREDICT_NONE;
3617 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3618 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3622 op2PredictReg = PREDICT_REG;
3623 #if !CPU_LOAD_STORE_ARCH
3624 if (genTypeSize(op2->gtType) >= sizeof(int))
3625 op2PredictReg = PREDICT_NONE;
3627 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3629 // For most ALU operations we can generate a single instruction that encodes
3630 // a small immediate integer constant value. (except for multiply)
3632 if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3634 ssize_t ival = op2->gtIntCon.gtIconVal;
3635 if (codeGen->validImmForAlu(ival))
3637 op2PredictReg = PREDICT_NONE;
3639 else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3640 ((oper == GT_ADD) || (oper == GT_SUB)))
3642 op2PredictReg = PREDICT_NONE;
3645 if (op2PredictReg == PREDICT_NONE)
3647 op2->gtUsedRegs = RBM_NONE;
3652 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3655 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3657 #if CPU_HAS_BYTE_REGS
3658 /* We have special register requirements for byte operations */
3660 if (varTypeIsByte(tree->TypeGet()))
3662 /* For 8 bit arithmetic, one operands has to be in a
3663 byte-addressable register, and the other has to be
3664 in a byte-addrble reg or in memory. Assume its in a reg */
3666 regMaskTP regByteMask = 0;
3667 regMaskTP op1ByteMask = op1->gtUsedRegs;
3669 if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3671 // Pick a Byte register to use for op1
3672 regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3673 op1ByteMask = regByteMask;
3676 if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3678 // Pick a Byte register to use for op2, avoiding the one used by op1
3679 regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3684 tree->gtUsedRegs |= regByteMask;
3685 regMask = regByteMask;
3697 /* non-integer division handled in generic way */
3698 if (!varTypeIsIntegral(type))
3700 tree->gtUsedRegs = 0;
3701 if (predictReg <= PREDICT_REG)
3702 predictReg = PREDICT_SCRATCH_REG;
3703 goto GENERIC_BINARY;
3706 #ifndef _TARGET_64BIT_
3708 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3710 /* Special case: a mod with an int op2 is done inline using idiv or div
3711 to avoid a costly call to the helper */
3713 noway_assert((op2->gtOper == GT_CNS_LNG) &&
3714 (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3716 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3717 if (tree->gtFlags & GTF_REVERSE_OPS)
3719 tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3720 rsvdRegs | op1->gtRsvdRegs);
3721 tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3725 tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3727 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3729 regMask = RBM_PAIR_TMP;
3730 #else // !_TARGET_X86_ && !_TARGET_ARM_
3731 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3732 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3735 (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3736 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3740 #endif // _TARGET_64BIT_
3742 /* no divide immediate, so force integer constant which is not
3743 * a power of two to register
3746 if (op2->OperKind() & GTK_CONST)
3748 ssize_t ival = op2->gtIntConCommon.IconValue();
3750 /* Is the divisor a power of 2 ? */
3752 if (ival > 0 && genMaxOneBit(size_t(ival)))
3757 op2PredictReg = PREDICT_SCRATCH_REG;
3761 /* Non integer constant also must be enregistered */
3762 op2PredictReg = PREDICT_REG;
3765 regMaskTP trashedMask;
3766 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3767 regMaskTP op1ExcludeMask;
3768 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3769 regMaskTP op2ExcludeMask;
3770 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3772 #ifdef _TARGET_XARCH_
3773 /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3774 * we can safely allow the "b" value to die. Unfortunately, if we simply
3775 * mark the node "b" as using EDX, this will not work if "b" is a register
3776 * variable that dies with this particular reference. Thus, if we want to
3777 * avoid this situation (where we would have to spill the variable from
3778 * EDX to someplace else), we need to explicitly mark the interference
3779 * of the variable at this point.
3782 if (op2->gtOper == GT_LCL_VAR)
3784 unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3785 varDsc = lvaTable + lclNum;
3786 if (varDsc->lvTracked)
3791 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3792 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3793 varDsc->lvVarIndex);
3794 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3795 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3796 varDsc->lvVarIndex);
3799 VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3800 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3804 /* set the held register based on opcode */
3805 if (oper == GT_DIV || oper == GT_UDIV)
3809 trashedMask = (RBM_EAX | RBM_EDX);
3811 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3813 #endif // _TARGET_XARCH_
3816 trashedMask = RBM_NONE;
3817 op1ExcludeMask = RBM_NONE;
3818 op2ExcludeMask = RBM_NONE;
3821 /* set the lvPref reg if possible */
3824 * Walking the gtNext link twice from here should get us back
3825 * to our parent node, if this is an simple assignment tree.
3827 dest = tree->gtNext;
3828 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3829 dest->gtNext->gtOp.gtOp2 == tree)
3831 varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3832 varDsc->addPrefReg(regMask, this);
3834 #ifdef _TARGET_XARCH_
3835 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3837 op1PredictReg = PREDICT_SCRATCH_REG;
3840 /* are we supposed to evaluate op2 first? */
3841 if (tree->gtFlags & GTF_REVERSE_OPS)
3843 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3844 rsvdRegs | op1->gtRsvdRegs);
3845 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3849 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3850 rsvdRegs | op2->gtRsvdRegs);
3851 rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3856 /* grab EAX, EDX for this tree node */
3857 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3865 if (predictReg <= PREDICT_REG)
3866 predictReg = PREDICT_SCRATCH_REG;
3868 #ifndef _TARGET_64BIT_
3869 if (type == TYP_LONG)
3871 if (op2->IsCnsIntOrI())
3873 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3874 // no register used by op2
3875 op2->gtUsedRegs = 0;
3876 tree->gtUsedRegs = op1->gtUsedRegs;
3880 // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3881 tmpMask = lockedRegs;
3882 tmpMask &= ~RBM_LNGARG_0;
3883 tmpMask &= ~RBM_SHIFT_LNG;
3885 // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3886 if (tree->gtFlags & GTF_REVERSE_OPS)
3888 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3889 tmpMask |= RBM_SHIFT_LNG;
3890 // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3891 // Fix 383843 X86/ARM ILGEN
3892 rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3893 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3897 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3898 tmpMask |= RBM_LNGARG_0;
3899 // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3900 // Fix 383839 ARM ILGEN
3901 rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3902 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3904 regMask = RBM_LNGRET; // function return registers
3905 op1->gtUsedRegs |= RBM_LNGARG_0;
3906 op2->gtUsedRegs |= RBM_SHIFT_LNG;
3908 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3910 // We are using a helper function to do shift:
3912 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3916 #endif // _TARGET_64BIT_
3918 #ifdef _TARGET_XARCH_
3919 if (!op2->IsCnsIntOrI())
3920 predictReg = PREDICT_NOT_REG_ECX;
3924 // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3926 regMaskTP tmpRsvdRegs;
3928 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3930 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3931 rsvdRegs = RBM_LASTUSE;
3932 tmpRsvdRegs = RBM_NONE;
3937 // Special case op1 of a constant
3938 if (op1->IsCnsIntOrI())
3939 tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3940 // System.Xml.Schema.BitSet:Get(int):bool
3942 tmpRsvdRegs = op1->gtRsvdRegs;
3946 if (!op2->IsCnsIntOrI())
3948 if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3950 op2PredictReg = PREDICT_REG_SHIFT;
3954 op2PredictReg = PREDICT_REG;
3957 /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3958 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3960 // If our target arch has a REG_SHIFT register then
3961 // we set the PrefReg when we have a LclVar for op2
3962 // we add an interference with REG_SHIFT for any other LclVars alive at op2
3963 if (REG_SHIFT != REG_NA)
3965 VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3967 while (op2->gtOper == GT_COMMA)
3969 op2 = op2->gtOp.gtOp2;
3972 if (op2->gtOper == GT_LCL_VAR)
3974 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3975 varDsc->setPrefReg(REG_SHIFT, this);
3976 if (varDsc->lvTracked)
3978 VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3982 // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3983 // excluding the LclVar that was used for the shift amount as it is read-only
3984 // and can be kept alive through the shift operation
3986 rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3987 // In case op2Mask doesn't contain the required shift register,
3988 // we will or it in now.
3989 op2Mask |= RBM_SHIFT;
3993 if (tree->gtFlags & GTF_REVERSE_OPS)
3995 assert(regMask == RBM_NONE);
3996 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3999 #if CPU_HAS_BYTE_REGS
4000 if (varTypeIsByte(type))
4002 // Fix 383789 X86 ILGEN
4003 // Fix 383813 X86 ILGEN
4004 // Fix 383828 X86 ILGEN
4005 if (op1->gtOper == GT_LCL_VAR)
4007 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4008 if (varDsc->lvTracked)
4010 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4012 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4013 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4016 if ((regMask & RBM_BYTE_REGS) == 0)
4018 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4019 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4022 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4026 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4032 if (tree->gtFlags & GTF_REVERSE_OPS)
4034 if (predictReg == PREDICT_NONE)
4036 predictReg = PREDICT_REG;
4038 else if (rpHasVarIndexForPredict(predictReg))
4040 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4041 predictReg = PREDICT_SCRATCH_REG;
4044 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4045 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4049 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4051 /* CodeGen will enregister the op2 side of a GT_COMMA */
4052 if (predictReg == PREDICT_NONE)
4054 predictReg = PREDICT_REG;
4056 else if (rpHasVarIndexForPredict(predictReg))
4058 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4059 predictReg = PREDICT_SCRATCH_REG;
4062 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4064 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4066 tree->gtUsedRegs = op2->gtUsedRegs;
4067 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4069 LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4071 if (op2VarDsc->lvTracked)
4073 VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4074 rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4081 noway_assert(op1 != NULL && op2 != NULL);
4084 * If the gtUsedRegs conflicts with lockedRegs
4085 * then we going to have to spill some registers
4086 * into the non-trashed register set to keep it alive
4090 regMaskTP spillRegs;
4091 spillRegs = lockedRegs & tree->gtUsedRegs;
4095 /* Find the next register that needs to be spilled */
4096 tmpMask = genFindLowestBit(spillRegs);
4101 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4102 gtDispTree(tree, 0, NULL, true);
4105 /* In Codegen it will typically introduce a spill temp here */
4106 /* rather than relocating the register to a non trashed reg */
4107 rpPredictSpillCnt++;
4110 /* Remove it from the spillRegs and lockedRegs*/
4111 spillRegs &= ~tmpMask;
4112 lockedRegs &= ~tmpMask;
4115 VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4117 /* Evaluate the <cond> subtree */
4118 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4119 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4120 tree->gtUsedRegs = op1->gtUsedRegs;
4122 noway_assert(op2->gtOper == GT_COLON);
4123 if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4125 // Don't try to target the register specified in predictReg when we have complex subtrees
4127 predictReg = PREDICT_SCRATCH_REG;
4129 GenTreePtr elseTree = op2->AsColon()->ElseNode();
4130 GenTreePtr thenTree = op2->AsColon()->ThenNode();
4132 noway_assert(thenTree != NULL && elseTree != NULL);
4134 // Update compCurLife to only those vars live on the <then> subtree
4136 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4138 if (type == TYP_VOID)
4140 /* Evaluate the <then> subtree */
4141 rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4143 predictReg = PREDICT_NONE;
4147 // A mask to use to force the predictor to choose low registers (to reduce code size)
4148 regMaskTP avoidRegs = RBM_NONE;
4150 avoidRegs = (RBM_R12 | RBM_LR);
4152 if (predictReg <= PREDICT_REG)
4153 predictReg = PREDICT_SCRATCH_REG;
4155 /* Evaluate the <then> subtree */
4157 rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4161 rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4162 if (op1PredictReg != PREDICT_NONE)
4163 predictReg = op1PredictReg;
4167 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4169 /* Evaluate the <else> subtree */
4170 // First record the post-then liveness, and reset the current liveness to the else
4172 CLANG_FORMAT_COMMENT_ANCHOR;
4175 VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4178 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4180 rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4181 tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4183 // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4184 // They each have only one successor, which they share. Their live-out sets must equal the
4185 // live-in set of this virtual successor block, and thus must be the same. We can assert
4186 // that equality here.
4187 assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4191 regMaskTP reloadMask = RBM_NONE;
4195 regMaskTP reloadReg;
4197 /* Get an extra register to hold it */
4198 reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4202 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4203 gtDispTree(tree, 0, NULL, true);
4206 reloadMask |= reloadReg;
4211 /* update the gtUsedRegs mask */
4212 tree->gtUsedRegs |= reloadMask;
4219 tree->gtUsedRegs = RBM_NONE;
4222 /* Is there a return value? */
4225 #if FEATURE_FP_REGALLOC
4226 if (varTypeIsFloating(type))
4228 predictReg = PREDICT_FLTRET;
4229 if (type == TYP_FLOAT)
4230 regMask = RBM_FLOATRET;
4232 regMask = RBM_DOUBLERET;
4236 if (isRegPairType(type))
4238 predictReg = PREDICT_LNGRET;
4239 regMask = RBM_LNGRET;
4243 predictReg = PREDICT_INTRET;
4244 regMask = RBM_INTRET;
4246 if (info.compCallUnmanaged)
4248 lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4250 rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4251 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4254 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4255 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4256 // We could optimize on registers based on int/long or no return value. But to
4257 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4258 if (compIsProfilerHookNeeded())
4260 tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4269 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4270 regMask = genReturnRegForTree(tree);
4271 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4274 tree->gtUsedRegs = 0;
4280 /* This must be a test of a relational operator */
4282 noway_assert(op1->OperIsCompare());
4284 /* Only condition code set by this operation */
4286 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4288 tree->gtUsedRegs = op1->gtUsedRegs;
4294 noway_assert(type <= TYP_INT);
4295 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4298 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4299 unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4302 // Table based switch requires an extra register for the table base
4303 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4305 tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4307 #else // !_TARGET_ARM_
4308 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4309 tree->gtUsedRegs = op1->gtUsedRegs;
4310 #endif // _TARGET_ARM_
4315 if (predictReg <= PREDICT_REG)
4316 predictReg = PREDICT_SCRATCH_REG;
4318 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4319 // Need a reg to load exponent into
4320 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4321 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4325 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4329 if (info.compInitMem)
4331 // We zero out two registers in the ARM codegen path
4333 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4337 op1->gtUsedRegs |= (regMaskSmall)regMask;
4338 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4340 // The result will be put in the reg we picked for the size
4341 // regMask = <already set as we want it to be>
4348 if (predictReg <= PREDICT_REG)
4349 predictReg = PREDICT_SCRATCH_REG;
4351 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4352 // registers (to reduce code size)
4354 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4357 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4359 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4363 unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4364 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4365 // If it has one bit set, and that's an arg reg...
4366 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4368 // We are passing the 'obj' in the argument registers
4370 regNumber rn = genRegNumFromMask(preferReg);
4372 // Add the registers used to pass the 'obj' to regMask.
4373 for (unsigned i = 0; i < objSize / 4; i++)
4375 if (rn == MAX_REG_ARG)
4378 regMask |= genRegMask(rn);
4379 rn = genRegArgNext(rn);
4384 // We are passing the 'obj' in the outgoing arg space
4385 // We will need one register to load into unless the 'obj' size is 4 or less.
4389 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4392 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4394 #else // !_TARGET_ARM_
4396 #endif // _TARGET_ARM_
4402 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4404 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4406 // A MKREFANY takes up two registers.
4407 regNumber rn = genRegNumFromMask(preferReg);
4409 if (rn < MAX_REG_ARG)
4411 regMask |= genRegMask(rn);
4412 rn = genRegArgNext(rn);
4413 if (rn < MAX_REG_ARG)
4414 regMask |= genRegMask(rn);
4417 if (regMask != RBM_NONE)
4419 // Condensation of GENERIC_BINARY path.
4420 assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4421 op2PredictReg = PREDICT_REG;
4422 regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4423 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4424 regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4425 tree->gtUsedRegs = (regMaskSmall)regMask;
4428 tree->gtUsedRegs = op1->gtUsedRegs;
4429 #endif // _TARGET_ARM_
4430 goto GENERIC_BINARY;
4437 goto GENERIC_BINARY;
4441 // Ensure we can write to op2. op2 will hold the output.
4442 if (predictReg < PREDICT_SCRATCH_REG)
4443 predictReg = PREDICT_SCRATCH_REG;
4445 if (tree->gtFlags & GTF_REVERSE_OPS)
4447 op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4448 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4452 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4453 op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4455 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4462 // This unary operator simply passes through the value from its child (much like GT_NOP)
4463 // and thus won't need a scratch register.
4464 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4465 tree->gtUsedRegs = op1->gtUsedRegs;
4472 noway_assert(!"unexpected simple operator in reg use prediction");
4477 /* See what kind of a special operator we have here */
4482 GenTreeArgList* list;
4484 unsigned regArgsNum;
4486 regMaskTP regArgMask;
4487 regMaskTP curArgMask;
4493 /* initialize so we can just or in various bits */
4494 tree->gtUsedRegs = RBM_NONE;
4496 #if GTF_CALL_REG_SAVE
4498 * Unless the GTF_CALL_REG_SAVE flag is set,
4499 * we can't preserve the RBM_CALLEE_TRASH registers.
4500 * (likewise we can't preserve the return registers)
4501 * So we remove them from the lockedRegs set and
4502 * record any of them in the keepMask
4505 if (tree->gtFlags & GTF_CALL_REG_SAVE)
4507 regMaskTP trashMask = genReturnRegForTree(tree);
4509 keepMask = lockedRegs & trashMask;
4510 lockedRegs &= ~trashMask;
4515 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4516 lockedRegs &= ~RBM_CALLEE_TRASH;
4522 /* Is there an object pointer? */
4523 if (tree->gtCall.gtCallObjp)
4525 /* Evaluate the instance pointer first */
4527 args = tree->gtCall.gtCallObjp;
4529 /* the objPtr always goes to an integer register (through temp or directly) */
4530 noway_assert(regArgsNum == 0);
4533 /* Must be passed in a register */
4535 noway_assert(args->gtFlags & GTF_LATE_ARG);
4537 /* Must be either a deferred reg arg node or a GT_ASG node */
4539 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4540 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4542 if (!args->IsArgPlaceHolderNode())
4544 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4547 VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4548 VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4550 /* process argument list */
4551 for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4553 args = list->Current();
4555 if (args->gtFlags & GTF_LATE_ARG)
4557 /* Must be either a Placeholder/NOP node or a GT_ASG node */
4559 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4560 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4562 if (!args->IsArgPlaceHolderNode())
4564 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4571 #ifdef FEATURE_FIXED_OUT_ARGS
4572 // We'll store this argument into the outgoing argument area
4573 // It needs to be in a register to be stored.
4575 predictReg = PREDICT_REG;
4577 #else // !FEATURE_FIXED_OUT_ARGS
4578 // We'll generate a push for this argument
4580 predictReg = PREDICT_NONE;
4581 if (varTypeIsSmall(args->TypeGet()))
4583 /* We may need to sign or zero extend a small type using a register */
4584 predictReg = PREDICT_SCRATCH_REG;
4588 rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4590 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4591 tree->gtUsedRegs |= args->gtUsedRegs;
4594 /* Is there a late argument list */
4597 regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4600 /* process the late argument list */
4601 for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4603 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4604 LclVarDsc* promotedStructLocal = NULL;
4606 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4607 tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4609 assert(list->OperIsList());
4611 args = list->Current();
4612 list = list->Rest();
4614 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4616 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4617 assert(curArgTabEntry);
4619 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4621 curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4623 rpPredictReg argPredictReg;
4624 regMaskTP avoidReg = RBM_NONE;
4626 if (regNum != REG_STK)
4628 argPredictReg = rpGetPredictForReg(regNum);
4629 curArgMask |= genRegMask(regNum);
4633 assert(numSlots > 0);
4634 argPredictReg = PREDICT_NONE;
4636 // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4637 avoidReg = (RBM_R12 | RBM_LR);
4642 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4644 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4646 // 64-bit longs and doubles require 2 consecutive argument registers
4647 curArgMask |= genRegMask(REG_NEXT(regNum));
4649 else if (args->TypeGet() == TYP_STRUCT)
4651 GenTreePtr argx = args;
4652 GenTreePtr lclVarTree = NULL;
4654 /* The GT_OBJ may be be a child of a GT_COMMA */
4655 while (argx->gtOper == GT_COMMA)
4657 argx = argx->gtOp.gtOp2;
4659 unsigned originalSize = 0;
4661 if (argx->gtOper == GT_OBJ)
4663 originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4665 // Is it the address of a promoted struct local?
4666 if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4668 lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1;
4669 LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4670 if (varDsc->lvPromoted)
4671 promotedStructLocal = varDsc;
4674 else if (argx->gtOper == GT_LCL_VAR)
4676 varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum;
4677 originalSize = varDsc->lvSize();
4679 // Is it a promoted struct local?
4680 if (varDsc->lvPromoted)
4681 promotedStructLocal = varDsc;
4683 else if (argx->gtOper == GT_MKREFANY)
4685 originalSize = 2 * TARGET_POINTER_SIZE;
4689 noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4692 // We only pass arguments differently if it a struct local "independently" promoted, which
4693 // allows the field locals can be independently enregistered.
4694 if (promotedStructLocal != NULL)
4696 if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4697 promotedStructLocal = NULL;
4700 unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4702 // Are we passing a TYP_STRUCT in multiple integer registers?
4703 // if so set up curArgMask to reflect this
4704 // Also slots is updated to reflect the number of outgoing arg slots that we will write
4705 if (regNum != REG_STK)
4707 regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4708 assert(genIsValidReg(regNum));
4709 regNumber nextReg = REG_NEXT(regNum);
4711 while (slots > 0 && nextReg <= regLast)
4713 curArgMask |= genRegMask(nextReg);
4714 nextReg = REG_NEXT(nextReg);
4719 if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4721 // All or a portion of this struct will be placed in the argument registers indicated by
4722 // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4723 // that the second arg to be evaluated interferes with the reg for the first, the third with
4724 // the regs for the first and second, etc. But since we always place the stack slots before
4725 // placing the register slots we do not add inteferences for any part of the struct that gets
4726 // passed on the stack.
4729 PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4730 regMaskTP prevArgMask = RBM_NONE;
4731 for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4733 LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4734 if (fieldVarDsc->lvTracked)
4736 assert(lclVarTree != NULL);
4737 if (prevArgMask != RBM_NONE)
4739 rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4740 DEBUGARG("fieldVar/argReg"));
4743 // Now see many registers this uses up.
4744 unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4745 unsigned nextAfterLastRegOffset =
4746 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4747 TARGET_POINTER_SIZE;
4748 unsigned nextAfterLastArgRegOffset =
4749 min(nextAfterLastRegOffset,
4750 genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4752 for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4755 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4758 if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4763 if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4765 // Add the argument register used here as a preferred register for this fieldVarDsc
4767 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4768 fieldVarDsc->setPrefReg(firstRegUsed, this);
4771 compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4774 // If slots is greater than zero then part or all of this TYP_STRUCT
4775 // argument is passed in the outgoing argument area. (except HFA arg)
4777 if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4779 // We will need a register to address the TYP_STRUCT
4780 // Note that we can use an argument register in curArgMask as in
4781 // codegen we pass the stack portion of the argument before we
4782 // setup the register part.
4785 // Force the predictor to choose a LOW_REG here to reduce code bloat
4786 avoidReg = (RBM_R12 | RBM_LR);
4788 assert(tmpMask == RBM_NONE);
4789 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4791 // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4795 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4796 lockedRegs | regArgMask | tmpMask | avoidReg);
4799 } // (args->TypeGet() == TYP_STRUCT)
4800 #endif // _TARGET_ARM_
4802 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4803 // as we have already calculated the correct tmpMask and curArgMask values and
4804 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4806 if (promotedStructLocal == NULL)
4808 /* Target the appropriate argument register */
4809 tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4812 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4813 // for the duration of the OBJ.
4814 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4816 GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4817 assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4818 compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4821 regArgMask |= curArgMask;
4822 args->gtUsedRegs |= (tmpMask | regArgMask);
4823 tree->gtUsedRegs |= args->gtUsedRegs;
4824 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4826 if (args->gtUsedRegs != RBM_NONE)
4828 // Add register interference with the set of registers used or in use when we evaluated
4829 // the current arg, with whatever is alive after the current arg
4831 rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4833 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4835 assert(list == NULL);
4837 #ifdef LEGACY_BACKEND
4838 #if CPU_LOAD_STORE_ARCH
4839 #ifdef FEATURE_READYTORUN_COMPILER
4840 if (tree->gtCall.IsR2RRelativeIndir())
4842 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4844 #endif // FEATURE_READYTORUN_COMPILER
4845 #endif // CPU_LOAD_STORE_ARCH
4846 #endif // LEGACY_BACKEND
4848 regMaskTP callAddrMask;
4849 callAddrMask = RBM_NONE;
4850 #if CPU_LOAD_STORE_ARCH
4851 predictReg = PREDICT_SCRATCH_REG;
4853 predictReg = PREDICT_NONE;
4856 switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4858 case GTF_CALL_VIRT_STUB:
4860 // We only want to record an interference between the virtual stub
4861 // param reg and anything that's live AFTER the call, but we've not
4862 // yet processed the indirect target. So add virtualStubParamInfo.regMask
4863 // to interferingRegs.
4864 interferingRegs |= virtualStubParamInfo->GetRegMask();
4867 printf("Adding interference with Virtual Stub Param\n");
4869 codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4871 if (tree->gtCall.gtCallType == CT_INDIRECT)
4873 predictReg = virtualStubParamInfo->GetPredict();
4877 case GTF_CALL_VIRT_VTABLE:
4878 predictReg = PREDICT_SCRATCH_REG;
4881 case GTF_CALL_NONVIRT:
4882 predictReg = PREDICT_SCRATCH_REG;
4886 if (tree->gtCall.gtCallType == CT_INDIRECT)
4888 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4889 if (tree->gtCall.gtCallCookie)
4891 codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4893 callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4894 lockedRegs | regArgMask, RBM_LASTUSE);
4896 // Just in case we predict some other registers, force interference with our two special
4897 // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4898 callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4900 predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4904 rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4906 else if (predictReg != PREDICT_NONE)
4908 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4911 if (tree->gtFlags & GTF_CALL_UNMANAGED)
4913 // Need a register for tcbReg
4915 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4916 #if CPU_LOAD_STORE_ARCH
4917 // Need an extra register for tmpReg
4919 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4923 tree->gtUsedRegs |= callAddrMask;
4925 /* After the call restore the orginal value of lockedRegs */
4926 lockedRegs |= keepMask;
4928 /* set the return register */
4929 regMask = genReturnRegForTree(tree);
4931 if (regMask & rsvdRegs)
4933 // We will need to relocate the return register value
4934 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4935 #if FEATURE_FP_REGALLOC
4936 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4942 if (intRegMask == RBM_INTRET)
4944 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4946 else if (intRegMask == RBM_LNGRET)
4948 regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4952 noway_assert(!"unexpected return regMask");
4956 #if FEATURE_FP_REGALLOC
4959 if (floatRegMask == RBM_FLOATRET)
4961 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4963 else if (floatRegMask == RBM_DOUBLERET)
4965 regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4967 else // HFA return case
4969 for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4971 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4978 /* the return registers (if any) are killed */
4979 tree->gtUsedRegs |= regMask;
4981 #if GTF_CALL_REG_SAVE
4982 if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4985 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4986 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4990 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4991 // Mark required registers for emitting tailcall profiler callback as used
4992 if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4994 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
5001 // Figure out which registers can't be touched
5003 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5004 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5006 regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5011 #if CPU_LOAD_STORE_ARCH
5012 // We need a register to load the bounds of the MD array
5013 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5016 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5018 /* We need scratch registers to compute index-lower_bound.
5019 Also, gtArrInds[0]'s register will be used as the second
5020 addressability register (besides gtArrObj's) */
5022 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5023 lockedRegs | regMask | dimsMask, rsvdRegs);
5027 dimsMask |= dimMask;
5029 #ifdef _TARGET_XARCH_
5030 // INS_imul doesnt have an immediate constant.
5031 if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5032 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5034 tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5039 #ifdef _TARGET_XARCH_
5040 rsvdRegs |= RBM_EAX;
5042 if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5044 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5048 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5050 op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5052 #ifdef _TARGET_XARCH_
5053 rsvdRegs &= ~RBM_EAX;
5054 tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5055 rsvdRegs | regMask | op2Mask);
5056 tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5057 predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5060 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5065 case GT_ARR_BOUNDS_CHECK:
5067 regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5068 regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5069 rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5072 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5077 NO_WAY("unexpected special operator in reg use prediction");
5084 /* make sure we set them to something reasonable */
5085 if (tree->gtUsedRegs & RBM_ILLEGAL)
5086 noway_assert(!"used regs not set properly in reg use prediction");
5088 if (regMask & RBM_ILLEGAL)
5089 noway_assert(!"return value not set propery in reg use prediction");
5094 * If the gtUsedRegs conflicts with lockedRegs
5095 * then we going to have to spill some registers
5096 * into the non-trashed register set to keep it alive
5098 regMaskTP spillMask;
5099 spillMask = tree->gtUsedRegs & lockedRegs;
5105 /* Find the next register that needs to be spilled */
5106 tmpMask = genFindLowestBit(spillMask);
5111 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5112 gtDispTree(tree, 0, NULL, true);
5113 if ((tmpMask & regMask) == 0)
5115 printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5116 gtDispTree(tree, 0, NULL, true);
5120 /* In Codegen it will typically introduce a spill temp here */
5121 /* rather than relocating the register to a non trashed reg */
5122 rpPredictSpillCnt++;
5124 /* Remove it from the spillMask */
5125 spillMask &= ~tmpMask;
5130 * If the return registers in regMask conflicts with the lockedRegs
5131 * then we allocate extra registers for the reload of the conflicting
5134 * Set spillMask to the set of locked registers that have to be reloaded here.
5135 * reloadMask is set to the extra registers that are used to reload
5136 * the spilled lockedRegs.
5139 noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5140 spillMask = lockedRegs & regMask;
5144 /* Remove the spillMask from regMask */
5145 regMask &= ~spillMask;
5147 regMaskTP reloadMask = RBM_NONE;
5150 /* Get an extra register to hold it */
5151 regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5155 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5156 gtDispTree(tree, 0, NULL, true);
5159 reloadMask |= reloadReg;
5161 /* Remove it from the spillMask */
5162 spillMask &= ~genFindLowestBit(spillMask);
5165 /* Update regMask to use the reloadMask */
5166 regMask |= reloadMask;
5168 /* update the gtUsedRegs mask */
5169 tree->gtUsedRegs |= (regMaskSmall)regMask;
5172 regMaskTP regUse = tree->gtUsedRegs;
5173 regUse |= interferingRegs;
5175 if (!VarSetOps::IsEmpty(this, compCurLife))
5177 // Add interference between the current set of live variables and
5178 // the set of temporary registers need to evaluate the sub tree
5181 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5185 if (rpAsgVarNum != -1)
5187 // Add interference between the registers used (if any)
5188 // and the assignment target variable
5191 rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5194 // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5195 // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5196 // to the set of currently live variables. This new interference will prevent us
5197 // from using the register value used here for enregistering different live variable
5199 if (!VarSetOps::IsEmpty(this, compCurLife))
5201 rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5205 /* Do we need to resore the oldLastUseVars value */
5206 if (restoreLastUseVars)
5208 /* If we used a GT_ASG targeted register then we need to add
5209 * a variable interference between any new last use variables
5210 * and the GT_ASG targeted register
5212 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5214 rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5215 DEBUGARG("asgn tgt last use conflict"));
5217 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5223 #pragma warning(pop)
5226 #endif // LEGACY_BACKEND
5228 /****************************************************************************/
5229 /* Returns true when we must create an EBP frame
5230 This is used to force most managed methods to have EBP based frames
5231 which allows the ETW kernel stackwalker to walk the stacks of managed code
5232 this allows the kernel to perform light weight profiling
5234 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5236 bool result = false;
5238 const char* reason = nullptr;
5242 if (!result && (opts.MinOpts() || opts.compDbgCode))
5244 INDEBUG(reason = "Debug Code");
5247 if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5249 INDEBUG(reason = "IL Code Size");
5252 if (!result && (fgBBcount > 3))
5254 INDEBUG(reason = "BasicBlock Count");
5257 if (!result && fgHasLoops)
5259 INDEBUG(reason = "Method has Loops");
5262 if (!result && (optCallCount >= 2))
5264 INDEBUG(reason = "Call Count");
5267 if (!result && (optIndirectCallCount >= 1))
5269 INDEBUG(reason = "Indirect Call");
5272 #endif // ETW_EBP_FRAMED
5274 // VM wants to identify the containing frame of an InlinedCallFrame always
5275 // via the frame register never the stack register so we need a frame.
5276 if (!result && (optNativeCallCount != 0))
5278 INDEBUG(reason = "Uses PInvoke");
5282 #ifdef _TARGET_ARM64_
5283 // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5287 INDEBUG(reason = "Temporary ARM64 force frame pointer");
5290 #endif // _TARGET_ARM64_
5293 if ((result == true) && (wbReason != nullptr))
5302 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5304 /*****************************************************************************
5306 * Predict which variables will be assigned to registers
5307 * This is x86 specific and only predicts the integer registers and
5308 * must be conservative, any register that is predicted to be enregister
5309 * must end up being enregistered.
5311 * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5312 * predicted to be enregistered to minimize calls to rpPredictRegPick.
5317 #pragma warning(push)
5318 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5320 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5324 if (rpPasses <= rpPassesPessimize)
5326 // Assume that we won't have to reverse EBP enregistration
5327 rpReverseEBPenreg = false;
5329 // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5330 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5331 rpFrameType = FT_EBP_FRAME;
5333 rpFrameType = FT_ESP_FRAME;
5337 // If we are using FPBASE as the frame register, we cannot also use it for
5339 if (rpFrameType == FT_EBP_FRAME)
5341 regAvail &= ~RBM_FPBASE;
5343 #endif // !ETW_EBP_FRAMED
5346 rpPredictAssignMask = regAvail;
5348 raSetupArgMasks(&codeGen->intRegState);
5349 #if !FEATURE_STACK_FP_X87
5350 raSetupArgMasks(&codeGen->floatRegState);
5353 // If there is a secret stub param, it is also live in
5354 if (info.compPublishStubParam)
5356 codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5359 if (regAvail == RBM_NONE)
5364 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5366 #if FEATURE_STACK_FP_X87
5367 if (!varDsc->IsFloatRegType())
5370 varDsc->lvRegNum = REG_STK;
5371 if (isRegPairType(varDsc->lvType))
5372 varDsc->lvOtherReg = REG_STK;
5380 printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5381 printf("\n Available registers = ");
5382 dspRegMask(regAvail);
5387 if (regAvail == RBM_NONE)
5392 /* We cannot change the lvVarIndexes at this point, so we */
5393 /* can only re-order the existing set of tracked variables */
5394 /* Which will change the order in which we select the */
5395 /* locals for enregistering. */
5397 assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5399 // Should not be set unless optimizing
5400 noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5406 fgDebugCheckBBlist();
5409 /* Initialize the weighted count of variables that could have */
5410 /* been enregistered but weren't */
5411 unsigned refCntStk = 0; // sum of ref counts for all stack based variables
5412 unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables
5413 unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5415 unsigned refCntStkParam; // sum of ref counts for all stack based parameters
5416 unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5418 #if FEATURE_STACK_FP_X87
5419 refCntStkParam = raCntStkParamDblStackFP;
5420 refCntWtdStkDbl = raCntWtdStkDblStackFP;
5421 refCntStk = raCntStkStackFP;
5424 refCntWtdStkDbl = 0;
5426 #endif // FEATURE_STACK_FP_X87
5428 #endif // DOUBLE_ALIGN
5430 /* Set of registers used to enregister variables in the predition */
5431 regMaskTP regUsed = RBM_NONE;
5433 /*-------------------------------------------------------------------------
5435 * Predict/Assign the enregistered locals in ref-count order
5439 VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5441 unsigned FPRegVarLiveInCnt;
5442 FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5445 for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5447 bool notWorthy = false;
5451 regMaskTP regAvailForType;
5454 unsigned customVarOrderSize;
5455 regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5457 regNumber saveOtherReg;
5459 varDsc = lvaRefSorted[sortNum];
5461 #if FEATURE_STACK_FP_X87
5462 if (varTypeIsFloating(varDsc->TypeGet()))
5465 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5467 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5468 // be en-registered.
5469 noway_assert(!varDsc->lvRegister);
5476 /* Check the set of invariant things that would prevent enregistration */
5478 /* Ignore the variable if it's not tracked */
5479 if (!varDsc->lvTracked)
5482 /* Get hold of the index and the interference mask for the variable */
5483 varIndex = varDsc->lvVarIndex;
5485 // Remove 'varIndex' from unprocessedVars
5486 VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5488 // Skip the variable if it's marked as DoNotEnregister.
5490 if (varDsc->lvDoNotEnregister)
5493 /* TODO: For now if we have JMP all register args go to stack
5494 * TODO: Later consider extending the life of the argument or make a copy of it */
5496 if (compJmpOpUsed && varDsc->lvIsRegArg)
5499 /* Skip the variable if the ref count is zero */
5501 if (varDsc->lvRefCnt == 0)
5504 /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5506 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5511 /* Is the unweighted ref count too low to be interesting? */
5513 if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5514 (varDsc->lvRefCnt <= 1))
5516 /* Sometimes it's useful to enregister a variable with only one use */
5517 /* arguments referenced in loops are one example */
5519 if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5520 goto OK_TO_ENREGISTER;
5522 /* If the variable has a preferred register set it may be useful to put it there */
5523 if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5524 goto OK_TO_ENREGISTER;
5526 /* Keep going; the table is sorted by "weighted" ref count */
5532 if (varTypeIsFloating(varDsc->TypeGet()))
5534 regType = varDsc->TypeGet();
5535 regAvailForType = regAvail & RBM_ALLFLOAT;
5540 regAvailForType = regAvail & RBM_ALLINT;
5544 isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5548 regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5552 /* If we don't have any registers available then skip the enregistration attempt */
5553 if (regAvailForType == RBM_NONE)
5556 // On the pessimize passes don't even try to enregister LONGS
5557 if (isRegPairType(varDsc->lvType))
5559 if (rpPasses > rpPassesPessimize)
5561 else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5565 // Set of registers to avoid when performing register allocation
5566 avoidReg = RBM_NONE;
5568 if (!varDsc->lvIsRegArg)
5570 /* For local variables,
5571 * avoid the incoming arguments,
5572 * but only if you conflict with them */
5574 if (raAvoidArgRegMask != 0)
5577 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5579 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5581 if (!argDsc->lvIsRegArg)
5584 bool isFloat = argDsc->IsFloatRegType();
5585 regNumber inArgReg = argDsc->lvArgReg;
5586 regMaskTP inArgBit = genRegMask(inArgReg);
5588 // Is this inArgReg in the raAvoidArgRegMask set?
5590 if (!(raAvoidArgRegMask & inArgBit))
5593 noway_assert(argDsc->lvIsParam);
5594 noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5596 unsigned locVarIndex = varDsc->lvVarIndex;
5597 unsigned argVarIndex = argDsc->lvVarIndex;
5599 /* Does this variable interfere with the arg variable ? */
5600 if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5602 noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5603 /* Yes, so try to avoid the incoming arg reg */
5604 avoidReg |= inArgBit;
5608 noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5614 // Now we will try to predict which register the variable
5615 // could be enregistered in
5617 customVarOrderSize = MAX_VAR_ORDER_SIZE;
5619 raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5622 saveOtherReg = DUMMY_INIT(REG_NA);
5624 for (regInx = 0; regInx < customVarOrderSize; regInx++)
5626 regNumber regNum = customVarOrder[regInx];
5627 regMaskTP regBits = genRegMask(regNum);
5629 /* Skip this register if it isn't available */
5630 if ((regAvailForType & regBits) == 0)
5633 /* Skip this register if it interferes with the variable */
5635 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5638 if (varTypeIsFloating(regType))
5643 regNumber regNext = REG_NEXT(regNum);
5644 regBits |= genRegMask(regNext);
5646 /* Skip if regNext interferes with the variable */
5647 if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5653 bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5654 bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5655 bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0);
5657 /* Skip this register if the weighted ref count is less than two
5658 and we are considering a unused callee saved register */
5660 if (lessThanTwoRefWtd && // less than two references (weighted)
5661 firstUseOfReg && // first use of this register
5662 calleeSavedReg) // callee saved register
5664 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5666 // psc is abbeviation for possibleSameColor
5667 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5669 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5670 unsigned pscIndex = 0;
5671 while (pscIndexIter.NextElem(&pscIndex))
5673 LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5674 totalRefCntWtd += pscVar->lvRefCntWtd;
5675 if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5679 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5682 continue; // not worth spilling a callee saved register
5684 // otherwise we will spill this callee saved registers,
5685 // because its uses when combined with the uses of
5686 // other yet to be processed candidates exceed our threshold.
5687 // totalRefCntWtd = totalRefCntWtd;
5690 /* Looks good - mark the variable as living in the register */
5692 if (isRegPairType(varDsc->lvType))
5694 if (firstHalf == false)
5696 /* Enregister the first half of the long */
5697 varDsc->lvRegNum = regNum;
5698 saveOtherReg = varDsc->lvOtherReg;
5699 varDsc->lvOtherReg = REG_STK;
5704 /* Ensure 'well-formed' register pairs */
5705 /* (those returned by gen[Pick|Grab]RegPair) */
5707 if (regNum < varDsc->lvRegNum)
5709 varDsc->lvOtherReg = varDsc->lvRegNum;
5710 varDsc->lvRegNum = regNum;
5714 varDsc->lvOtherReg = regNum;
5721 varDsc->lvRegNum = regNum;
5725 varDsc->lvOtherReg = REG_NEXT(regNum);
5730 if (regNum == REG_FPBASE)
5732 refCntEBP += varDsc->lvRefCnt;
5733 refCntWtdEBP += varDsc->lvRefCntWtd;
5735 if (varDsc->lvIsParam)
5737 refCntStkParam += varDsc->lvRefCnt;
5742 /* Record this register in the regUsed set */
5745 /* The register is now ineligible for all interfering variables */
5747 VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5752 regNumber secondHalf = REG_NEXT(regNum);
5753 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5754 unsigned intfIndex = 0;
5755 while (iter.NextElem(&intfIndex))
5757 VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5762 /* If a register argument, remove its incoming register
5763 * from the "avoid" list */
5765 if (varDsc->lvIsRegArg)
5767 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5771 raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5776 /* A variable of TYP_LONG can take two registers */
5780 // Since we have successfully enregistered this variable it is
5781 // now time to move on and consider the next variable
5787 noway_assert(isRegPairType(varDsc->lvType));
5789 /* This TYP_LONG is partially enregistered */
5791 noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5793 if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5798 raAddToStkPredict(varDsc->lvRefCntWtd);
5803 if (varDsc->lvDependReg)
5810 /* Weighted count of variables that could have been enregistered but weren't */
5811 raAddToStkPredict(varDsc->lvRefCntWtd);
5813 if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5814 raAddToStkPredict(varDsc->lvRefCntWtd);
5818 varDsc->lvRegister = false;
5820 varDsc->lvRegNum = REG_STK;
5821 if (isRegPairType(varDsc->lvType))
5822 varDsc->lvOtherReg = REG_STK;
5824 /* unweighted count of variables that were not enregistered */
5826 refCntStk += varDsc->lvRefCnt;
5829 if (varDsc->lvIsParam)
5831 refCntStkParam += varDsc->lvRefCnt;
5835 /* Is it a stack based double? */
5836 /* Note that double params are excluded since they can not be double aligned */
5837 if (varDsc->lvType == TYP_DOUBLE)
5839 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5847 gtDispLclVar((unsigned)(varDsc - lvaTable));
5848 if (varDsc->lvTracked)
5849 printf("T%02u", varDsc->lvVarIndex);
5852 printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5853 if (varDsc->lvDoNotEnregister)
5854 printf(", do-not-enregister");
5862 varDsc->lvRegister = true;
5864 // Record the fact that we enregistered a stack arg when tail call is used.
5865 if (compJmpOpUsed && !varDsc->lvIsRegArg)
5867 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5868 if (isRegPairType(varDsc->lvType))
5870 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5878 gtDispLclVar((unsigned)(varDsc - lvaTable));
5879 printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5880 refCntWtd2str(varDsc->lvRefCntWtd));
5881 varDsc->PrintVarReg();
5885 printf(":%s", getRegName(varDsc->lvOtherReg));
5894 noway_assert(refCntEBP == 0);
5901 printf("; refCntStk = %u\n", refCntStk);
5903 printf("; refCntEBP = %u\n", refCntEBP);
5904 if (refCntWtdEBP > 0)
5905 printf("; refCntWtdEBP = %u\n", refCntWtdEBP);
5907 if (refCntStkParam > 0)
5908 printf("; refCntStkParam = %u\n", refCntStkParam);
5909 if (refCntWtdStkDbl > 0)
5910 printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5915 /* Determine how the EBP register should be used */
5916 CLANG_FORMAT_COMMENT_ANCHOR;
5920 if (!codeGen->isFramePointerRequired())
5922 noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5925 First let us decide if we should use EBP to create a
5926 double-aligned frame, instead of enregistering variables
5929 if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5931 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5932 goto REVERSE_EBP_ENREG;
5935 if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5937 if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5939 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5940 goto REVERSE_EBP_ENREG;
5945 #endif // DOUBLE_ALIGN
5947 if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5949 #ifdef _TARGET_XARCH_
5951 /* If we are using EBP to enregister variables then
5952 will we actually save bytes by setting up an EBP frame?
5954 Each stack reference is an extra byte of code if we use
5957 Here we measure the savings that we get by using EBP to
5958 enregister variables vs. the cost in code size that we
5959 pay when using an ESP based frame.
5961 We pay one byte of code for each refCntStk
5962 but we save one byte (or more) for each refCntEBP.
5964 Our savings are the elimination of a stack memory read/write.
5965 We use the loop weighted value of
5966 refCntWtdEBP * mem_access_weight (0, 3, 6)
5967 to represent this savings.
5970 // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5971 // to set up an EBP frame in the prolog and epilog
5972 #define EBP_FRAME_SETUP_SIZE 5
5975 if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5977 unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5978 unsigned mem_access_weight = 3;
5980 if (compCodeOpt() == SMALL_CODE)
5981 mem_access_weight = 0;
5982 else if (compCodeOpt() == FAST_CODE)
5983 mem_access_weight *= 2;
5985 if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5987 /* It's not be a good idea to use EBP in our predictions */
5988 CLANG_FORMAT_COMMENT_ANCHOR;
5990 if (verbose && (refCntEBP > 0))
5991 printf("; Predicting that it's not worth using EBP to enregister variables\n");
5993 rpFrameType = FT_EBP_FRAME;
5994 goto REVERSE_EBP_ENREG;
5997 #endif // _TARGET_XARCH_
5999 if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6004 if (rpMustCreateEBPCalled == false)
6006 rpMustCreateEBPCalled = true;
6007 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6011 printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6013 codeGen->setFrameRequired(true);
6015 rpFrameType = FT_EBP_FRAME;
6016 goto REVERSE_EBP_ENREG;
6026 noway_assert(rpFrameType != FT_ESP_FRAME);
6028 rpReverseEBPenreg = true;
6033 noway_assert(regUsed & RBM_FPBASE);
6035 regUsed &= ~RBM_FPBASE;
6037 /* variables that were enregistered in EBP become stack based variables */
6038 raAddToStkPredict(refCntWtdEBP);
6042 /* We're going to have to undo some predicted enregistered variables */
6043 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6045 /* Is this a register variable? */
6046 if (varDsc->lvRegNum != REG_STK)
6048 if (isRegPairType(varDsc->lvType))
6050 /* Only one can be EBP */
6051 if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6053 if (varDsc->lvRegNum == REG_FPBASE)
6054 varDsc->lvRegNum = varDsc->lvOtherReg;
6056 varDsc->lvOtherReg = REG_STK;
6058 if (varDsc->lvRegNum == REG_STK)
6059 varDsc->lvRegister = false;
6061 if (varDsc->lvDependReg)
6071 if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6073 varDsc->lvRegNum = REG_STK;
6075 varDsc->lvRegister = false;
6077 if (varDsc->lvDependReg)
6083 printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6084 varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6085 (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6093 #endif // ETW_EBP_FRAMED
6098 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6100 /* Clear the lvDependReg flag for next iteration of the predictor */
6101 varDsc->lvDependReg = false;
6103 // If we set rpLostEnreg and this is the first pessimize pass
6104 // then reverse the enreg of all TYP_LONG
6105 if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6107 varDsc->lvRegNum = REG_STK;
6108 varDsc->lvOtherReg = REG_STK;
6113 if (verbose && raNewBlocks)
6115 printf("\nAdded FP register killing blocks:\n");
6116 fgDispBasicBlocks();
6120 noway_assert(rpFrameType != FT_NOT_SET);
6122 /* return the set of registers used to enregister variables */
6126 #pragma warning(pop)
6129 /*****************************************************************************
6131 * Predict register use for every tree in the function. Note that we do this
6132 * at different times (not to mention in a totally different way) for x86 vs
6135 void Compiler::rpPredictRegUse()
6142 // We might want to adjust the ref counts based on interference
6145 regMaskTP allAcceptableRegs = RBM_ALLINT;
6147 #if FEATURE_FP_REGALLOC
6148 allAcceptableRegs |= raConfigRestrictMaskFP();
6151 allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6153 /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6154 to acdHelper(). This is done implicitly, without creating a GT_CALL
6155 node. Hence, this interference is be handled implicitly by
6156 restricting the registers used for enregistering variables */
6158 if (opts.compDbgCode)
6160 allAcceptableRegs &= RBM_CALLEE_SAVED;
6163 /* Compute the initial regmask to use for the first pass */
6164 regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6167 #if CPU_USES_BLOCK_MOVE
6168 /* If we might need to generate a rep mov instruction */
6169 /* remove ESI and EDI */
6171 regAvail &= ~(RBM_ESI | RBM_EDI);
6175 /* If we using longs then we remove ESI to allow */
6176 /* ESI:EBX to be saved accross a call */
6178 regAvail &= ~(RBM_ESI);
6182 // For the first register allocation pass we don't want to color using r4
6183 // as we want to allow it to be used to color the internal temps instead
6184 // when r0,r1,r2,r3 are all in use.
6186 regAvail &= ~(RBM_R4);
6190 // We never have EBP available when ETW_EBP_FRAME is defined
6191 regAvail &= ~RBM_FPBASE;
6193 /* If a frame pointer is required then we remove EBP */
6194 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6195 regAvail &= ~RBM_FPBASE;
6199 BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6201 regAvail = RBM_NONE;
6204 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6205 regAvail = RBM_NONE;
6207 #if FEATURE_STACK_FP_X87
6208 VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6209 VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6211 // Calculate the set of all tracked FP/non-FP variables
6212 // into optAllFloatVars and optAllNonFPvars
6217 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6219 /* Ignore the variable if it's not tracked */
6221 if (!varDsc->lvTracked)
6224 /* Get hold of the index and the interference mask for the variable */
6226 unsigned varNum = varDsc->lvVarIndex;
6228 /* add to the set of all tracked FP/non-FP variables */
6230 if (varDsc->IsFloatRegType())
6231 VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6233 VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6237 for (unsigned i = 0; i < REG_COUNT; i++)
6239 VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6241 for (unsigned i = 0; i < lvaTrackedCount; i++)
6243 VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6246 raNewBlocks = false;
6247 rpPredictAssignAgain = false;
6250 bool mustPredict = true;
6251 unsigned stmtNum = 0;
6252 unsigned oldStkPredict = DUMMY_INIT(~0);
6253 VARSET_TP oldLclRegIntf[REG_COUNT];
6255 for (unsigned i = 0; i < REG_COUNT; i++)
6257 VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6262 /* Assign registers to variables using the variable/register interference
6263 graph (raLclRegIntf[]) calculated in the previous pass */
6264 regUsed = rpPredictAssignRegVars(regAvail);
6266 mustPredict |= rpLostEnreg;
6269 // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6270 if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6271 !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6273 // We can release our reservation on R10 and use it to color registers
6274 codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6275 allAcceptableRegs |= RBM_OPT_RSVD;
6279 /* Is our new prediction good enough?? */
6282 /* For small methods (less than 12 stmts), we add a */
6283 /* extra pass if we are predicting the use of some */
6284 /* of the caller saved registers. */
6285 /* This fixes RAID perf bug 43440 VB Ackerman function */
6287 if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6292 /* If every variable was fully enregistered then we're done */
6293 if (rpStkPredict == 0)
6296 // This was a successful prediction. Record it, in case it turns out to be the best one.
6297 rpRecordPrediction();
6301 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6303 // Be careful about overflow
6304 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6305 if (oldStkPredict < highStkPredict)
6308 if (rpStkPredict < rpPasses * 8)
6311 if (rpPasses >= (rpPassesMax - 1))
6316 /* We will do another pass */;
6320 if (JitConfig.JitAssertOnMaxRAPasses())
6322 noway_assert(rpPasses < rpPassesMax &&
6323 "This may not a bug, but dev team should look and see what is happening");
6327 // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with
6328 // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope
6329 // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6330 if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6332 NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6341 printf("\n; Another pass due to rpLostEnreg");
6343 printf("\n; Another pass due to rpAddedVarIntf");
6344 if ((rpPasses == 1) && rpPredictAssignAgain)
6345 printf("\n; Another pass due to rpPredictAssignAgain");
6347 printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6351 /* Zero the variable/register interference graph */
6352 for (unsigned i = 0; i < REG_COUNT; i++)
6354 VarSetOps::ClearD(this, raLclRegIntf[i]);
6357 // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6358 // it must not be in a register trashed by the callee
6359 if (info.compLvFrameListRoot != BAD_VAR_NUM)
6361 assert(!opts.ShouldUsePInvokeHelpers());
6362 noway_assert(info.compLvFrameListRoot < lvaCount);
6364 LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6366 if (pinvokeVarDsc->lvTracked)
6368 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6369 DEBUGARG("compLvFrameListRoot"));
6371 // We would prefer to have this be enregister in the PINVOKE_TCB register
6372 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6375 // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6376 // worst case). Make sure that the return value compiler temp that we create for the single
6377 // return block knows about this interference.
6378 if (genReturnLocal != BAD_VAR_NUM)
6380 noway_assert(genReturnBB);
6381 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6382 if (localTmp->lvTracked)
6384 rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6385 VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6391 if (compFloatingPointUsed)
6393 bool hasMustInitFloat = false;
6395 // if we have any must-init floating point LclVars then we will add register interferences
6396 // for the arguments with RBM_SCRATCH
6397 // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6398 // we won't home the arguments into REG_SCRATCH
6403 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6405 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6407 hasMustInitFloat = true;
6412 if (hasMustInitFloat)
6414 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6416 // If is an incoming argument, that is tracked and not floating-point
6417 if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6419 rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6420 DEBUGARG("arg home with must-init fp"));
6428 rpAddedVarIntf = false;
6429 rpLostEnreg = false;
6431 /* Walk the basic blocks and predict reg use for each tree */
6433 for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6437 compCurLifeTree = NULL;
6438 VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6442 for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6444 noway_assert(stmt->gtOper == GT_STMT);
6446 rpPredictSpillCnt = 0;
6447 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6448 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6450 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6455 printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6460 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6462 noway_assert(rpAsgVarNum == -1);
6464 if (rpPredictSpillCnt > tmpIntSpillMax)
6465 tmpIntSpillMax = rpPredictSpillCnt;
6470 /* Decide whether we need to set mustPredict */
6471 mustPredict = false;
6474 // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6475 // reserve r10, and if it was used, throw out the last prediction and repredict.
6476 if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6478 codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6479 allAcceptableRegs &= ~RBM_OPT_RSVD;
6480 if ((regUsed & RBM_OPT_RSVD) != 0)
6483 rpBestRecordedPrediction = nullptr;
6499 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6502 if (rpPredictAssignAgain)
6510 /* Calculate the new value to use for regAvail */
6512 regAvail = allAcceptableRegs;
6514 /* If a frame pointer is required then we remove EBP */
6515 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6516 regAvail &= ~RBM_FPBASE;
6519 // We never have EBP available when ETW_EBP_FRAME is defined
6520 regAvail &= ~RBM_FPBASE;
6523 // If we have done n-passes then we must continue to pessimize the
6524 // interference graph by or-ing the interferences from the previous pass
6526 if (rpPasses > rpPassesPessimize)
6528 for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6529 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6531 /* If we reverse an EBP enregistration then keep it that way */
6532 if (rpReverseEBPenreg)
6533 regAvail &= ~RBM_FPBASE;
6541 /* Save the old variable/register interference graph */
6542 for (unsigned i = 0; i < REG_COUNT; i++)
6544 VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6546 oldStkPredict = rpStkPredict;
6547 } // end of while (true)
6551 // If we recorded a better feasible allocation than we ended up with, go back to using it.
6552 rpUseRecordedPredictionIfBetter();
6555 codeGen->setDoubleAlign(false);
6558 switch (rpFrameType)
6561 noway_assert(!"rpFrameType not set correctly!");
6564 noway_assert(!codeGen->isFramePointerRequired());
6565 noway_assert(!codeGen->isFrameRequired());
6566 codeGen->setFramePointerUsed(false);
6569 noway_assert((regUsed & RBM_FPBASE) == 0);
6570 codeGen->setFramePointerUsed(true);
6573 case FT_DOUBLE_ALIGN_FRAME:
6574 noway_assert((regUsed & RBM_FPBASE) == 0);
6575 noway_assert(!codeGen->isFramePointerRequired());
6576 codeGen->setFramePointerUsed(false);
6577 codeGen->setDoubleAlign(true);
6582 /* Record the set of registers that we need */
6583 codeGen->regSet.rsClearRegsModified();
6584 if (regUsed != RBM_NONE)
6586 codeGen->regSet.rsSetRegsModified(regUsed);
6589 /* We need genFullPtrRegMap if :
6590 * The method is fully interruptible, or
6591 * We are generating an EBP-less frame (for stack-pointer deltas)
6594 genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6600 printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6601 printf(" rpStkPredict was %u\n", rpStkPredict);
6604 rpRegAllocDone = true;
6607 #endif // LEGACY_BACKEND
6609 /*****************************************************************************
6611 * Mark all variables as to whether they live on the stack frame
6612 * (part or whole), and if so what the base is (FP or SP).
6615 void Compiler::raMarkStkVars()
6620 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6622 // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6623 CLANG_FORMAT_COMMENT_ANCHOR;
6625 #ifdef LEGACY_BACKEND
6626 varDsc->lvOnFrame = false;
6627 #endif // LEGACY_BACKEND
6629 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6631 noway_assert(!varDsc->lvRegister);
6635 /* Fully enregistered variables don't need any frame space */
6637 if (varDsc->lvRegister)
6639 if (!isRegPairType(varDsc->TypeGet()))
6644 /* For "large" variables make sure both halves are enregistered */
6646 if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6651 /* Unused variables typically don't get any frame space */
6652 else if (varDsc->lvRefCnt == 0)
6654 bool needSlot = false;
6656 bool stkFixedArgInVarArgs =
6657 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6659 // If its address has been exposed, ignore lvRefCnt. However, exclude
6660 // fixed arguments in varargs method as lvOnFrame shouldn't be set
6661 // for them as we don't want to explicitly report them to GC.
6663 if (!stkFixedArgInVarArgs)
6665 needSlot |= varDsc->lvAddrExposed;
6668 #if FEATURE_FIXED_OUT_ARGS
6670 /* Is this the dummy variable representing GT_LCLBLK ? */
6671 needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6673 #endif // FEATURE_FIXED_OUT_ARGS
6676 /* For debugging, note that we have to reserve space even for
6677 unused variables if they are ever in scope. However, this is not
6678 an issue as fgExtendDbgLifetimes() adds an initialization and
6679 variables in scope will not have a zero ref-cnt.
6681 if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6683 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6685 noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6690 For Debug Code, we have to reserve space even if the variable is never
6691 in scope. We will also need to initialize it if it is a GC var.
6692 So we set lvMustInit and artifically bump up the ref-cnt.
6695 if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6699 if (lvaTypeIsGC(lclNum))
6701 varDsc->lvRefCnt = 1;
6704 if (!varDsc->lvIsParam)
6706 varDsc->lvMustInit = true;
6710 #ifndef LEGACY_BACKEND
6711 varDsc->lvOnFrame = needSlot;
6712 #endif // !LEGACY_BACKEND
6715 /* Clear the lvMustInit flag in case it is set */
6716 varDsc->lvMustInit = false;
6722 #ifndef LEGACY_BACKEND
6723 if (!varDsc->lvOnFrame)
6727 #endif // !LEGACY_BACKEND
6730 /* The variable (or part of it) lives on the stack frame */
6732 noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6733 #if FEATURE_FIXED_OUT_ARGS
6734 noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6735 #else // FEATURE_FIXED_OUT_ARGS
6736 noway_assert(lvaLclSize(lclNum) != 0);
6737 #endif // FEATURE_FIXED_OUT_ARGS
6739 varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6743 varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6747 if (codeGen->doDoubleAlign())
6749 noway_assert(codeGen->isFramePointerUsed() == false);
6751 /* All arguments are off of EBP with double-aligned frames */
6753 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6755 varDsc->lvFramePointerBased = true;
6761 /* Some basic checks */
6763 // It must be in a register, on frame, or have zero references.
6765 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6767 #ifndef LEGACY_BACKEND
6768 // We can't have both lvRegister and lvOnFrame for RyuJIT
6769 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6770 #else // LEGACY_BACKEND
6772 /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6773 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6774 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6775 #endif // LEGACY_BACKEND
6779 // For varargs functions, there should be no direct references to
6780 // parameter variables except for 'this' (because these were morphed
6781 // in the importer) and the 'arglist' parameter (which is not a GC
6782 // pointer). and the return buffer argument (if we are returning a
6784 // This is important because we don't want to try to report them
6785 // to the GC, as the frame offsets in these local varables would
6788 if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6790 if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6792 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6799 #ifdef LEGACY_BACKEND
6800 void Compiler::rpRecordPrediction()
6802 if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6804 if (rpBestRecordedPrediction == NULL)
6806 rpBestRecordedPrediction =
6807 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6809 for (unsigned k = 0; k < lvaCount; k++)
6811 rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6812 rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum();
6813 rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg();
6815 rpBestRecordedStkPredict = rpStkPredict;
6816 JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6820 void Compiler::rpUseRecordedPredictionIfBetter()
6822 JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6823 rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6824 if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6826 JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6827 rpBestRecordedStkPredict);
6829 for (unsigned k = 0; k < lvaCount; k++)
6831 lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6832 lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6833 lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6837 #endif // LEGACY_BACKEND