1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XX Does the register allocation and puts the remaining lclVars on the stack XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
25 DWORD val = JitConfig.JitRegisterFP();
27 return (enumConfigRegisterFP)(val & 0x3);
29 #endif // FEATURE_FP_REGALLOC
31 regMaskTP Compiler::raConfigRestrictMaskFP()
33 regMaskTP result = RBM_NONE;
35 #if FEATURE_FP_REGALLOC
36 switch (raConfigRegisterFP())
38 case CONFIG_REGISTER_FP_NONE:
41 case CONFIG_REGISTER_FP_CALLEE_TRASH:
42 result = RBM_FLT_CALLEE_TRASH;
44 case CONFIG_REGISTER_FP_CALLEE_SAVED:
45 result = RBM_FLT_CALLEE_SAVED;
47 case CONFIG_REGISTER_FP_FULL:
48 result = RBM_ALLFLOAT;
57 DWORD Compiler::getCanDoubleAlign()
60 if (compStressCompile(STRESS_DBL_ALN, 20))
61 return MUST_DOUBLE_ALIGN;
63 return JitConfig.JitDoubleAlign();
65 return DEFAULT_DOUBLE_ALIGN;
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
73 // refCntStk - sum of ref counts for all stack based variables
74 // refCntEBP - sum of ref counts for EBP enregistered variables
75 // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables
76 // refCntStkParam - sum of ref counts for all stack based parameters
77 // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 // with double fields).
81 // Returns true if this method estimates that a double-aligned frame would be beneficial
84 // The impact of a double-aligned frame is computed as follows:
85 // - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 // - We pay a byte of code for each non-parameter stack reference.
87 // - We save the misalignment penalty and possible cache-line crossing penalty.
88 // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 // - We pay 7 extra bytes for:
91 // LEA ESP,[EBP-offset]
92 // AND ESP,-8 to double align ESP
93 // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
95 // If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 // Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 // ref count for double-aligned values.
99 bool Compiler::shouldDoubleAlign(
100 unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
102 bool doDoubleAlign = false;
103 const unsigned DBL_ALIGN_SETUP_SIZE = 7;
105 unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106 unsigned misaligned_weight = 4;
108 if (compCodeOpt() == Compiler::SMALL_CODE)
109 misaligned_weight = 0;
111 if (compCodeOpt() == Compiler::FAST_CODE)
112 misaligned_weight *= 4;
114 JITDUMP("\nDouble alignment:\n");
115 JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116 JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117 JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
119 if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
121 JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
123 else if (refCntWtdEBP > refCntWtdStkDbl * 2)
125 // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126 // not double aligned.
127 // Here are the numbers that make this not double-aligned.
128 // refCntWtdStkDbl = 0x164
129 // refCntWtdEBP = 0x1a4
130 // We think we do need to change the heuristic to be in favor of double-align.
132 JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
136 // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137 JITDUMP(" Predicting to create a double-aligned frame\n");
138 doDoubleAlign = true;
140 return doDoubleAlign;
142 #endif // DOUBLE_ALIGN
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
146 void Compiler::raInit()
148 #if FEATURE_STACK_FP_X87
149 /* We have not assigned any FP variables to registers yet */
151 VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
153 codeGen->intRegState.rsIsFloat = false;
154 codeGen->floatRegState.rsIsFloat = true;
156 rpReverseEBPenreg = false;
159 rpPassesPessimize = rpPassesMax - 3;
160 if (opts.compDbgCode)
164 rpStkPredict = (unsigned)-1;
165 rpFrameType = FT_NOT_SET;
167 rpMustCreateEBPCalled = false;
168 rpRegAllocDone = false;
169 rpMaskPInvokeEpilogIntf = RBM_NONE;
171 rpPredictMap[PREDICT_NONE] = RBM_NONE;
172 rpPredictMap[PREDICT_ADDR] = RBM_NONE;
174 #if FEATURE_FP_REGALLOC
175 rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT;
176 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
178 rpPredictMap[PREDICT_REG] = RBM_ALLINT;
179 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
185 #if defined(_TARGET_ARM_)
187 rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188 rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189 rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL;
191 #elif defined(_TARGET_AMD64_)
193 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
197 #elif defined(_TARGET_X86_)
199 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
202 rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203 rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
207 rpBestRecordedPrediction = NULL;
210 /*****************************************************************************
212 * The following table(s) determines the order in which registers are considered
213 * for variables to live in
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
218 #if FEATURE_FP_REGALLOC
219 if (varTypeIsFloating(regType))
221 static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT};
222 const unsigned raRegVarOrderFltSize = sizeof(raRegVarOrderFlt) / sizeof(raRegVarOrderFlt[0]);
224 if (wbVarOrderSize != NULL)
225 *wbVarOrderSize = raRegVarOrderFltSize;
227 return &raRegVarOrderFlt[0];
232 static const regNumber raRegVarOrder[] = {REG_VAR_ORDER};
233 const unsigned raRegVarOrderSize = sizeof(raRegVarOrder) / sizeof(raRegVarOrder[0]);
235 if (wbVarOrderSize != NULL)
236 *wbVarOrderSize = raRegVarOrderSize;
238 return &raRegVarOrder[0];
244 /*****************************************************************************
246 * Dump out the variable interference graph
250 void Compiler::raDumpVarIntf()
255 printf("Var. interference graph for %s\n", info.compFullName);
257 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
259 /* Ignore the variable if it's not tracked */
261 if (!varDsc->lvTracked)
264 /* Get hold of the index and the interference mask for the variable */
265 unsigned varIndex = varDsc->lvVarIndex;
267 printf(" V%02u,T%02u and ", lclNum, varIndex);
271 for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
273 if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274 printf("T%02u ", refIndex);
285 /*****************************************************************************
287 * Dump out the register interference graph
290 void Compiler::raDumpRegIntf()
292 printf("Reg. interference graph for %s\n", info.compFullName);
297 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
301 /* Ignore the variable if it's not tracked */
303 if (!varDsc->lvTracked)
306 /* Get hold of the index and the interference mask for the variable */
308 varNum = varDsc->lvVarIndex;
310 printf(" V%02u,T%02u and ", lclNum, varNum);
312 if (varDsc->IsFloatRegType())
314 #if !FEATURE_STACK_FP_X87
315 for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
317 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318 printf("%3s ", getRegName(regNum, true));
326 for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
328 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329 printf("%3s ", getRegName(regNum));
342 /*****************************************************************************
344 * We'll adjust the ref counts based on interference
348 void Compiler::raAdjustVarIntf()
350 // This method was not correct and has been disabled.
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
361 var_types type = tree->TypeGet();
363 if (varTypeIsStruct(type) && IsHfa(tree))
365 int retSlots = GetHfaCount(tree);
366 return ((1 << retSlots) - 1) << REG_FLOATRET;
369 const static regMaskTP returnMap[TYP_COUNT] = {
370 RBM_ILLEGAL, // TYP_UNDEF,
371 RBM_NONE, // TYP_VOID,
372 RBM_INTRET, // TYP_BOOL,
373 RBM_INTRET, // TYP_CHAR,
374 RBM_INTRET, // TYP_BYTE,
375 RBM_INTRET, // TYP_UBYTE,
376 RBM_INTRET, // TYP_SHORT,
377 RBM_INTRET, // TYP_USHORT,
378 RBM_INTRET, // TYP_INT,
379 RBM_INTRET, // TYP_UINT,
380 RBM_LNGRET, // TYP_LONG,
381 RBM_LNGRET, // TYP_ULONG,
382 RBM_FLOATRET, // TYP_FLOAT,
383 RBM_DOUBLERET, // TYP_DOUBLE,
384 RBM_INTRET, // TYP_REF,
385 RBM_INTRET, // TYP_BYREF,
386 RBM_INTRET, // TYP_ARRAY,
387 RBM_ILLEGAL, // TYP_STRUCT,
388 RBM_ILLEGAL, // TYP_BLK,
389 RBM_ILLEGAL, // TYP_LCLBLK,
390 RBM_ILLEGAL, // TYP_PTR,
391 RBM_ILLEGAL, // TYP_FNC,
392 RBM_ILLEGAL, // TYP_UNKNOWN,
395 assert((unsigned)type < sizeof(returnMap) / sizeof(returnMap[0]));
396 assert(returnMap[TYP_LONG] == RBM_LNGRET);
397 assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
398 assert(returnMap[TYP_REF] == RBM_INTRET);
399 assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
401 regMaskTP result = returnMap[type];
402 assert(result != RBM_ILLEGAL);
406 /*****************************************************************************/
408 /****************************************************************************/
412 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
417 for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
419 if (!varDsc->lvTracked)
422 if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
425 if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
426 printf("V%02u ", lclNum);
432 /*****************************************************************************/
434 /*****************************************************************************
436 * Debugging helpers - display variables liveness info.
439 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
443 printf("BB%02u: ", beg->bbNum);
446 dispLifeSet(comp, mask, beg->bbLiveIn);
450 dispLifeSet(comp, mask, beg->bbLiveOut);
453 if (beg->bbFlags & BBF_VISITED)
454 printf(" inner=%u", beg->bbFPinVars);
461 } while (beg != end);
464 #if FEATURE_STACK_FP_X87
465 void Compiler::raDispFPlifeInfo()
469 for (block = fgFirstBB; block; block = block->bbNext)
473 printf("BB%02u: in = [ ", block->bbNum);
474 dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
477 VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
478 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
482 noway_assert(stmt->gtOper == GT_STMT);
484 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
486 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
488 dispLifeSet(this, optAllFloatVars, life);
490 gtDispTree(tree, 0, NULL, true);
496 printf("BB%02u: out = [ ", block->bbNum);
497 dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
501 #endif // FEATURE_STACK_FP_X87
502 /*****************************************************************************/
504 /*****************************************************************************/
506 /*****************************************************************************/
508 void Compiler::raSetRegVarOrder(
509 var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
511 unsigned normalVarOrderSize;
512 const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
514 unsigned listIndex = 0;
515 regMaskTP usedReg = avoidReg;
517 noway_assert(*customVarOrderSize >= normalVarOrderSize);
521 /* First place the preferred registers at the start of customVarOrder */
526 for (index = 0; index < normalVarOrderSize; index++)
528 regNum = normalVarOrder[index];
529 regBit = genRegMask(regNum);
531 if (usedReg & regBit)
534 if (prefReg & regBit)
537 noway_assert(listIndex < normalVarOrderSize);
538 customVarOrder[listIndex++] = regNum;
545 #if CPU_HAS_BYTE_REGS
546 /* Then if byteable registers are preferred place them */
548 if (prefReg & RBM_BYTE_REG_FLAG)
550 for (index = 0; index < normalVarOrderSize; index++)
552 regNum = normalVarOrder[index];
553 regBit = genRegMask(regNum);
555 if (usedReg & regBit)
558 if (RBM_BYTE_REGS & regBit)
561 noway_assert(listIndex < normalVarOrderSize);
562 customVarOrder[listIndex++] = regNum;
567 #endif // CPU_HAS_BYTE_REGS
570 /* Now place all the non-preferred registers */
572 for (index = 0; index < normalVarOrderSize; index++)
574 regNumber regNum = normalVarOrder[index];
575 regMaskTP regBit = genRegMask(regNum);
577 if (usedReg & regBit)
581 noway_assert(listIndex < normalVarOrderSize);
582 customVarOrder[listIndex++] = regNum;
587 /* Now place the "avoid" registers */
589 for (index = 0; index < normalVarOrderSize; index++)
591 regNumber regNum = normalVarOrder[index];
592 regMaskTP regBit = genRegMask(regNum);
594 if (avoidReg & regBit)
596 noway_assert(listIndex < normalVarOrderSize);
597 customVarOrder[listIndex++] = regNum;
605 *customVarOrderSize = listIndex;
606 noway_assert(listIndex == normalVarOrderSize);
609 /*****************************************************************************
611 * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
614 void Compiler::raSetupArgMasks(RegState* regState)
616 /* Determine the registers holding incoming register arguments */
617 /* and setup raAvoidArgRegMask to the set of registers that we */
618 /* may want to avoid when enregistering the locals. */
620 regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
621 raAvoidArgRegMask = RBM_NONE;
623 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
625 for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
627 noway_assert(argDsc->lvIsParam);
629 // Is it a register argument ?
630 if (!argDsc->lvIsRegArg)
633 // only process args that apply to the current register file
634 if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
639 // Is it dead on entry ??
640 // In certain cases such as when compJmpOpUsed is true,
641 // or when we have a generic type context arg that we must report
642 // then the arguments have to be kept alive throughout the prolog.
643 // So we have to consider it as live on entry.
645 bool keepArgAlive = compJmpOpUsed;
646 if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
647 ((lvaTable + info.compTypeCtxtArg) == argDsc))
652 if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
657 // The code to set the regState for each arg is outlined for shared use
659 regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
661 // Do we need to try to avoid this incoming arg registers?
663 // If it's not tracked, don't do the stuff below.
664 if (!argDsc->lvTracked)
667 // If the incoming arg is used after a call it is live accross
668 // a call and will have to be allocated to a caller saved
669 // register anyway (a very common case).
671 // In this case it is pointless to ask that the higher ref count
672 // locals to avoid using the incoming arg register
674 unsigned argVarIndex = argDsc->lvVarIndex;
676 /* Does the incoming register and the arg variable interfere? */
678 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
680 // No they do not interfere,
681 // so we add inArgReg to raAvoidArgRegMask
683 raAvoidArgRegMask |= genRegMask(inArgReg);
686 if (argDsc->lvType == TYP_DOUBLE)
688 // Avoid the double register argument pair for register allocation.
689 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
691 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
698 #endif // LEGACY_BACKEND
700 // The code to set the regState for each arg is outlined for shared use
701 // by linear scan. (It is not shared for System V AMD64 platform.)
702 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
704 regNumber inArgReg = argDsc->lvArgReg;
705 regMaskTP inArgMask = genRegMask(inArgReg);
707 if (regState->rsIsFloat)
709 noway_assert(inArgMask & RBM_FLTARG_REGS);
711 else // regState is for the integer registers
713 // This might be the fixed return buffer register argument (on ARM64)
714 // We check and allow inArgReg to be theFixedRetBuffReg
715 if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
717 // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
718 noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
719 // We should have recorded the variable number for the return buffer arg
720 noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
722 else // we have a regular arg
724 noway_assert(inArgMask & RBM_ARG_REGS);
728 regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
731 if (argDsc->lvType == TYP_DOUBLE)
733 if (info.compIsVarArgs || opts.compUseSoftFP)
735 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
736 assert(!regState->rsIsFloat);
740 assert(regState->rsIsFloat);
741 assert(emitter::isDoubleReg(inArgReg));
743 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
745 else if (argDsc->lvType == TYP_LONG)
747 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
748 assert(!regState->rsIsFloat);
749 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
751 #endif // _TARGET_ARM_
753 #if FEATURE_MULTIREG_ARGS
754 if (varTypeIsStruct(argDsc->lvType))
756 if (argDsc->lvIsHfaRegArg())
758 assert(regState->rsIsFloat);
759 unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
760 for (unsigned i = 1; i < cSlots; i++)
762 assert(inArgReg + i <= LAST_FP_ARGREG);
763 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
768 unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
769 for (unsigned i = 1; i < cSlots; i++)
771 regNumber nextArgReg = (regNumber)(inArgReg + i);
772 if (nextArgReg > REG_ARG_LAST)
776 assert(regState->rsIsFloat == false);
777 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
781 #endif // FEATURE_MULTIREG_ARGS
786 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
788 /*****************************************************************************
790 * Assign variables to live in registers, etc.
793 void Compiler::raAssignVars()
797 printf("*************** In raAssignVars()\n");
799 /* We need to keep track of which registers we ever touch */
801 codeGen->regSet.rsClearRegsModified();
803 #if FEATURE_STACK_FP_X87
804 // FP register allocation
805 raEnregisterVarsStackFP();
806 raGenerateFPRefCounts();
809 /* Predict registers used by code generation */
810 rpPredictRegUse(); // New reg predictor/allocator
812 // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
813 // so that the gc tracking logic and lvMustInit logic will ignore them.
818 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
820 if (varDsc->lvType != TYP_STRUCT)
823 if (!varDsc->lvPromoted)
826 if (varDsc->lvIsParam)
829 if (varDsc->lvRefCnt > 0)
835 printf("Mark unused struct local V%02u\n", lclNum);
838 lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
840 if (promotionType == PROMOTION_TYPE_DEPENDENT)
842 // This should only happen when all its field locals are unused as well.
844 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
847 noway_assert(lvaTable[varNum].lvRefCnt == 0);
852 noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
855 varDsc->lvUnusedStruct = 1;
858 // Change such struct locals to ints
860 varDsc->lvType = TYP_INT; // Bash to a non-gc type.
861 noway_assert(!varDsc->lvTracked);
862 noway_assert(!varDsc->lvRegister);
863 varDsc->lvOnFrame = false; // Force it not to be onstack.
864 varDsc->lvMustInit = false; // Force not to init it.
865 varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
869 /*****************************************************************************/
870 /*****************************************************************************/
872 /*****************************************************************************
874 * Given a regNumber return the correct predictReg enum value
877 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
879 return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
882 /*****************************************************************************
884 * Given a varIndex return the correct predictReg enum value
887 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
889 return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
892 /*****************************************************************************
894 * Given a rpPredictReg return the correct varNumber value
897 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
899 return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
902 /*****************************************************************************
904 * Given a rpPredictReg return true if it specifies a Txx register
907 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
909 if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
915 /*****************************************************************************
917 * Given a regmask return the correct predictReg enum value
920 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
922 rpPredictReg result = PREDICT_NONE;
923 if (regmask != 0) /* Check if regmask has zero bits set */
925 if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
928 assert(FitsIn<DWORD>(regmask));
929 BitScanForward(®, (DWORD)regmask);
930 return rpGetPredictForReg((regNumber)reg);
933 #if defined(_TARGET_ARM_)
934 /* It has multiple bits set */
935 else if (regmask == (RBM_R0 | RBM_R1))
937 result = PREDICT_PAIR_R0R1;
939 else if (regmask == (RBM_R2 | RBM_R3))
941 result = PREDICT_PAIR_R2R3;
943 #elif defined(_TARGET_X86_)
944 /* It has multiple bits set */
945 else if (regmask == (RBM_EAX | RBM_EDX))
947 result = PREDICT_PAIR_EAXEDX;
949 else if (regmask == (RBM_ECX | RBM_EBX))
951 result = PREDICT_PAIR_ECXEBX;
954 else /* It doesn't match anything */
956 result = PREDICT_NONE;
957 assert(!"unreachable");
958 NO_WAY("bad regpair");
964 /*****************************************************************************
966 * Record a variable to register(s) interference
969 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
972 bool addedIntf = false;
976 for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
978 regMaskTP regBit = genRegMask(regNum);
980 if (regMask & regBit)
982 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
983 if (!VarSetOps::IsEmpty(this, newIntf))
988 VarSetOps::Iter newIntfIter(this, newIntf);
990 while (newIntfIter.NextElem(&varNum))
992 unsigned lclNum = lvaTrackedToVarNum[varNum];
993 LclVarDsc* varDsc = &lvaTable[varNum];
994 #if FEATURE_FP_REGALLOC
995 // Only print the useful interferences
996 // i.e. floating point LclVar interference with floating point registers
997 // or integer LclVar interference with general purpose registers
998 if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
1001 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
1002 getRegName(regNum), msg);
1008 VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1020 /*****************************************************************************
1022 * Record a new variable to variable(s) interference
1025 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1027 noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1028 noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1030 VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1031 VarSetOps::AddElemD(this, oneVar, varNum);
1033 bool newIntf = fgMarkIntf(intfVar, oneVar);
1036 rpAddedVarIntf = true;
1039 if (verbose && newIntf)
1041 for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1043 if (VarSetOps::IsMember(this, intfVar, oneNum))
1045 unsigned lclNum = lvaTrackedToVarNum[varNum];
1046 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1047 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1057 /*****************************************************************************
1059 * Determine preferred register mask for a given predictReg value
1062 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1064 if (rpHasVarIndexForPredict(predictReg))
1065 predictReg = PREDICT_REG;
1067 noway_assert((unsigned)predictReg < sizeof(rpPredictMap) / sizeof(rpPredictMap[0]));
1068 noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1070 regMaskTP regAvailForType = rpPredictMap[predictReg];
1071 if (varTypeIsFloating(type))
1073 regAvailForType &= RBM_ALLFLOAT;
1077 regAvailForType &= RBM_ALLINT;
1080 if (type == TYP_DOUBLE)
1082 if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1084 // Fix 388433 ARM JitStress WP7
1085 if ((regAvailForType & RBM_DBL_REGS) != 0)
1087 regAvailForType |= (regAvailForType << 1);
1091 regAvailForType = RBM_NONE;
1096 return regAvailForType;
1099 /*****************************************************************************
1101 * Predict register choice for a type.
1103 * Adds the predicted registers to rsModifiedRegsMask.
1105 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1107 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1111 // Add any reserved register to the lockedRegs
1112 lockedRegs |= codeGen->regSet.rsMaskResvd;
1114 /* Clear out the lockedRegs from preferReg */
1115 preferReg &= ~lockedRegs;
1117 if (rpAsgVarNum != -1)
1119 noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1121 /* Don't pick the register used by rpAsgVarNum either */
1122 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1123 noway_assert(tgtVar->lvRegNum != REG_STK);
1125 preferReg &= ~genRegMask(tgtVar->lvRegNum);
1139 #ifdef _TARGET_AMD64_
1141 #endif // _TARGET_AMD64_
1143 // expand preferReg to all non-locked registers if no bits set
1144 preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1146 if (preferReg == 0) // no bits set?
1148 // Add one predefined spill choice register if no bits set.
1149 // (The jit will introduce one spill temp)
1150 preferReg |= RBM_SPILL_CHOICE;
1151 rpPredictSpillCnt++;
1155 printf("Predict one spill temp\n");
1161 /* Iterate the registers in the order specified by rpRegTmpOrder */
1163 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1165 regNum = rpRegTmpOrder[index];
1166 regBits = genRegMask(regNum);
1168 if ((preferReg & regBits) == regBits)
1174 /* Otherwise we have allocated all registers, so do nothing */
1177 #ifndef _TARGET_AMD64_
1180 if ((preferReg == 0) || // no bits set?
1181 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1183 // expand preferReg to all non-locked registers
1184 preferReg = RBM_ALLINT & ~lockedRegs;
1187 if (preferReg == 0) // no bits set?
1189 // Add EAX:EDX to the registers
1190 // (The jit will introduce two spill temps)
1191 preferReg = RBM_PAIR_TMP;
1192 rpPredictSpillCnt += 2;
1195 printf("Predict two spill temps\n");
1198 else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1200 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1202 // Add EAX to the registers
1203 // (The jit will introduce one spill temp)
1204 preferReg |= RBM_PAIR_TMP_LO;
1208 // Add EDX to the registers
1209 // (The jit will introduce one spill temp)
1210 preferReg |= RBM_PAIR_TMP_HI;
1212 rpPredictSpillCnt++;
1215 printf("Predict one spill temp\n");
1220 regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1221 if (regPair != REG_PAIR_NONE)
1223 regBits = genRegPairMask(regPair);
1227 /* Otherwise we have allocated all registers, so do nothing */
1229 #endif // _TARGET_AMD64_
1238 #if FEATURE_FP_REGALLOC
1239 regMaskTP restrictMask;
1240 restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1241 assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1243 // expand preferReg to all available non-locked registers if no bits set
1244 preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1245 regMaskTP preferDouble;
1246 preferDouble = preferReg & (preferReg >> 1);
1248 if ((preferReg == 0) // no bits set?
1250 || ((type == TYP_DOUBLE) &&
1251 ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1255 // Add one predefined spill choice register if no bits set.
1256 // (The jit will introduce one spill temp)
1257 preferReg |= RBM_SPILL_CHOICE_FLT;
1258 rpPredictSpillCnt++;
1262 printf("Predict one spill temp (float)\n");
1266 assert(preferReg != 0);
1268 /* Iterate the registers in the order specified by raRegFltTmpOrder */
1270 for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1272 regNum = raRegFltTmpOrder[index];
1273 regBits = genRegMask(regNum);
1275 if (varTypeIsFloating(type))
1278 if (type == TYP_DOUBLE)
1280 if ((regBits & RBM_DBL_REGS) == 0)
1282 continue; // We must restrict the set to the double registers
1286 // TYP_DOUBLE use two consecutive registers
1287 regBits |= genRegMask(REG_NEXT(regNum));
1291 // See if COMPlus_JitRegisterFP is restricting this FP register
1293 if ((restrictMask & regBits) != regBits)
1297 if ((preferReg & regBits) == regBits)
1302 /* Otherwise we have allocated all registers, so do nothing */
1305 #else // !FEATURE_FP_REGALLOC
1312 noway_assert(!"unexpected type in reg use prediction");
1315 /* Abnormal return */
1316 noway_assert(!"Ran out of registers in rpPredictRegPick");
1321 * If during the first prediction we need to allocate
1322 * one of the registers that we used for coloring locals
1323 * then flag this by setting rpPredictAssignAgain.
1324 * We will have to go back and repredict the registers
1326 if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1327 rpPredictAssignAgain = true;
1329 // Add a register interference to each of the last use variables
1330 if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1332 VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1333 VarSetOps::Assign(this, lastUse, rpLastUseVars);
1334 VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1335 VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1336 // While we still have any lastUse or inPlaceUse bits
1337 VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1339 VARSET_TP varAsSet(VarSetOps::MakeEmpty(this));
1340 VarSetOps::Iter iter(this, useUnion);
1341 unsigned varNum = 0;
1342 while (iter.NextElem(&varNum))
1344 // We'll need this for one of the calls...
1345 VarSetOps::ClearD(this, varAsSet);
1346 VarSetOps::AddElemD(this, varAsSet, varNum);
1348 // If this varBit and lastUse?
1349 if (VarSetOps::IsMember(this, lastUse, varNum))
1351 // Record a register to variable interference
1352 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1355 // If this varBit and inPlaceUse?
1356 if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1358 // Record a register to variable interference
1359 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1363 codeGen->regSet.rsSetRegsModified(regBits);
1368 /*****************************************************************************
1370 * Predict integer register use for generating an address mode for a tree,
1371 * by setting tree->gtUsedRegs to all registers used by this tree and its
1373 * tree - is the child of a GT_IND node
1374 * type - the type of the GT_IND node (floating point/integer)
1375 * lockedRegs - are the registers which are currently held by
1376 * a previously evaluated node.
1377 * rsvdRegs - registers which should not be allocated because they will
1378 * be needed to evaluate a node in the future
1379 * - Also if rsvdRegs has the RBM_LASTUSE bit set then
1380 * the rpLastUseVars set should be saved and restored
1381 * so that we don't add any new variables to rpLastUseVars
1382 * lenCSE - is non-NULL only when we have a lenCSE expression
1384 * Return the scratch registers to be held by this tree. (one or two registers
1385 * to form an address expression)
1388 regMaskTP Compiler::rpPredictAddressMode(
1389 GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1394 genTreeOps oper = tree->OperGet();
1401 bool hasTwoAddConst = false;
1402 bool restoreLastUseVars = false;
1403 VARSET_TP oldLastUseVars(VarSetOps::MakeEmpty(this));
1405 /* do we need to save and restore the rpLastUseVars set ? */
1406 if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1408 restoreLastUseVars = true;
1409 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1411 rsvdRegs &= ~RBM_LASTUSE;
1413 /* if not an add, then just force it to a register */
1417 if (oper == GT_ARR_ELEM)
1419 regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1428 op1 = tree->gtOp.gtOp1;
1429 op2 = tree->gtOp.gtOp2;
1430 rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1432 /* look for (x + y) + icon address mode */
1434 if (op2->OperGet() == GT_CNS_INT)
1436 cns = op2->gtIntCon.gtIconVal;
1438 /* if not an add, then just force op1 into a register */
1439 if (op1->OperGet() != GT_ADD)
1442 hasTwoAddConst = true;
1444 /* Record the 'rev' flag, reverse evaluation order */
1445 rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1447 op2 = op1->gtOp.gtOp2;
1448 op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1451 /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1454 if (op2->OperGet() == GT_LSH)
1456 if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1458 sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1459 opTemp = op2->gtOp.gtOp1;
1473 if (opTemp->OperGet() == GT_NOP)
1475 opTemp = opTemp->gtOp.gtOp1;
1478 // Is this a const operand?
1479 if (opTemp->OperGet() == GT_CNS_INT)
1481 // Compute the new cns value that Codegen will end up using
1482 cns += (opTemp->gtIntCon.gtIconVal << sh);
1488 /* Check for LSH in op1 slot */
1490 if (op1->OperGet() != GT_LSH)
1493 opTemp = op1->gtOp.gtOp2;
1495 if (opTemp->OperGet() != GT_CNS_INT)
1498 sh = opTemp->gtIntCon.gtIconVal;
1500 /* Check for LSH of 0, special case */
1504 #if defined(_TARGET_XARCH_)
1506 /* Check for LSH of 1 2 or 3 */
1510 #elif defined(_TARGET_ARM_)
1512 /* Check for LSH of 1 to 30 */
1522 /* Matched a leftShift by 'sh' subtree, move op1 down */
1523 op1 = op1->gtOp.gtOp1;
1527 /* Now we have to evaluate op1 and op2 into registers */
1529 /* Evaluate op1 and op2 in the correct order */
1532 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1533 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1537 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1538 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1541 /* If op1 and op2 must be spilled and reloaded then
1542 * op1 and op2 might be reloaded into the same register
1543 * This can only happen when all the registers are lockedRegs
1545 if ((op1Mask == op2Mask) && (op1Mask != 0))
1547 /* We'll need to grab a different register for op2 */
1548 op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1552 // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1553 // [op2 + op1<<sh + cns]
1554 // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1556 if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1558 op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1562 // On the ARM we will need at least one scratch register for trees that have this form:
1563 // [op1 + op2 + cns] or [op1 + op2<<sh + cns]
1564 // or for a float/double or long when we have both op1 and op2
1565 // or when we have an 'cns' that is too large for the ld/st instruction
1567 if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1569 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1573 // If we create a CSE that immediately dies then we may need to add an additional register interference
1574 // so we don't color the CSE into R3
1576 if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1578 opTemp = op2->gtOp.gtOp2;
1579 if (opTemp->OperGet() == GT_LCL_VAR)
1581 unsigned varNum = opTemp->gtLclVar.gtLclNum;
1582 LclVarDsc* varDsc = &lvaTable[varNum];
1584 if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1586 rpRecordRegIntf(RBM_TMP_0,
1587 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1593 regMask = (op1Mask | op2Mask);
1594 tree->gtUsedRegs = (regMaskSmall)regMask;
1599 /* now we have to evaluate op1 into a register */
1601 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1606 // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1609 if (!codeGen->validDispForLdSt(cns, type))
1611 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1615 regMask = (op1Mask | op2Mask);
1616 tree->gtUsedRegs = (regMaskSmall)regMask;
1621 #if !CPU_LOAD_STORE_ARCH
1622 if (oper == GT_CNS_INT)
1624 /* Indirect of a constant does not require a register */
1630 /* now we have to evaluate tree into a register */
1631 regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1635 regMaskTP regUse = tree->gtUsedRegs;
1637 if (!VarSetOps::IsEmpty(this, compCurLife))
1639 // Add interference between the current set of life variables and
1640 // the set of temporary registers need to evaluate the sub tree
1643 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1647 /* Do we need to resore the oldLastUseVars value */
1648 if (restoreLastUseVars)
1651 * If we used a GT_ASG targeted register then we need to add
1652 * a variable interference between any new last use variables
1653 * and the GT_ASG targeted register
1655 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1657 rpRecordVarIntf(rpAsgVarNum,
1658 VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1660 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1666 /*****************************************************************************
1671 void Compiler::rpPredictRefAssign(unsigned lclNum)
1673 LclVarDsc* varDsc = lvaTable + lclNum;
1675 varDsc->lvRefAssign = 1;
1677 #if NOGC_WRITE_BARRIERS
1681 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1682 printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1683 varDsc->lvVarIndex);
1687 /* Make sure that write barrier pointer variables never land in EDX */
1688 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1689 #endif // NOGC_WRITE_BARRIERS
1692 /*****************************************************************************
1694 * Predict the internal temp physical register usage for a block assignment tree,
1695 * by setting tree->gtUsedRegs.
1696 * Records the internal temp physical register usage for this tree.
1697 * Returns a mask of interfering registers for this tree.
1699 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1700 * to the set of scratch registers needed when evaluating the tree.
1701 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1702 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1703 * predict additional internal temp physical registers to spill into.
1705 * tree - is the child of a GT_IND node
1706 * predictReg - what type of register does the tree need
1707 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1708 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1709 * rsvdRegs - registers which should not be allocated because they will
1710 * be needed to evaluate a node in the future
1711 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1712 * the rpLastUseVars set should be saved and restored
1713 * so that we don't add any new variables to rpLastUseVars.
1715 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree,
1716 rpPredictReg predictReg,
1717 regMaskTP lockedRegs,
1720 regMaskTP regMask = RBM_NONE;
1721 regMaskTP interferingRegs = RBM_NONE;
1723 bool hasGCpointer = false;
1724 bool dstIsOnStack = false;
1725 bool useMemHelper = false;
1726 bool useBarriers = false;
1727 GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
1728 GenTreePtr dstAddr = dst->Addr();
1729 GenTreePtr srcAddrOrFill = tree->gtGetOp2IfPresent();
1731 size_t blkSize = dst->gtBlkSize;
1733 hasGCpointer = (dst->HasGCPtr());
1735 bool isCopyBlk = tree->OperIsCopyBlkOp();
1736 bool isCopyObj = isCopyBlk && hasGCpointer;
1737 bool isInitBlk = tree->OperIsInitBlkOp();
1741 assert(srcAddrOrFill->OperIsIndir());
1742 srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1746 // For initBlk, we don't need to worry about the GC pointers.
1747 hasGCpointer = false;
1754 dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1759 if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1761 useMemHelper = true;
1767 useMemHelper = true;
1770 if (hasGCpointer && !dstIsOnStack)
1777 // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1779 if (!useMemHelper && !useBarriers)
1781 bool useLoop = false;
1782 unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1784 // A mask to use to force the predictor to choose low registers (to reduce code size)
1785 regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1787 // Allow the src and dst to be used in place, unless we use a loop, in which
1788 // case we will need scratch registers as we will be writing to them.
1789 rpPredictReg srcAndDstPredict = PREDICT_REG;
1791 // Will we be using a loop to implement this INITBLK/COPYBLK?
1792 if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1795 avoidReg = RBM_NONE;
1796 srcAndDstPredict = PREDICT_SCRATCH_REG;
1799 if (tree->gtFlags & GTF_REVERSE_OPS)
1801 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1802 dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1803 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1807 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1808 srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1809 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1812 // We need at least one scratch register for a copyBlk
1815 // Pick a low register to reduce the code size
1816 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1823 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1824 // Pick another low register to reduce the code size
1825 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1828 // We need a loop index register
1829 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1832 tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1834 return interferingRegs;
1837 // What order should the Dest, Val/Src, and Size be calculated
1838 GenTreePtr opsPtr[3];
1839 regMaskTP regsPtr[3];
1841 #if defined(_TARGET_XARCH_)
1842 fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1844 // We're going to use these, might as well make them available now
1846 codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1848 codeGen->regSet.rsSetRegsModified(RBM_ESI);
1850 #elif defined(_TARGET_ARM_)
1854 // For all other cases that involve non-constants, we just call memcpy/memset
1856 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1857 interferingRegs |= RBM_CALLEE_TRASH;
1860 printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1865 assert(useBarriers);
1868 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1870 // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1871 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1874 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1877 #else // !_TARGET_X86_ && !_TARGET_ARM_
1878 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1879 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1880 regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1881 regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1882 opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1883 regMask |= regsPtr[0];
1884 opsPtr[0]->gtUsedRegs |= regsPtr[0];
1885 rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1887 regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1888 opsPtr2RsvdRegs | RBM_LASTUSE);
1889 regMask |= regsPtr[1];
1890 opsPtr[1]->gtUsedRegs |= regsPtr[1];
1891 rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1893 regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1894 if (opsPtr[2] == nullptr)
1896 // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1897 // Note that it is quite possible that no register is required, but this preserves
1899 regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1900 rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1904 regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1905 opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1907 regMask |= opsPtr2UsedRegs;
1909 tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1910 return interferingRegs;
1913 /*****************************************************************************
1915 * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1916 * Returns a regMask with the internal temp physical register usage for this tree.
1918 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1919 * to the set of scratch registers needed when evaluating the tree.
1920 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1921 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1922 * predict additional internal temp physical registers to spill into.
1924 * tree - is the child of a GT_IND node
1925 * predictReg - what type of register does the tree need
1926 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1927 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1928 * rsvdRegs - registers which should not be allocated because they will
1929 * be needed to evaluate a node in the future
1930 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1931 * the rpLastUseVars set should be saved and restored
1932 * so that we don't add any new variables to rpLastUseVars.
1935 #pragma warning(disable : 4701)
1938 #pragma warning(push)
1939 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1941 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
1942 rpPredictReg predictReg,
1943 regMaskTP lockedRegs,
1946 regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL);
1949 rpPredictReg op1PredictReg;
1950 rpPredictReg op2PredictReg;
1951 LclVarDsc* varDsc = NULL;
1952 VARSET_TP oldLastUseVars(VarSetOps::UninitVal());
1954 VARSET_TP varBits(VarSetOps::UninitVal());
1955 VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1957 bool restoreLastUseVars = false;
1958 regMaskTP interferingRegs = RBM_NONE;
1961 // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1963 noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1964 noway_assert(RBM_ILLEGAL);
1965 noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1966 /* impossible values, to make sure that we set them */
1967 tree->gtUsedRegs = RBM_ILLEGAL;
1970 /* Figure out what kind of a node we have */
1972 genTreeOps oper = tree->OperGet();
1973 var_types type = tree->TypeGet();
1974 unsigned kind = tree->OperKind();
1976 // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1977 genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1978 if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1979 predictReg = PREDICT_NONE;
1980 else if (rpHasVarIndexForPredict(predictReg))
1982 // The only place where predictReg is set to a var is in the PURE
1983 // assignment case where varIndex is the var being assigned to.
1984 // We need to check whether the variable is used between here and
1985 // its redefinition.
1986 unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1987 unsigned lclNum = lvaTrackedToVarNum[varIndex];
1989 for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1991 if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1993 // Is this the pure assignment?
1994 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1996 predictReg = PREDICT_SCRATCH_REG;
2005 if (rsvdRegs & RBM_LASTUSE)
2007 restoreLastUseVars = true;
2008 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2009 rsvdRegs &= ~RBM_LASTUSE;
2012 /* Is this a constant or leaf node? */
2014 if (kind & (GTK_CONST | GTK_LEAF))
2016 bool lastUse = false;
2017 regMaskTP enregMask = RBM_NONE;
2023 // Codegen for floating point constants on the ARM is currently
2024 // movw/movt rT1, <lo32 bits>
2025 // movw/movt rT2, <hi32 bits>
2026 // vmov.i2d dT0, rT1,rT2
2028 // For TYP_FLOAT one integer register is required
2030 // These integer register(s) immediately die
2031 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2032 if (type == TYP_DOUBLE)
2034 // For TYP_DOUBLE a second integer register is required
2036 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2039 // We also need a floating point register that we keep
2041 if (predictReg == PREDICT_NONE)
2042 predictReg = PREDICT_SCRATCH_REG;
2044 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2045 tree->gtUsedRegs = regMask | tmpMask;
2052 if (rpHasVarIndexForPredict(predictReg))
2054 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2055 rpAsgVarNum = tgtIndex;
2057 // We don't need any register as we plan on writing to the rpAsgVarNum register
2058 predictReg = PREDICT_NONE;
2060 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2061 tgtVar->lvDependReg = true;
2063 if (type == TYP_LONG)
2065 assert(oper == GT_CNS_LNG);
2067 if (tgtVar->lvOtherReg == REG_STK)
2069 // Well we do need one register for a partially enregistered
2071 predictReg = PREDICT_SCRATCH_REG;
2077 #if !CPU_LOAD_STORE_ARCH
2078 /* If the constant is a handle then it will need to have a relocation
2079 applied to it. It will need to be loaded into a register.
2080 But never throw away an existing hint.
2082 if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2085 if (predictReg == PREDICT_NONE)
2086 predictReg = PREDICT_SCRATCH_REG;
2095 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2096 (genTypeSize(type) < sizeof(int)))
2098 predictReg = PREDICT_SCRATCH_REG;
2101 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2103 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2105 // These integer register(s) immediately die
2106 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2107 // Two integer registers are required for a TYP_DOUBLE
2108 if (type == TYP_DOUBLE)
2109 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2111 // We need a temp register in some cases of loads/stores to a class var
2112 if (predictReg == PREDICT_NONE)
2114 predictReg = PREDICT_SCRATCH_REG;
2117 if (rpHasVarIndexForPredict(predictReg))
2119 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2120 rpAsgVarNum = tgtIndex;
2122 // We don't need any register as we plan on writing to the rpAsgVarNum register
2123 predictReg = PREDICT_NONE;
2125 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2126 tgtVar->lvDependReg = true;
2128 if (type == TYP_LONG)
2130 if (tgtVar->lvOtherReg == REG_STK)
2132 // Well we do need one register for a partially enregistered
2134 predictReg = PREDICT_SCRATCH_REG;
2142 // Check for a misalignment on a Floating Point field
2144 if (varTypeIsFloating(type))
2146 if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2148 // These integer register(s) immediately die
2149 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2150 // Two integer registers are required for a TYP_DOUBLE
2151 if (type == TYP_DOUBLE)
2152 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2161 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2163 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2164 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2165 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2167 #if FEATURE_STACK_FP_X87
2168 // If it's a floating point var, there's nothing to do
2169 if (varTypeIsFloating(type))
2171 tree->gtUsedRegs = RBM_NONE;
2177 // If the variable is already a register variable, no need to go further.
2178 if (oper == GT_REG_VAR)
2181 /* Apply the type of predictReg to the LCL_VAR */
2183 if (predictReg == PREDICT_REG)
2186 if (varDsc->lvRegNum == REG_STK)
2191 else if (predictReg == PREDICT_SCRATCH_REG)
2193 noway_assert(predictReg == PREDICT_SCRATCH_REG);
2195 /* Is this the last use of a local var? */
2198 if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2199 goto PREDICT_REG_COMMON;
2202 else if (rpHasVarIndexForPredict(predictReg))
2204 /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2206 unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2207 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2208 VarSetOps::MakeSingleton(this, tgtIndex1);
2210 noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2211 noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2212 #ifndef _TARGET_AMD64_
2213 // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2214 // so this assert is meaningless
2215 noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2216 #endif // !_TARGET_AMD64_
2218 if (varDsc->lvTracked)
2221 srcIndex = varDsc->lvVarIndex;
2223 // If this register has it's last use here then we will prefer
2224 // to color to the same register as tgtVar.
2228 * Add an entry in the lvaVarPref graph to indicate
2229 * that it would be worthwhile to color these two variables
2230 * into the same physical register.
2231 * This will help us avoid having an extra copy instruction
2233 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2234 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2237 // Add a variable interference from srcIndex to each of the last use variables
2238 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2240 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2243 rpAsgVarNum = tgtIndex1;
2245 /* We will rely on the target enregistered variable from the GT_ASG */
2252 if (genIsValidFloatReg(varDsc->lvRegNum))
2254 enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2258 enregMask = genRegMask(varDsc->lvRegNum);
2262 if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2264 // We need to compute the intermediate value using a TYP_DOUBLE
2265 // but we storing the result in a TYP_SINGLE enregistered variable
2272 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2273 if (enregMask & (rsvdRegs | lockedRegs))
2277 #ifndef _TARGET_64BIT_
2278 if (type == TYP_LONG)
2280 if (varDsc->lvOtherReg != REG_STK)
2282 tmpMask = genRegMask(varDsc->lvOtherReg);
2283 enregMask |= tmpMask;
2285 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2286 if (tmpMask & (rsvdRegs | lockedRegs))
2289 else // lvOtherReg == REG_STK
2294 #endif // _TARGET_64BIT_
2297 varDsc->lvDependReg = true;
2301 /* Does not need a register */
2302 predictReg = PREDICT_NONE;
2303 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2304 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2306 else // (grabCount > 0)
2308 #ifndef _TARGET_64BIT_
2309 /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2310 if ((type == TYP_LONG) && (grabCount == 1))
2312 /* We will need to pick one register */
2314 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2315 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2317 noway_assert((type == TYP_DOUBLE) ||
2318 (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2319 #else // !_TARGET_64BIT_
2320 noway_assert(grabCount == 1);
2321 #endif // !_TARGET_64BIT_
2324 else if (type == TYP_STRUCT)
2327 // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2328 // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2329 // As a workaround we just bash it to PREDICT_NONE here
2331 if (predictReg != PREDICT_NONE)
2332 predictReg = PREDICT_NONE;
2334 // Currently predictReg is saying that we will not need any scratch registers
2335 noway_assert(predictReg == PREDICT_NONE);
2337 /* We may need to sign or zero extend a small type when pushing a struct */
2338 if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2340 for (unsigned varNum = varDsc->lvFieldLclStart;
2341 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2343 LclVarDsc* fldVar = lvaTable + varNum;
2345 if (fldVar->lvStackAligned())
2347 // When we are stack aligned Codegen will just use
2348 // a push instruction and thus doesn't need any register
2349 // since we can push both a register or a stack frame location
2353 if (varTypeIsByte(fldVar->TypeGet()))
2355 // We will need to reserve one byteable register,
2358 predictReg = PREDICT_SCRATCH_REG;
2359 #if CPU_HAS_BYTE_REGS
2360 // It is best to enregister this fldVar in a byteable register
2362 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2365 else if (varTypeIsShort(fldVar->TypeGet()))
2367 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2368 // If fldVar is not enregistered then we will need a scratch register
2370 if (!isEnregistered)
2372 // We will need either an int register or a byte register
2373 // If we are not requesting a byte register we will request an int register
2375 if (type != TYP_BYTE)
2377 predictReg = PREDICT_SCRATCH_REG;
2385 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2388 if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2390 varDsc->addPrefReg(preferReg, this);
2394 break; /* end of case GT_LCL_VAR */
2397 tree->gtUsedRegs = RBM_NONE;
2400 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2401 // Mark the registers required to emit a tailcall profiler callback
2402 if (compIsProfilerHookNeeded())
2404 tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2411 } /* end of switch (oper) */
2413 /* If we don't need to evaluate to register, regmask is the empty set */
2414 /* Otherwise we grab a temp for the local variable */
2416 if (predictReg == PREDICT_NONE)
2420 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2422 if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2424 /* We need to sign or zero extend a small type when pushing a struct */
2425 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2427 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2428 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2430 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2433 LclVarDsc* fldVar = lvaTable + varNum;
2434 if (fldVar->lvTracked)
2436 VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2437 rpRecordRegIntf(regMask, fldBit DEBUGARG(
2438 "need scratch register when pushing a small field of a struct"));
2444 /* Update the set of lastUse variables that we encountered so far */
2447 VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2448 VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2451 * Add interference from any previously locked temps into this last use variable.
2455 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2458 * Add interference from any reserved temps into this last use variable.
2462 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2465 * For partially enregistered longs add an interference with the
2466 * register return by rpPredictRegPick
2468 if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2470 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2474 tree->gtUsedRegs = (regMaskSmall)regMask;
2478 /* Is it a 'simple' unary/binary operator? */
2480 if (kind & GTK_SMPOP)
2482 GenTreePtr op1 = tree->gtOp.gtOp1;
2483 GenTreePtr op2 = tree->gtGetOp2IfPresent();
2485 GenTreePtr opsPtr[3];
2486 regMaskTP regsPtr[3];
2488 VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2494 /* Is the value being assigned into a LCL_VAR? */
2495 if (op1->gtOper == GT_LCL_VAR)
2497 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2499 /* Are we assigning a LCL_VAR the result of a call? */
2500 if (op2->gtOper == GT_CALL)
2502 /* Set a preferred register for the LCL_VAR */
2503 if (isRegPairType(varDsc->TypeGet()))
2504 varDsc->addPrefReg(RBM_LNGRET, this);
2505 else if (!varTypeIsFloating(varDsc->TypeGet()))
2506 varDsc->addPrefReg(RBM_INTRET, this);
2507 #ifdef _TARGET_AMD64_
2509 varDsc->addPrefReg(RBM_FLOATRET, this);
2512 * When assigning the result of a call we don't
2513 * bother trying to target the right side of the
2514 * assignment, since we have a fixed calling convention.
2517 else if (varDsc->lvTracked)
2519 // We interfere with uses in place
2520 if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2522 rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2525 // Did we predict that this local will be fully enregistered?
2526 // and the assignment type is the same as the expression type?
2527 // and it is dead on the right side of the assignment?
2528 // and we current have no other rpAsgVarNum active?
2530 if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2531 (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2534 // Yes, we should try to target the right side (op2) of this
2535 // assignment into the (enregistered) tracked variable.
2538 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2539 op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2541 // Remember that this is a new use in place
2543 // We've added "new UseInPlace"; remove from the global set.
2544 VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2546 // Note that later when we walk down to the leaf node for op2
2547 // if we decide to actually use the register for the 'varDsc'
2548 // to enregister the operand, the we will set rpAsgVarNum to
2549 // varDsc->lvVarIndex, by extracting this value using
2550 // rpGetVarIndexForPredict()
2552 // Also we reset rpAsgVarNum back to -1 after we have finished
2553 // predicting the current GT_ASG node
2559 else if (tree->OperIsBlkOp())
2561 interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2578 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2579 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2581 /* Is the value being assigned into an enregistered LCL_VAR? */
2582 /* For debug code we only allow a simple op2 to be assigned */
2583 if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2585 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2586 /* Did we predict that this local will be enregistered? */
2587 if (varDsc->lvRegNum != REG_STK)
2589 /* Yes, we can use "reg <op>= addr" */
2591 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2592 op2PredictReg = PREDICT_NONE;
2599 #if CPU_LOAD_STORE_ARCH
2602 op1PredictReg = PREDICT_REG;
2603 op2PredictReg = PREDICT_REG;
2609 * Otherwise, initialize the normal forcing of operands:
2612 op1PredictReg = PREDICT_ADDR;
2613 op2PredictReg = PREDICT_REG;
2618 #if !CPU_LOAD_STORE_ARCH
2619 if (op2PredictReg != PREDICT_NONE)
2621 /* Is the value being assigned a simple one? */
2622 if (rpCanAsgOperWithoutReg(op2, false))
2623 op2PredictReg = PREDICT_NONE;
2627 bool simpleAssignment;
2628 simpleAssignment = false;
2630 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2632 // Add a variable interference from the assign target
2633 // to each of the last use variables
2634 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2636 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2638 if (varDsc->lvTracked)
2640 unsigned varIndex = varDsc->lvVarIndex;
2642 rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2646 /* Record whether this tree is a simple assignment to a local */
2648 simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2651 bool requireByteReg;
2652 requireByteReg = false;
2654 #if CPU_HAS_BYTE_REGS
2655 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2657 if (varTypeIsByte(type) &&
2658 ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2661 // Byte-assignments typically need a byte register
2662 requireByteReg = true;
2664 if (op1->gtOper == GT_LCL_VAR)
2666 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2668 // Did we predict that this local will be enregistered?
2669 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2671 // We don't require a byte register when op1 is an enregistered local */
2672 requireByteReg = false;
2675 // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2676 if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2678 // We should try to put op1 in an byte register
2679 varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2685 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2687 bool isWriteBarrierAsgNode;
2688 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2690 GCInfo::WriteBarrierForm wbf;
2691 if (isWriteBarrierAsgNode)
2692 wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2694 wbf = GCInfo::WBF_NoBarrier;
2697 regMaskTP wbaLockedRegs;
2698 wbaLockedRegs = lockedRegs;
2699 if (isWriteBarrierAsgNode)
2701 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2703 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2706 wbaLockedRegs |= RBM_WRITE_BARRIER;
2707 op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2708 assert(REG_WRITE_BARRIER == REG_EDX);
2709 op1PredictReg = PREDICT_REG_EDX;
2714 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2716 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2719 op1PredictReg = PREDICT_REG_ECX;
2720 op2PredictReg = PREDICT_REG_EDX;
2721 #elif defined(_TARGET_ARM_)
2722 op1PredictReg = PREDICT_REG_R0;
2723 op2PredictReg = PREDICT_REG_R1;
2725 // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2726 if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2728 op1 = op1->gtOp.gtOp1;
2730 #else // !_TARGET_X86_ && !_TARGET_ARM_
2731 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2737 /* Are we supposed to evaluate RHS first? */
2739 if (tree->gtFlags & GTF_REVERSE_OPS)
2741 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2743 #if CPU_HAS_BYTE_REGS
2744 // Should we insure that op2 gets evaluated into a byte register?
2745 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2747 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2748 // and we can't select one that is already reserved (i.e. lockedRegs)
2750 op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2751 op2->gtUsedRegs |= op2Mask;
2753 // No longer a simple assignment because we're using extra registers and might
2754 // have interference between op1 and op2. See DevDiv #136681
2755 simpleAssignment = false;
2759 * For a simple assignment we don't want the op2Mask to be
2760 * marked as interferring with the LCL_VAR, since it is likely
2761 * that we will want to enregister the LCL_VAR in exactly
2762 * the register that is used to compute op2
2764 tmpMask = lockedRegs;
2766 if (!simpleAssignment)
2769 regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2771 // Did we relax the register prediction for op1 and op2 above ?
2772 // - because we are depending upon op1 being enregistered
2774 if ((op1PredictReg == PREDICT_NONE) &&
2775 ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2777 /* We must be assigning into an enregistered LCL_VAR */
2778 noway_assert(op1->gtOper == GT_LCL_VAR);
2779 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2780 noway_assert(varDsc->lvRegNum != REG_STK);
2782 /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2783 varDsc->lvDependReg = true;
2788 // For the case of simpleAssignments op2 should always be evaluated first
2789 noway_assert(!simpleAssignment);
2791 regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2792 if (isWriteBarrierAsgNode)
2794 wbaLockedRegs |= op1->gtUsedRegs;
2796 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2798 #if CPU_HAS_BYTE_REGS
2799 // Should we insure that op2 gets evaluated into a byte register?
2800 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2802 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2803 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2806 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2807 op2->gtUsedRegs |= op2Mask;
2812 if (rpHasVarIndexForPredict(op2PredictReg))
2817 if (isWriteBarrierAsgNode)
2819 #if NOGC_WRITE_BARRIERS
2821 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2825 /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2826 passed to the write-barrier call in REG_WRITE_BARRIER */
2830 if (op1->gtOper == GT_IND)
2832 GenTreePtr rv1, rv2;
2836 /* Special handling of indirect assigns for write barrier */
2838 bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2841 /* Check address mode for enregisterable locals */
2845 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2847 rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2849 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2851 rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2856 if (op2->gtOper == GT_LCL_VAR)
2858 rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2861 // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2862 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2864 rpRecordRegIntf(RBM_WRITE_BARRIER,
2865 rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2867 tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2872 #endif // NOGC_WRITE_BARRIERS
2874 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2879 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2882 // For the ARM target we have an optimized JIT Helper
2883 // that only trashes a subset of the callee saved registers
2886 // NOTE: Adding it to the gtUsedRegs will cause the interference to
2887 // be added appropriately
2889 // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs
2890 // instead of gtUsedRegs, because the latter will be modified later, but we need
2891 // to remember to add the interference.
2893 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2895 op1->gtUsedRegs |= RBM_R0;
2896 op2->gtUsedRegs |= RBM_R1;
2897 #else // _TARGET_ARM_
2901 printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2903 // We have to call a normal JIT helper to perform the Write Barrier Assignment
2904 // It will trash the callee saved registers
2906 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2907 #endif // _TARGET_ARM_
2909 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2912 if (simpleAssignment)
2915 * Consider a simple assignment to a local:
2919 * Since the "=" node is visited after the variable
2920 * is marked live (assuming it's live after the
2921 * assignment), we don't want to use the register
2922 * use mask of the "=" node but rather that of the
2925 tree->gtUsedRegs = op1->gtUsedRegs;
2929 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2931 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2937 /* assigning shift operators */
2939 noway_assert(type != TYP_LONG);
2941 #if CPU_LOAD_STORE_ARCH
2942 predictReg = PREDICT_ADDR;
2944 predictReg = PREDICT_NONE;
2947 /* shift count is handled same as ordinary shift */
2948 goto HANDLE_SHIFT_COUNT;
2951 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2953 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2955 // We need a scratch register for the LEA instruction
2956 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2959 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2964 /* Cannot cast to VOID */
2965 noway_assert(type != TYP_VOID);
2967 /* cast to long is special */
2968 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2970 noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2971 #if CPU_LONG_USES_REGPAIR
2972 rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2974 if (rpHasVarIndexForPredict(predictReg))
2976 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2977 rpAsgVarNum = tgtIndex;
2979 // We don't need any register as we plan on writing to the rpAsgVarNum register
2980 predictReg = PREDICT_NONE;
2982 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2983 tgtVar->lvDependReg = true;
2985 if (tgtVar->lvOtherReg != REG_STK)
2987 predictRegHi = PREDICT_NONE;
2992 if (predictReg == PREDICT_NONE)
2994 predictReg = PREDICT_SCRATCH_REG;
2997 // If we are widening an int into a long using a targeted register pair we
2998 // should retarget so that the low part get loaded into the appropriate register
2999 else if (predictReg == PREDICT_PAIR_R0R1)
3001 predictReg = PREDICT_REG_R0;
3002 predictRegHi = PREDICT_REG_R1;
3004 else if (predictReg == PREDICT_PAIR_R2R3)
3006 predictReg = PREDICT_REG_R2;
3007 predictRegHi = PREDICT_REG_R3;
3011 // If we are widening an int into a long using a targeted register pair we
3012 // should retarget so that the low part get loaded into the appropriate register
3013 else if (predictReg == PREDICT_PAIR_EAXEDX)
3015 predictReg = PREDICT_REG_EAX;
3016 predictRegHi = PREDICT_REG_EDX;
3018 else if (predictReg == PREDICT_PAIR_ECXEBX)
3020 predictReg = PREDICT_REG_ECX;
3021 predictRegHi = PREDICT_REG_EBX;
3025 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3027 #if CPU_LONG_USES_REGPAIR
3028 if (predictRegHi != PREDICT_NONE)
3030 // Now get one more reg for the upper part
3031 regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3034 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3038 /* cast from long is special - it frees a register */
3039 if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types"
3040 && op1->gtType == TYP_LONG)
3042 if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3043 predictReg = PREDICT_REG;
3045 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3047 // If we have 2 or more regs, free one of them
3048 if (!genMaxOneBit(regMask))
3050 /* Clear the 2nd lowest bit in regMask */
3051 /* First set tmpMask to the lowest bit in regMask */
3052 tmpMask = genFindLowestBit(regMask);
3053 /* Next find the second lowest bit in regMask */
3054 tmpMask = genFindLowestBit(regMask & ~tmpMask);
3055 /* Clear this bit from regmask */
3056 regMask &= ~tmpMask;
3058 tree->gtUsedRegs = op1->gtUsedRegs;
3062 #if CPU_HAS_BYTE_REGS
3063 /* cast from signed-byte is special - it uses byteable registers */
3064 if (type == TYP_INT)
3066 var_types smallType;
3068 if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3069 smallType = tree->gtCast.CastOp()->TypeGet();
3071 smallType = tree->gtCast.gtCastType;
3073 if (smallType == TYP_BYTE)
3075 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3077 if ((regMask & RBM_BYTE_REGS) == 0)
3078 regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3080 tree->gtUsedRegs = (regMaskSmall)regMask;
3086 #if FEATURE_STACK_FP_X87
3087 /* cast to float/double is special */
3088 if (varTypeIsFloating(type))
3090 switch (op1->TypeGet())
3092 /* uses fild, so don't need to be loaded to reg */
3095 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3096 tree->gtUsedRegs = op1->gtUsedRegs;
3104 /* Casting from integral type to floating type is special */
3105 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3107 if (opts.compCanUseSSE2)
3109 // predict for SSE2 based casting
3110 if (predictReg <= PREDICT_REG)
3111 predictReg = PREDICT_SCRATCH_REG;
3112 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3114 // Get one more int reg to hold cast result
3115 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3116 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3122 #if FEATURE_FP_REGALLOC
3123 // Are we casting between int to float or float to int
3124 // Fix 388428 ARM JitStress WP7
3125 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3127 // op1 needs to go into a register
3128 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3131 if (varTypeIsFloating(op1->TypeGet()))
3133 // We also need a fp scratch register for the convert operation
3134 regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3135 PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3138 // We also need a register to hold the result
3139 regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3140 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3145 /* otherwise must load op1 into a register */
3150 #ifdef _TARGET_XARCH_
3151 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3153 // This is a special case to handle the following
3154 // optimization: conv.i4(round.d(d)) -> round.i(d)
3155 // if flowgraph 3186
3157 if (predictReg <= PREDICT_REG)
3158 predictReg = PREDICT_SCRATCH_REG;
3160 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3162 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3164 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3172 if (tree->TypeGet() == TYP_LONG)
3174 // On ARM this consumes an extra register for the '0' value
3175 if (predictReg <= PREDICT_REG)
3176 predictReg = PREDICT_SCRATCH_REG;
3178 regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3180 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3182 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3185 #endif // _TARGET_ARM_
3190 // these unary operators will write new values
3191 // and thus will need a scratch register
3193 /* generic unary operators */
3195 if (predictReg <= PREDICT_REG)
3196 predictReg = PREDICT_SCRATCH_REG;
3201 // these unary operators do not write new values
3202 // and thus won't need a scratch register
3203 CLANG_FORMAT_COMMENT_ANCHOR;
3208 tree->gtUsedRegs = 0;
3213 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3214 tree->gtUsedRegs = op1->gtUsedRegs;
3218 case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3220 bool intoReg = true;
3221 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3223 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3225 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3228 if (predictReg == PREDICT_ADDR)
3232 else if (predictReg == PREDICT_NONE)
3234 if (type != TYP_LONG)
3240 predictReg = PREDICT_REG;
3244 /* forcing to register? */
3245 if (intoReg && (type != TYP_LONG))
3247 rsvdRegs |= RBM_LASTUSE;
3253 /* check for address mode */
3254 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3257 #if CPU_LOAD_STORE_ARCH
3258 // We may need a scratch register for loading a long
3259 if (type == TYP_LONG)
3261 /* This scratch register immediately dies */
3262 tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3264 #endif // CPU_LOAD_STORE_ARCH
3267 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3269 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3271 /* These integer register(s) immediately die */
3272 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3273 // Two integer registers are required for a TYP_DOUBLE
3274 if (type == TYP_DOUBLE)
3276 rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3280 /* forcing to register? */
3283 regMaskTP lockedMask = lockedRegs | rsvdRegs;
3286 // We will compute a new regMask that holds the register(s)
3287 // that we will load the indirection into.
3289 CLANG_FORMAT_COMMENT_ANCHOR;
3291 #ifndef _TARGET_64BIT_
3292 if (type == TYP_LONG)
3294 // We need to use multiple load instructions here:
3295 // For the first register we can not choose
3296 // any registers that are being used in place or
3297 // any register in the current regMask
3299 regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3301 // For the second register we can choose a register that was
3302 // used in place or any register in the old now overwritten regMask
3303 // but not the same register that we picked above in 'regMask'
3305 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3306 regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3311 // We will use one load instruction here:
3312 // The load target register can be a register that was used in place
3313 // or one of the register from the orginal regMask.
3315 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3316 regMask = rpPredictRegPick(type, predictReg, lockedMask);
3319 else if (predictReg != PREDICT_ADDR)
3321 /* Unless the caller specified PREDICT_ADDR */
3322 /* we don't return the temp registers used */
3323 /* to form the address */
3328 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3340 /* Floating point comparison uses EAX for flags */
3341 if (varTypeIsFloating(op1->TypeGet()))
3347 if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3349 // Some comparisons are converted to ?:
3350 noway_assert(!fgMorphRelopToQmark(op1));
3352 if (predictReg <= PREDICT_REG)
3353 predictReg = PREDICT_SCRATCH_REG;
3355 // The set instructions need a byte register
3356 regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3361 #ifdef _TARGET_XARCH_
3363 // Optimize the compare with a constant cases for xarch
3364 if (op1->gtOper == GT_CNS_INT)
3366 if (op2->gtOper == GT_CNS_INT)
3368 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3369 rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3370 tree->gtUsedRegs = op2->gtUsedRegs;
3373 else if (op2->gtOper == GT_CNS_INT)
3375 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3376 tree->gtUsedRegs = op1->gtUsedRegs;
3379 else if (op2->gtOper == GT_CNS_LNG)
3381 regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3383 // We also need one extra register to read values from
3384 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3385 #endif // _TARGET_X86_
3386 tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3389 #endif // _TARGET_XARCH_
3392 unsigned op1TypeSize;
3393 unsigned op2TypeSize;
3395 op1TypeSize = genTypeSize(op1->TypeGet());
3396 op2TypeSize = genTypeSize(op2->TypeGet());
3398 op1PredictReg = PREDICT_REG;
3399 op2PredictReg = PREDICT_REG;
3401 if (tree->gtFlags & GTF_REVERSE_OPS)
3403 #ifdef _TARGET_XARCH_
3404 if (op1TypeSize == sizeof(int))
3405 op1PredictReg = PREDICT_NONE;
3408 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3409 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3413 #ifdef _TARGET_XARCH_
3414 // For full DWORD compares we can have
3416 // op1 is an address mode and op2 is a register
3418 // op1 is a register and op2 is an address mode
3420 if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3422 if (op2->gtOper == GT_LCL_VAR)
3424 unsigned lclNum = op2->gtLclVar.gtLclNum;
3425 varDsc = lvaTable + lclNum;
3426 /* Did we predict that this local will be enregistered? */
3427 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3429 op1PredictReg = PREDICT_ADDR;
3433 // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3434 if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3435 op2PredictReg = PREDICT_ADDR;
3436 #endif // _TARGET_XARCH_
3438 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3440 if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3443 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3447 #ifdef _TARGET_XARCH_
3448 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3449 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3450 // if one of the registers is small and the types aren't equal.
3452 if (regMask == RBM_NONE)
3454 rpPredictReg op1xPredictReg, op2xPredictReg;
3455 GenTreePtr op1x, op2x;
3456 if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3458 op1xPredictReg = op2PredictReg;
3459 op2xPredictReg = op1PredictReg;
3465 op1xPredictReg = op1PredictReg;
3466 op2xPredictReg = op2PredictReg;
3470 if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir)
3471 (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3472 varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int
3474 bool needTmp = false;
3476 // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3477 // We could predict a byteable register for op2x, but what if we don't get it?
3478 // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3480 if (varTypeIsByte(op1x->TypeGet()))
3486 if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3488 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3493 if (op1x->TypeGet() != op2x->TypeGet())
3499 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3503 #endif // _TARGET_XARCH_
3505 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3510 #ifndef _TARGET_AMD64_
3511 if (type == TYP_LONG)
3513 assert(tree->gtIsValid64RsltMul());
3515 /* Strip out the cast nodes */
3517 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3518 op1 = op1->gtCast.CastOp();
3519 op2 = op2->gtCast.CastOp();
3523 #endif // !_TARGET_AMD64_
3526 #if defined(_TARGET_X86_)
3527 // This will done by a 64-bit imul "imul eax, reg"
3528 // (i.e. EDX:EAX = EAX * reg)
3530 /* Are we supposed to evaluate op2 first? */
3531 if (tree->gtFlags & GTF_REVERSE_OPS)
3533 rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3534 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3538 rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3539 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3542 /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3544 tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3546 /* set regMask to the set of held registers */
3548 regMask = RBM_PAIR_TMP_LO;
3550 if (type == TYP_LONG)
3551 regMask |= RBM_PAIR_TMP_HI;
3553 #elif defined(_TARGET_ARM_)
3554 // This will done by a 4 operand multiply
3556 // Are we supposed to evaluate op2 first?
3557 if (tree->gtFlags & GTF_REVERSE_OPS)
3559 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3560 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3564 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3565 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3568 // set regMask to the set of held registers,
3569 // the two scratch register we need to compute the mul result
3571 regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3573 // set gtUsedRegs toregMask and the registers needed by op1 and op2
3575 tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3577 #else // !_TARGET_X86_ && !_TARGET_ARM_
3578 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3585 /* We use imulEAX for most unsigned multiply operations */
3586 if (tree->gtOverflow())
3588 if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3603 tree->gtUsedRegs = 0;
3605 if (predictReg <= PREDICT_REG)
3606 predictReg = PREDICT_SCRATCH_REG;
3611 if (tree->gtFlags & GTF_REVERSE_OPS)
3613 op1PredictReg = PREDICT_REG;
3614 #if !CPU_LOAD_STORE_ARCH
3615 if (genTypeSize(op1->gtType) >= sizeof(int))
3616 op1PredictReg = PREDICT_NONE;
3618 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3619 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3623 op2PredictReg = PREDICT_REG;
3624 #if !CPU_LOAD_STORE_ARCH
3625 if (genTypeSize(op2->gtType) >= sizeof(int))
3626 op2PredictReg = PREDICT_NONE;
3628 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3630 // For most ALU operations we can generate a single instruction that encodes
3631 // a small immediate integer constant value. (except for multiply)
3633 if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3635 ssize_t ival = op2->gtIntCon.gtIconVal;
3636 if (codeGen->validImmForAlu(ival))
3638 op2PredictReg = PREDICT_NONE;
3640 else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3641 ((oper == GT_ADD) || (oper == GT_SUB)))
3643 op2PredictReg = PREDICT_NONE;
3646 if (op2PredictReg == PREDICT_NONE)
3648 op2->gtUsedRegs = RBM_NONE;
3653 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3656 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3658 #if CPU_HAS_BYTE_REGS
3659 /* We have special register requirements for byte operations */
3661 if (varTypeIsByte(tree->TypeGet()))
3663 /* For 8 bit arithmetic, one operands has to be in a
3664 byte-addressable register, and the other has to be
3665 in a byte-addrble reg or in memory. Assume its in a reg */
3667 regMaskTP regByteMask = 0;
3668 regMaskTP op1ByteMask = op1->gtUsedRegs;
3670 if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3672 // Pick a Byte register to use for op1
3673 regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3674 op1ByteMask = regByteMask;
3677 if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3679 // Pick a Byte register to use for op2, avoiding the one used by op1
3680 regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3685 tree->gtUsedRegs |= regByteMask;
3686 regMask = regByteMask;
3698 /* non-integer division handled in generic way */
3699 if (!varTypeIsIntegral(type))
3701 tree->gtUsedRegs = 0;
3702 if (predictReg <= PREDICT_REG)
3703 predictReg = PREDICT_SCRATCH_REG;
3704 goto GENERIC_BINARY;
3707 #ifndef _TARGET_64BIT_
3709 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3711 /* Special case: a mod with an int op2 is done inline using idiv or div
3712 to avoid a costly call to the helper */
3714 noway_assert((op2->gtOper == GT_CNS_LNG) &&
3715 (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3717 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3718 if (tree->gtFlags & GTF_REVERSE_OPS)
3720 tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3721 rsvdRegs | op1->gtRsvdRegs);
3722 tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3726 tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3728 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3730 regMask = RBM_PAIR_TMP;
3731 #else // !_TARGET_X86_ && !_TARGET_ARM_
3732 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3733 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3736 (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3737 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3741 #endif // _TARGET_64BIT_
3743 /* no divide immediate, so force integer constant which is not
3744 * a power of two to register
3747 if (op2->OperKind() & GTK_CONST)
3749 ssize_t ival = op2->gtIntConCommon.IconValue();
3751 /* Is the divisor a power of 2 ? */
3753 if (ival > 0 && genMaxOneBit(size_t(ival)))
3758 op2PredictReg = PREDICT_SCRATCH_REG;
3762 /* Non integer constant also must be enregistered */
3763 op2PredictReg = PREDICT_REG;
3766 regMaskTP trashedMask;
3767 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3768 regMaskTP op1ExcludeMask;
3769 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3770 regMaskTP op2ExcludeMask;
3771 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3773 #ifdef _TARGET_XARCH_
3774 /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3775 * we can safely allow the "b" value to die. Unfortunately, if we simply
3776 * mark the node "b" as using EDX, this will not work if "b" is a register
3777 * variable that dies with this particular reference. Thus, if we want to
3778 * avoid this situation (where we would have to spill the variable from
3779 * EDX to someplace else), we need to explicitly mark the interference
3780 * of the variable at this point.
3783 if (op2->gtOper == GT_LCL_VAR)
3785 unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3786 varDsc = lvaTable + lclNum;
3787 if (varDsc->lvTracked)
3792 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3793 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3794 varDsc->lvVarIndex);
3795 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3796 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3797 varDsc->lvVarIndex);
3800 VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3801 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3805 /* set the held register based on opcode */
3806 if (oper == GT_DIV || oper == GT_UDIV)
3810 trashedMask = (RBM_EAX | RBM_EDX);
3812 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3814 #endif // _TARGET_XARCH_
3817 trashedMask = RBM_NONE;
3818 op1ExcludeMask = RBM_NONE;
3819 op2ExcludeMask = RBM_NONE;
3822 /* set the lvPref reg if possible */
3825 * Walking the gtNext link twice from here should get us back
3826 * to our parent node, if this is an simple assignment tree.
3828 dest = tree->gtNext;
3829 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3830 dest->gtNext->gtOp.gtOp2 == tree)
3832 varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3833 varDsc->addPrefReg(regMask, this);
3835 #ifdef _TARGET_XARCH_
3836 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3838 op1PredictReg = PREDICT_SCRATCH_REG;
3841 /* are we supposed to evaluate op2 first? */
3842 if (tree->gtFlags & GTF_REVERSE_OPS)
3844 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3845 rsvdRegs | op1->gtRsvdRegs);
3846 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3850 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3851 rsvdRegs | op2->gtRsvdRegs);
3852 rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3857 /* grab EAX, EDX for this tree node */
3858 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3866 if (predictReg <= PREDICT_REG)
3867 predictReg = PREDICT_SCRATCH_REG;
3869 #ifndef _TARGET_64BIT_
3870 if (type == TYP_LONG)
3872 if (op2->IsCnsIntOrI())
3874 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3875 // no register used by op2
3876 op2->gtUsedRegs = 0;
3877 tree->gtUsedRegs = op1->gtUsedRegs;
3881 // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3882 tmpMask = lockedRegs;
3883 tmpMask &= ~RBM_LNGARG_0;
3884 tmpMask &= ~RBM_SHIFT_LNG;
3886 // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3887 if (tree->gtFlags & GTF_REVERSE_OPS)
3889 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3890 tmpMask |= RBM_SHIFT_LNG;
3891 // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3892 // Fix 383843 X86/ARM ILGEN
3893 rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3894 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3898 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3899 tmpMask |= RBM_LNGARG_0;
3900 // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3901 // Fix 383839 ARM ILGEN
3902 rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3903 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3905 regMask = RBM_LNGRET; // function return registers
3906 op1->gtUsedRegs |= RBM_LNGARG_0;
3907 op2->gtUsedRegs |= RBM_SHIFT_LNG;
3909 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3911 // We are using a helper function to do shift:
3913 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3917 #endif // _TARGET_64BIT_
3919 #ifdef _TARGET_XARCH_
3920 if (!op2->IsCnsIntOrI())
3921 predictReg = PREDICT_NOT_REG_ECX;
3925 // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3927 regMaskTP tmpRsvdRegs;
3929 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3931 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3932 rsvdRegs = RBM_LASTUSE;
3933 tmpRsvdRegs = RBM_NONE;
3938 // Special case op1 of a constant
3939 if (op1->IsCnsIntOrI())
3940 tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3941 // System.Xml.Schema.BitSet:Get(int):bool
3943 tmpRsvdRegs = op1->gtRsvdRegs;
3947 if (!op2->IsCnsIntOrI())
3949 if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3951 op2PredictReg = PREDICT_REG_SHIFT;
3955 op2PredictReg = PREDICT_REG;
3958 /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3959 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3961 // If our target arch has a REG_SHIFT register then
3962 // we set the PrefReg when we have a LclVar for op2
3963 // we add an interference with REG_SHIFT for any other LclVars alive at op2
3964 if (REG_SHIFT != REG_NA)
3966 VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3968 while (op2->gtOper == GT_COMMA)
3970 op2 = op2->gtOp.gtOp2;
3973 if (op2->gtOper == GT_LCL_VAR)
3975 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3976 varDsc->setPrefReg(REG_SHIFT, this);
3977 if (varDsc->lvTracked)
3979 VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3983 // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3984 // excluding the LclVar that was used for the shift amount as it is read-only
3985 // and can be kept alive through the shift operation
3987 rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3988 // In case op2Mask doesn't contain the required shift register,
3989 // we will or it in now.
3990 op2Mask |= RBM_SHIFT;
3994 if (tree->gtFlags & GTF_REVERSE_OPS)
3996 assert(regMask == RBM_NONE);
3997 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
4000 #if CPU_HAS_BYTE_REGS
4001 if (varTypeIsByte(type))
4003 // Fix 383789 X86 ILGEN
4004 // Fix 383813 X86 ILGEN
4005 // Fix 383828 X86 ILGEN
4006 if (op1->gtOper == GT_LCL_VAR)
4008 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4009 if (varDsc->lvTracked)
4011 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4013 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4014 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4017 if ((regMask & RBM_BYTE_REGS) == 0)
4019 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4020 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4023 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4027 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4033 if (tree->gtFlags & GTF_REVERSE_OPS)
4035 if (predictReg == PREDICT_NONE)
4037 predictReg = PREDICT_REG;
4039 else if (rpHasVarIndexForPredict(predictReg))
4041 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4042 predictReg = PREDICT_SCRATCH_REG;
4045 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4046 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4050 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4052 /* CodeGen will enregister the op2 side of a GT_COMMA */
4053 if (predictReg == PREDICT_NONE)
4055 predictReg = PREDICT_REG;
4057 else if (rpHasVarIndexForPredict(predictReg))
4059 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4060 predictReg = PREDICT_SCRATCH_REG;
4063 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4065 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4067 tree->gtUsedRegs = op2->gtUsedRegs;
4068 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4070 LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4072 if (op2VarDsc->lvTracked)
4074 VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4075 rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4082 noway_assert(op1 != NULL && op2 != NULL);
4085 * If the gtUsedRegs conflicts with lockedRegs
4086 * then we going to have to spill some registers
4087 * into the non-trashed register set to keep it alive
4091 regMaskTP spillRegs;
4092 spillRegs = lockedRegs & tree->gtUsedRegs;
4096 /* Find the next register that needs to be spilled */
4097 tmpMask = genFindLowestBit(spillRegs);
4102 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4103 gtDispTree(tree, 0, NULL, true);
4106 /* In Codegen it will typically introduce a spill temp here */
4107 /* rather than relocating the register to a non trashed reg */
4108 rpPredictSpillCnt++;
4111 /* Remove it from the spillRegs and lockedRegs*/
4112 spillRegs &= ~tmpMask;
4113 lockedRegs &= ~tmpMask;
4116 VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4118 /* Evaluate the <cond> subtree */
4119 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4120 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4121 tree->gtUsedRegs = op1->gtUsedRegs;
4123 noway_assert(op2->gtOper == GT_COLON);
4124 if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4126 // Don't try to target the register specified in predictReg when we have complex subtrees
4128 predictReg = PREDICT_SCRATCH_REG;
4130 GenTreePtr elseTree = op2->AsColon()->ElseNode();
4131 GenTreePtr thenTree = op2->AsColon()->ThenNode();
4133 noway_assert(thenTree != NULL && elseTree != NULL);
4135 // Update compCurLife to only those vars live on the <then> subtree
4137 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4139 if (type == TYP_VOID)
4141 /* Evaluate the <then> subtree */
4142 rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4144 predictReg = PREDICT_NONE;
4148 // A mask to use to force the predictor to choose low registers (to reduce code size)
4149 regMaskTP avoidRegs = RBM_NONE;
4151 avoidRegs = (RBM_R12 | RBM_LR);
4153 if (predictReg <= PREDICT_REG)
4154 predictReg = PREDICT_SCRATCH_REG;
4156 /* Evaluate the <then> subtree */
4158 rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4162 rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4163 if (op1PredictReg != PREDICT_NONE)
4164 predictReg = op1PredictReg;
4168 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4170 /* Evaluate the <else> subtree */
4171 // First record the post-then liveness, and reset the current liveness to the else
4173 CLANG_FORMAT_COMMENT_ANCHOR;
4176 VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4179 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4181 rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4182 tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4184 // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4185 // They each have only one successor, which they share. Their live-out sets must equal the
4186 // live-in set of this virtual successor block, and thus must be the same. We can assert
4187 // that equality here.
4188 assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4192 regMaskTP reloadMask = RBM_NONE;
4196 regMaskTP reloadReg;
4198 /* Get an extra register to hold it */
4199 reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4203 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4204 gtDispTree(tree, 0, NULL, true);
4207 reloadMask |= reloadReg;
4212 /* update the gtUsedRegs mask */
4213 tree->gtUsedRegs |= reloadMask;
4220 tree->gtUsedRegs = RBM_NONE;
4223 /* Is there a return value? */
4226 #if FEATURE_FP_REGALLOC
4227 if (varTypeIsFloating(type))
4229 predictReg = PREDICT_FLTRET;
4230 if (type == TYP_FLOAT)
4231 regMask = RBM_FLOATRET;
4233 regMask = RBM_DOUBLERET;
4237 if (isRegPairType(type))
4239 predictReg = PREDICT_LNGRET;
4240 regMask = RBM_LNGRET;
4244 predictReg = PREDICT_INTRET;
4245 regMask = RBM_INTRET;
4247 if (info.compCallUnmanaged)
4249 lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4251 rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4252 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4255 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4256 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4257 // We could optimize on registers based on int/long or no return value. But to
4258 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4259 if (compIsProfilerHookNeeded())
4261 tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4270 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4271 regMask = genReturnRegForTree(tree);
4272 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4275 tree->gtUsedRegs = 0;
4281 /* This must be a test of a relational operator */
4283 noway_assert(op1->OperIsCompare());
4285 /* Only condition code set by this operation */
4287 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4289 tree->gtUsedRegs = op1->gtUsedRegs;
4295 noway_assert(type <= TYP_INT);
4296 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4299 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4300 unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4303 // Table based switch requires an extra register for the table base
4304 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4306 tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4308 #else // !_TARGET_ARM_
4309 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4310 tree->gtUsedRegs = op1->gtUsedRegs;
4311 #endif // _TARGET_ARM_
4316 if (predictReg <= PREDICT_REG)
4317 predictReg = PREDICT_SCRATCH_REG;
4319 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4320 // Need a reg to load exponent into
4321 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4322 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4326 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4330 if (info.compInitMem)
4332 // We zero out two registers in the ARM codegen path
4334 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4338 op1->gtUsedRegs |= (regMaskSmall)regMask;
4339 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4341 // The result will be put in the reg we picked for the size
4342 // regMask = <already set as we want it to be>
4349 if (predictReg <= PREDICT_REG)
4350 predictReg = PREDICT_SCRATCH_REG;
4352 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4353 // registers (to reduce code size)
4355 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4358 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4360 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4364 unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4365 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4366 // If it has one bit set, and that's an arg reg...
4367 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4369 // We are passing the 'obj' in the argument registers
4371 regNumber rn = genRegNumFromMask(preferReg);
4373 // Add the registers used to pass the 'obj' to regMask.
4374 for (unsigned i = 0; i < objSize / 4; i++)
4376 if (rn == MAX_REG_ARG)
4379 regMask |= genRegMask(rn);
4380 rn = genRegArgNext(rn);
4385 // We are passing the 'obj' in the outgoing arg space
4386 // We will need one register to load into unless the 'obj' size is 4 or less.
4390 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4393 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4395 #else // !_TARGET_ARM
4397 #endif // _TARGET_ARM_
4403 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4405 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4407 // A MKREFANY takes up two registers.
4408 regNumber rn = genRegNumFromMask(preferReg);
4410 if (rn < MAX_REG_ARG)
4412 regMask |= genRegMask(rn);
4413 rn = genRegArgNext(rn);
4414 if (rn < MAX_REG_ARG)
4415 regMask |= genRegMask(rn);
4418 if (regMask != RBM_NONE)
4420 // Condensation of GENERIC_BINARY path.
4421 assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4422 op2PredictReg = PREDICT_REG;
4423 regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4424 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4425 regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4426 tree->gtUsedRegs = (regMaskSmall)regMask;
4429 tree->gtUsedRegs = op1->gtUsedRegs;
4430 #endif // _TARGET_ARM_
4431 goto GENERIC_BINARY;
4438 goto GENERIC_BINARY;
4442 // Ensure we can write to op2. op2 will hold the output.
4443 if (predictReg < PREDICT_SCRATCH_REG)
4444 predictReg = PREDICT_SCRATCH_REG;
4446 if (tree->gtFlags & GTF_REVERSE_OPS)
4448 op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4449 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4453 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4454 op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4456 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4463 // This unary operator simply passes through the value from its child (much like GT_NOP)
4464 // and thus won't need a scratch register.
4465 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4466 tree->gtUsedRegs = op1->gtUsedRegs;
4473 noway_assert(!"unexpected simple operator in reg use prediction");
4478 /* See what kind of a special operator we have here */
4483 GenTreeArgList* list;
4485 unsigned regArgsNum;
4487 regMaskTP regArgMask;
4488 regMaskTP curArgMask;
4494 /* initialize so we can just or in various bits */
4495 tree->gtUsedRegs = RBM_NONE;
4497 #if GTF_CALL_REG_SAVE
4499 * Unless the GTF_CALL_REG_SAVE flag is set,
4500 * we can't preserve the RBM_CALLEE_TRASH registers.
4501 * (likewise we can't preserve the return registers)
4502 * So we remove them from the lockedRegs set and
4503 * record any of them in the keepMask
4506 if (tree->gtFlags & GTF_CALL_REG_SAVE)
4508 regMaskTP trashMask = genReturnRegForTree(tree);
4510 keepMask = lockedRegs & trashMask;
4511 lockedRegs &= ~trashMask;
4516 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4517 lockedRegs &= ~RBM_CALLEE_TRASH;
4523 /* Is there an object pointer? */
4524 if (tree->gtCall.gtCallObjp)
4526 /* Evaluate the instance pointer first */
4528 args = tree->gtCall.gtCallObjp;
4530 /* the objPtr always goes to an integer register (through temp or directly) */
4531 noway_assert(regArgsNum == 0);
4534 /* Must be passed in a register */
4536 noway_assert(args->gtFlags & GTF_LATE_ARG);
4538 /* Must be either a deferred reg arg node or a GT_ASG node */
4540 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4541 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4543 if (!args->IsArgPlaceHolderNode())
4545 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4548 VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4549 VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4551 /* process argument list */
4552 for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4554 args = list->Current();
4556 if (args->gtFlags & GTF_LATE_ARG)
4558 /* Must be either a Placeholder/NOP node or a GT_ASG node */
4560 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4561 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4563 if (!args->IsArgPlaceHolderNode())
4565 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4572 #ifdef FEATURE_FIXED_OUT_ARGS
4573 // We'll store this argument into the outgoing argument area
4574 // It needs to be in a register to be stored.
4576 predictReg = PREDICT_REG;
4578 #else // !FEATURE_FIXED_OUT_ARGS
4579 // We'll generate a push for this argument
4581 predictReg = PREDICT_NONE;
4582 if (varTypeIsSmall(args->TypeGet()))
4584 /* We may need to sign or zero extend a small type using a register */
4585 predictReg = PREDICT_SCRATCH_REG;
4589 rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4591 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4592 tree->gtUsedRegs |= args->gtUsedRegs;
4595 /* Is there a late argument list */
4598 regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4601 /* process the late argument list */
4602 for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4604 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4605 LclVarDsc* promotedStructLocal = NULL;
4607 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4608 tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4610 assert(list->OperIsList());
4612 args = list->Current();
4613 list = list->Rest();
4615 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4617 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4618 assert(curArgTabEntry);
4620 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4622 curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4624 rpPredictReg argPredictReg;
4625 regMaskTP avoidReg = RBM_NONE;
4627 if (regNum != REG_STK)
4629 argPredictReg = rpGetPredictForReg(regNum);
4630 curArgMask |= genRegMask(regNum);
4634 assert(numSlots > 0);
4635 argPredictReg = PREDICT_NONE;
4637 // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4638 avoidReg = (RBM_R12 | RBM_LR);
4643 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4645 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4647 // 64-bit longs and doubles require 2 consecutive argument registers
4648 curArgMask |= genRegMask(REG_NEXT(regNum));
4650 else if (args->TypeGet() == TYP_STRUCT)
4652 GenTreePtr argx = args;
4653 GenTreePtr lclVarTree = NULL;
4655 /* The GT_OBJ may be be a child of a GT_COMMA */
4656 while (argx->gtOper == GT_COMMA)
4658 argx = argx->gtOp.gtOp2;
4660 unsigned originalSize = 0;
4662 if (argx->gtOper == GT_OBJ)
4664 originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4666 // Is it the address of a promoted struct local?
4667 if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4669 lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1;
4670 LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4671 if (varDsc->lvPromoted)
4672 promotedStructLocal = varDsc;
4675 else if (argx->gtOper == GT_LCL_VAR)
4677 varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum;
4678 originalSize = varDsc->lvSize();
4680 // Is it a promoted struct local?
4681 if (varDsc->lvPromoted)
4682 promotedStructLocal = varDsc;
4684 else if (argx->gtOper == GT_MKREFANY)
4686 originalSize = 2 * TARGET_POINTER_SIZE;
4690 noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4693 // We only pass arguments differently if it a struct local "independently" promoted, which
4694 // allows the field locals can be independently enregistered.
4695 if (promotedStructLocal != NULL)
4697 if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4698 promotedStructLocal = NULL;
4701 unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4703 // Are we passing a TYP_STRUCT in multiple integer registers?
4704 // if so set up curArgMask to reflect this
4705 // Also slots is updated to reflect the number of outgoing arg slots that we will write
4706 if (regNum != REG_STK)
4708 regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4709 assert(genIsValidReg(regNum));
4710 regNumber nextReg = REG_NEXT(regNum);
4712 while (slots > 0 && nextReg <= regLast)
4714 curArgMask |= genRegMask(nextReg);
4715 nextReg = REG_NEXT(nextReg);
4720 if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4722 // All or a portion of this struct will be placed in the argument registers indicated by
4723 // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4724 // that the second arg to be evaluated interferes with the reg for the first, the third with
4725 // the regs for the first and second, etc. But since we always place the stack slots before
4726 // placing the register slots we do not add inteferences for any part of the struct that gets
4727 // passed on the stack.
4730 PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4731 regMaskTP prevArgMask = RBM_NONE;
4732 for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4734 LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4735 if (fieldVarDsc->lvTracked)
4737 assert(lclVarTree != NULL);
4738 if (prevArgMask != RBM_NONE)
4740 rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4741 DEBUGARG("fieldVar/argReg"));
4744 // Now see many registers this uses up.
4745 unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4746 unsigned nextAfterLastRegOffset =
4747 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4748 TARGET_POINTER_SIZE;
4749 unsigned nextAfterLastArgRegOffset =
4750 min(nextAfterLastRegOffset,
4751 genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4753 for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4756 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4759 if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4764 if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4766 // Add the argument register used here as a preferred register for this fieldVarDsc
4768 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4769 fieldVarDsc->setPrefReg(firstRegUsed, this);
4772 compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4775 // If slots is greater than zero then part or all of this TYP_STRUCT
4776 // argument is passed in the outgoing argument area. (except HFA arg)
4778 if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4780 // We will need a register to address the TYP_STRUCT
4781 // Note that we can use an argument register in curArgMask as in
4782 // codegen we pass the stack portion of the argument before we
4783 // setup the register part.
4786 // Force the predictor to choose a LOW_REG here to reduce code bloat
4787 avoidReg = (RBM_R12 | RBM_LR);
4789 assert(tmpMask == RBM_NONE);
4790 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4792 // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4796 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4797 lockedRegs | regArgMask | tmpMask | avoidReg);
4800 } // (args->TypeGet() == TYP_STRUCT)
4801 #endif // _TARGET_ARM_
4803 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4804 // as we have already calculated the correct tmpMask and curArgMask values and
4805 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4807 if (promotedStructLocal == NULL)
4809 /* Target the appropriate argument register */
4810 tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4813 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4814 // for the duration of the OBJ.
4815 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4817 GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4818 assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4819 compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4822 regArgMask |= curArgMask;
4823 args->gtUsedRegs |= (tmpMask | regArgMask);
4824 tree->gtUsedRegs |= args->gtUsedRegs;
4825 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4827 if (args->gtUsedRegs != RBM_NONE)
4829 // Add register interference with the set of registers used or in use when we evaluated
4830 // the current arg, with whatever is alive after the current arg
4832 rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4834 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4836 assert(list == NULL);
4838 #ifdef LEGACY_BACKEND
4839 #if CPU_LOAD_STORE_ARCH
4840 #ifdef FEATURE_READYTORUN_COMPILER
4841 if (tree->gtCall.IsR2RRelativeIndir())
4843 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4845 #endif // FEATURE_READYTORUN_COMPILER
4846 #endif // CPU_LOAD_STORE_ARCH
4847 #endif // LEGACY_BACKEND
4849 regMaskTP callAddrMask;
4850 callAddrMask = RBM_NONE;
4851 #if CPU_LOAD_STORE_ARCH
4852 predictReg = PREDICT_SCRATCH_REG;
4854 predictReg = PREDICT_NONE;
4857 switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4859 case GTF_CALL_VIRT_STUB:
4861 // We only want to record an interference between the virtual stub
4862 // param reg and anything that's live AFTER the call, but we've not
4863 // yet processed the indirect target. So add virtualStubParamInfo.regMask
4864 // to interferingRegs.
4865 interferingRegs |= virtualStubParamInfo->GetRegMask();
4868 printf("Adding interference with Virtual Stub Param\n");
4870 codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4872 if (tree->gtCall.gtCallType == CT_INDIRECT)
4874 predictReg = virtualStubParamInfo->GetPredict();
4878 case GTF_CALL_VIRT_VTABLE:
4879 predictReg = PREDICT_SCRATCH_REG;
4882 case GTF_CALL_NONVIRT:
4883 predictReg = PREDICT_SCRATCH_REG;
4887 if (tree->gtCall.gtCallType == CT_INDIRECT)
4889 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4890 if (tree->gtCall.gtCallCookie)
4892 codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4894 callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4895 lockedRegs | regArgMask, RBM_LASTUSE);
4897 // Just in case we predict some other registers, force interference with our two special
4898 // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4899 callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4901 predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4905 rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4907 else if (predictReg != PREDICT_NONE)
4909 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4912 if (tree->gtFlags & GTF_CALL_UNMANAGED)
4914 // Need a register for tcbReg
4916 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4917 #if CPU_LOAD_STORE_ARCH
4918 // Need an extra register for tmpReg
4920 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4924 tree->gtUsedRegs |= callAddrMask;
4926 /* After the call restore the orginal value of lockedRegs */
4927 lockedRegs |= keepMask;
4929 /* set the return register */
4930 regMask = genReturnRegForTree(tree);
4932 if (regMask & rsvdRegs)
4934 // We will need to relocate the return register value
4935 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4936 #if FEATURE_FP_REGALLOC
4937 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4943 if (intRegMask == RBM_INTRET)
4945 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4947 else if (intRegMask == RBM_LNGRET)
4949 regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4953 noway_assert(!"unexpected return regMask");
4957 #if FEATURE_FP_REGALLOC
4960 if (floatRegMask == RBM_FLOATRET)
4962 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4964 else if (floatRegMask == RBM_DOUBLERET)
4966 regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4968 else // HFA return case
4970 for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4972 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4979 /* the return registers (if any) are killed */
4980 tree->gtUsedRegs |= regMask;
4982 #if GTF_CALL_REG_SAVE
4983 if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4986 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4987 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4991 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4992 // Mark required registers for emitting tailcall profiler callback as used
4993 if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4995 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
5002 // Figure out which registers can't be touched
5004 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5005 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5007 regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5012 #if CPU_LOAD_STORE_ARCH
5013 // We need a register to load the bounds of the MD array
5014 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5017 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5019 /* We need scratch registers to compute index-lower_bound.
5020 Also, gtArrInds[0]'s register will be used as the second
5021 addressability register (besides gtArrObj's) */
5023 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5024 lockedRegs | regMask | dimsMask, rsvdRegs);
5028 dimsMask |= dimMask;
5030 #ifdef _TARGET_XARCH_
5031 // INS_imul doesnt have an immediate constant.
5032 if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5033 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5035 tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5040 #ifdef _TARGET_XARCH_
5041 rsvdRegs |= RBM_EAX;
5043 if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5045 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5049 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5051 op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5053 #ifdef _TARGET_XARCH_
5054 rsvdRegs &= ~RBM_EAX;
5055 tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5056 rsvdRegs | regMask | op2Mask);
5057 tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5058 predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5061 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5066 case GT_ARR_BOUNDS_CHECK:
5068 regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5069 regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5070 rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5073 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5078 NO_WAY("unexpected special operator in reg use prediction");
5085 /* make sure we set them to something reasonable */
5086 if (tree->gtUsedRegs & RBM_ILLEGAL)
5087 noway_assert(!"used regs not set properly in reg use prediction");
5089 if (regMask & RBM_ILLEGAL)
5090 noway_assert(!"return value not set propery in reg use prediction");
5095 * If the gtUsedRegs conflicts with lockedRegs
5096 * then we going to have to spill some registers
5097 * into the non-trashed register set to keep it alive
5099 regMaskTP spillMask;
5100 spillMask = tree->gtUsedRegs & lockedRegs;
5106 /* Find the next register that needs to be spilled */
5107 tmpMask = genFindLowestBit(spillMask);
5112 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5113 gtDispTree(tree, 0, NULL, true);
5114 if ((tmpMask & regMask) == 0)
5116 printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5117 gtDispTree(tree, 0, NULL, true);
5121 /* In Codegen it will typically introduce a spill temp here */
5122 /* rather than relocating the register to a non trashed reg */
5123 rpPredictSpillCnt++;
5125 /* Remove it from the spillMask */
5126 spillMask &= ~tmpMask;
5131 * If the return registers in regMask conflicts with the lockedRegs
5132 * then we allocate extra registers for the reload of the conflicting
5135 * Set spillMask to the set of locked registers that have to be reloaded here.
5136 * reloadMask is set to the extra registers that are used to reload
5137 * the spilled lockedRegs.
5140 noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5141 spillMask = lockedRegs & regMask;
5145 /* Remove the spillMask from regMask */
5146 regMask &= ~spillMask;
5148 regMaskTP reloadMask = RBM_NONE;
5151 /* Get an extra register to hold it */
5152 regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5156 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5157 gtDispTree(tree, 0, NULL, true);
5160 reloadMask |= reloadReg;
5162 /* Remove it from the spillMask */
5163 spillMask &= ~genFindLowestBit(spillMask);
5166 /* Update regMask to use the reloadMask */
5167 regMask |= reloadMask;
5169 /* update the gtUsedRegs mask */
5170 tree->gtUsedRegs |= (regMaskSmall)regMask;
5173 regMaskTP regUse = tree->gtUsedRegs;
5174 regUse |= interferingRegs;
5176 if (!VarSetOps::IsEmpty(this, compCurLife))
5178 // Add interference between the current set of live variables and
5179 // the set of temporary registers need to evaluate the sub tree
5182 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5186 if (rpAsgVarNum != -1)
5188 // Add interference between the registers used (if any)
5189 // and the assignment target variable
5192 rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5195 // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5196 // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5197 // to the set of currently live variables. This new interference will prevent us
5198 // from using the register value used here for enregistering different live variable
5200 if (!VarSetOps::IsEmpty(this, compCurLife))
5202 rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5206 /* Do we need to resore the oldLastUseVars value */
5207 if (restoreLastUseVars)
5209 /* If we used a GT_ASG targeted register then we need to add
5210 * a variable interference between any new last use variables
5211 * and the GT_ASG targeted register
5213 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5215 rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5216 DEBUGARG("asgn tgt last use conflict"));
5218 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5224 #pragma warning(pop)
5227 #endif // LEGACY_BACKEND
5229 /****************************************************************************/
5230 /* Returns true when we must create an EBP frame
5231 This is used to force most managed methods to have EBP based frames
5232 which allows the ETW kernel stackwalker to walk the stacks of managed code
5233 this allows the kernel to perform light weight profiling
5235 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5237 bool result = false;
5239 const char* reason = nullptr;
5243 if (!result && (opts.MinOpts() || opts.compDbgCode))
5245 INDEBUG(reason = "Debug Code");
5248 if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5250 INDEBUG(reason = "IL Code Size");
5253 if (!result && (fgBBcount > 3))
5255 INDEBUG(reason = "BasicBlock Count");
5258 if (!result && fgHasLoops)
5260 INDEBUG(reason = "Method has Loops");
5263 if (!result && (optCallCount >= 2))
5265 INDEBUG(reason = "Call Count");
5268 if (!result && (optIndirectCallCount >= 1))
5270 INDEBUG(reason = "Indirect Call");
5273 #endif // ETW_EBP_FRAMED
5275 // VM wants to identify the containing frame of an InlinedCallFrame always
5276 // via the frame register never the stack register so we need a frame.
5277 if (!result && (optNativeCallCount != 0))
5279 INDEBUG(reason = "Uses PInvoke");
5283 #ifdef _TARGET_ARM64_
5284 // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5288 INDEBUG(reason = "Temporary ARM64 force frame pointer");
5291 #endif // _TARGET_ARM64_
5294 if ((result == true) && (wbReason != nullptr))
5303 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5305 /*****************************************************************************
5307 * Predict which variables will be assigned to registers
5308 * This is x86 specific and only predicts the integer registers and
5309 * must be conservative, any register that is predicted to be enregister
5310 * must end up being enregistered.
5312 * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5313 * predicted to be enregistered to minimize calls to rpPredictRegPick.
5318 #pragma warning(push)
5319 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5321 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5325 if (rpPasses <= rpPassesPessimize)
5327 // Assume that we won't have to reverse EBP enregistration
5328 rpReverseEBPenreg = false;
5330 // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5331 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5332 rpFrameType = FT_EBP_FRAME;
5334 rpFrameType = FT_ESP_FRAME;
5338 // If we are using FPBASE as the frame register, we cannot also use it for
5340 if (rpFrameType == FT_EBP_FRAME)
5342 regAvail &= ~RBM_FPBASE;
5344 #endif // !ETW_EBP_FRAMED
5347 rpPredictAssignMask = regAvail;
5349 raSetupArgMasks(&codeGen->intRegState);
5350 #if !FEATURE_STACK_FP_X87
5351 raSetupArgMasks(&codeGen->floatRegState);
5354 // If there is a secret stub param, it is also live in
5355 if (info.compPublishStubParam)
5357 codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5360 if (regAvail == RBM_NONE)
5365 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5367 #if FEATURE_STACK_FP_X87
5368 if (!varDsc->IsFloatRegType())
5371 varDsc->lvRegNum = REG_STK;
5372 if (isRegPairType(varDsc->lvType))
5373 varDsc->lvOtherReg = REG_STK;
5381 printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5382 printf("\n Available registers = ");
5383 dspRegMask(regAvail);
5388 if (regAvail == RBM_NONE)
5393 /* We cannot change the lvVarIndexes at this point, so we */
5394 /* can only re-order the existing set of tracked variables */
5395 /* Which will change the order in which we select the */
5396 /* locals for enregistering. */
5398 assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5400 // Should not be set unless optimizing
5401 noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5407 fgDebugCheckBBlist();
5410 /* Initialize the weighted count of variables that could have */
5411 /* been enregistered but weren't */
5412 unsigned refCntStk = 0; // sum of ref counts for all stack based variables
5413 unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables
5414 unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5416 unsigned refCntStkParam; // sum of ref counts for all stack based parameters
5417 unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5419 #if FEATURE_STACK_FP_X87
5420 refCntStkParam = raCntStkParamDblStackFP;
5421 refCntWtdStkDbl = raCntWtdStkDblStackFP;
5422 refCntStk = raCntStkStackFP;
5425 refCntWtdStkDbl = 0;
5427 #endif // FEATURE_STACK_FP_X87
5429 #endif // DOUBLE_ALIGN
5431 /* Set of registers used to enregister variables in the predition */
5432 regMaskTP regUsed = RBM_NONE;
5434 /*-------------------------------------------------------------------------
5436 * Predict/Assign the enregistered locals in ref-count order
5440 VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5442 unsigned FPRegVarLiveInCnt;
5443 FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5446 for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5448 bool notWorthy = false;
5452 regMaskTP regAvailForType;
5455 unsigned customVarOrderSize;
5456 regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5458 regNumber saveOtherReg;
5460 varDsc = lvaRefSorted[sortNum];
5462 #if FEATURE_STACK_FP_X87
5463 if (varTypeIsFloating(varDsc->TypeGet()))
5466 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5468 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5469 // be en-registered.
5470 noway_assert(!varDsc->lvRegister);
5477 /* Check the set of invariant things that would prevent enregistration */
5479 /* Ignore the variable if it's not tracked */
5480 if (!varDsc->lvTracked)
5483 /* Get hold of the index and the interference mask for the variable */
5484 varIndex = varDsc->lvVarIndex;
5486 // Remove 'varIndex' from unprocessedVars
5487 VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5489 // Skip the variable if it's marked as DoNotEnregister.
5491 if (varDsc->lvDoNotEnregister)
5494 /* TODO: For now if we have JMP all register args go to stack
5495 * TODO: Later consider extending the life of the argument or make a copy of it */
5497 if (compJmpOpUsed && varDsc->lvIsRegArg)
5500 /* Skip the variable if the ref count is zero */
5502 if (varDsc->lvRefCnt == 0)
5505 /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5507 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5512 /* Is the unweighted ref count too low to be interesting? */
5514 if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5515 (varDsc->lvRefCnt <= 1))
5517 /* Sometimes it's useful to enregister a variable with only one use */
5518 /* arguments referenced in loops are one example */
5520 if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5521 goto OK_TO_ENREGISTER;
5523 /* If the variable has a preferred register set it may be useful to put it there */
5524 if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5525 goto OK_TO_ENREGISTER;
5527 /* Keep going; the table is sorted by "weighted" ref count */
5533 if (varTypeIsFloating(varDsc->TypeGet()))
5535 regType = varDsc->TypeGet();
5536 regAvailForType = regAvail & RBM_ALLFLOAT;
5541 regAvailForType = regAvail & RBM_ALLINT;
5545 isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5549 regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5553 /* If we don't have any registers available then skip the enregistration attempt */
5554 if (regAvailForType == RBM_NONE)
5557 // On the pessimize passes don't even try to enregister LONGS
5558 if (isRegPairType(varDsc->lvType))
5560 if (rpPasses > rpPassesPessimize)
5562 else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5566 // Set of registers to avoid when performing register allocation
5567 avoidReg = RBM_NONE;
5569 if (!varDsc->lvIsRegArg)
5571 /* For local variables,
5572 * avoid the incoming arguments,
5573 * but only if you conflict with them */
5575 if (raAvoidArgRegMask != 0)
5578 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5580 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5582 if (!argDsc->lvIsRegArg)
5585 bool isFloat = argDsc->IsFloatRegType();
5586 regNumber inArgReg = argDsc->lvArgReg;
5587 regMaskTP inArgBit = genRegMask(inArgReg);
5589 // Is this inArgReg in the raAvoidArgRegMask set?
5591 if (!(raAvoidArgRegMask & inArgBit))
5594 noway_assert(argDsc->lvIsParam);
5595 noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5597 unsigned locVarIndex = varDsc->lvVarIndex;
5598 unsigned argVarIndex = argDsc->lvVarIndex;
5600 /* Does this variable interfere with the arg variable ? */
5601 if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5603 noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5604 /* Yes, so try to avoid the incoming arg reg */
5605 avoidReg |= inArgBit;
5609 noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5615 // Now we will try to predict which register the variable
5616 // could be enregistered in
5618 customVarOrderSize = MAX_VAR_ORDER_SIZE;
5620 raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5623 saveOtherReg = DUMMY_INIT(REG_NA);
5625 for (regInx = 0; regInx < customVarOrderSize; regInx++)
5627 regNumber regNum = customVarOrder[regInx];
5628 regMaskTP regBits = genRegMask(regNum);
5630 /* Skip this register if it isn't available */
5631 if ((regAvailForType & regBits) == 0)
5634 /* Skip this register if it interferes with the variable */
5636 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5639 if (varTypeIsFloating(regType))
5644 regNumber regNext = REG_NEXT(regNum);
5645 regBits |= genRegMask(regNext);
5647 /* Skip if regNext interferes with the variable */
5648 if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5654 bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5655 bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5656 bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0);
5658 /* Skip this register if the weighted ref count is less than two
5659 and we are considering a unused callee saved register */
5661 if (lessThanTwoRefWtd && // less than two references (weighted)
5662 firstUseOfReg && // first use of this register
5663 calleeSavedReg) // callee saved register
5665 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5667 // psc is abbeviation for possibleSameColor
5668 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5670 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5671 unsigned pscIndex = 0;
5672 while (pscIndexIter.NextElem(&pscIndex))
5674 LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5675 totalRefCntWtd += pscVar->lvRefCntWtd;
5676 if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5680 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5683 continue; // not worth spilling a callee saved register
5685 // otherwise we will spill this callee saved registers,
5686 // because its uses when combined with the uses of
5687 // other yet to be processed candidates exceed our threshold.
5688 // totalRefCntWtd = totalRefCntWtd;
5691 /* Looks good - mark the variable as living in the register */
5693 if (isRegPairType(varDsc->lvType))
5695 if (firstHalf == false)
5697 /* Enregister the first half of the long */
5698 varDsc->lvRegNum = regNum;
5699 saveOtherReg = varDsc->lvOtherReg;
5700 varDsc->lvOtherReg = REG_STK;
5705 /* Ensure 'well-formed' register pairs */
5706 /* (those returned by gen[Pick|Grab]RegPair) */
5708 if (regNum < varDsc->lvRegNum)
5710 varDsc->lvOtherReg = varDsc->lvRegNum;
5711 varDsc->lvRegNum = regNum;
5715 varDsc->lvOtherReg = regNum;
5722 varDsc->lvRegNum = regNum;
5726 varDsc->lvOtherReg = REG_NEXT(regNum);
5731 if (regNum == REG_FPBASE)
5733 refCntEBP += varDsc->lvRefCnt;
5734 refCntWtdEBP += varDsc->lvRefCntWtd;
5736 if (varDsc->lvIsParam)
5738 refCntStkParam += varDsc->lvRefCnt;
5743 /* Record this register in the regUsed set */
5746 /* The register is now ineligible for all interfering variables */
5748 VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5753 regNumber secondHalf = REG_NEXT(regNum);
5754 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5755 unsigned intfIndex = 0;
5756 while (iter.NextElem(&intfIndex))
5758 VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5763 /* If a register argument, remove its incoming register
5764 * from the "avoid" list */
5766 if (varDsc->lvIsRegArg)
5768 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5772 raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5777 /* A variable of TYP_LONG can take two registers */
5781 // Since we have successfully enregistered this variable it is
5782 // now time to move on and consider the next variable
5788 noway_assert(isRegPairType(varDsc->lvType));
5790 /* This TYP_LONG is partially enregistered */
5792 noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5794 if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5799 raAddToStkPredict(varDsc->lvRefCntWtd);
5804 if (varDsc->lvDependReg)
5811 /* Weighted count of variables that could have been enregistered but weren't */
5812 raAddToStkPredict(varDsc->lvRefCntWtd);
5814 if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5815 raAddToStkPredict(varDsc->lvRefCntWtd);
5819 varDsc->lvRegister = false;
5821 varDsc->lvRegNum = REG_STK;
5822 if (isRegPairType(varDsc->lvType))
5823 varDsc->lvOtherReg = REG_STK;
5825 /* unweighted count of variables that were not enregistered */
5827 refCntStk += varDsc->lvRefCnt;
5830 if (varDsc->lvIsParam)
5832 refCntStkParam += varDsc->lvRefCnt;
5836 /* Is it a stack based double? */
5837 /* Note that double params are excluded since they can not be double aligned */
5838 if (varDsc->lvType == TYP_DOUBLE)
5840 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5848 gtDispLclVar((unsigned)(varDsc - lvaTable));
5849 if (varDsc->lvTracked)
5850 printf("T%02u", varDsc->lvVarIndex);
5853 printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5854 if (varDsc->lvDoNotEnregister)
5855 printf(", do-not-enregister");
5863 varDsc->lvRegister = true;
5865 // Record the fact that we enregistered a stack arg when tail call is used.
5866 if (compJmpOpUsed && !varDsc->lvIsRegArg)
5868 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5869 if (isRegPairType(varDsc->lvType))
5871 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5879 gtDispLclVar((unsigned)(varDsc - lvaTable));
5880 printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5881 refCntWtd2str(varDsc->lvRefCntWtd));
5882 varDsc->PrintVarReg();
5886 printf(":%s", getRegName(varDsc->lvOtherReg));
5895 noway_assert(refCntEBP == 0);
5902 printf("; refCntStk = %u\n", refCntStk);
5904 printf("; refCntEBP = %u\n", refCntEBP);
5905 if (refCntWtdEBP > 0)
5906 printf("; refCntWtdEBP = %u\n", refCntWtdEBP);
5908 if (refCntStkParam > 0)
5909 printf("; refCntStkParam = %u\n", refCntStkParam);
5910 if (refCntWtdStkDbl > 0)
5911 printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5916 /* Determine how the EBP register should be used */
5917 CLANG_FORMAT_COMMENT_ANCHOR;
5921 if (!codeGen->isFramePointerRequired())
5923 noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5926 First let us decide if we should use EBP to create a
5927 double-aligned frame, instead of enregistering variables
5930 if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5932 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5933 goto REVERSE_EBP_ENREG;
5936 if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5938 if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5940 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5941 goto REVERSE_EBP_ENREG;
5946 #endif // DOUBLE_ALIGN
5948 if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5950 #ifdef _TARGET_XARCH_
5952 /* If we are using EBP to enregister variables then
5953 will we actually save bytes by setting up an EBP frame?
5955 Each stack reference is an extra byte of code if we use
5958 Here we measure the savings that we get by using EBP to
5959 enregister variables vs. the cost in code size that we
5960 pay when using an ESP based frame.
5962 We pay one byte of code for each refCntStk
5963 but we save one byte (or more) for each refCntEBP.
5965 Our savings are the elimination of a stack memory read/write.
5966 We use the loop weighted value of
5967 refCntWtdEBP * mem_access_weight (0, 3, 6)
5968 to represent this savings.
5971 // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5972 // to set up an EBP frame in the prolog and epilog
5973 #define EBP_FRAME_SETUP_SIZE 5
5976 if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5978 unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5979 unsigned mem_access_weight = 3;
5981 if (compCodeOpt() == SMALL_CODE)
5982 mem_access_weight = 0;
5983 else if (compCodeOpt() == FAST_CODE)
5984 mem_access_weight *= 2;
5986 if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5988 /* It's not be a good idea to use EBP in our predictions */
5989 CLANG_FORMAT_COMMENT_ANCHOR;
5991 if (verbose && (refCntEBP > 0))
5992 printf("; Predicting that it's not worth using EBP to enregister variables\n");
5994 rpFrameType = FT_EBP_FRAME;
5995 goto REVERSE_EBP_ENREG;
5998 #endif // _TARGET_XARCH_
6000 if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6005 if (rpMustCreateEBPCalled == false)
6007 rpMustCreateEBPCalled = true;
6008 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6012 printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6014 codeGen->setFrameRequired(true);
6016 rpFrameType = FT_EBP_FRAME;
6017 goto REVERSE_EBP_ENREG;
6027 noway_assert(rpFrameType != FT_ESP_FRAME);
6029 rpReverseEBPenreg = true;
6034 noway_assert(regUsed & RBM_FPBASE);
6036 regUsed &= ~RBM_FPBASE;
6038 /* variables that were enregistered in EBP become stack based variables */
6039 raAddToStkPredict(refCntWtdEBP);
6043 /* We're going to have to undo some predicted enregistered variables */
6044 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6046 /* Is this a register variable? */
6047 if (varDsc->lvRegNum != REG_STK)
6049 if (isRegPairType(varDsc->lvType))
6051 /* Only one can be EBP */
6052 if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6054 if (varDsc->lvRegNum == REG_FPBASE)
6055 varDsc->lvRegNum = varDsc->lvOtherReg;
6057 varDsc->lvOtherReg = REG_STK;
6059 if (varDsc->lvRegNum == REG_STK)
6060 varDsc->lvRegister = false;
6062 if (varDsc->lvDependReg)
6072 if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6074 varDsc->lvRegNum = REG_STK;
6076 varDsc->lvRegister = false;
6078 if (varDsc->lvDependReg)
6084 printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6085 varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6086 (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6094 #endif // ETW_EBP_FRAMED
6099 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6101 /* Clear the lvDependReg flag for next iteration of the predictor */
6102 varDsc->lvDependReg = false;
6104 // If we set rpLostEnreg and this is the first pessimize pass
6105 // then reverse the enreg of all TYP_LONG
6106 if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6108 varDsc->lvRegNum = REG_STK;
6109 varDsc->lvOtherReg = REG_STK;
6114 if (verbose && raNewBlocks)
6116 printf("\nAdded FP register killing blocks:\n");
6117 fgDispBasicBlocks();
6121 noway_assert(rpFrameType != FT_NOT_SET);
6123 /* return the set of registers used to enregister variables */
6127 #pragma warning(pop)
6130 /*****************************************************************************
6132 * Predict register use for every tree in the function. Note that we do this
6133 * at different times (not to mention in a totally different way) for x86 vs
6136 void Compiler::rpPredictRegUse()
6143 // We might want to adjust the ref counts based on interference
6146 regMaskTP allAcceptableRegs = RBM_ALLINT;
6148 #if FEATURE_FP_REGALLOC
6149 allAcceptableRegs |= raConfigRestrictMaskFP();
6152 allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6154 /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6155 to acdHelper(). This is done implicitly, without creating a GT_CALL
6156 node. Hence, this interference is be handled implicitly by
6157 restricting the registers used for enregistering variables */
6159 if (opts.compDbgCode)
6161 allAcceptableRegs &= RBM_CALLEE_SAVED;
6164 /* Compute the initial regmask to use for the first pass */
6165 regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6168 #if CPU_USES_BLOCK_MOVE
6169 /* If we might need to generate a rep mov instruction */
6170 /* remove ESI and EDI */
6172 regAvail &= ~(RBM_ESI | RBM_EDI);
6176 /* If we using longs then we remove ESI to allow */
6177 /* ESI:EBX to be saved accross a call */
6179 regAvail &= ~(RBM_ESI);
6183 // For the first register allocation pass we don't want to color using r4
6184 // as we want to allow it to be used to color the internal temps instead
6185 // when r0,r1,r2,r3 are all in use.
6187 regAvail &= ~(RBM_R4);
6191 // We never have EBP available when ETW_EBP_FRAME is defined
6192 regAvail &= ~RBM_FPBASE;
6194 /* If a frame pointer is required then we remove EBP */
6195 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6196 regAvail &= ~RBM_FPBASE;
6200 BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6202 regAvail = RBM_NONE;
6205 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6206 regAvail = RBM_NONE;
6208 #if FEATURE_STACK_FP_X87
6209 VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6210 VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6212 // Calculate the set of all tracked FP/non-FP variables
6213 // into optAllFloatVars and optAllNonFPvars
6218 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6220 /* Ignore the variable if it's not tracked */
6222 if (!varDsc->lvTracked)
6225 /* Get hold of the index and the interference mask for the variable */
6227 unsigned varNum = varDsc->lvVarIndex;
6229 /* add to the set of all tracked FP/non-FP variables */
6231 if (varDsc->IsFloatRegType())
6232 VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6234 VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6238 for (unsigned i = 0; i < REG_COUNT; i++)
6240 VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6242 for (unsigned i = 0; i < lvaTrackedCount; i++)
6244 VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6247 raNewBlocks = false;
6248 rpPredictAssignAgain = false;
6251 bool mustPredict = true;
6252 unsigned stmtNum = 0;
6253 unsigned oldStkPredict = DUMMY_INIT(~0);
6254 VARSET_TP oldLclRegIntf[REG_COUNT];
6256 for (unsigned i = 0; i < REG_COUNT; i++)
6258 VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6263 /* Assign registers to variables using the variable/register interference
6264 graph (raLclRegIntf[]) calculated in the previous pass */
6265 regUsed = rpPredictAssignRegVars(regAvail);
6267 mustPredict |= rpLostEnreg;
6270 // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6271 if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6272 !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6274 // We can release our reservation on R10 and use it to color registers
6275 codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6276 allAcceptableRegs |= RBM_OPT_RSVD;
6280 /* Is our new prediction good enough?? */
6283 /* For small methods (less than 12 stmts), we add a */
6284 /* extra pass if we are predicting the use of some */
6285 /* of the caller saved registers. */
6286 /* This fixes RAID perf bug 43440 VB Ackerman function */
6288 if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6293 /* If every variable was fully enregistered then we're done */
6294 if (rpStkPredict == 0)
6297 // This was a successful prediction. Record it, in case it turns out to be the best one.
6298 rpRecordPrediction();
6302 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6304 // Be careful about overflow
6305 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6306 if (oldStkPredict < highStkPredict)
6309 if (rpStkPredict < rpPasses * 8)
6312 if (rpPasses >= (rpPassesMax - 1))
6317 /* We will do another pass */;
6321 if (JitConfig.JitAssertOnMaxRAPasses())
6323 noway_assert(rpPasses < rpPassesMax &&
6324 "This may not a bug, but dev team should look and see what is happening");
6328 // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with
6329 // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope
6330 // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6331 if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6333 NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6342 printf("\n; Another pass due to rpLostEnreg");
6344 printf("\n; Another pass due to rpAddedVarIntf");
6345 if ((rpPasses == 1) && rpPredictAssignAgain)
6346 printf("\n; Another pass due to rpPredictAssignAgain");
6348 printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6352 /* Zero the variable/register interference graph */
6353 for (unsigned i = 0; i < REG_COUNT; i++)
6355 VarSetOps::ClearD(this, raLclRegIntf[i]);
6358 // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6359 // it must not be in a register trashed by the callee
6360 if (info.compLvFrameListRoot != BAD_VAR_NUM)
6362 assert(!opts.ShouldUsePInvokeHelpers());
6363 noway_assert(info.compLvFrameListRoot < lvaCount);
6365 LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6367 if (pinvokeVarDsc->lvTracked)
6369 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6370 DEBUGARG("compLvFrameListRoot"));
6372 // We would prefer to have this be enregister in the PINVOKE_TCB register
6373 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6376 // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6377 // worst case). Make sure that the return value compiler temp that we create for the single
6378 // return block knows about this interference.
6379 if (genReturnLocal != BAD_VAR_NUM)
6381 noway_assert(genReturnBB);
6382 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6383 if (localTmp->lvTracked)
6385 rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6386 VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6392 if (compFloatingPointUsed)
6394 bool hasMustInitFloat = false;
6396 // if we have any must-init floating point LclVars then we will add register interferences
6397 // for the arguments with RBM_SCRATCH
6398 // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6399 // we won't home the arguments into REG_SCRATCH
6404 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6406 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6408 hasMustInitFloat = true;
6413 if (hasMustInitFloat)
6415 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6417 // If is an incoming argument, that is tracked and not floating-point
6418 if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6420 rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6421 DEBUGARG("arg home with must-init fp"));
6429 rpAddedVarIntf = false;
6430 rpLostEnreg = false;
6432 /* Walk the basic blocks and predict reg use for each tree */
6434 for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6438 compCurLifeTree = NULL;
6439 VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6443 for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6445 noway_assert(stmt->gtOper == GT_STMT);
6447 rpPredictSpillCnt = 0;
6448 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6449 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6451 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6456 printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6461 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6463 noway_assert(rpAsgVarNum == -1);
6465 if (rpPredictSpillCnt > tmpIntSpillMax)
6466 tmpIntSpillMax = rpPredictSpillCnt;
6471 /* Decide whether we need to set mustPredict */
6472 mustPredict = false;
6475 // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6476 // reserve r10, and if it was used, throw out the last prediction and repredict.
6477 if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6479 codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6480 allAcceptableRegs &= ~RBM_OPT_RSVD;
6481 if ((regUsed & RBM_OPT_RSVD) != 0)
6484 rpBestRecordedPrediction = nullptr;
6500 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6503 if (rpPredictAssignAgain)
6511 /* Calculate the new value to use for regAvail */
6513 regAvail = allAcceptableRegs;
6515 /* If a frame pointer is required then we remove EBP */
6516 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6517 regAvail &= ~RBM_FPBASE;
6520 // We never have EBP available when ETW_EBP_FRAME is defined
6521 regAvail &= ~RBM_FPBASE;
6524 // If we have done n-passes then we must continue to pessimize the
6525 // interference graph by or-ing the interferences from the previous pass
6527 if (rpPasses > rpPassesPessimize)
6529 for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6530 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6532 /* If we reverse an EBP enregistration then keep it that way */
6533 if (rpReverseEBPenreg)
6534 regAvail &= ~RBM_FPBASE;
6542 /* Save the old variable/register interference graph */
6543 for (unsigned i = 0; i < REG_COUNT; i++)
6545 VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6547 oldStkPredict = rpStkPredict;
6548 } // end of while (true)
6552 // If we recorded a better feasible allocation than we ended up with, go back to using it.
6553 rpUseRecordedPredictionIfBetter();
6556 codeGen->setDoubleAlign(false);
6559 switch (rpFrameType)
6562 noway_assert(!"rpFrameType not set correctly!");
6565 noway_assert(!codeGen->isFramePointerRequired());
6566 noway_assert(!codeGen->isFrameRequired());
6567 codeGen->setFramePointerUsed(false);
6570 noway_assert((regUsed & RBM_FPBASE) == 0);
6571 codeGen->setFramePointerUsed(true);
6574 case FT_DOUBLE_ALIGN_FRAME:
6575 noway_assert((regUsed & RBM_FPBASE) == 0);
6576 noway_assert(!codeGen->isFramePointerRequired());
6577 codeGen->setFramePointerUsed(false);
6578 codeGen->setDoubleAlign(true);
6583 /* Record the set of registers that we need */
6584 codeGen->regSet.rsClearRegsModified();
6585 if (regUsed != RBM_NONE)
6587 codeGen->regSet.rsSetRegsModified(regUsed);
6590 /* We need genFullPtrRegMap if :
6591 * The method is fully interruptible, or
6592 * We are generating an EBP-less frame (for stack-pointer deltas)
6595 genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6601 printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6602 printf(" rpStkPredict was %u\n", rpStkPredict);
6605 rpRegAllocDone = true;
6608 #endif // LEGACY_BACKEND
6610 /*****************************************************************************
6612 * Mark all variables as to whether they live on the stack frame
6613 * (part or whole), and if so what the base is (FP or SP).
6616 void Compiler::raMarkStkVars()
6621 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6623 // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6624 CLANG_FORMAT_COMMENT_ANCHOR;
6626 #ifdef LEGACY_BACKEND
6627 varDsc->lvOnFrame = false;
6628 #endif // LEGACY_BACKEND
6630 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6632 noway_assert(!varDsc->lvRegister);
6636 /* Fully enregistered variables don't need any frame space */
6638 if (varDsc->lvRegister)
6640 if (!isRegPairType(varDsc->TypeGet()))
6645 /* For "large" variables make sure both halves are enregistered */
6647 if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6652 /* Unused variables typically don't get any frame space */
6653 else if (varDsc->lvRefCnt == 0)
6655 bool needSlot = false;
6657 bool stkFixedArgInVarArgs =
6658 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6660 // If its address has been exposed, ignore lvRefCnt. However, exclude
6661 // fixed arguments in varargs method as lvOnFrame shouldn't be set
6662 // for them as we don't want to explicitly report them to GC.
6664 if (!stkFixedArgInVarArgs)
6666 needSlot |= varDsc->lvAddrExposed;
6669 #if FEATURE_FIXED_OUT_ARGS
6671 /* Is this the dummy variable representing GT_LCLBLK ? */
6672 needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6674 #endif // FEATURE_FIXED_OUT_ARGS
6677 /* For debugging, note that we have to reserve space even for
6678 unused variables if they are ever in scope. However, this is not
6679 an issue as fgExtendDbgLifetimes() adds an initialization and
6680 variables in scope will not have a zero ref-cnt.
6682 if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6684 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6686 noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6691 For Debug Code, we have to reserve space even if the variable is never
6692 in scope. We will also need to initialize it if it is a GC var.
6693 So we set lvMustInit and artifically bump up the ref-cnt.
6696 if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6700 if (lvaTypeIsGC(lclNum))
6702 varDsc->lvRefCnt = 1;
6705 if (!varDsc->lvIsParam)
6707 varDsc->lvMustInit = true;
6711 #ifndef LEGACY_BACKEND
6712 varDsc->lvOnFrame = needSlot;
6713 #endif // !LEGACY_BACKEND
6716 /* Clear the lvMustInit flag in case it is set */
6717 varDsc->lvMustInit = false;
6723 #ifndef LEGACY_BACKEND
6724 if (!varDsc->lvOnFrame)
6728 #endif // !LEGACY_BACKEND
6731 /* The variable (or part of it) lives on the stack frame */
6733 noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6734 #if FEATURE_FIXED_OUT_ARGS
6735 noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6736 #else // FEATURE_FIXED_OUT_ARGS
6737 noway_assert(lvaLclSize(lclNum) != 0);
6738 #endif // FEATURE_FIXED_OUT_ARGS
6740 varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6744 varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6748 if (codeGen->doDoubleAlign())
6750 noway_assert(codeGen->isFramePointerUsed() == false);
6752 /* All arguments are off of EBP with double-aligned frames */
6754 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6756 varDsc->lvFramePointerBased = true;
6762 /* Some basic checks */
6764 // It must be in a register, on frame, or have zero references.
6766 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6768 #ifndef LEGACY_BACKEND
6769 // We can't have both lvRegister and lvOnFrame for RyuJIT
6770 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6771 #else // LEGACY_BACKEND
6773 /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6774 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6775 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6776 #endif // LEGACY_BACKEND
6780 // For varargs functions, there should be no direct references to
6781 // parameter variables except for 'this' (because these were morphed
6782 // in the importer) and the 'arglist' parameter (which is not a GC
6783 // pointer). and the return buffer argument (if we are returning a
6785 // This is important because we don't want to try to report them
6786 // to the GC, as the frame offsets in these local varables would
6789 if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6791 if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6793 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6800 #ifdef LEGACY_BACKEND
6801 void Compiler::rpRecordPrediction()
6803 if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6805 if (rpBestRecordedPrediction == NULL)
6807 rpBestRecordedPrediction =
6808 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6810 for (unsigned k = 0; k < lvaCount; k++)
6812 rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6813 rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum();
6814 rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg();
6816 rpBestRecordedStkPredict = rpStkPredict;
6817 JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6821 void Compiler::rpUseRecordedPredictionIfBetter()
6823 JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6824 rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6825 if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6827 JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6828 rpBestRecordedStkPredict);
6830 for (unsigned k = 0; k < lvaCount; k++)
6832 lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6833 lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6834 lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6838 #endif // LEGACY_BACKEND