1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XX Does the register allocation and puts the remaining lclVars on the stack XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
25 DWORD val = JitConfig.JitRegisterFP();
27 return (enumConfigRegisterFP)(val & 0x3);
29 #endif // FEATURE_FP_REGALLOC
31 regMaskTP Compiler::raConfigRestrictMaskFP()
33 regMaskTP result = RBM_NONE;
35 #if FEATURE_FP_REGALLOC
36 switch (raConfigRegisterFP())
38 case CONFIG_REGISTER_FP_NONE:
41 case CONFIG_REGISTER_FP_CALLEE_TRASH:
42 result = RBM_FLT_CALLEE_TRASH;
44 case CONFIG_REGISTER_FP_CALLEE_SAVED:
45 result = RBM_FLT_CALLEE_SAVED;
47 case CONFIG_REGISTER_FP_FULL:
48 result = RBM_ALLFLOAT;
57 DWORD Compiler::getCanDoubleAlign()
60 if (compStressCompile(STRESS_DBL_ALN, 20))
61 return MUST_DOUBLE_ALIGN;
63 return JitConfig.JitDoubleAlign();
65 return DEFAULT_DOUBLE_ALIGN;
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
73 // refCntStk - sum of ref counts for all stack based variables
74 // refCntEBP - sum of ref counts for EBP enregistered variables
75 // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables
76 // refCntStkParam - sum of ref counts for all stack based parameters
77 // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 // with double fields).
81 // Returns true if this method estimates that a double-aligned frame would be beneficial
84 // The impact of a double-aligned frame is computed as follows:
85 // - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 // - We pay a byte of code for each non-parameter stack reference.
87 // - We save the misalignment penalty and possible cache-line crossing penalty.
88 // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 // - We pay 7 extra bytes for:
91 // LEA ESP,[EBP-offset]
92 // AND ESP,-8 to double align ESP
93 // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
95 // If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 // Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 // ref count for double-aligned values.
99 bool Compiler::shouldDoubleAlign(
100 unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
102 bool doDoubleAlign = false;
103 const unsigned DBL_ALIGN_SETUP_SIZE = 7;
105 unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106 unsigned misaligned_weight = 4;
108 if (compCodeOpt() == Compiler::SMALL_CODE)
109 misaligned_weight = 0;
111 if (compCodeOpt() == Compiler::FAST_CODE)
112 misaligned_weight *= 4;
114 JITDUMP("\nDouble alignment:\n");
115 JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116 JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117 JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
119 if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
121 JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
123 else if (refCntWtdEBP > refCntWtdStkDbl * 2)
125 // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126 // not double aligned.
127 // Here are the numbers that make this not double-aligned.
128 // refCntWtdStkDbl = 0x164
129 // refCntWtdEBP = 0x1a4
130 // We think we do need to change the heuristic to be in favor of double-align.
132 JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
136 // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137 JITDUMP(" Predicting to create a double-aligned frame\n");
138 doDoubleAlign = true;
140 return doDoubleAlign;
142 #endif // DOUBLE_ALIGN
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
146 void Compiler::raInit()
148 #if FEATURE_STACK_FP_X87
149 /* We have not assigned any FP variables to registers yet */
151 VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
153 codeGen->intRegState.rsIsFloat = false;
154 codeGen->floatRegState.rsIsFloat = true;
156 rpReverseEBPenreg = false;
159 rpPassesPessimize = rpPassesMax - 3;
160 if (opts.compDbgCode)
164 rpStkPredict = (unsigned)-1;
165 rpFrameType = FT_NOT_SET;
167 rpMustCreateEBPCalled = false;
168 rpRegAllocDone = false;
169 rpMaskPInvokeEpilogIntf = RBM_NONE;
171 rpPredictMap[PREDICT_NONE] = RBM_NONE;
172 rpPredictMap[PREDICT_ADDR] = RBM_NONE;
174 #if FEATURE_FP_REGALLOC
175 rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT;
176 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
178 rpPredictMap[PREDICT_REG] = RBM_ALLINT;
179 rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
185 #if defined(_TARGET_ARM_)
187 rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188 rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189 rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL;
191 #elif defined(_TARGET_AMD64_)
193 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
197 #elif defined(_TARGET_X86_)
199 rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200 rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201 rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
202 rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203 rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
207 rpBestRecordedPrediction = NULL;
210 /*****************************************************************************
212 * The following table(s) determines the order in which registers are considered
213 * for variables to live in
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
218 #if FEATURE_FP_REGALLOC
219 if (varTypeIsFloating(regType))
221 static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT};
222 const unsigned raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
224 if (wbVarOrderSize != NULL)
225 *wbVarOrderSize = raRegVarOrderFltSize;
227 return &raRegVarOrderFlt[0];
232 static const regNumber raRegVarOrder[] = {REG_VAR_ORDER};
233 const unsigned raRegVarOrderSize = _countof(raRegVarOrder);
235 if (wbVarOrderSize != NULL)
236 *wbVarOrderSize = raRegVarOrderSize;
238 return &raRegVarOrder[0];
244 /*****************************************************************************
246 * Dump out the variable interference graph
250 void Compiler::raDumpVarIntf()
255 printf("Var. interference graph for %s\n", info.compFullName);
257 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
259 /* Ignore the variable if it's not tracked */
261 if (!varDsc->lvTracked)
264 /* Get hold of the index and the interference mask for the variable */
265 unsigned varIndex = varDsc->lvVarIndex;
267 printf(" V%02u,T%02u and ", lclNum, varIndex);
271 for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
273 if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274 printf("T%02u ", refIndex);
285 /*****************************************************************************
287 * Dump out the register interference graph
290 void Compiler::raDumpRegIntf()
292 printf("Reg. interference graph for %s\n", info.compFullName);
297 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
301 /* Ignore the variable if it's not tracked */
303 if (!varDsc->lvTracked)
306 /* Get hold of the index and the interference mask for the variable */
308 varNum = varDsc->lvVarIndex;
310 printf(" V%02u,T%02u and ", lclNum, varNum);
312 if (varDsc->IsFloatRegType())
314 #if !FEATURE_STACK_FP_X87
315 for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
317 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318 printf("%3s ", getRegName(regNum, true));
326 for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
328 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329 printf("%3s ", getRegName(regNum));
342 /*****************************************************************************
344 * We'll adjust the ref counts based on interference
348 void Compiler::raAdjustVarIntf()
350 // This method was not correct and has been disabled.
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
361 var_types type = tree->TypeGet();
363 if (varTypeIsStruct(type) && IsHfa(tree))
365 int retSlots = GetHfaCount(tree);
366 return ((1 << retSlots) - 1) << REG_FLOATRET;
369 const static regMaskTP returnMap[TYP_COUNT] = {
370 RBM_ILLEGAL, // TYP_UNDEF,
371 RBM_NONE, // TYP_VOID,
372 RBM_INTRET, // TYP_BOOL,
373 RBM_INTRET, // TYP_BYTE,
374 RBM_INTRET, // TYP_UBYTE,
375 RBM_INTRET, // TYP_SHORT,
376 RBM_INTRET, // TYP_USHORT,
377 RBM_INTRET, // TYP_INT,
378 RBM_INTRET, // TYP_UINT,
379 RBM_LNGRET, // TYP_LONG,
380 RBM_LNGRET, // TYP_ULONG,
381 RBM_FLOATRET, // TYP_FLOAT,
382 RBM_DOUBLERET, // TYP_DOUBLE,
383 RBM_INTRET, // TYP_REF,
384 RBM_INTRET, // TYP_BYREF,
385 RBM_ILLEGAL, // TYP_STRUCT,
386 RBM_ILLEGAL, // TYP_BLK,
387 RBM_ILLEGAL, // TYP_LCLBLK,
388 RBM_ILLEGAL, // TYP_UNKNOWN,
391 assert((unsigned)type < _countof(returnMap));
392 assert(returnMap[TYP_LONG] == RBM_LNGRET);
393 assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
394 assert(returnMap[TYP_REF] == RBM_INTRET);
395 assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
397 regMaskTP result = returnMap[type];
398 assert(result != RBM_ILLEGAL);
402 /*****************************************************************************/
404 /****************************************************************************/
408 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
413 for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
415 if (!varDsc->lvTracked)
418 if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
421 if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
422 printf("V%02u ", lclNum);
428 /*****************************************************************************/
430 /*****************************************************************************
432 * Debugging helpers - display variables liveness info.
435 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
439 printf("BB%02u: ", beg->bbNum);
442 dispLifeSet(comp, mask, beg->bbLiveIn);
446 dispLifeSet(comp, mask, beg->bbLiveOut);
449 if (beg->bbFlags & BBF_VISITED)
450 printf(" inner=%u", beg->bbFPinVars);
457 } while (beg != end);
460 #if FEATURE_STACK_FP_X87
461 void Compiler::raDispFPlifeInfo()
465 for (block = fgFirstBB; block; block = block->bbNext)
469 printf("BB%02u: in = [ ", block->bbNum);
470 dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
473 VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
474 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
478 noway_assert(stmt->gtOper == GT_STMT);
480 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
482 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
484 dispLifeSet(this, optAllFloatVars, life);
486 gtDispTree(tree, 0, NULL, true);
492 printf("BB%02u: out = [ ", block->bbNum);
493 dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
497 #endif // FEATURE_STACK_FP_X87
498 /*****************************************************************************/
500 /*****************************************************************************/
502 /*****************************************************************************/
504 void Compiler::raSetRegVarOrder(
505 var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
507 unsigned normalVarOrderSize;
508 const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
510 unsigned listIndex = 0;
511 regMaskTP usedReg = avoidReg;
513 noway_assert(*customVarOrderSize >= normalVarOrderSize);
517 /* First place the preferred registers at the start of customVarOrder */
522 for (index = 0; index < normalVarOrderSize; index++)
524 regNum = normalVarOrder[index];
525 regBit = genRegMask(regNum);
527 if (usedReg & regBit)
530 if (prefReg & regBit)
533 noway_assert(listIndex < normalVarOrderSize);
534 customVarOrder[listIndex++] = regNum;
541 #if CPU_HAS_BYTE_REGS
542 /* Then if byteable registers are preferred place them */
544 if (prefReg & RBM_BYTE_REG_FLAG)
546 for (index = 0; index < normalVarOrderSize; index++)
548 regNum = normalVarOrder[index];
549 regBit = genRegMask(regNum);
551 if (usedReg & regBit)
554 if (RBM_BYTE_REGS & regBit)
557 noway_assert(listIndex < normalVarOrderSize);
558 customVarOrder[listIndex++] = regNum;
563 #endif // CPU_HAS_BYTE_REGS
566 /* Now place all the non-preferred registers */
568 for (index = 0; index < normalVarOrderSize; index++)
570 regNumber regNum = normalVarOrder[index];
571 regMaskTP regBit = genRegMask(regNum);
573 if (usedReg & regBit)
577 noway_assert(listIndex < normalVarOrderSize);
578 customVarOrder[listIndex++] = regNum;
583 /* Now place the "avoid" registers */
585 for (index = 0; index < normalVarOrderSize; index++)
587 regNumber regNum = normalVarOrder[index];
588 regMaskTP regBit = genRegMask(regNum);
590 if (avoidReg & regBit)
592 noway_assert(listIndex < normalVarOrderSize);
593 customVarOrder[listIndex++] = regNum;
601 *customVarOrderSize = listIndex;
602 noway_assert(listIndex == normalVarOrderSize);
605 /*****************************************************************************
607 * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
610 void Compiler::raSetupArgMasks(RegState* regState)
612 /* Determine the registers holding incoming register arguments */
613 /* and setup raAvoidArgRegMask to the set of registers that we */
614 /* may want to avoid when enregistering the locals. */
616 regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
617 raAvoidArgRegMask = RBM_NONE;
619 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
621 for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
623 noway_assert(argDsc->lvIsParam);
625 // Is it a register argument ?
626 if (!argDsc->lvIsRegArg)
629 // only process args that apply to the current register file
630 if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
635 // Is it dead on entry ??
636 // In certain cases such as when compJmpOpUsed is true,
637 // or when we have a generic type context arg that we must report
638 // then the arguments have to be kept alive throughout the prolog.
639 // So we have to consider it as live on entry.
641 bool keepArgAlive = compJmpOpUsed;
642 if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
643 ((lvaTable + info.compTypeCtxtArg) == argDsc))
648 if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
653 // The code to set the regState for each arg is outlined for shared use
655 regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
657 // Do we need to try to avoid this incoming arg registers?
659 // If it's not tracked, don't do the stuff below.
660 if (!argDsc->lvTracked)
663 // If the incoming arg is used after a call it is live accross
664 // a call and will have to be allocated to a caller saved
665 // register anyway (a very common case).
667 // In this case it is pointless to ask that the higher ref count
668 // locals to avoid using the incoming arg register
670 unsigned argVarIndex = argDsc->lvVarIndex;
672 /* Does the incoming register and the arg variable interfere? */
674 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
676 // No they do not interfere,
677 // so we add inArgReg to raAvoidArgRegMask
679 raAvoidArgRegMask |= genRegMask(inArgReg);
682 if (argDsc->lvType == TYP_DOUBLE)
684 // Avoid the double register argument pair for register allocation.
685 if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
687 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
694 #endif // LEGACY_BACKEND
696 // The code to set the regState for each arg is outlined for shared use
697 // by linear scan. (It is not shared for System V AMD64 platform.)
698 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
700 regNumber inArgReg = argDsc->lvArgReg;
701 regMaskTP inArgMask = genRegMask(inArgReg);
703 if (regState->rsIsFloat)
705 noway_assert(inArgMask & RBM_FLTARG_REGS);
707 else // regState is for the integer registers
709 // This might be the fixed return buffer register argument (on ARM64)
710 // We check and allow inArgReg to be theFixedRetBuffReg
711 if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
713 // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
714 noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
715 // We should have recorded the variable number for the return buffer arg
716 noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
718 else // we have a regular arg
720 noway_assert(inArgMask & RBM_ARG_REGS);
724 regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
727 if (argDsc->lvType == TYP_DOUBLE)
729 if (info.compIsVarArgs || opts.compUseSoftFP)
731 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
732 assert(!regState->rsIsFloat);
736 assert(regState->rsIsFloat);
737 assert(emitter::isDoubleReg(inArgReg));
739 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
741 else if (argDsc->lvType == TYP_LONG)
743 assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
744 assert(!regState->rsIsFloat);
745 regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
747 #endif // _TARGET_ARM_
749 #if FEATURE_MULTIREG_ARGS
750 if (varTypeIsStruct(argDsc->lvType))
752 if (argDsc->lvIsHfaRegArg())
754 assert(regState->rsIsFloat);
755 unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
756 for (unsigned i = 1; i < cSlots; i++)
758 assert(inArgReg + i <= LAST_FP_ARGREG);
759 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
764 unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
765 for (unsigned i = 1; i < cSlots; i++)
767 regNumber nextArgReg = (regNumber)(inArgReg + i);
768 if (nextArgReg > REG_ARG_LAST)
772 assert(regState->rsIsFloat == false);
773 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
777 #endif // FEATURE_MULTIREG_ARGS
782 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
784 /*****************************************************************************
786 * Assign variables to live in registers, etc.
789 void Compiler::raAssignVars()
793 printf("*************** In raAssignVars()\n");
795 /* We need to keep track of which registers we ever touch */
797 codeGen->regSet.rsClearRegsModified();
799 #if FEATURE_STACK_FP_X87
800 // FP register allocation
801 raEnregisterVarsStackFP();
802 raGenerateFPRefCounts();
805 /* Predict registers used by code generation */
806 rpPredictRegUse(); // New reg predictor/allocator
808 // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
809 // so that the gc tracking logic and lvMustInit logic will ignore them.
814 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
816 if (varDsc->lvType != TYP_STRUCT)
819 if (!varDsc->lvPromoted)
822 if (varDsc->lvIsParam)
825 if (varDsc->lvRefCnt > 0)
831 printf("Mark unused struct local V%02u\n", lclNum);
834 lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
836 if (promotionType == PROMOTION_TYPE_DEPENDENT)
838 // This should only happen when all its field locals are unused as well.
840 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
843 noway_assert(lvaTable[varNum].lvRefCnt == 0);
844 lvaTable[varNum].lvIsStructField = false;
849 noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
852 varDsc->lvUnusedStruct = 1;
855 // Change such struct locals to ints
857 varDsc->lvType = TYP_INT; // Bash to a non-gc type.
858 noway_assert(!varDsc->lvTracked);
859 noway_assert(!varDsc->lvRegister);
860 varDsc->lvOnFrame = false; // Force it not to be onstack.
861 varDsc->lvMustInit = false; // Force not to init it.
862 varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
866 /*****************************************************************************/
867 /*****************************************************************************/
869 /*****************************************************************************
871 * Given a regNumber return the correct predictReg enum value
874 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
876 return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
879 /*****************************************************************************
881 * Given a varIndex return the correct predictReg enum value
884 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
886 return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
889 /*****************************************************************************
891 * Given a rpPredictReg return the correct varNumber value
894 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
896 return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
899 /*****************************************************************************
901 * Given a rpPredictReg return true if it specifies a Txx register
904 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
906 if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
912 /*****************************************************************************
914 * Given a regmask return the correct predictReg enum value
917 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
919 rpPredictReg result = PREDICT_NONE;
920 if (regmask != 0) /* Check if regmask has zero bits set */
922 if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
925 assert(FitsIn<DWORD>(regmask));
926 BitScanForward(®, (DWORD)regmask);
927 return rpGetPredictForReg((regNumber)reg);
930 #if defined(_TARGET_ARM_)
931 /* It has multiple bits set */
932 else if (regmask == (RBM_R0 | RBM_R1))
934 result = PREDICT_PAIR_R0R1;
936 else if (regmask == (RBM_R2 | RBM_R3))
938 result = PREDICT_PAIR_R2R3;
940 #elif defined(_TARGET_X86_)
941 /* It has multiple bits set */
942 else if (regmask == (RBM_EAX | RBM_EDX))
944 result = PREDICT_PAIR_EAXEDX;
946 else if (regmask == (RBM_ECX | RBM_EBX))
948 result = PREDICT_PAIR_ECXEBX;
951 else /* It doesn't match anything */
953 result = PREDICT_NONE;
954 assert(!"unreachable");
955 NO_WAY("bad regpair");
961 /*****************************************************************************
963 * Record a variable to register(s) interference
966 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
969 bool addedIntf = false;
973 for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
975 regMaskTP regBit = genRegMask(regNum);
977 if (regMask & regBit)
979 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
980 if (!VarSetOps::IsEmpty(this, newIntf))
985 VarSetOps::Iter newIntfIter(this, newIntf);
987 while (newIntfIter.NextElem(&varNum))
989 unsigned lclNum = lvaTrackedToVarNum[varNum];
990 LclVarDsc* varDsc = &lvaTable[varNum];
991 #if FEATURE_FP_REGALLOC
992 // Only print the useful interferences
993 // i.e. floating point LclVar interference with floating point registers
994 // or integer LclVar interference with general purpose registers
995 if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
998 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
999 getRegName(regNum), msg);
1005 VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1017 /*****************************************************************************
1019 * Record a new variable to variable(s) interference
1022 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1024 noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1025 noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1027 VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1028 VarSetOps::AddElemD(this, oneVar, varNum);
1030 bool newIntf = fgMarkIntf(intfVar, oneVar);
1033 rpAddedVarIntf = true;
1036 if (verbose && newIntf)
1038 for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1040 if (VarSetOps::IsMember(this, intfVar, oneNum))
1042 unsigned lclNum = lvaTrackedToVarNum[varNum];
1043 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1044 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1054 /*****************************************************************************
1056 * Determine preferred register mask for a given predictReg value
1059 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1061 if (rpHasVarIndexForPredict(predictReg))
1062 predictReg = PREDICT_REG;
1064 noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1065 noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1067 regMaskTP regAvailForType = rpPredictMap[predictReg];
1068 if (varTypeIsFloating(type))
1070 regAvailForType &= RBM_ALLFLOAT;
1074 regAvailForType &= RBM_ALLINT;
1077 if (type == TYP_DOUBLE)
1079 if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1081 // Fix 388433 ARM JitStress WP7
1082 if ((regAvailForType & RBM_DBL_REGS) != 0)
1084 regAvailForType |= (regAvailForType << 1);
1088 regAvailForType = RBM_NONE;
1093 return regAvailForType;
1096 /*****************************************************************************
1098 * Predict register choice for a type.
1100 * Adds the predicted registers to rsModifiedRegsMask.
1102 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1104 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1108 // Add any reserved register to the lockedRegs
1109 lockedRegs |= codeGen->regSet.rsMaskResvd;
1111 /* Clear out the lockedRegs from preferReg */
1112 preferReg &= ~lockedRegs;
1114 if (rpAsgVarNum != -1)
1116 noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1118 /* Don't pick the register used by rpAsgVarNum either */
1119 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1120 noway_assert(tgtVar->lvRegNum != REG_STK);
1122 preferReg &= ~genRegMask(tgtVar->lvRegNum);
1136 #ifdef _TARGET_AMD64_
1138 #endif // _TARGET_AMD64_
1140 // expand preferReg to all non-locked registers if no bits set
1141 preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1143 if (preferReg == 0) // no bits set?
1145 // Add one predefined spill choice register if no bits set.
1146 // (The jit will introduce one spill temp)
1147 preferReg |= RBM_SPILL_CHOICE;
1148 rpPredictSpillCnt++;
1152 printf("Predict one spill temp\n");
1158 /* Iterate the registers in the order specified by rpRegTmpOrder */
1160 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1162 regNum = rpRegTmpOrder[index];
1163 regBits = genRegMask(regNum);
1165 if ((preferReg & regBits) == regBits)
1171 /* Otherwise we have allocated all registers, so do nothing */
1174 #ifndef _TARGET_AMD64_
1177 if ((preferReg == 0) || // no bits set?
1178 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1180 // expand preferReg to all non-locked registers
1181 preferReg = RBM_ALLINT & ~lockedRegs;
1184 if (preferReg == 0) // no bits set?
1186 // Add EAX:EDX to the registers
1187 // (The jit will introduce two spill temps)
1188 preferReg = RBM_PAIR_TMP;
1189 rpPredictSpillCnt += 2;
1192 printf("Predict two spill temps\n");
1195 else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1197 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1199 // Add EAX to the registers
1200 // (The jit will introduce one spill temp)
1201 preferReg |= RBM_PAIR_TMP_LO;
1205 // Add EDX to the registers
1206 // (The jit will introduce one spill temp)
1207 preferReg |= RBM_PAIR_TMP_HI;
1209 rpPredictSpillCnt++;
1212 printf("Predict one spill temp\n");
1217 regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1218 if (regPair != REG_PAIR_NONE)
1220 regBits = genRegPairMask(regPair);
1224 /* Otherwise we have allocated all registers, so do nothing */
1226 #endif // _TARGET_AMD64_
1235 #if FEATURE_FP_REGALLOC
1236 regMaskTP restrictMask;
1237 restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1238 assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1240 // expand preferReg to all available non-locked registers if no bits set
1241 preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1242 regMaskTP preferDouble;
1243 preferDouble = preferReg & (preferReg >> 1);
1245 if ((preferReg == 0) // no bits set?
1247 || ((type == TYP_DOUBLE) &&
1248 ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1252 // Add one predefined spill choice register if no bits set.
1253 // (The jit will introduce one spill temp)
1254 preferReg |= RBM_SPILL_CHOICE_FLT;
1255 rpPredictSpillCnt++;
1259 printf("Predict one spill temp (float)\n");
1263 assert(preferReg != 0);
1265 /* Iterate the registers in the order specified by raRegFltTmpOrder */
1267 for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1269 regNum = raRegFltTmpOrder[index];
1270 regBits = genRegMask(regNum);
1272 if (varTypeIsFloating(type))
1275 if (type == TYP_DOUBLE)
1277 if ((regBits & RBM_DBL_REGS) == 0)
1279 continue; // We must restrict the set to the double registers
1283 // TYP_DOUBLE use two consecutive registers
1284 regBits |= genRegMask(REG_NEXT(regNum));
1288 // See if COMPlus_JitRegisterFP is restricting this FP register
1290 if ((restrictMask & regBits) != regBits)
1294 if ((preferReg & regBits) == regBits)
1299 /* Otherwise we have allocated all registers, so do nothing */
1302 #else // !FEATURE_FP_REGALLOC
1309 noway_assert(!"unexpected type in reg use prediction");
1312 /* Abnormal return */
1313 noway_assert(!"Ran out of registers in rpPredictRegPick");
1318 * If during the first prediction we need to allocate
1319 * one of the registers that we used for coloring locals
1320 * then flag this by setting rpPredictAssignAgain.
1321 * We will have to go back and repredict the registers
1323 if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1324 rpPredictAssignAgain = true;
1326 // Add a register interference to each of the last use variables
1327 if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1329 VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1330 VarSetOps::Assign(this, lastUse, rpLastUseVars);
1331 VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1332 VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1333 // While we still have any lastUse or inPlaceUse bits
1334 VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1336 VARSET_TP varAsSet(VarSetOps::MakeEmpty(this));
1337 VarSetOps::Iter iter(this, useUnion);
1338 unsigned varNum = 0;
1339 while (iter.NextElem(&varNum))
1341 // We'll need this for one of the calls...
1342 VarSetOps::ClearD(this, varAsSet);
1343 VarSetOps::AddElemD(this, varAsSet, varNum);
1345 // If this varBit and lastUse?
1346 if (VarSetOps::IsMember(this, lastUse, varNum))
1348 // Record a register to variable interference
1349 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1352 // If this varBit and inPlaceUse?
1353 if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1355 // Record a register to variable interference
1356 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1360 codeGen->regSet.rsSetRegsModified(regBits);
1365 /*****************************************************************************
1367 * Predict integer register use for generating an address mode for a tree,
1368 * by setting tree->gtUsedRegs to all registers used by this tree and its
1370 * tree - is the child of a GT_IND node
1371 * type - the type of the GT_IND node (floating point/integer)
1372 * lockedRegs - are the registers which are currently held by
1373 * a previously evaluated node.
1374 * rsvdRegs - registers which should not be allocated because they will
1375 * be needed to evaluate a node in the future
1376 * - Also if rsvdRegs has the RBM_LASTUSE bit set then
1377 * the rpLastUseVars set should be saved and restored
1378 * so that we don't add any new variables to rpLastUseVars
1379 * lenCSE - is non-NULL only when we have a lenCSE expression
1381 * Return the scratch registers to be held by this tree. (one or two registers
1382 * to form an address expression)
1385 regMaskTP Compiler::rpPredictAddressMode(
1386 GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1391 genTreeOps oper = tree->OperGet();
1398 bool hasTwoAddConst = false;
1399 bool restoreLastUseVars = false;
1400 VARSET_TP oldLastUseVars(VarSetOps::MakeEmpty(this));
1402 /* do we need to save and restore the rpLastUseVars set ? */
1403 if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1405 restoreLastUseVars = true;
1406 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1408 rsvdRegs &= ~RBM_LASTUSE;
1410 /* if not an add, then just force it to a register */
1414 if (oper == GT_ARR_ELEM)
1416 regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1425 op1 = tree->gtOp.gtOp1;
1426 op2 = tree->gtOp.gtOp2;
1427 rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1429 /* look for (x + y) + icon address mode */
1431 if (op2->OperGet() == GT_CNS_INT)
1433 cns = op2->gtIntCon.gtIconVal;
1435 /* if not an add, then just force op1 into a register */
1436 if (op1->OperGet() != GT_ADD)
1439 hasTwoAddConst = true;
1441 /* Record the 'rev' flag, reverse evaluation order */
1442 rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1444 op2 = op1->gtOp.gtOp2;
1445 op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1448 /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1451 if (op2->OperGet() == GT_LSH)
1453 if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1455 sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1456 opTemp = op2->gtOp.gtOp1;
1470 if (opTemp->OperGet() == GT_NOP)
1472 opTemp = opTemp->gtOp.gtOp1;
1475 // Is this a const operand?
1476 if (opTemp->OperGet() == GT_CNS_INT)
1478 // Compute the new cns value that Codegen will end up using
1479 cns += (opTemp->gtIntCon.gtIconVal << sh);
1485 /* Check for LSH in op1 slot */
1487 if (op1->OperGet() != GT_LSH)
1490 opTemp = op1->gtOp.gtOp2;
1492 if (opTemp->OperGet() != GT_CNS_INT)
1495 sh = opTemp->gtIntCon.gtIconVal;
1497 /* Check for LSH of 0, special case */
1501 #if defined(_TARGET_XARCH_)
1503 /* Check for LSH of 1 2 or 3 */
1507 #elif defined(_TARGET_ARM_)
1509 /* Check for LSH of 1 to 30 */
1519 /* Matched a leftShift by 'sh' subtree, move op1 down */
1520 op1 = op1->gtOp.gtOp1;
1524 /* Now we have to evaluate op1 and op2 into registers */
1526 /* Evaluate op1 and op2 in the correct order */
1529 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1530 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1534 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1535 op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1538 /* If op1 and op2 must be spilled and reloaded then
1539 * op1 and op2 might be reloaded into the same register
1540 * This can only happen when all the registers are lockedRegs
1542 if ((op1Mask == op2Mask) && (op1Mask != 0))
1544 /* We'll need to grab a different register for op2 */
1545 op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1549 // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1550 // [op2 + op1<<sh + cns]
1551 // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1553 if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1555 op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1559 // On the ARM we will need at least one scratch register for trees that have this form:
1560 // [op1 + op2 + cns] or [op1 + op2<<sh + cns]
1561 // or for a float/double or long when we have both op1 and op2
1562 // or when we have an 'cns' that is too large for the ld/st instruction
1564 if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1566 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1570 // If we create a CSE that immediately dies then we may need to add an additional register interference
1571 // so we don't color the CSE into R3
1573 if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1575 opTemp = op2->gtOp.gtOp2;
1576 if (opTemp->OperGet() == GT_LCL_VAR)
1578 unsigned varNum = opTemp->gtLclVar.gtLclNum;
1579 LclVarDsc* varDsc = &lvaTable[varNum];
1581 if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1583 rpRecordRegIntf(RBM_TMP_0,
1584 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1590 regMask = (op1Mask | op2Mask);
1591 tree->gtUsedRegs = (regMaskSmall)regMask;
1596 /* now we have to evaluate op1 into a register */
1598 op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1603 // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1606 if (!codeGen->validDispForLdSt(cns, type))
1608 op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1612 regMask = (op1Mask | op2Mask);
1613 tree->gtUsedRegs = (regMaskSmall)regMask;
1618 #if !CPU_LOAD_STORE_ARCH
1619 if (oper == GT_CNS_INT)
1621 /* Indirect of a constant does not require a register */
1627 /* now we have to evaluate tree into a register */
1628 regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1632 regMaskTP regUse = tree->gtUsedRegs;
1634 if (!VarSetOps::IsEmpty(this, compCurLife))
1636 // Add interference between the current set of life variables and
1637 // the set of temporary registers need to evaluate the sub tree
1640 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1644 /* Do we need to resore the oldLastUseVars value */
1645 if (restoreLastUseVars)
1648 * If we used a GT_ASG targeted register then we need to add
1649 * a variable interference between any new last use variables
1650 * and the GT_ASG targeted register
1652 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1654 rpRecordVarIntf(rpAsgVarNum,
1655 VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1657 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1663 /*****************************************************************************
1668 void Compiler::rpPredictRefAssign(unsigned lclNum)
1670 LclVarDsc* varDsc = lvaTable + lclNum;
1672 varDsc->lvRefAssign = 1;
1674 #if NOGC_WRITE_BARRIERS
1678 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1679 printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1680 varDsc->lvVarIndex);
1684 /* Make sure that write barrier pointer variables never land in EDX */
1685 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1686 #endif // NOGC_WRITE_BARRIERS
1689 /*****************************************************************************
1691 * Predict the internal temp physical register usage for a block assignment tree,
1692 * by setting tree->gtUsedRegs.
1693 * Records the internal temp physical register usage for this tree.
1694 * Returns a mask of interfering registers for this tree.
1696 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1697 * to the set of scratch registers needed when evaluating the tree.
1698 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1699 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1700 * predict additional internal temp physical registers to spill into.
1702 * tree - is the child of a GT_IND node
1703 * predictReg - what type of register does the tree need
1704 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1705 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1706 * rsvdRegs - registers which should not be allocated because they will
1707 * be needed to evaluate a node in the future
1708 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1709 * the rpLastUseVars set should be saved and restored
1710 * so that we don't add any new variables to rpLastUseVars.
1712 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree,
1713 rpPredictReg predictReg,
1714 regMaskTP lockedRegs,
1717 regMaskTP regMask = RBM_NONE;
1718 regMaskTP interferingRegs = RBM_NONE;
1720 bool hasGCpointer = false;
1721 bool dstIsOnStack = false;
1722 bool useMemHelper = false;
1723 bool useBarriers = false;
1724 GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
1725 GenTreePtr dstAddr = dst->Addr();
1726 GenTreePtr srcAddrOrFill = tree->gtGetOp2IfPresent();
1728 size_t blkSize = dst->gtBlkSize;
1730 hasGCpointer = (dst->HasGCPtr());
1732 bool isCopyBlk = tree->OperIsCopyBlkOp();
1733 bool isCopyObj = isCopyBlk && hasGCpointer;
1734 bool isInitBlk = tree->OperIsInitBlkOp();
1738 assert(srcAddrOrFill->OperIsIndir());
1739 srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1743 // For initBlk, we don't need to worry about the GC pointers.
1744 hasGCpointer = false;
1751 dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1756 if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1758 useMemHelper = true;
1764 useMemHelper = true;
1767 if (hasGCpointer && !dstIsOnStack)
1774 // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1776 if (!useMemHelper && !useBarriers)
1778 bool useLoop = false;
1779 unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1781 // A mask to use to force the predictor to choose low registers (to reduce code size)
1782 regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1784 // Allow the src and dst to be used in place, unless we use a loop, in which
1785 // case we will need scratch registers as we will be writing to them.
1786 rpPredictReg srcAndDstPredict = PREDICT_REG;
1788 // Will we be using a loop to implement this INITBLK/COPYBLK?
1789 if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1792 avoidReg = RBM_NONE;
1793 srcAndDstPredict = PREDICT_SCRATCH_REG;
1796 if (tree->gtFlags & GTF_REVERSE_OPS)
1798 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1799 dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1800 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1804 regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1805 srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1806 regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1809 // We need at least one scratch register for a copyBlk
1812 // Pick a low register to reduce the code size
1813 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1820 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1821 // Pick another low register to reduce the code size
1822 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1825 // We need a loop index register
1826 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1829 tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1831 return interferingRegs;
1834 // What order should the Dest, Val/Src, and Size be calculated
1835 GenTreePtr opsPtr[3];
1836 regMaskTP regsPtr[3];
1838 #if defined(_TARGET_XARCH_)
1839 fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1841 // We're going to use these, might as well make them available now
1843 codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1845 codeGen->regSet.rsSetRegsModified(RBM_ESI);
1847 #elif defined(_TARGET_ARM_)
1851 // For all other cases that involve non-constants, we just call memcpy/memset
1853 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1854 interferingRegs |= RBM_CALLEE_TRASH;
1857 printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1862 assert(useBarriers);
1865 fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1867 // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1868 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1871 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1874 #else // !_TARGET_X86_ && !_TARGET_ARM_
1875 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1876 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1877 regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1878 regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1879 opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1880 regMask |= regsPtr[0];
1881 opsPtr[0]->gtUsedRegs |= regsPtr[0];
1882 rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1884 regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1885 opsPtr2RsvdRegs | RBM_LASTUSE);
1886 regMask |= regsPtr[1];
1887 opsPtr[1]->gtUsedRegs |= regsPtr[1];
1888 rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1890 regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1891 if (opsPtr[2] == nullptr)
1893 // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1894 // Note that it is quite possible that no register is required, but this preserves
1896 regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1897 rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1901 regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1902 opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1904 regMask |= opsPtr2UsedRegs;
1906 tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1907 return interferingRegs;
1910 /*****************************************************************************
1912 * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1913 * Returns a regMask with the internal temp physical register usage for this tree.
1915 * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1916 * to the set of scratch registers needed when evaluating the tree.
1917 * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1918 * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1919 * predict additional internal temp physical registers to spill into.
1921 * tree - is the child of a GT_IND node
1922 * predictReg - what type of register does the tree need
1923 * lockedRegs - are the registers which are currently held by a previously evaluated node.
1924 * Don't modify lockedRegs as it is used at the end to compute a spill mask.
1925 * rsvdRegs - registers which should not be allocated because they will
1926 * be needed to evaluate a node in the future
1927 * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1928 * the rpLastUseVars set should be saved and restored
1929 * so that we don't add any new variables to rpLastUseVars.
1932 #pragma warning(disable : 4701)
1935 #pragma warning(push)
1936 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1938 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
1939 rpPredictReg predictReg,
1940 regMaskTP lockedRegs,
1943 regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL);
1946 rpPredictReg op1PredictReg;
1947 rpPredictReg op2PredictReg;
1948 LclVarDsc* varDsc = NULL;
1949 VARSET_TP oldLastUseVars(VarSetOps::UninitVal());
1951 VARSET_TP varBits(VarSetOps::UninitVal());
1952 VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1954 bool restoreLastUseVars = false;
1955 regMaskTP interferingRegs = RBM_NONE;
1958 // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1960 noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1961 noway_assert(RBM_ILLEGAL);
1962 noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1963 /* impossible values, to make sure that we set them */
1964 tree->gtUsedRegs = RBM_ILLEGAL;
1967 /* Figure out what kind of a node we have */
1969 genTreeOps oper = tree->OperGet();
1970 var_types type = tree->TypeGet();
1971 unsigned kind = tree->OperKind();
1973 // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1974 genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1975 if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1976 predictReg = PREDICT_NONE;
1977 else if (rpHasVarIndexForPredict(predictReg))
1979 // The only place where predictReg is set to a var is in the PURE
1980 // assignment case where varIndex is the var being assigned to.
1981 // We need to check whether the variable is used between here and
1982 // its redefinition.
1983 unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1984 unsigned lclNum = lvaTrackedToVarNum[varIndex];
1986 for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1988 if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1990 // Is this the pure assignment?
1991 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1993 predictReg = PREDICT_SCRATCH_REG;
2002 if (rsvdRegs & RBM_LASTUSE)
2004 restoreLastUseVars = true;
2005 VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2006 rsvdRegs &= ~RBM_LASTUSE;
2009 /* Is this a constant or leaf node? */
2011 if (kind & (GTK_CONST | GTK_LEAF))
2013 bool lastUse = false;
2014 regMaskTP enregMask = RBM_NONE;
2020 // Codegen for floating point constants on the ARM is currently
2021 // movw/movt rT1, <lo32 bits>
2022 // movw/movt rT2, <hi32 bits>
2023 // vmov.i2d dT0, rT1,rT2
2025 // For TYP_FLOAT one integer register is required
2027 // These integer register(s) immediately die
2028 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2029 if (type == TYP_DOUBLE)
2031 // For TYP_DOUBLE a second integer register is required
2033 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2036 // We also need a floating point register that we keep
2038 if (predictReg == PREDICT_NONE)
2039 predictReg = PREDICT_SCRATCH_REG;
2041 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2042 tree->gtUsedRegs = regMask | tmpMask;
2049 if (rpHasVarIndexForPredict(predictReg))
2051 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2052 rpAsgVarNum = tgtIndex;
2054 // We don't need any register as we plan on writing to the rpAsgVarNum register
2055 predictReg = PREDICT_NONE;
2057 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2058 tgtVar->lvDependReg = true;
2060 if (type == TYP_LONG)
2062 assert(oper == GT_CNS_LNG);
2064 if (tgtVar->lvOtherReg == REG_STK)
2066 // Well we do need one register for a partially enregistered
2068 predictReg = PREDICT_SCRATCH_REG;
2074 #if !CPU_LOAD_STORE_ARCH
2075 /* If the constant is a handle then it will need to have a relocation
2076 applied to it. It will need to be loaded into a register.
2077 But never throw away an existing hint.
2079 if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2082 if (predictReg == PREDICT_NONE)
2083 predictReg = PREDICT_SCRATCH_REG;
2092 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2093 (genTypeSize(type) < sizeof(int)))
2095 predictReg = PREDICT_SCRATCH_REG;
2098 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2100 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2102 // These integer register(s) immediately die
2103 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2104 // Two integer registers are required for a TYP_DOUBLE
2105 if (type == TYP_DOUBLE)
2106 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2108 // We need a temp register in some cases of loads/stores to a class var
2109 if (predictReg == PREDICT_NONE)
2111 predictReg = PREDICT_SCRATCH_REG;
2114 if (rpHasVarIndexForPredict(predictReg))
2116 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2117 rpAsgVarNum = tgtIndex;
2119 // We don't need any register as we plan on writing to the rpAsgVarNum register
2120 predictReg = PREDICT_NONE;
2122 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2123 tgtVar->lvDependReg = true;
2125 if (type == TYP_LONG)
2127 if (tgtVar->lvOtherReg == REG_STK)
2129 // Well we do need one register for a partially enregistered
2131 predictReg = PREDICT_SCRATCH_REG;
2139 // Check for a misalignment on a Floating Point field
2141 if (varTypeIsFloating(type))
2143 if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2145 // These integer register(s) immediately die
2146 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2147 // Two integer registers are required for a TYP_DOUBLE
2148 if (type == TYP_DOUBLE)
2149 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2158 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2160 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2161 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2162 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2164 #if FEATURE_STACK_FP_X87
2165 // If it's a floating point var, there's nothing to do
2166 if (varTypeIsFloating(type))
2168 tree->gtUsedRegs = RBM_NONE;
2174 // If the variable is already a register variable, no need to go further.
2175 if (oper == GT_REG_VAR)
2178 /* Apply the type of predictReg to the LCL_VAR */
2180 if (predictReg == PREDICT_REG)
2183 if (varDsc->lvRegNum == REG_STK)
2188 else if (predictReg == PREDICT_SCRATCH_REG)
2190 noway_assert(predictReg == PREDICT_SCRATCH_REG);
2192 /* Is this the last use of a local var? */
2195 if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2196 goto PREDICT_REG_COMMON;
2199 else if (rpHasVarIndexForPredict(predictReg))
2201 /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2203 unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2204 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2205 VarSetOps::MakeSingleton(this, tgtIndex1);
2207 noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2208 noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2209 #ifndef _TARGET_AMD64_
2210 // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2211 // so this assert is meaningless
2212 noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2213 #endif // !_TARGET_AMD64_
2215 if (varDsc->lvTracked)
2218 srcIndex = varDsc->lvVarIndex;
2220 // If this register has it's last use here then we will prefer
2221 // to color to the same register as tgtVar.
2225 * Add an entry in the lvaVarPref graph to indicate
2226 * that it would be worthwhile to color these two variables
2227 * into the same physical register.
2228 * This will help us avoid having an extra copy instruction
2230 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2231 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2234 // Add a variable interference from srcIndex to each of the last use variables
2235 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2237 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2240 rpAsgVarNum = tgtIndex1;
2242 /* We will rely on the target enregistered variable from the GT_ASG */
2249 if (genIsValidFloatReg(varDsc->lvRegNum))
2251 enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2255 enregMask = genRegMask(varDsc->lvRegNum);
2259 if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2261 // We need to compute the intermediate value using a TYP_DOUBLE
2262 // but we storing the result in a TYP_SINGLE enregistered variable
2269 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2270 if (enregMask & (rsvdRegs | lockedRegs))
2274 #ifndef _TARGET_64BIT_
2275 if (type == TYP_LONG)
2277 if (varDsc->lvOtherReg != REG_STK)
2279 tmpMask = genRegMask(varDsc->lvOtherReg);
2280 enregMask |= tmpMask;
2282 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2283 if (tmpMask & (rsvdRegs | lockedRegs))
2286 else // lvOtherReg == REG_STK
2291 #endif // _TARGET_64BIT_
2294 varDsc->lvDependReg = true;
2298 /* Does not need a register */
2299 predictReg = PREDICT_NONE;
2300 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2301 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2303 else // (grabCount > 0)
2305 #ifndef _TARGET_64BIT_
2306 /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2307 if ((type == TYP_LONG) && (grabCount == 1))
2309 /* We will need to pick one register */
2311 // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2312 VarSetOps::UnionD(this, rpUseInPlace, varBits);
2314 noway_assert((type == TYP_DOUBLE) ||
2315 (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2316 #else // !_TARGET_64BIT_
2317 noway_assert(grabCount == 1);
2318 #endif // !_TARGET_64BIT_
2321 else if (type == TYP_STRUCT)
2324 // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2325 // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2326 // As a workaround we just bash it to PREDICT_NONE here
2328 if (predictReg != PREDICT_NONE)
2329 predictReg = PREDICT_NONE;
2331 // Currently predictReg is saying that we will not need any scratch registers
2332 noway_assert(predictReg == PREDICT_NONE);
2334 /* We may need to sign or zero extend a small type when pushing a struct */
2335 if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2337 for (unsigned varNum = varDsc->lvFieldLclStart;
2338 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2340 LclVarDsc* fldVar = lvaTable + varNum;
2342 if (fldVar->lvStackAligned())
2344 // When we are stack aligned Codegen will just use
2345 // a push instruction and thus doesn't need any register
2346 // since we can push both a register or a stack frame location
2350 if (varTypeIsByte(fldVar->TypeGet()))
2352 // We will need to reserve one byteable register,
2355 predictReg = PREDICT_SCRATCH_REG;
2356 #if CPU_HAS_BYTE_REGS
2357 // It is best to enregister this fldVar in a byteable register
2359 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2362 else if (varTypeIsShort(fldVar->TypeGet()))
2364 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2365 // If fldVar is not enregistered then we will need a scratch register
2367 if (!isEnregistered)
2369 // We will need either an int register or a byte register
2370 // If we are not requesting a byte register we will request an int register
2372 if (type != TYP_BYTE)
2374 predictReg = PREDICT_SCRATCH_REG;
2382 regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2385 if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2387 varDsc->addPrefReg(preferReg, this);
2391 break; /* end of case GT_LCL_VAR */
2394 tree->gtUsedRegs = RBM_NONE;
2397 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2398 // Mark the registers required to emit a tailcall profiler callback
2399 if (compIsProfilerHookNeeded())
2401 tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2408 } /* end of switch (oper) */
2410 /* If we don't need to evaluate to register, regmask is the empty set */
2411 /* Otherwise we grab a temp for the local variable */
2413 if (predictReg == PREDICT_NONE)
2417 regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2419 if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2421 /* We need to sign or zero extend a small type when pushing a struct */
2422 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2424 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2425 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2427 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2430 LclVarDsc* fldVar = lvaTable + varNum;
2431 if (fldVar->lvTracked)
2433 VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2434 rpRecordRegIntf(regMask, fldBit DEBUGARG(
2435 "need scratch register when pushing a small field of a struct"));
2441 /* Update the set of lastUse variables that we encountered so far */
2444 VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2445 VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2448 * Add interference from any previously locked temps into this last use variable.
2452 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2455 * Add interference from any reserved temps into this last use variable.
2459 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2462 * For partially enregistered longs add an interference with the
2463 * register return by rpPredictRegPick
2465 if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2467 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2471 tree->gtUsedRegs = (regMaskSmall)regMask;
2475 /* Is it a 'simple' unary/binary operator? */
2477 if (kind & GTK_SMPOP)
2479 GenTreePtr op1 = tree->gtOp.gtOp1;
2480 GenTreePtr op2 = tree->gtGetOp2IfPresent();
2482 GenTreePtr opsPtr[3];
2483 regMaskTP regsPtr[3];
2485 VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2491 /* Is the value being assigned into a LCL_VAR? */
2492 if (op1->gtOper == GT_LCL_VAR)
2494 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2496 /* Are we assigning a LCL_VAR the result of a call? */
2497 if (op2->gtOper == GT_CALL)
2499 /* Set a preferred register for the LCL_VAR */
2500 if (isRegPairType(varDsc->TypeGet()))
2501 varDsc->addPrefReg(RBM_LNGRET, this);
2502 else if (!varTypeIsFloating(varDsc->TypeGet()))
2503 varDsc->addPrefReg(RBM_INTRET, this);
2504 #ifdef _TARGET_AMD64_
2506 varDsc->addPrefReg(RBM_FLOATRET, this);
2509 * When assigning the result of a call we don't
2510 * bother trying to target the right side of the
2511 * assignment, since we have a fixed calling convention.
2514 else if (varDsc->lvTracked)
2516 // We interfere with uses in place
2517 if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2519 rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2522 // Did we predict that this local will be fully enregistered?
2523 // and the assignment type is the same as the expression type?
2524 // and it is dead on the right side of the assignment?
2525 // and we current have no other rpAsgVarNum active?
2527 if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2528 (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2531 // Yes, we should try to target the right side (op2) of this
2532 // assignment into the (enregistered) tracked variable.
2535 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2536 op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2538 // Remember that this is a new use in place
2540 // We've added "new UseInPlace"; remove from the global set.
2541 VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2543 // Note that later when we walk down to the leaf node for op2
2544 // if we decide to actually use the register for the 'varDsc'
2545 // to enregister the operand, the we will set rpAsgVarNum to
2546 // varDsc->lvVarIndex, by extracting this value using
2547 // rpGetVarIndexForPredict()
2549 // Also we reset rpAsgVarNum back to -1 after we have finished
2550 // predicting the current GT_ASG node
2556 else if (tree->OperIsBlkOp())
2558 interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2575 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2576 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2578 /* Is the value being assigned into an enregistered LCL_VAR? */
2579 /* For debug code we only allow a simple op2 to be assigned */
2580 if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2582 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2583 /* Did we predict that this local will be enregistered? */
2584 if (varDsc->lvRegNum != REG_STK)
2586 /* Yes, we can use "reg <op>= addr" */
2588 op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2589 op2PredictReg = PREDICT_NONE;
2596 #if CPU_LOAD_STORE_ARCH
2599 op1PredictReg = PREDICT_REG;
2600 op2PredictReg = PREDICT_REG;
2606 * Otherwise, initialize the normal forcing of operands:
2609 op1PredictReg = PREDICT_ADDR;
2610 op2PredictReg = PREDICT_REG;
2615 #if !CPU_LOAD_STORE_ARCH
2616 if (op2PredictReg != PREDICT_NONE)
2618 /* Is the value being assigned a simple one? */
2619 if (rpCanAsgOperWithoutReg(op2, false))
2620 op2PredictReg = PREDICT_NONE;
2624 bool simpleAssignment;
2625 simpleAssignment = false;
2627 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2629 // Add a variable interference from the assign target
2630 // to each of the last use variables
2631 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2633 varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2635 if (varDsc->lvTracked)
2637 unsigned varIndex = varDsc->lvVarIndex;
2639 rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2643 /* Record whether this tree is a simple assignment to a local */
2645 simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2648 bool requireByteReg;
2649 requireByteReg = false;
2651 #if CPU_HAS_BYTE_REGS
2652 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2654 if (varTypeIsByte(type) &&
2655 ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2658 // Byte-assignments typically need a byte register
2659 requireByteReg = true;
2661 if (op1->gtOper == GT_LCL_VAR)
2663 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2665 // Did we predict that this local will be enregistered?
2666 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2668 // We don't require a byte register when op1 is an enregistered local */
2669 requireByteReg = false;
2672 // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2673 if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2675 // We should try to put op1 in an byte register
2676 varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2682 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2684 bool isWriteBarrierAsgNode;
2685 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2687 GCInfo::WriteBarrierForm wbf;
2688 if (isWriteBarrierAsgNode)
2689 wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2691 wbf = GCInfo::WBF_NoBarrier;
2694 regMaskTP wbaLockedRegs;
2695 wbaLockedRegs = lockedRegs;
2696 if (isWriteBarrierAsgNode)
2698 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2700 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2703 wbaLockedRegs |= RBM_WRITE_BARRIER;
2704 op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2705 assert(REG_WRITE_BARRIER == REG_EDX);
2706 op1PredictReg = PREDICT_REG_EDX;
2711 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2713 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2716 op1PredictReg = PREDICT_REG_ECX;
2717 op2PredictReg = PREDICT_REG_EDX;
2718 #elif defined(_TARGET_ARM_)
2719 op1PredictReg = PREDICT_REG_R0;
2720 op2PredictReg = PREDICT_REG_R1;
2722 // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2723 if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2725 op1 = op1->gtOp.gtOp1;
2727 #else // !_TARGET_X86_ && !_TARGET_ARM_
2728 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2734 /* Are we supposed to evaluate RHS first? */
2736 if (tree->gtFlags & GTF_REVERSE_OPS)
2738 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2740 #if CPU_HAS_BYTE_REGS
2741 // Should we insure that op2 gets evaluated into a byte register?
2742 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2744 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2745 // and we can't select one that is already reserved (i.e. lockedRegs)
2747 op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2748 op2->gtUsedRegs |= op2Mask;
2750 // No longer a simple assignment because we're using extra registers and might
2751 // have interference between op1 and op2. See DevDiv #136681
2752 simpleAssignment = false;
2756 * For a simple assignment we don't want the op2Mask to be
2757 * marked as interferring with the LCL_VAR, since it is likely
2758 * that we will want to enregister the LCL_VAR in exactly
2759 * the register that is used to compute op2
2761 tmpMask = lockedRegs;
2763 if (!simpleAssignment)
2766 regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2768 // Did we relax the register prediction for op1 and op2 above ?
2769 // - because we are depending upon op1 being enregistered
2771 if ((op1PredictReg == PREDICT_NONE) &&
2772 ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2774 /* We must be assigning into an enregistered LCL_VAR */
2775 noway_assert(op1->gtOper == GT_LCL_VAR);
2776 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2777 noway_assert(varDsc->lvRegNum != REG_STK);
2779 /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2780 varDsc->lvDependReg = true;
2785 // For the case of simpleAssignments op2 should always be evaluated first
2786 noway_assert(!simpleAssignment);
2788 regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2789 if (isWriteBarrierAsgNode)
2791 wbaLockedRegs |= op1->gtUsedRegs;
2793 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2795 #if CPU_HAS_BYTE_REGS
2796 // Should we insure that op2 gets evaluated into a byte register?
2797 if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2799 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2800 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2803 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2804 op2->gtUsedRegs |= op2Mask;
2809 if (rpHasVarIndexForPredict(op2PredictReg))
2814 if (isWriteBarrierAsgNode)
2816 #if NOGC_WRITE_BARRIERS
2818 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2822 /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2823 passed to the write-barrier call in REG_WRITE_BARRIER */
2827 if (op1->gtOper == GT_IND)
2829 GenTreePtr rv1, rv2;
2833 /* Special handling of indirect assigns for write barrier */
2835 bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2838 /* Check address mode for enregisterable locals */
2842 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2844 rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2846 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2848 rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2853 if (op2->gtOper == GT_LCL_VAR)
2855 rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2858 // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2859 if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2861 rpRecordRegIntf(RBM_WRITE_BARRIER,
2862 rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2864 tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2869 #endif // NOGC_WRITE_BARRIERS
2871 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2876 printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2879 // For the ARM target we have an optimized JIT Helper
2880 // that only trashes a subset of the callee saved registers
2883 // NOTE: Adding it to the gtUsedRegs will cause the interference to
2884 // be added appropriately
2886 // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs
2887 // instead of gtUsedRegs, because the latter will be modified later, but we need
2888 // to remember to add the interference.
2890 interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2892 op1->gtUsedRegs |= RBM_R0;
2893 op2->gtUsedRegs |= RBM_R1;
2894 #else // _TARGET_ARM_
2898 printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2900 // We have to call a normal JIT helper to perform the Write Barrier Assignment
2901 // It will trash the callee saved registers
2903 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2904 #endif // _TARGET_ARM_
2906 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2909 if (simpleAssignment)
2912 * Consider a simple assignment to a local:
2916 * Since the "=" node is visited after the variable
2917 * is marked live (assuming it's live after the
2918 * assignment), we don't want to use the register
2919 * use mask of the "=" node but rather that of the
2922 tree->gtUsedRegs = op1->gtUsedRegs;
2926 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2928 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2934 /* assigning shift operators */
2936 noway_assert(type != TYP_LONG);
2938 #if CPU_LOAD_STORE_ARCH
2939 predictReg = PREDICT_ADDR;
2941 predictReg = PREDICT_NONE;
2944 /* shift count is handled same as ordinary shift */
2945 goto HANDLE_SHIFT_COUNT;
2948 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2950 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2952 // We need a scratch register for the LEA instruction
2953 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2956 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2961 /* Cannot cast to VOID */
2962 noway_assert(type != TYP_VOID);
2964 /* cast to long is special */
2965 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2967 noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2968 #if CPU_LONG_USES_REGPAIR
2969 rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2971 if (rpHasVarIndexForPredict(predictReg))
2973 unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2974 rpAsgVarNum = tgtIndex;
2976 // We don't need any register as we plan on writing to the rpAsgVarNum register
2977 predictReg = PREDICT_NONE;
2979 LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
2980 tgtVar->lvDependReg = true;
2982 if (tgtVar->lvOtherReg != REG_STK)
2984 predictRegHi = PREDICT_NONE;
2989 if (predictReg == PREDICT_NONE)
2991 predictReg = PREDICT_SCRATCH_REG;
2994 // If we are widening an int into a long using a targeted register pair we
2995 // should retarget so that the low part get loaded into the appropriate register
2996 else if (predictReg == PREDICT_PAIR_R0R1)
2998 predictReg = PREDICT_REG_R0;
2999 predictRegHi = PREDICT_REG_R1;
3001 else if (predictReg == PREDICT_PAIR_R2R3)
3003 predictReg = PREDICT_REG_R2;
3004 predictRegHi = PREDICT_REG_R3;
3008 // If we are widening an int into a long using a targeted register pair we
3009 // should retarget so that the low part get loaded into the appropriate register
3010 else if (predictReg == PREDICT_PAIR_EAXEDX)
3012 predictReg = PREDICT_REG_EAX;
3013 predictRegHi = PREDICT_REG_EDX;
3015 else if (predictReg == PREDICT_PAIR_ECXEBX)
3017 predictReg = PREDICT_REG_ECX;
3018 predictRegHi = PREDICT_REG_EBX;
3022 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3024 #if CPU_LONG_USES_REGPAIR
3025 if (predictRegHi != PREDICT_NONE)
3027 // Now get one more reg for the upper part
3028 regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3031 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3035 /* cast from long is special - it frees a register */
3036 if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types"
3037 && op1->gtType == TYP_LONG)
3039 if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3040 predictReg = PREDICT_REG;
3042 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3044 // If we have 2 or more regs, free one of them
3045 if (!genMaxOneBit(regMask))
3047 /* Clear the 2nd lowest bit in regMask */
3048 /* First set tmpMask to the lowest bit in regMask */
3049 tmpMask = genFindLowestBit(regMask);
3050 /* Next find the second lowest bit in regMask */
3051 tmpMask = genFindLowestBit(regMask & ~tmpMask);
3052 /* Clear this bit from regmask */
3053 regMask &= ~tmpMask;
3055 tree->gtUsedRegs = op1->gtUsedRegs;
3059 #if CPU_HAS_BYTE_REGS
3060 /* cast from signed-byte is special - it uses byteable registers */
3061 if (type == TYP_INT)
3063 var_types smallType;
3065 if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3066 smallType = tree->gtCast.CastOp()->TypeGet();
3068 smallType = tree->gtCast.gtCastType;
3070 if (smallType == TYP_BYTE)
3072 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3074 if ((regMask & RBM_BYTE_REGS) == 0)
3075 regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3077 tree->gtUsedRegs = (regMaskSmall)regMask;
3083 #if FEATURE_STACK_FP_X87
3084 /* cast to float/double is special */
3085 if (varTypeIsFloating(type))
3087 switch (op1->TypeGet())
3089 /* uses fild, so don't need to be loaded to reg */
3092 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3093 tree->gtUsedRegs = op1->gtUsedRegs;
3101 /* Casting from integral type to floating type is special */
3102 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3104 if (opts.compCanUseSSE2)
3106 // predict for SSE2 based casting
3107 if (predictReg <= PREDICT_REG)
3108 predictReg = PREDICT_SCRATCH_REG;
3109 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3111 // Get one more int reg to hold cast result
3112 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3113 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3119 #if FEATURE_FP_REGALLOC
3120 // Are we casting between int to float or float to int
3121 // Fix 388428 ARM JitStress WP7
3122 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3124 // op1 needs to go into a register
3125 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3128 if (varTypeIsFloating(op1->TypeGet()))
3130 // We also need a fp scratch register for the convert operation
3131 regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3132 PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3135 // We also need a register to hold the result
3136 regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3137 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3142 /* otherwise must load op1 into a register */
3147 #ifdef _TARGET_XARCH_
3148 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3150 // This is a special case to handle the following
3151 // optimization: conv.i4(round.d(d)) -> round.i(d)
3152 // if flowgraph 3186
3154 if (predictReg <= PREDICT_REG)
3155 predictReg = PREDICT_SCRATCH_REG;
3157 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3159 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3161 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3169 if (tree->TypeGet() == TYP_LONG)
3171 // On ARM this consumes an extra register for the '0' value
3172 if (predictReg <= PREDICT_REG)
3173 predictReg = PREDICT_SCRATCH_REG;
3175 regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3177 regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3179 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3182 #endif // _TARGET_ARM_
3187 // these unary operators will write new values
3188 // and thus will need a scratch register
3190 /* generic unary operators */
3192 if (predictReg <= PREDICT_REG)
3193 predictReg = PREDICT_SCRATCH_REG;
3198 // these unary operators do not write new values
3199 // and thus won't need a scratch register
3200 CLANG_FORMAT_COMMENT_ANCHOR;
3205 tree->gtUsedRegs = 0;
3210 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3211 tree->gtUsedRegs = op1->gtUsedRegs;
3215 case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3217 bool intoReg = true;
3218 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3220 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3222 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3225 if (predictReg == PREDICT_ADDR)
3229 else if (predictReg == PREDICT_NONE)
3231 if (type != TYP_LONG)
3237 predictReg = PREDICT_REG;
3241 /* forcing to register? */
3242 if (intoReg && (type != TYP_LONG))
3244 rsvdRegs |= RBM_LASTUSE;
3250 /* check for address mode */
3251 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3254 #if CPU_LOAD_STORE_ARCH
3255 // We may need a scratch register for loading a long
3256 if (type == TYP_LONG)
3258 /* This scratch register immediately dies */
3259 tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3261 #endif // CPU_LOAD_STORE_ARCH
3264 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3266 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3268 /* These integer register(s) immediately die */
3269 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3270 // Two integer registers are required for a TYP_DOUBLE
3271 if (type == TYP_DOUBLE)
3273 rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3277 /* forcing to register? */
3280 regMaskTP lockedMask = lockedRegs | rsvdRegs;
3283 // We will compute a new regMask that holds the register(s)
3284 // that we will load the indirection into.
3286 CLANG_FORMAT_COMMENT_ANCHOR;
3288 #ifndef _TARGET_64BIT_
3289 if (type == TYP_LONG)
3291 // We need to use multiple load instructions here:
3292 // For the first register we can not choose
3293 // any registers that are being used in place or
3294 // any register in the current regMask
3296 regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3298 // For the second register we can choose a register that was
3299 // used in place or any register in the old now overwritten regMask
3300 // but not the same register that we picked above in 'regMask'
3302 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3303 regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3308 // We will use one load instruction here:
3309 // The load target register can be a register that was used in place
3310 // or one of the register from the orginal regMask.
3312 VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3313 regMask = rpPredictRegPick(type, predictReg, lockedMask);
3316 else if (predictReg != PREDICT_ADDR)
3318 /* Unless the caller specified PREDICT_ADDR */
3319 /* we don't return the temp registers used */
3320 /* to form the address */
3325 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3337 /* Floating point comparison uses EAX for flags */
3338 if (varTypeIsFloating(op1->TypeGet()))
3344 if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3346 // Some comparisons are converted to ?:
3347 noway_assert(!fgMorphRelopToQmark(op1));
3349 if (predictReg <= PREDICT_REG)
3350 predictReg = PREDICT_SCRATCH_REG;
3352 // The set instructions need a byte register
3353 regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3358 #ifdef _TARGET_XARCH_
3360 // Optimize the compare with a constant cases for xarch
3361 if (op1->gtOper == GT_CNS_INT)
3363 if (op2->gtOper == GT_CNS_INT)
3365 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3366 rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3367 tree->gtUsedRegs = op2->gtUsedRegs;
3370 else if (op2->gtOper == GT_CNS_INT)
3372 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3373 tree->gtUsedRegs = op1->gtUsedRegs;
3376 else if (op2->gtOper == GT_CNS_LNG)
3378 regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3380 // We also need one extra register to read values from
3381 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3382 #endif // _TARGET_X86_
3383 tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3386 #endif // _TARGET_XARCH_
3389 unsigned op1TypeSize;
3390 unsigned op2TypeSize;
3392 op1TypeSize = genTypeSize(op1->TypeGet());
3393 op2TypeSize = genTypeSize(op2->TypeGet());
3395 op1PredictReg = PREDICT_REG;
3396 op2PredictReg = PREDICT_REG;
3398 if (tree->gtFlags & GTF_REVERSE_OPS)
3400 #ifdef _TARGET_XARCH_
3401 if (op1TypeSize == sizeof(int))
3402 op1PredictReg = PREDICT_NONE;
3405 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3406 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3410 #ifdef _TARGET_XARCH_
3411 // For full DWORD compares we can have
3413 // op1 is an address mode and op2 is a register
3415 // op1 is a register and op2 is an address mode
3417 if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3419 if (op2->gtOper == GT_LCL_VAR)
3421 unsigned lclNum = op2->gtLclVar.gtLclNum;
3422 varDsc = lvaTable + lclNum;
3423 /* Did we predict that this local will be enregistered? */
3424 if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3426 op1PredictReg = PREDICT_ADDR;
3430 // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3431 if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3432 op2PredictReg = PREDICT_ADDR;
3433 #endif // _TARGET_XARCH_
3435 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3437 if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3440 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3444 #ifdef _TARGET_XARCH_
3445 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3446 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3447 // if one of the registers is small and the types aren't equal.
3449 if (regMask == RBM_NONE)
3451 rpPredictReg op1xPredictReg, op2xPredictReg;
3452 GenTreePtr op1x, op2x;
3453 if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3455 op1xPredictReg = op2PredictReg;
3456 op2xPredictReg = op1PredictReg;
3462 op1xPredictReg = op1PredictReg;
3463 op2xPredictReg = op2PredictReg;
3467 if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir)
3468 (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3469 varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int
3471 bool needTmp = false;
3473 // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3474 // We could predict a byteable register for op2x, but what if we don't get it?
3475 // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3477 if (varTypeIsByte(op1x->TypeGet()))
3483 if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3485 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3490 if (op1x->TypeGet() != op2x->TypeGet())
3496 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3500 #endif // _TARGET_XARCH_
3502 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3507 #ifndef _TARGET_AMD64_
3508 if (type == TYP_LONG)
3510 assert(tree->gtIsValid64RsltMul());
3512 /* Strip out the cast nodes */
3514 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3515 op1 = op1->gtCast.CastOp();
3516 op2 = op2->gtCast.CastOp();
3520 #endif // !_TARGET_AMD64_
3523 #if defined(_TARGET_X86_)
3524 // This will done by a 64-bit imul "imul eax, reg"
3525 // (i.e. EDX:EAX = EAX * reg)
3527 /* Are we supposed to evaluate op2 first? */
3528 if (tree->gtFlags & GTF_REVERSE_OPS)
3530 rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3531 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3535 rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3536 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3539 /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3541 tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3543 /* set regMask to the set of held registers */
3545 regMask = RBM_PAIR_TMP_LO;
3547 if (type == TYP_LONG)
3548 regMask |= RBM_PAIR_TMP_HI;
3550 #elif defined(_TARGET_ARM_)
3551 // This will done by a 4 operand multiply
3553 // Are we supposed to evaluate op2 first?
3554 if (tree->gtFlags & GTF_REVERSE_OPS)
3556 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3557 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3561 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3562 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3565 // set regMask to the set of held registers,
3566 // the two scratch register we need to compute the mul result
3568 regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3570 // set gtUsedRegs toregMask and the registers needed by op1 and op2
3572 tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3574 #else // !_TARGET_X86_ && !_TARGET_ARM_
3575 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3582 /* We use imulEAX for most unsigned multiply operations */
3583 if (tree->gtOverflow())
3585 if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3600 tree->gtUsedRegs = 0;
3602 if (predictReg <= PREDICT_REG)
3603 predictReg = PREDICT_SCRATCH_REG;
3608 if (tree->gtFlags & GTF_REVERSE_OPS)
3610 op1PredictReg = PREDICT_REG;
3611 #if !CPU_LOAD_STORE_ARCH
3612 if (genTypeSize(op1->gtType) >= sizeof(int))
3613 op1PredictReg = PREDICT_NONE;
3615 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3616 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3620 op2PredictReg = PREDICT_REG;
3621 #if !CPU_LOAD_STORE_ARCH
3622 if (genTypeSize(op2->gtType) >= sizeof(int))
3623 op2PredictReg = PREDICT_NONE;
3625 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3627 // For most ALU operations we can generate a single instruction that encodes
3628 // a small immediate integer constant value. (except for multiply)
3630 if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3632 ssize_t ival = op2->gtIntCon.gtIconVal;
3633 if (codeGen->validImmForAlu(ival))
3635 op2PredictReg = PREDICT_NONE;
3637 else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3638 ((oper == GT_ADD) || (oper == GT_SUB)))
3640 op2PredictReg = PREDICT_NONE;
3643 if (op2PredictReg == PREDICT_NONE)
3645 op2->gtUsedRegs = RBM_NONE;
3650 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3653 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3655 #if CPU_HAS_BYTE_REGS
3656 /* We have special register requirements for byte operations */
3658 if (varTypeIsByte(tree->TypeGet()))
3660 /* For 8 bit arithmetic, one operands has to be in a
3661 byte-addressable register, and the other has to be
3662 in a byte-addrble reg or in memory. Assume its in a reg */
3664 regMaskTP regByteMask = 0;
3665 regMaskTP op1ByteMask = op1->gtUsedRegs;
3667 if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3669 // Pick a Byte register to use for op1
3670 regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3671 op1ByteMask = regByteMask;
3674 if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3676 // Pick a Byte register to use for op2, avoiding the one used by op1
3677 regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3682 tree->gtUsedRegs |= regByteMask;
3683 regMask = regByteMask;
3695 /* non-integer division handled in generic way */
3696 if (!varTypeIsIntegral(type))
3698 tree->gtUsedRegs = 0;
3699 if (predictReg <= PREDICT_REG)
3700 predictReg = PREDICT_SCRATCH_REG;
3701 goto GENERIC_BINARY;
3704 #ifndef _TARGET_64BIT_
3706 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3708 /* Special case: a mod with an int op2 is done inline using idiv or div
3709 to avoid a costly call to the helper */
3711 noway_assert((op2->gtOper == GT_CNS_LNG) &&
3712 (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3714 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3715 if (tree->gtFlags & GTF_REVERSE_OPS)
3717 tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3718 rsvdRegs | op1->gtRsvdRegs);
3719 tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3723 tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3725 rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3727 regMask = RBM_PAIR_TMP;
3728 #else // !_TARGET_X86_ && !_TARGET_ARM_
3729 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3730 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3733 (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3734 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3738 #endif // _TARGET_64BIT_
3740 /* no divide immediate, so force integer constant which is not
3741 * a power of two to register
3744 if (op2->OperKind() & GTK_CONST)
3746 ssize_t ival = op2->gtIntConCommon.IconValue();
3748 /* Is the divisor a power of 2 ? */
3750 if (ival > 0 && genMaxOneBit(size_t(ival)))
3755 op2PredictReg = PREDICT_SCRATCH_REG;
3759 /* Non integer constant also must be enregistered */
3760 op2PredictReg = PREDICT_REG;
3763 regMaskTP trashedMask;
3764 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3765 regMaskTP op1ExcludeMask;
3766 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3767 regMaskTP op2ExcludeMask;
3768 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3770 #ifdef _TARGET_XARCH_
3771 /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3772 * we can safely allow the "b" value to die. Unfortunately, if we simply
3773 * mark the node "b" as using EDX, this will not work if "b" is a register
3774 * variable that dies with this particular reference. Thus, if we want to
3775 * avoid this situation (where we would have to spill the variable from
3776 * EDX to someplace else), we need to explicitly mark the interference
3777 * of the variable at this point.
3780 if (op2->gtOper == GT_LCL_VAR)
3782 unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3783 varDsc = lvaTable + lclNum;
3784 if (varDsc->lvTracked)
3789 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3790 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3791 varDsc->lvVarIndex);
3792 if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3793 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3794 varDsc->lvVarIndex);
3797 VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3798 VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3802 /* set the held register based on opcode */
3803 if (oper == GT_DIV || oper == GT_UDIV)
3807 trashedMask = (RBM_EAX | RBM_EDX);
3809 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3811 #endif // _TARGET_XARCH_
3814 trashedMask = RBM_NONE;
3815 op1ExcludeMask = RBM_NONE;
3816 op2ExcludeMask = RBM_NONE;
3819 /* set the lvPref reg if possible */
3822 * Walking the gtNext link twice from here should get us back
3823 * to our parent node, if this is an simple assignment tree.
3825 dest = tree->gtNext;
3826 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3827 dest->gtNext->gtOp.gtOp2 == tree)
3829 varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3830 varDsc->addPrefReg(regMask, this);
3832 #ifdef _TARGET_XARCH_
3833 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3835 op1PredictReg = PREDICT_SCRATCH_REG;
3838 /* are we supposed to evaluate op2 first? */
3839 if (tree->gtFlags & GTF_REVERSE_OPS)
3841 tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3842 rsvdRegs | op1->gtRsvdRegs);
3843 rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3847 tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3848 rsvdRegs | op2->gtRsvdRegs);
3849 rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3854 /* grab EAX, EDX for this tree node */
3855 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3863 if (predictReg <= PREDICT_REG)
3864 predictReg = PREDICT_SCRATCH_REG;
3866 #ifndef _TARGET_64BIT_
3867 if (type == TYP_LONG)
3869 if (op2->IsCnsIntOrI())
3871 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3872 // no register used by op2
3873 op2->gtUsedRegs = 0;
3874 tree->gtUsedRegs = op1->gtUsedRegs;
3878 // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3879 tmpMask = lockedRegs;
3880 tmpMask &= ~RBM_LNGARG_0;
3881 tmpMask &= ~RBM_SHIFT_LNG;
3883 // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3884 if (tree->gtFlags & GTF_REVERSE_OPS)
3886 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3887 tmpMask |= RBM_SHIFT_LNG;
3888 // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3889 // Fix 383843 X86/ARM ILGEN
3890 rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3891 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3895 rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3896 tmpMask |= RBM_LNGARG_0;
3897 // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3898 // Fix 383839 ARM ILGEN
3899 rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3900 rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3902 regMask = RBM_LNGRET; // function return registers
3903 op1->gtUsedRegs |= RBM_LNGARG_0;
3904 op2->gtUsedRegs |= RBM_SHIFT_LNG;
3906 tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3908 // We are using a helper function to do shift:
3910 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3914 #endif // _TARGET_64BIT_
3916 #ifdef _TARGET_XARCH_
3917 if (!op2->IsCnsIntOrI())
3918 predictReg = PREDICT_NOT_REG_ECX;
3922 // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3924 regMaskTP tmpRsvdRegs;
3926 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3928 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3929 rsvdRegs = RBM_LASTUSE;
3930 tmpRsvdRegs = RBM_NONE;
3935 // Special case op1 of a constant
3936 if (op1->IsCnsIntOrI())
3937 tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3938 // System.Xml.Schema.BitSet:Get(int):bool
3940 tmpRsvdRegs = op1->gtRsvdRegs;
3944 if (!op2->IsCnsIntOrI())
3946 if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3948 op2PredictReg = PREDICT_REG_SHIFT;
3952 op2PredictReg = PREDICT_REG;
3955 /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3956 op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3958 // If our target arch has a REG_SHIFT register then
3959 // we set the PrefReg when we have a LclVar for op2
3960 // we add an interference with REG_SHIFT for any other LclVars alive at op2
3961 if (REG_SHIFT != REG_NA)
3963 VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3965 while (op2->gtOper == GT_COMMA)
3967 op2 = op2->gtOp.gtOp2;
3970 if (op2->gtOper == GT_LCL_VAR)
3972 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3973 varDsc->setPrefReg(REG_SHIFT, this);
3974 if (varDsc->lvTracked)
3976 VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3980 // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3981 // excluding the LclVar that was used for the shift amount as it is read-only
3982 // and can be kept alive through the shift operation
3984 rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3985 // In case op2Mask doesn't contain the required shift register,
3986 // we will or it in now.
3987 op2Mask |= RBM_SHIFT;
3991 if (tree->gtFlags & GTF_REVERSE_OPS)
3993 assert(regMask == RBM_NONE);
3994 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3997 #if CPU_HAS_BYTE_REGS
3998 if (varTypeIsByte(type))
4000 // Fix 383789 X86 ILGEN
4001 // Fix 383813 X86 ILGEN
4002 // Fix 383828 X86 ILGEN
4003 if (op1->gtOper == GT_LCL_VAR)
4005 varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4006 if (varDsc->lvTracked)
4008 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4010 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4011 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4014 if ((regMask & RBM_BYTE_REGS) == 0)
4016 // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4017 // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4020 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4024 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4030 if (tree->gtFlags & GTF_REVERSE_OPS)
4032 if (predictReg == PREDICT_NONE)
4034 predictReg = PREDICT_REG;
4036 else if (rpHasVarIndexForPredict(predictReg))
4038 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4039 predictReg = PREDICT_SCRATCH_REG;
4042 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4043 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4047 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4049 /* CodeGen will enregister the op2 side of a GT_COMMA */
4050 if (predictReg == PREDICT_NONE)
4052 predictReg = PREDICT_REG;
4054 else if (rpHasVarIndexForPredict(predictReg))
4056 /* Don't propagate the use of tgt reg use in a GT_COMMA */
4057 predictReg = PREDICT_SCRATCH_REG;
4060 regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4062 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4064 tree->gtUsedRegs = op2->gtUsedRegs;
4065 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4067 LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4069 if (op2VarDsc->lvTracked)
4071 VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4072 rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4079 noway_assert(op1 != NULL && op2 != NULL);
4082 * If the gtUsedRegs conflicts with lockedRegs
4083 * then we going to have to spill some registers
4084 * into the non-trashed register set to keep it alive
4088 regMaskTP spillRegs;
4089 spillRegs = lockedRegs & tree->gtUsedRegs;
4093 /* Find the next register that needs to be spilled */
4094 tmpMask = genFindLowestBit(spillRegs);
4099 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4100 gtDispTree(tree, 0, NULL, true);
4103 /* In Codegen it will typically introduce a spill temp here */
4104 /* rather than relocating the register to a non trashed reg */
4105 rpPredictSpillCnt++;
4108 /* Remove it from the spillRegs and lockedRegs*/
4109 spillRegs &= ~tmpMask;
4110 lockedRegs &= ~tmpMask;
4113 VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4115 /* Evaluate the <cond> subtree */
4116 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4117 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4118 tree->gtUsedRegs = op1->gtUsedRegs;
4120 noway_assert(op2->gtOper == GT_COLON);
4121 if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4123 // Don't try to target the register specified in predictReg when we have complex subtrees
4125 predictReg = PREDICT_SCRATCH_REG;
4127 GenTreePtr elseTree = op2->AsColon()->ElseNode();
4128 GenTreePtr thenTree = op2->AsColon()->ThenNode();
4130 noway_assert(thenTree != NULL && elseTree != NULL);
4132 // Update compCurLife to only those vars live on the <then> subtree
4134 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4136 if (type == TYP_VOID)
4138 /* Evaluate the <then> subtree */
4139 rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4141 predictReg = PREDICT_NONE;
4145 // A mask to use to force the predictor to choose low registers (to reduce code size)
4146 regMaskTP avoidRegs = RBM_NONE;
4148 avoidRegs = (RBM_R12 | RBM_LR);
4150 if (predictReg <= PREDICT_REG)
4151 predictReg = PREDICT_SCRATCH_REG;
4153 /* Evaluate the <then> subtree */
4155 rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4159 rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4160 if (op1PredictReg != PREDICT_NONE)
4161 predictReg = op1PredictReg;
4165 VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4167 /* Evaluate the <else> subtree */
4168 // First record the post-then liveness, and reset the current liveness to the else
4170 CLANG_FORMAT_COMMENT_ANCHOR;
4173 VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4176 VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4178 rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4179 tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4181 // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4182 // They each have only one successor, which they share. Their live-out sets must equal the
4183 // live-in set of this virtual successor block, and thus must be the same. We can assert
4184 // that equality here.
4185 assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4189 regMaskTP reloadMask = RBM_NONE;
4193 regMaskTP reloadReg;
4195 /* Get an extra register to hold it */
4196 reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4200 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4201 gtDispTree(tree, 0, NULL, true);
4204 reloadMask |= reloadReg;
4209 /* update the gtUsedRegs mask */
4210 tree->gtUsedRegs |= reloadMask;
4217 tree->gtUsedRegs = RBM_NONE;
4220 /* Is there a return value? */
4223 #if FEATURE_FP_REGALLOC
4224 if (varTypeIsFloating(type))
4226 predictReg = PREDICT_FLTRET;
4227 if (type == TYP_FLOAT)
4228 regMask = RBM_FLOATRET;
4230 regMask = RBM_DOUBLERET;
4234 if (isRegPairType(type))
4236 predictReg = PREDICT_LNGRET;
4237 regMask = RBM_LNGRET;
4241 predictReg = PREDICT_INTRET;
4242 regMask = RBM_INTRET;
4244 if (info.compCallUnmanaged)
4246 lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4248 rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4249 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4252 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4253 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4254 // We could optimize on registers based on int/long or no return value. But to
4255 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4256 if (compIsProfilerHookNeeded())
4258 tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4267 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4268 regMask = genReturnRegForTree(tree);
4269 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4272 tree->gtUsedRegs = 0;
4278 /* This must be a test of a relational operator */
4280 noway_assert(op1->OperIsCompare());
4282 /* Only condition code set by this operation */
4284 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4286 tree->gtUsedRegs = op1->gtUsedRegs;
4292 noway_assert(type <= TYP_INT);
4293 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4296 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4297 unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4300 // Table based switch requires an extra register for the table base
4301 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4303 tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4305 #else // !_TARGET_ARM_
4306 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4307 tree->gtUsedRegs = op1->gtUsedRegs;
4308 #endif // _TARGET_ARM_
4313 if (predictReg <= PREDICT_REG)
4314 predictReg = PREDICT_SCRATCH_REG;
4316 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4317 // Need a reg to load exponent into
4318 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4319 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4323 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4327 if (info.compInitMem)
4329 // We zero out two registers in the ARM codegen path
4331 rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4335 op1->gtUsedRegs |= (regMaskSmall)regMask;
4336 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4338 // The result will be put in the reg we picked for the size
4339 // regMask = <already set as we want it to be>
4346 if (predictReg <= PREDICT_REG)
4347 predictReg = PREDICT_SCRATCH_REG;
4349 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4350 // registers (to reduce code size)
4352 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4355 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4357 compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4361 unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4362 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4363 // If it has one bit set, and that's an arg reg...
4364 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4366 // We are passing the 'obj' in the argument registers
4368 regNumber rn = genRegNumFromMask(preferReg);
4370 // Add the registers used to pass the 'obj' to regMask.
4371 for (unsigned i = 0; i < objSize / 4; i++)
4373 if (rn == MAX_REG_ARG)
4376 regMask |= genRegMask(rn);
4377 rn = genRegArgNext(rn);
4382 // We are passing the 'obj' in the outgoing arg space
4383 // We will need one register to load into unless the 'obj' size is 4 or less.
4387 regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4390 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4392 #else // !_TARGET_ARM_
4394 #endif // _TARGET_ARM_
4400 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4402 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4404 // A MKREFANY takes up two registers.
4405 regNumber rn = genRegNumFromMask(preferReg);
4407 if (rn < MAX_REG_ARG)
4409 regMask |= genRegMask(rn);
4410 rn = genRegArgNext(rn);
4411 if (rn < MAX_REG_ARG)
4412 regMask |= genRegMask(rn);
4415 if (regMask != RBM_NONE)
4417 // Condensation of GENERIC_BINARY path.
4418 assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4419 op2PredictReg = PREDICT_REG;
4420 regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4421 rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4422 regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4423 tree->gtUsedRegs = (regMaskSmall)regMask;
4426 tree->gtUsedRegs = op1->gtUsedRegs;
4427 #endif // _TARGET_ARM_
4428 goto GENERIC_BINARY;
4435 goto GENERIC_BINARY;
4439 // Ensure we can write to op2. op2 will hold the output.
4440 if (predictReg < PREDICT_SCRATCH_REG)
4441 predictReg = PREDICT_SCRATCH_REG;
4443 if (tree->gtFlags & GTF_REVERSE_OPS)
4445 op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4446 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4450 regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4451 op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4453 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4460 // This unary operator simply passes through the value from its child (much like GT_NOP)
4461 // and thus won't need a scratch register.
4462 regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4463 tree->gtUsedRegs = op1->gtUsedRegs;
4470 noway_assert(!"unexpected simple operator in reg use prediction");
4475 /* See what kind of a special operator we have here */
4480 GenTreeArgList* list;
4482 unsigned regArgsNum;
4484 regMaskTP regArgMask;
4485 regMaskTP curArgMask;
4491 /* initialize so we can just or in various bits */
4492 tree->gtUsedRegs = RBM_NONE;
4494 #if GTF_CALL_REG_SAVE
4496 * Unless the GTF_CALL_REG_SAVE flag is set,
4497 * we can't preserve the RBM_CALLEE_TRASH registers.
4498 * (likewise we can't preserve the return registers)
4499 * So we remove them from the lockedRegs set and
4500 * record any of them in the keepMask
4503 if (tree->gtFlags & GTF_CALL_REG_SAVE)
4505 regMaskTP trashMask = genReturnRegForTree(tree);
4507 keepMask = lockedRegs & trashMask;
4508 lockedRegs &= ~trashMask;
4513 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4514 lockedRegs &= ~RBM_CALLEE_TRASH;
4520 /* Is there an object pointer? */
4521 if (tree->gtCall.gtCallObjp)
4523 /* Evaluate the instance pointer first */
4525 args = tree->gtCall.gtCallObjp;
4527 /* the objPtr always goes to an integer register (through temp or directly) */
4528 noway_assert(regArgsNum == 0);
4531 /* Must be passed in a register */
4533 noway_assert(args->gtFlags & GTF_LATE_ARG);
4535 /* Must be either a deferred reg arg node or a GT_ASG node */
4537 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4538 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4540 if (!args->IsArgPlaceHolderNode())
4542 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4545 VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4546 VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4548 /* process argument list */
4549 for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4551 args = list->Current();
4553 if (args->gtFlags & GTF_LATE_ARG)
4555 /* Must be either a Placeholder/NOP node or a GT_ASG node */
4557 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4558 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4560 if (!args->IsArgPlaceHolderNode())
4562 rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4569 #ifdef FEATURE_FIXED_OUT_ARGS
4570 // We'll store this argument into the outgoing argument area
4571 // It needs to be in a register to be stored.
4573 predictReg = PREDICT_REG;
4575 #else // !FEATURE_FIXED_OUT_ARGS
4576 // We'll generate a push for this argument
4578 predictReg = PREDICT_NONE;
4579 if (varTypeIsSmall(args->TypeGet()))
4581 /* We may need to sign or zero extend a small type using a register */
4582 predictReg = PREDICT_SCRATCH_REG;
4586 rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4588 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4589 tree->gtUsedRegs |= args->gtUsedRegs;
4592 /* Is there a late argument list */
4595 regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4598 /* process the late argument list */
4599 for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4601 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4602 LclVarDsc* promotedStructLocal = NULL;
4604 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4605 tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4607 assert(list->OperIsList());
4609 args = list->Current();
4610 list = list->Rest();
4612 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4614 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4615 assert(curArgTabEntry);
4617 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4619 curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4621 rpPredictReg argPredictReg;
4622 regMaskTP avoidReg = RBM_NONE;
4624 if (regNum != REG_STK)
4626 argPredictReg = rpGetPredictForReg(regNum);
4627 curArgMask |= genRegMask(regNum);
4631 assert(numSlots > 0);
4632 argPredictReg = PREDICT_NONE;
4634 // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4635 avoidReg = (RBM_R12 | RBM_LR);
4640 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4642 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4644 // 64-bit longs and doubles require 2 consecutive argument registers
4645 curArgMask |= genRegMask(REG_NEXT(regNum));
4647 else if (args->TypeGet() == TYP_STRUCT)
4649 GenTreePtr argx = args;
4650 GenTreePtr lclVarTree = NULL;
4652 /* The GT_OBJ may be be a child of a GT_COMMA */
4653 while (argx->gtOper == GT_COMMA)
4655 argx = argx->gtOp.gtOp2;
4657 unsigned originalSize = 0;
4659 if (argx->gtOper == GT_OBJ)
4661 originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4663 // Is it the address of a promoted struct local?
4664 if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4666 lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1;
4667 LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4668 if (varDsc->lvPromoted)
4669 promotedStructLocal = varDsc;
4672 else if (argx->gtOper == GT_LCL_VAR)
4674 varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum;
4675 originalSize = varDsc->lvSize();
4677 // Is it a promoted struct local?
4678 if (varDsc->lvPromoted)
4679 promotedStructLocal = varDsc;
4681 else if (argx->gtOper == GT_MKREFANY)
4683 originalSize = 2 * TARGET_POINTER_SIZE;
4687 noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4690 // We only pass arguments differently if it a struct local "independently" promoted, which
4691 // allows the field locals can be independently enregistered.
4692 if (promotedStructLocal != NULL)
4694 if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4695 promotedStructLocal = NULL;
4698 unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4700 // Are we passing a TYP_STRUCT in multiple integer registers?
4701 // if so set up curArgMask to reflect this
4702 // Also slots is updated to reflect the number of outgoing arg slots that we will write
4703 if (regNum != REG_STK)
4705 regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4706 assert(genIsValidReg(regNum));
4707 regNumber nextReg = REG_NEXT(regNum);
4709 while (slots > 0 && nextReg <= regLast)
4711 curArgMask |= genRegMask(nextReg);
4712 nextReg = REG_NEXT(nextReg);
4717 if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4719 // All or a portion of this struct will be placed in the argument registers indicated by
4720 // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4721 // that the second arg to be evaluated interferes with the reg for the first, the third with
4722 // the regs for the first and second, etc. But since we always place the stack slots before
4723 // placing the register slots we do not add inteferences for any part of the struct that gets
4724 // passed on the stack.
4727 PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4728 regMaskTP prevArgMask = RBM_NONE;
4729 for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4731 LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4732 if (fieldVarDsc->lvTracked)
4734 assert(lclVarTree != NULL);
4735 if (prevArgMask != RBM_NONE)
4737 rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4738 DEBUGARG("fieldVar/argReg"));
4741 // Now see many registers this uses up.
4742 unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4743 unsigned nextAfterLastRegOffset =
4744 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4745 TARGET_POINTER_SIZE;
4746 unsigned nextAfterLastArgRegOffset =
4747 min(nextAfterLastRegOffset,
4748 genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4750 for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4753 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4756 if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4761 if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4763 // Add the argument register used here as a preferred register for this fieldVarDsc
4765 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4766 fieldVarDsc->setPrefReg(firstRegUsed, this);
4769 compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4772 // If slots is greater than zero then part or all of this TYP_STRUCT
4773 // argument is passed in the outgoing argument area. (except HFA arg)
4775 if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4777 // We will need a register to address the TYP_STRUCT
4778 // Note that we can use an argument register in curArgMask as in
4779 // codegen we pass the stack portion of the argument before we
4780 // setup the register part.
4783 // Force the predictor to choose a LOW_REG here to reduce code bloat
4784 avoidReg = (RBM_R12 | RBM_LR);
4786 assert(tmpMask == RBM_NONE);
4787 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4789 // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4793 tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4794 lockedRegs | regArgMask | tmpMask | avoidReg);
4797 } // (args->TypeGet() == TYP_STRUCT)
4798 #endif // _TARGET_ARM_
4800 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4801 // as we have already calculated the correct tmpMask and curArgMask values and
4802 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4804 if (promotedStructLocal == NULL)
4806 /* Target the appropriate argument register */
4807 tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4810 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4811 // for the duration of the OBJ.
4812 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4814 GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4815 assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4816 compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4819 regArgMask |= curArgMask;
4820 args->gtUsedRegs |= (tmpMask | regArgMask);
4821 tree->gtUsedRegs |= args->gtUsedRegs;
4822 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4824 if (args->gtUsedRegs != RBM_NONE)
4826 // Add register interference with the set of registers used or in use when we evaluated
4827 // the current arg, with whatever is alive after the current arg
4829 rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4831 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4833 assert(list == NULL);
4835 #ifdef LEGACY_BACKEND
4836 #if CPU_LOAD_STORE_ARCH
4837 #ifdef FEATURE_READYTORUN_COMPILER
4838 if (tree->gtCall.IsR2RRelativeIndir())
4840 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4842 #endif // FEATURE_READYTORUN_COMPILER
4843 #endif // CPU_LOAD_STORE_ARCH
4844 #endif // LEGACY_BACKEND
4846 regMaskTP callAddrMask;
4847 callAddrMask = RBM_NONE;
4848 #if CPU_LOAD_STORE_ARCH
4849 predictReg = PREDICT_SCRATCH_REG;
4851 predictReg = PREDICT_NONE;
4854 switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4856 case GTF_CALL_VIRT_STUB:
4858 // We only want to record an interference between the virtual stub
4859 // param reg and anything that's live AFTER the call, but we've not
4860 // yet processed the indirect target. So add virtualStubParamInfo.regMask
4861 // to interferingRegs.
4862 interferingRegs |= virtualStubParamInfo->GetRegMask();
4865 printf("Adding interference with Virtual Stub Param\n");
4867 codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4869 if (tree->gtCall.gtCallType == CT_INDIRECT)
4871 predictReg = virtualStubParamInfo->GetPredict();
4875 case GTF_CALL_VIRT_VTABLE:
4876 predictReg = PREDICT_SCRATCH_REG;
4879 case GTF_CALL_NONVIRT:
4880 predictReg = PREDICT_SCRATCH_REG;
4884 if (tree->gtCall.gtCallType == CT_INDIRECT)
4886 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4887 if (tree->gtCall.gtCallCookie)
4889 codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4891 callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4892 lockedRegs | regArgMask, RBM_LASTUSE);
4894 // Just in case we predict some other registers, force interference with our two special
4895 // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4896 callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4898 predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4902 rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4904 else if (predictReg != PREDICT_NONE)
4906 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4909 if (tree->gtFlags & GTF_CALL_UNMANAGED)
4911 // Need a register for tcbReg
4913 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4914 #if CPU_LOAD_STORE_ARCH
4915 // Need an extra register for tmpReg
4917 rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4921 tree->gtUsedRegs |= callAddrMask;
4923 /* After the call restore the orginal value of lockedRegs */
4924 lockedRegs |= keepMask;
4926 /* set the return register */
4927 regMask = genReturnRegForTree(tree);
4929 if (regMask & rsvdRegs)
4931 // We will need to relocate the return register value
4932 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4933 #if FEATURE_FP_REGALLOC
4934 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4940 if (intRegMask == RBM_INTRET)
4942 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4944 else if (intRegMask == RBM_LNGRET)
4946 regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4950 noway_assert(!"unexpected return regMask");
4954 #if FEATURE_FP_REGALLOC
4957 if (floatRegMask == RBM_FLOATRET)
4959 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4961 else if (floatRegMask == RBM_DOUBLERET)
4963 regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4965 else // HFA return case
4967 for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4969 regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4976 /* the return registers (if any) are killed */
4977 tree->gtUsedRegs |= regMask;
4979 #if GTF_CALL_REG_SAVE
4980 if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4983 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4984 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4988 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4989 // Mark required registers for emitting tailcall profiler callback as used
4990 if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4992 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
4999 // Figure out which registers can't be touched
5001 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5002 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5004 regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5009 #if CPU_LOAD_STORE_ARCH
5010 // We need a register to load the bounds of the MD array
5011 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5014 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5016 /* We need scratch registers to compute index-lower_bound.
5017 Also, gtArrInds[0]'s register will be used as the second
5018 addressability register (besides gtArrObj's) */
5020 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5021 lockedRegs | regMask | dimsMask, rsvdRegs);
5025 dimsMask |= dimMask;
5027 #ifdef _TARGET_XARCH_
5028 // INS_imul doesnt have an immediate constant.
5029 if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5030 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5032 tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5037 #ifdef _TARGET_XARCH_
5038 rsvdRegs |= RBM_EAX;
5040 if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5042 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5046 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5048 op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5050 #ifdef _TARGET_XARCH_
5051 rsvdRegs &= ~RBM_EAX;
5052 tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5053 rsvdRegs | regMask | op2Mask);
5054 tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5055 predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5058 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5063 case GT_ARR_BOUNDS_CHECK:
5065 regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5066 regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5067 rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5070 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5075 NO_WAY("unexpected special operator in reg use prediction");
5082 /* make sure we set them to something reasonable */
5083 if (tree->gtUsedRegs & RBM_ILLEGAL)
5084 noway_assert(!"used regs not set properly in reg use prediction");
5086 if (regMask & RBM_ILLEGAL)
5087 noway_assert(!"return value not set propery in reg use prediction");
5092 * If the gtUsedRegs conflicts with lockedRegs
5093 * then we going to have to spill some registers
5094 * into the non-trashed register set to keep it alive
5096 regMaskTP spillMask;
5097 spillMask = tree->gtUsedRegs & lockedRegs;
5103 /* Find the next register that needs to be spilled */
5104 tmpMask = genFindLowestBit(spillMask);
5109 printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5110 gtDispTree(tree, 0, NULL, true);
5111 if ((tmpMask & regMask) == 0)
5113 printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5114 gtDispTree(tree, 0, NULL, true);
5118 /* In Codegen it will typically introduce a spill temp here */
5119 /* rather than relocating the register to a non trashed reg */
5120 rpPredictSpillCnt++;
5122 /* Remove it from the spillMask */
5123 spillMask &= ~tmpMask;
5128 * If the return registers in regMask conflicts with the lockedRegs
5129 * then we allocate extra registers for the reload of the conflicting
5132 * Set spillMask to the set of locked registers that have to be reloaded here.
5133 * reloadMask is set to the extra registers that are used to reload
5134 * the spilled lockedRegs.
5137 noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5138 spillMask = lockedRegs & regMask;
5142 /* Remove the spillMask from regMask */
5143 regMask &= ~spillMask;
5145 regMaskTP reloadMask = RBM_NONE;
5148 /* Get an extra register to hold it */
5149 regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5153 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5154 gtDispTree(tree, 0, NULL, true);
5157 reloadMask |= reloadReg;
5159 /* Remove it from the spillMask */
5160 spillMask &= ~genFindLowestBit(spillMask);
5163 /* Update regMask to use the reloadMask */
5164 regMask |= reloadMask;
5166 /* update the gtUsedRegs mask */
5167 tree->gtUsedRegs |= (regMaskSmall)regMask;
5170 regMaskTP regUse = tree->gtUsedRegs;
5171 regUse |= interferingRegs;
5173 if (!VarSetOps::IsEmpty(this, compCurLife))
5175 // Add interference between the current set of live variables and
5176 // the set of temporary registers need to evaluate the sub tree
5179 rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5183 if (rpAsgVarNum != -1)
5185 // Add interference between the registers used (if any)
5186 // and the assignment target variable
5189 rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5192 // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5193 // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5194 // to the set of currently live variables. This new interference will prevent us
5195 // from using the register value used here for enregistering different live variable
5197 if (!VarSetOps::IsEmpty(this, compCurLife))
5199 rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5203 /* Do we need to resore the oldLastUseVars value */
5204 if (restoreLastUseVars)
5206 /* If we used a GT_ASG targeted register then we need to add
5207 * a variable interference between any new last use variables
5208 * and the GT_ASG targeted register
5210 if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5212 rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5213 DEBUGARG("asgn tgt last use conflict"));
5215 VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5221 #pragma warning(pop)
5224 #endif // LEGACY_BACKEND
5226 /****************************************************************************/
5227 /* Returns true when we must create an EBP frame
5228 This is used to force most managed methods to have EBP based frames
5229 which allows the ETW kernel stackwalker to walk the stacks of managed code
5230 this allows the kernel to perform light weight profiling
5232 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5234 bool result = false;
5236 const char* reason = nullptr;
5240 if (!result && (opts.MinOpts() || opts.compDbgCode))
5242 INDEBUG(reason = "Debug Code");
5245 if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5247 INDEBUG(reason = "IL Code Size");
5250 if (!result && (fgBBcount > 3))
5252 INDEBUG(reason = "BasicBlock Count");
5255 if (!result && fgHasLoops)
5257 INDEBUG(reason = "Method has Loops");
5260 if (!result && (optCallCount >= 2))
5262 INDEBUG(reason = "Call Count");
5265 if (!result && (optIndirectCallCount >= 1))
5267 INDEBUG(reason = "Indirect Call");
5270 #endif // ETW_EBP_FRAMED
5272 // VM wants to identify the containing frame of an InlinedCallFrame always
5273 // via the frame register never the stack register so we need a frame.
5274 if (!result && (optNativeCallCount != 0))
5276 INDEBUG(reason = "Uses PInvoke");
5280 #ifdef _TARGET_ARM64_
5281 // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5285 INDEBUG(reason = "Temporary ARM64 force frame pointer");
5288 #endif // _TARGET_ARM64_
5291 if ((result == true) && (wbReason != nullptr))
5300 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5302 /*****************************************************************************
5304 * Predict which variables will be assigned to registers
5305 * This is x86 specific and only predicts the integer registers and
5306 * must be conservative, any register that is predicted to be enregister
5307 * must end up being enregistered.
5309 * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5310 * predicted to be enregistered to minimize calls to rpPredictRegPick.
5315 #pragma warning(push)
5316 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5318 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5322 if (rpPasses <= rpPassesPessimize)
5324 // Assume that we won't have to reverse EBP enregistration
5325 rpReverseEBPenreg = false;
5327 // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5328 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5329 rpFrameType = FT_EBP_FRAME;
5331 rpFrameType = FT_ESP_FRAME;
5335 // If we are using FPBASE as the frame register, we cannot also use it for
5337 if (rpFrameType == FT_EBP_FRAME)
5339 regAvail &= ~RBM_FPBASE;
5341 #endif // !ETW_EBP_FRAMED
5344 rpPredictAssignMask = regAvail;
5346 raSetupArgMasks(&codeGen->intRegState);
5347 #if !FEATURE_STACK_FP_X87
5348 raSetupArgMasks(&codeGen->floatRegState);
5351 // If there is a secret stub param, it is also live in
5352 if (info.compPublishStubParam)
5354 codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5357 if (regAvail == RBM_NONE)
5362 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5364 #if FEATURE_STACK_FP_X87
5365 if (!varDsc->IsFloatRegType())
5368 varDsc->lvRegNum = REG_STK;
5369 if (isRegPairType(varDsc->lvType))
5370 varDsc->lvOtherReg = REG_STK;
5378 printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5379 printf("\n Available registers = ");
5380 dspRegMask(regAvail);
5385 if (regAvail == RBM_NONE)
5390 /* We cannot change the lvVarIndexes at this point, so we */
5391 /* can only re-order the existing set of tracked variables */
5392 /* Which will change the order in which we select the */
5393 /* locals for enregistering. */
5395 assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5397 // Should not be set unless optimizing
5398 noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5404 fgDebugCheckBBlist();
5407 /* Initialize the weighted count of variables that could have */
5408 /* been enregistered but weren't */
5409 unsigned refCntStk = 0; // sum of ref counts for all stack based variables
5410 unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables
5411 unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5413 unsigned refCntStkParam; // sum of ref counts for all stack based parameters
5414 unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5416 #if FEATURE_STACK_FP_X87
5417 refCntStkParam = raCntStkParamDblStackFP;
5418 refCntWtdStkDbl = raCntWtdStkDblStackFP;
5419 refCntStk = raCntStkStackFP;
5422 refCntWtdStkDbl = 0;
5424 #endif // FEATURE_STACK_FP_X87
5426 #endif // DOUBLE_ALIGN
5428 /* Set of registers used to enregister variables in the predition */
5429 regMaskTP regUsed = RBM_NONE;
5431 /*-------------------------------------------------------------------------
5433 * Predict/Assign the enregistered locals in ref-count order
5437 VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5439 unsigned FPRegVarLiveInCnt;
5440 FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5443 for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5445 bool notWorthy = false;
5449 regMaskTP regAvailForType;
5452 unsigned customVarOrderSize;
5453 regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5455 regNumber saveOtherReg;
5457 varDsc = lvaRefSorted[sortNum];
5459 #if FEATURE_STACK_FP_X87
5460 if (varTypeIsFloating(varDsc->TypeGet()))
5463 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5465 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5466 // be en-registered.
5467 noway_assert(!varDsc->lvRegister);
5474 /* Check the set of invariant things that would prevent enregistration */
5476 /* Ignore the variable if it's not tracked */
5477 if (!varDsc->lvTracked)
5480 /* Get hold of the index and the interference mask for the variable */
5481 varIndex = varDsc->lvVarIndex;
5483 // Remove 'varIndex' from unprocessedVars
5484 VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5486 // Skip the variable if it's marked as DoNotEnregister.
5488 if (varDsc->lvDoNotEnregister)
5491 /* TODO: For now if we have JMP all register args go to stack
5492 * TODO: Later consider extending the life of the argument or make a copy of it */
5494 if (compJmpOpUsed && varDsc->lvIsRegArg)
5497 /* Skip the variable if the ref count is zero */
5499 if (varDsc->lvRefCnt == 0)
5502 /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5504 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5509 /* Is the unweighted ref count too low to be interesting? */
5511 if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5512 (varDsc->lvRefCnt <= 1))
5514 /* Sometimes it's useful to enregister a variable with only one use */
5515 /* arguments referenced in loops are one example */
5517 if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5518 goto OK_TO_ENREGISTER;
5520 /* If the variable has a preferred register set it may be useful to put it there */
5521 if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5522 goto OK_TO_ENREGISTER;
5524 /* Keep going; the table is sorted by "weighted" ref count */
5530 if (varTypeIsFloating(varDsc->TypeGet()))
5532 regType = varDsc->TypeGet();
5533 regAvailForType = regAvail & RBM_ALLFLOAT;
5538 regAvailForType = regAvail & RBM_ALLINT;
5542 isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5546 regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5550 /* If we don't have any registers available then skip the enregistration attempt */
5551 if (regAvailForType == RBM_NONE)
5554 // On the pessimize passes don't even try to enregister LONGS
5555 if (isRegPairType(varDsc->lvType))
5557 if (rpPasses > rpPassesPessimize)
5559 else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5563 // Set of registers to avoid when performing register allocation
5564 avoidReg = RBM_NONE;
5566 if (!varDsc->lvIsRegArg)
5568 /* For local variables,
5569 * avoid the incoming arguments,
5570 * but only if you conflict with them */
5572 if (raAvoidArgRegMask != 0)
5575 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5577 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5579 if (!argDsc->lvIsRegArg)
5582 bool isFloat = argDsc->IsFloatRegType();
5583 regNumber inArgReg = argDsc->lvArgReg;
5584 regMaskTP inArgBit = genRegMask(inArgReg);
5586 // Is this inArgReg in the raAvoidArgRegMask set?
5588 if (!(raAvoidArgRegMask & inArgBit))
5591 noway_assert(argDsc->lvIsParam);
5592 noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5594 unsigned locVarIndex = varDsc->lvVarIndex;
5595 unsigned argVarIndex = argDsc->lvVarIndex;
5597 /* Does this variable interfere with the arg variable ? */
5598 if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5600 noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5601 /* Yes, so try to avoid the incoming arg reg */
5602 avoidReg |= inArgBit;
5606 noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5612 // Now we will try to predict which register the variable
5613 // could be enregistered in
5615 customVarOrderSize = MAX_VAR_ORDER_SIZE;
5617 raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5620 saveOtherReg = DUMMY_INIT(REG_NA);
5622 for (regInx = 0; regInx < customVarOrderSize; regInx++)
5624 regNumber regNum = customVarOrder[regInx];
5625 regMaskTP regBits = genRegMask(regNum);
5627 /* Skip this register if it isn't available */
5628 if ((regAvailForType & regBits) == 0)
5631 /* Skip this register if it interferes with the variable */
5633 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5636 if (varTypeIsFloating(regType))
5641 regNumber regNext = REG_NEXT(regNum);
5642 regBits |= genRegMask(regNext);
5644 /* Skip if regNext interferes with the variable */
5645 if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5651 bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5652 bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5653 bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0);
5655 /* Skip this register if the weighted ref count is less than two
5656 and we are considering a unused callee saved register */
5658 if (lessThanTwoRefWtd && // less than two references (weighted)
5659 firstUseOfReg && // first use of this register
5660 calleeSavedReg) // callee saved register
5662 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5664 // psc is abbeviation for possibleSameColor
5665 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5667 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5668 unsigned pscIndex = 0;
5669 while (pscIndexIter.NextElem(&pscIndex))
5671 LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5672 totalRefCntWtd += pscVar->lvRefCntWtd;
5673 if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5677 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5680 continue; // not worth spilling a callee saved register
5682 // otherwise we will spill this callee saved registers,
5683 // because its uses when combined with the uses of
5684 // other yet to be processed candidates exceed our threshold.
5685 // totalRefCntWtd = totalRefCntWtd;
5688 /* Looks good - mark the variable as living in the register */
5690 if (isRegPairType(varDsc->lvType))
5692 if (firstHalf == false)
5694 /* Enregister the first half of the long */
5695 varDsc->lvRegNum = regNum;
5696 saveOtherReg = varDsc->lvOtherReg;
5697 varDsc->lvOtherReg = REG_STK;
5702 /* Ensure 'well-formed' register pairs */
5703 /* (those returned by gen[Pick|Grab]RegPair) */
5705 if (regNum < varDsc->lvRegNum)
5707 varDsc->lvOtherReg = varDsc->lvRegNum;
5708 varDsc->lvRegNum = regNum;
5712 varDsc->lvOtherReg = regNum;
5719 varDsc->lvRegNum = regNum;
5723 varDsc->lvOtherReg = REG_NEXT(regNum);
5728 if (regNum == REG_FPBASE)
5730 refCntEBP += varDsc->lvRefCnt;
5731 refCntWtdEBP += varDsc->lvRefCntWtd;
5733 if (varDsc->lvIsParam)
5735 refCntStkParam += varDsc->lvRefCnt;
5740 /* Record this register in the regUsed set */
5743 /* The register is now ineligible for all interfering variables */
5745 VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5750 regNumber secondHalf = REG_NEXT(regNum);
5751 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5752 unsigned intfIndex = 0;
5753 while (iter.NextElem(&intfIndex))
5755 VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5760 /* If a register argument, remove its incoming register
5761 * from the "avoid" list */
5763 if (varDsc->lvIsRegArg)
5765 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5769 raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5774 /* A variable of TYP_LONG can take two registers */
5778 // Since we have successfully enregistered this variable it is
5779 // now time to move on and consider the next variable
5785 noway_assert(isRegPairType(varDsc->lvType));
5787 /* This TYP_LONG is partially enregistered */
5789 noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5791 if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5796 raAddToStkPredict(varDsc->lvRefCntWtd);
5801 if (varDsc->lvDependReg)
5808 /* Weighted count of variables that could have been enregistered but weren't */
5809 raAddToStkPredict(varDsc->lvRefCntWtd);
5811 if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5812 raAddToStkPredict(varDsc->lvRefCntWtd);
5816 varDsc->lvRegister = false;
5818 varDsc->lvRegNum = REG_STK;
5819 if (isRegPairType(varDsc->lvType))
5820 varDsc->lvOtherReg = REG_STK;
5822 /* unweighted count of variables that were not enregistered */
5824 refCntStk += varDsc->lvRefCnt;
5827 if (varDsc->lvIsParam)
5829 refCntStkParam += varDsc->lvRefCnt;
5833 /* Is it a stack based double? */
5834 /* Note that double params are excluded since they can not be double aligned */
5835 if (varDsc->lvType == TYP_DOUBLE)
5837 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5845 gtDispLclVar((unsigned)(varDsc - lvaTable));
5846 if (varDsc->lvTracked)
5847 printf("T%02u", varDsc->lvVarIndex);
5850 printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5851 if (varDsc->lvDoNotEnregister)
5852 printf(", do-not-enregister");
5860 varDsc->lvRegister = true;
5862 // Record the fact that we enregistered a stack arg when tail call is used.
5863 if (compJmpOpUsed && !varDsc->lvIsRegArg)
5865 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5866 if (isRegPairType(varDsc->lvType))
5868 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5876 gtDispLclVar((unsigned)(varDsc - lvaTable));
5877 printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5878 refCntWtd2str(varDsc->lvRefCntWtd));
5879 varDsc->PrintVarReg();
5883 printf(":%s", getRegName(varDsc->lvOtherReg));
5892 noway_assert(refCntEBP == 0);
5899 printf("; refCntStk = %u\n", refCntStk);
5901 printf("; refCntEBP = %u\n", refCntEBP);
5902 if (refCntWtdEBP > 0)
5903 printf("; refCntWtdEBP = %u\n", refCntWtdEBP);
5905 if (refCntStkParam > 0)
5906 printf("; refCntStkParam = %u\n", refCntStkParam);
5907 if (refCntWtdStkDbl > 0)
5908 printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5913 /* Determine how the EBP register should be used */
5914 CLANG_FORMAT_COMMENT_ANCHOR;
5918 if (!codeGen->isFramePointerRequired())
5920 noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5923 First let us decide if we should use EBP to create a
5924 double-aligned frame, instead of enregistering variables
5927 if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5929 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5930 goto REVERSE_EBP_ENREG;
5933 if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5935 if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5937 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5938 goto REVERSE_EBP_ENREG;
5943 #endif // DOUBLE_ALIGN
5945 if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5947 #ifdef _TARGET_XARCH_
5949 /* If we are using EBP to enregister variables then
5950 will we actually save bytes by setting up an EBP frame?
5952 Each stack reference is an extra byte of code if we use
5955 Here we measure the savings that we get by using EBP to
5956 enregister variables vs. the cost in code size that we
5957 pay when using an ESP based frame.
5959 We pay one byte of code for each refCntStk
5960 but we save one byte (or more) for each refCntEBP.
5962 Our savings are the elimination of a stack memory read/write.
5963 We use the loop weighted value of
5964 refCntWtdEBP * mem_access_weight (0, 3, 6)
5965 to represent this savings.
5968 // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5969 // to set up an EBP frame in the prolog and epilog
5970 #define EBP_FRAME_SETUP_SIZE 5
5973 if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5975 unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5976 unsigned mem_access_weight = 3;
5978 if (compCodeOpt() == SMALL_CODE)
5979 mem_access_weight = 0;
5980 else if (compCodeOpt() == FAST_CODE)
5981 mem_access_weight *= 2;
5983 if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5985 /* It's not be a good idea to use EBP in our predictions */
5986 CLANG_FORMAT_COMMENT_ANCHOR;
5988 if (verbose && (refCntEBP > 0))
5989 printf("; Predicting that it's not worth using EBP to enregister variables\n");
5991 rpFrameType = FT_EBP_FRAME;
5992 goto REVERSE_EBP_ENREG;
5995 #endif // _TARGET_XARCH_
5997 if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6002 if (rpMustCreateEBPCalled == false)
6004 rpMustCreateEBPCalled = true;
6005 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6009 printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6011 codeGen->setFrameRequired(true);
6013 rpFrameType = FT_EBP_FRAME;
6014 goto REVERSE_EBP_ENREG;
6024 noway_assert(rpFrameType != FT_ESP_FRAME);
6026 rpReverseEBPenreg = true;
6031 noway_assert(regUsed & RBM_FPBASE);
6033 regUsed &= ~RBM_FPBASE;
6035 /* variables that were enregistered in EBP become stack based variables */
6036 raAddToStkPredict(refCntWtdEBP);
6040 /* We're going to have to undo some predicted enregistered variables */
6041 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6043 /* Is this a register variable? */
6044 if (varDsc->lvRegNum != REG_STK)
6046 if (isRegPairType(varDsc->lvType))
6048 /* Only one can be EBP */
6049 if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6051 if (varDsc->lvRegNum == REG_FPBASE)
6052 varDsc->lvRegNum = varDsc->lvOtherReg;
6054 varDsc->lvOtherReg = REG_STK;
6056 if (varDsc->lvRegNum == REG_STK)
6057 varDsc->lvRegister = false;
6059 if (varDsc->lvDependReg)
6069 if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6071 varDsc->lvRegNum = REG_STK;
6073 varDsc->lvRegister = false;
6075 if (varDsc->lvDependReg)
6081 printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6082 varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6083 (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6091 #endif // ETW_EBP_FRAMED
6096 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6098 /* Clear the lvDependReg flag for next iteration of the predictor */
6099 varDsc->lvDependReg = false;
6101 // If we set rpLostEnreg and this is the first pessimize pass
6102 // then reverse the enreg of all TYP_LONG
6103 if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6105 varDsc->lvRegNum = REG_STK;
6106 varDsc->lvOtherReg = REG_STK;
6111 if (verbose && raNewBlocks)
6113 printf("\nAdded FP register killing blocks:\n");
6114 fgDispBasicBlocks();
6118 noway_assert(rpFrameType != FT_NOT_SET);
6120 /* return the set of registers used to enregister variables */
6124 #pragma warning(pop)
6127 /*****************************************************************************
6129 * Predict register use for every tree in the function. Note that we do this
6130 * at different times (not to mention in a totally different way) for x86 vs
6133 void Compiler::rpPredictRegUse()
6140 // We might want to adjust the ref counts based on interference
6143 regMaskTP allAcceptableRegs = RBM_ALLINT;
6145 #if FEATURE_FP_REGALLOC
6146 allAcceptableRegs |= raConfigRestrictMaskFP();
6149 allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6151 /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6152 to acdHelper(). This is done implicitly, without creating a GT_CALL
6153 node. Hence, this interference is be handled implicitly by
6154 restricting the registers used for enregistering variables */
6156 if (opts.compDbgCode)
6158 allAcceptableRegs &= RBM_CALLEE_SAVED;
6161 /* Compute the initial regmask to use for the first pass */
6162 regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6165 #if CPU_USES_BLOCK_MOVE
6166 /* If we might need to generate a rep mov instruction */
6167 /* remove ESI and EDI */
6169 regAvail &= ~(RBM_ESI | RBM_EDI);
6173 /* If we using longs then we remove ESI to allow */
6174 /* ESI:EBX to be saved accross a call */
6176 regAvail &= ~(RBM_ESI);
6180 // For the first register allocation pass we don't want to color using r4
6181 // as we want to allow it to be used to color the internal temps instead
6182 // when r0,r1,r2,r3 are all in use.
6184 regAvail &= ~(RBM_R4);
6188 // We never have EBP available when ETW_EBP_FRAME is defined
6189 regAvail &= ~RBM_FPBASE;
6191 /* If a frame pointer is required then we remove EBP */
6192 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6193 regAvail &= ~RBM_FPBASE;
6197 BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6199 regAvail = RBM_NONE;
6202 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6203 regAvail = RBM_NONE;
6205 #if FEATURE_STACK_FP_X87
6206 VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6207 VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6209 // Calculate the set of all tracked FP/non-FP variables
6210 // into optAllFloatVars and optAllNonFPvars
6215 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6217 /* Ignore the variable if it's not tracked */
6219 if (!varDsc->lvTracked)
6222 /* Get hold of the index and the interference mask for the variable */
6224 unsigned varNum = varDsc->lvVarIndex;
6226 /* add to the set of all tracked FP/non-FP variables */
6228 if (varDsc->IsFloatRegType())
6229 VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6231 VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6235 for (unsigned i = 0; i < REG_COUNT; i++)
6237 VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6239 for (unsigned i = 0; i < lvaTrackedCount; i++)
6241 VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6244 raNewBlocks = false;
6245 rpPredictAssignAgain = false;
6248 bool mustPredict = true;
6249 unsigned stmtNum = 0;
6250 unsigned oldStkPredict = DUMMY_INIT(~0);
6251 VARSET_TP oldLclRegIntf[REG_COUNT];
6253 for (unsigned i = 0; i < REG_COUNT; i++)
6255 VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6260 /* Assign registers to variables using the variable/register interference
6261 graph (raLclRegIntf[]) calculated in the previous pass */
6262 regUsed = rpPredictAssignRegVars(regAvail);
6264 mustPredict |= rpLostEnreg;
6267 // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6268 if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6269 !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6271 // We can release our reservation on R10 and use it to color registers
6272 codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6273 allAcceptableRegs |= RBM_OPT_RSVD;
6277 /* Is our new prediction good enough?? */
6280 /* For small methods (less than 12 stmts), we add a */
6281 /* extra pass if we are predicting the use of some */
6282 /* of the caller saved registers. */
6283 /* This fixes RAID perf bug 43440 VB Ackerman function */
6285 if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6290 /* If every variable was fully enregistered then we're done */
6291 if (rpStkPredict == 0)
6294 // This was a successful prediction. Record it, in case it turns out to be the best one.
6295 rpRecordPrediction();
6299 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6301 // Be careful about overflow
6302 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6303 if (oldStkPredict < highStkPredict)
6306 if (rpStkPredict < rpPasses * 8)
6309 if (rpPasses >= (rpPassesMax - 1))
6314 /* We will do another pass */;
6318 if (JitConfig.JitAssertOnMaxRAPasses())
6320 noway_assert(rpPasses < rpPassesMax &&
6321 "This may not a bug, but dev team should look and see what is happening");
6325 // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with
6326 // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope
6327 // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6328 if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6330 NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6339 printf("\n; Another pass due to rpLostEnreg");
6341 printf("\n; Another pass due to rpAddedVarIntf");
6342 if ((rpPasses == 1) && rpPredictAssignAgain)
6343 printf("\n; Another pass due to rpPredictAssignAgain");
6345 printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6349 /* Zero the variable/register interference graph */
6350 for (unsigned i = 0; i < REG_COUNT; i++)
6352 VarSetOps::ClearD(this, raLclRegIntf[i]);
6355 // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6356 // it must not be in a register trashed by the callee
6357 if (info.compLvFrameListRoot != BAD_VAR_NUM)
6359 assert(!opts.ShouldUsePInvokeHelpers());
6360 noway_assert(info.compLvFrameListRoot < lvaCount);
6362 LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6364 if (pinvokeVarDsc->lvTracked)
6366 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6367 DEBUGARG("compLvFrameListRoot"));
6369 // We would prefer to have this be enregister in the PINVOKE_TCB register
6370 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6373 // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6374 // worst case). Make sure that the return value compiler temp that we create for the single
6375 // return block knows about this interference.
6376 if (genReturnLocal != BAD_VAR_NUM)
6378 noway_assert(genReturnBB);
6379 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6380 if (localTmp->lvTracked)
6382 rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6383 VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6389 if (compFloatingPointUsed)
6391 bool hasMustInitFloat = false;
6393 // if we have any must-init floating point LclVars then we will add register interferences
6394 // for the arguments with RBM_SCRATCH
6395 // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6396 // we won't home the arguments into REG_SCRATCH
6401 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6403 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6405 hasMustInitFloat = true;
6410 if (hasMustInitFloat)
6412 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6414 // If is an incoming argument, that is tracked and not floating-point
6415 if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6417 rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6418 DEBUGARG("arg home with must-init fp"));
6426 rpAddedVarIntf = false;
6427 rpLostEnreg = false;
6429 /* Walk the basic blocks and predict reg use for each tree */
6431 for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6435 compCurLifeTree = NULL;
6436 VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6440 for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6442 noway_assert(stmt->gtOper == GT_STMT);
6444 rpPredictSpillCnt = 0;
6445 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6446 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6448 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6453 printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6458 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6460 noway_assert(rpAsgVarNum == -1);
6462 if (rpPredictSpillCnt > tmpIntSpillMax)
6463 tmpIntSpillMax = rpPredictSpillCnt;
6468 /* Decide whether we need to set mustPredict */
6469 mustPredict = false;
6472 // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6473 // reserve r10, and if it was used, throw out the last prediction and repredict.
6474 if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6476 codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6477 allAcceptableRegs &= ~RBM_OPT_RSVD;
6478 if ((regUsed & RBM_OPT_RSVD) != 0)
6481 rpBestRecordedPrediction = nullptr;
6497 if ((opts.compFlags & CLFLG_REGVAR) == 0)
6500 if (rpPredictAssignAgain)
6508 /* Calculate the new value to use for regAvail */
6510 regAvail = allAcceptableRegs;
6512 /* If a frame pointer is required then we remove EBP */
6513 if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6514 regAvail &= ~RBM_FPBASE;
6517 // We never have EBP available when ETW_EBP_FRAME is defined
6518 regAvail &= ~RBM_FPBASE;
6521 // If we have done n-passes then we must continue to pessimize the
6522 // interference graph by or-ing the interferences from the previous pass
6524 if (rpPasses > rpPassesPessimize)
6526 for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6527 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6529 /* If we reverse an EBP enregistration then keep it that way */
6530 if (rpReverseEBPenreg)
6531 regAvail &= ~RBM_FPBASE;
6539 /* Save the old variable/register interference graph */
6540 for (unsigned i = 0; i < REG_COUNT; i++)
6542 VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6544 oldStkPredict = rpStkPredict;
6545 } // end of while (true)
6549 // If we recorded a better feasible allocation than we ended up with, go back to using it.
6550 rpUseRecordedPredictionIfBetter();
6553 codeGen->setDoubleAlign(false);
6556 switch (rpFrameType)
6559 noway_assert(!"rpFrameType not set correctly!");
6562 noway_assert(!codeGen->isFramePointerRequired());
6563 noway_assert(!codeGen->isFrameRequired());
6564 codeGen->setFramePointerUsed(false);
6567 noway_assert((regUsed & RBM_FPBASE) == 0);
6568 codeGen->setFramePointerUsed(true);
6571 case FT_DOUBLE_ALIGN_FRAME:
6572 noway_assert((regUsed & RBM_FPBASE) == 0);
6573 noway_assert(!codeGen->isFramePointerRequired());
6574 codeGen->setFramePointerUsed(false);
6575 codeGen->setDoubleAlign(true);
6580 /* Record the set of registers that we need */
6581 codeGen->regSet.rsClearRegsModified();
6582 if (regUsed != RBM_NONE)
6584 codeGen->regSet.rsSetRegsModified(regUsed);
6587 /* We need genFullPtrRegMap if :
6588 * The method is fully interruptible, or
6589 * We are generating an EBP-less frame (for stack-pointer deltas)
6592 genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6598 printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6599 printf(" rpStkPredict was %u\n", rpStkPredict);
6602 rpRegAllocDone = true;
6605 #endif // LEGACY_BACKEND
6607 /*****************************************************************************
6609 * Mark all variables as to whether they live on the stack frame
6610 * (part or whole), and if so what the base is (FP or SP).
6613 void Compiler::raMarkStkVars()
6618 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6620 // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6621 CLANG_FORMAT_COMMENT_ANCHOR;
6623 #ifdef LEGACY_BACKEND
6624 varDsc->lvOnFrame = false;
6625 #endif // LEGACY_BACKEND
6627 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6629 noway_assert(!varDsc->lvRegister);
6633 /* Fully enregistered variables don't need any frame space */
6635 if (varDsc->lvRegister)
6637 if (!isRegPairType(varDsc->TypeGet()))
6642 /* For "large" variables make sure both halves are enregistered */
6644 if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6649 /* Unused variables typically don't get any frame space */
6650 else if (varDsc->lvRefCnt == 0)
6652 bool needSlot = false;
6654 bool stkFixedArgInVarArgs =
6655 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6657 // If its address has been exposed, ignore lvRefCnt. However, exclude
6658 // fixed arguments in varargs method as lvOnFrame shouldn't be set
6659 // for them as we don't want to explicitly report them to GC.
6661 if (!stkFixedArgInVarArgs)
6663 needSlot |= varDsc->lvAddrExposed;
6666 #if FEATURE_FIXED_OUT_ARGS
6668 /* Is this the dummy variable representing GT_LCLBLK ? */
6669 needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6671 #endif // FEATURE_FIXED_OUT_ARGS
6674 /* For debugging, note that we have to reserve space even for
6675 unused variables if they are ever in scope. However, this is not
6676 an issue as fgExtendDbgLifetimes() adds an initialization and
6677 variables in scope will not have a zero ref-cnt.
6679 if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6681 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6683 noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6688 For Debug Code, we have to reserve space even if the variable is never
6689 in scope. We will also need to initialize it if it is a GC var.
6690 So we set lvMustInit and artifically bump up the ref-cnt.
6693 if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6697 if (lvaTypeIsGC(lclNum))
6699 varDsc->lvRefCnt = 1;
6702 if (!varDsc->lvIsParam)
6704 varDsc->lvMustInit = true;
6708 #ifndef LEGACY_BACKEND
6709 varDsc->lvOnFrame = needSlot;
6710 #endif // !LEGACY_BACKEND
6713 /* Clear the lvMustInit flag in case it is set */
6714 varDsc->lvMustInit = false;
6720 #ifndef LEGACY_BACKEND
6721 if (!varDsc->lvOnFrame)
6725 #endif // !LEGACY_BACKEND
6728 /* The variable (or part of it) lives on the stack frame */
6730 noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6731 #if FEATURE_FIXED_OUT_ARGS
6732 noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6733 #else // FEATURE_FIXED_OUT_ARGS
6734 noway_assert(lvaLclSize(lclNum) != 0);
6735 #endif // FEATURE_FIXED_OUT_ARGS
6737 varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6741 varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6745 if (codeGen->doDoubleAlign())
6747 noway_assert(codeGen->isFramePointerUsed() == false);
6749 /* All arguments are off of EBP with double-aligned frames */
6751 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6753 varDsc->lvFramePointerBased = true;
6759 /* Some basic checks */
6761 // It must be in a register, on frame, or have zero references.
6763 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6765 #ifndef LEGACY_BACKEND
6766 // We can't have both lvRegister and lvOnFrame for RyuJIT
6767 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6768 #else // LEGACY_BACKEND
6770 /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6771 noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6772 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6773 #endif // LEGACY_BACKEND
6777 // For varargs functions, there should be no direct references to
6778 // parameter variables except for 'this' (because these were morphed
6779 // in the importer) and the 'arglist' parameter (which is not a GC
6780 // pointer). and the return buffer argument (if we are returning a
6782 // This is important because we don't want to try to report them
6783 // to the GC, as the frame offsets in these local varables would
6786 if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6788 if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6790 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6797 #ifdef LEGACY_BACKEND
6798 void Compiler::rpRecordPrediction()
6800 if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6802 if (rpBestRecordedPrediction == NULL)
6804 rpBestRecordedPrediction =
6805 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6807 for (unsigned k = 0; k < lvaCount; k++)
6809 rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6810 rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum();
6811 rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg();
6813 rpBestRecordedStkPredict = rpStkPredict;
6814 JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6818 void Compiler::rpUseRecordedPredictionIfBetter()
6820 JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6821 rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6822 if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6824 JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6825 rpBestRecordedStkPredict);
6827 for (unsigned k = 0; k < lvaCount; k++)
6829 lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6830 lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6831 lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6835 #endif // LEGACY_BACKEND