Merge pull request #15456 from tannergooding/math-api
[platform/upstream/coreclr.git] / src / jit / regalloc.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           RegAlloc                                        XX
9 XX                                                                           XX
10 XX  Does the register allocation and puts the remaining lclVars on the stack XX
11 XX                                                                           XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 */
15
16 #include "jitpch.h"
17 #ifdef _MSC_VER
18 #pragma hdrstop
19 #endif
20 #include "regalloc.h"
21
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
24 {
25     DWORD val = JitConfig.JitRegisterFP();
26
27     return (enumConfigRegisterFP)(val & 0x3);
28 }
29 #endif // FEATURE_FP_REGALLOC
30
31 regMaskTP Compiler::raConfigRestrictMaskFP()
32 {
33     regMaskTP result = RBM_NONE;
34
35 #if FEATURE_FP_REGALLOC
36     switch (raConfigRegisterFP())
37     {
38         case CONFIG_REGISTER_FP_NONE:
39             result = RBM_NONE;
40             break;
41         case CONFIG_REGISTER_FP_CALLEE_TRASH:
42             result = RBM_FLT_CALLEE_TRASH;
43             break;
44         case CONFIG_REGISTER_FP_CALLEE_SAVED:
45             result = RBM_FLT_CALLEE_SAVED;
46             break;
47         case CONFIG_REGISTER_FP_FULL:
48             result = RBM_ALLFLOAT;
49             break;
50     }
51 #endif
52
53     return result;
54 }
55
56 #if DOUBLE_ALIGN
57 DWORD Compiler::getCanDoubleAlign()
58 {
59 #ifdef DEBUG
60     if (compStressCompile(STRESS_DBL_ALN, 20))
61         return MUST_DOUBLE_ALIGN;
62
63     return JitConfig.JitDoubleAlign();
64 #else
65     return DEFAULT_DOUBLE_ALIGN;
66 #endif
67 }
68
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
71 //
72 // Arguments:
73 //    refCntStk       - sum of     ref counts for all stack based variables
74 //    refCntEBP       - sum of     ref counts for EBP enregistered variables
75 //    refCntWtdEBP    - sum of wtd ref counts for EBP enregistered variables
76 //    refCntStkParam  - sum of     ref counts for all stack based parameters
77 //    refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 //                      with double fields).
79 //
80 // Return Value:
81 //    Returns true if this method estimates that a double-aligned frame would be beneficial
82 //
83 // Notes:
84 //    The impact of a double-aligned frame is computed as follows:
85 //    - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 //    - We pay a byte of code for each non-parameter stack reference.
87 //    - We save the misalignment penalty and possible cache-line crossing penalty.
88 //      This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 //    - We pay 7 extra bytes for:
90 //        MOV EBP,ESP,
91 //        LEA ESP,[EBP-offset]
92 //        AND ESP,-8 to double align ESP
93 //    - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
94 //
95 //    If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 //    Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 //    ref count for double-aligned values.
98 //
99 bool Compiler::shouldDoubleAlign(
100     unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
101 {
102     bool           doDoubleAlign        = false;
103     const unsigned DBL_ALIGN_SETUP_SIZE = 7;
104
105     unsigned bytesUsed         = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106     unsigned misaligned_weight = 4;
107
108     if (compCodeOpt() == Compiler::SMALL_CODE)
109         misaligned_weight = 0;
110
111     if (compCodeOpt() == Compiler::FAST_CODE)
112         misaligned_weight *= 4;
113
114     JITDUMP("\nDouble alignment:\n");
115     JITDUMP("  Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116     JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117     JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
118
119     if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
120     {
121         JITDUMP("    Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
122     }
123     else if (refCntWtdEBP > refCntWtdStkDbl * 2)
124     {
125         // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126         // not double aligned.
127         // Here are the numbers that make this not double-aligned.
128         //     refCntWtdStkDbl = 0x164
129         //     refCntWtdEBP    = 0x1a4
130         // We think we do need to change the heuristic to be in favor of double-align.
131
132         JITDUMP("    Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
133     }
134     else
135     {
136         // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137         JITDUMP("    Predicting to create a double-aligned frame\n");
138         doDoubleAlign = true;
139     }
140     return doDoubleAlign;
141 }
142 #endif // DOUBLE_ALIGN
143
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
145
146 void Compiler::raInit()
147 {
148 #if FEATURE_STACK_FP_X87
149     /* We have not assigned any FP variables to registers yet */
150
151     VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
152 #endif
153     codeGen->intRegState.rsIsFloat   = false;
154     codeGen->floatRegState.rsIsFloat = true;
155
156     rpReverseEBPenreg = false;
157     rpAsgVarNum       = -1;
158     rpPassesMax       = 6;
159     rpPassesPessimize = rpPassesMax - 3;
160     if (opts.compDbgCode)
161     {
162         rpPassesMax++;
163     }
164     rpStkPredict            = (unsigned)-1;
165     rpFrameType             = FT_NOT_SET;
166     rpLostEnreg             = false;
167     rpMustCreateEBPCalled   = false;
168     rpRegAllocDone          = false;
169     rpMaskPInvokeEpilogIntf = RBM_NONE;
170
171     rpPredictMap[PREDICT_NONE] = RBM_NONE;
172     rpPredictMap[PREDICT_ADDR] = RBM_NONE;
173
174 #if FEATURE_FP_REGALLOC
175     rpPredictMap[PREDICT_REG]         = RBM_ALLINT | RBM_ALLFLOAT;
176     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
177 #else
178     rpPredictMap[PREDICT_REG]         = RBM_ALLINT;
179     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
180 #endif
181
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
184
185 #if defined(_TARGET_ARM_)
186
187     rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188     rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189     rpPredictMap[PREDICT_REG_SP]    = RBM_ILLEGAL;
190
191 #elif defined(_TARGET_AMD64_)
192
193     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
196
197 #elif defined(_TARGET_X86_)
198
199     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
202     rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203     rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
204
205 #endif
206
207     rpBestRecordedPrediction = NULL;
208 }
209
210 /*****************************************************************************
211  *
212  *  The following table(s) determines the order in which registers are considered
213  *  for variables to live in
214  */
215
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
217 {
218 #if FEATURE_FP_REGALLOC
219     if (varTypeIsFloating(regType))
220     {
221         static const regNumber raRegVarOrderFlt[]   = {REG_VAR_ORDER_FLT};
222         const unsigned         raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
223
224         if (wbVarOrderSize != NULL)
225             *wbVarOrderSize = raRegVarOrderFltSize;
226
227         return &raRegVarOrderFlt[0];
228     }
229     else
230 #endif
231     {
232         static const regNumber raRegVarOrder[]   = {REG_VAR_ORDER};
233         const unsigned         raRegVarOrderSize = _countof(raRegVarOrder);
234
235         if (wbVarOrderSize != NULL)
236             *wbVarOrderSize = raRegVarOrderSize;
237
238         return &raRegVarOrder[0];
239     }
240 }
241
242 #ifdef DEBUG
243
244 /*****************************************************************************
245  *
246  *  Dump out the variable interference graph
247  *
248  */
249
250 void Compiler::raDumpVarIntf()
251 {
252     unsigned   lclNum;
253     LclVarDsc* varDsc;
254
255     printf("Var. interference graph for %s\n", info.compFullName);
256
257     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
258     {
259         /* Ignore the variable if it's not tracked */
260
261         if (!varDsc->lvTracked)
262             continue;
263
264         /* Get hold of the index and the interference mask for the variable */
265         unsigned varIndex = varDsc->lvVarIndex;
266
267         printf("  V%02u,T%02u and ", lclNum, varIndex);
268
269         unsigned refIndex;
270
271         for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
272         {
273             if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274                 printf("T%02u ", refIndex);
275             else
276                 printf("    ");
277         }
278
279         printf("\n");
280     }
281
282     printf("\n");
283 }
284
285 /*****************************************************************************
286  *
287  *  Dump out the register interference graph
288  *
289  */
290 void Compiler::raDumpRegIntf()
291 {
292     printf("Reg. interference graph for %s\n", info.compFullName);
293
294     unsigned   lclNum;
295     LclVarDsc* varDsc;
296
297     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
298     {
299         unsigned varNum;
300
301         /* Ignore the variable if it's not tracked */
302
303         if (!varDsc->lvTracked)
304             continue;
305
306         /* Get hold of the index and the interference mask for the variable */
307
308         varNum = varDsc->lvVarIndex;
309
310         printf("  V%02u,T%02u and ", lclNum, varNum);
311
312         if (varDsc->IsFloatRegType())
313         {
314 #if !FEATURE_STACK_FP_X87
315             for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
316             {
317                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318                     printf("%3s ", getRegName(regNum, true));
319                 else
320                     printf("    ");
321             }
322 #endif
323         }
324         else
325         {
326             for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
327             {
328                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329                     printf("%3s ", getRegName(regNum));
330                 else
331                     printf("    ");
332             }
333         }
334
335         printf("\n");
336     }
337
338     printf("\n");
339 }
340 #endif // DEBUG
341
342 /*****************************************************************************
343  *
344  * We'll adjust the ref counts based on interference
345  *
346  */
347
348 void Compiler::raAdjustVarIntf()
349 {
350     // This method was not correct and has been disabled.
351     return;
352 }
353
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
357  */
358
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
360 {
361     var_types type = tree->TypeGet();
362
363     if (varTypeIsStruct(type) && IsHfa(tree))
364     {
365         int retSlots = GetHfaCount(tree);
366         return ((1 << retSlots) - 1) << REG_FLOATRET;
367     }
368
369     const static regMaskTP returnMap[TYP_COUNT] = {
370         RBM_ILLEGAL,   // TYP_UNDEF,
371         RBM_NONE,      // TYP_VOID,
372         RBM_INTRET,    // TYP_BOOL,
373         RBM_INTRET,    // TYP_BYTE,
374         RBM_INTRET,    // TYP_UBYTE,
375         RBM_INTRET,    // TYP_SHORT,
376         RBM_INTRET,    // TYP_USHORT,
377         RBM_INTRET,    // TYP_INT,
378         RBM_INTRET,    // TYP_UINT,
379         RBM_LNGRET,    // TYP_LONG,
380         RBM_LNGRET,    // TYP_ULONG,
381         RBM_FLOATRET,  // TYP_FLOAT,
382         RBM_DOUBLERET, // TYP_DOUBLE,
383         RBM_INTRET,    // TYP_REF,
384         RBM_INTRET,    // TYP_BYREF,
385         RBM_INTRET,    // TYP_ARRAY,
386         RBM_ILLEGAL,   // TYP_STRUCT,
387         RBM_ILLEGAL,   // TYP_BLK,
388         RBM_ILLEGAL,   // TYP_LCLBLK,
389         RBM_ILLEGAL,   // TYP_PTR,
390         RBM_ILLEGAL,   // TYP_FNC,
391         RBM_ILLEGAL,   // TYP_UNKNOWN,
392     };
393
394     assert((unsigned)type < _countof(returnMap));
395     assert(returnMap[TYP_LONG] == RBM_LNGRET);
396     assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
397     assert(returnMap[TYP_REF] == RBM_INTRET);
398     assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
399
400     regMaskTP result = returnMap[type];
401     assert(result != RBM_ILLEGAL);
402     return result;
403 }
404
405 /*****************************************************************************/
406
407 /****************************************************************************/
408
409 #ifdef DEBUG
410
411 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
412 {
413     unsigned   lclNum;
414     LclVarDsc* varDsc;
415
416     for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
417     {
418         if (!varDsc->lvTracked)
419             continue;
420
421         if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
422             continue;
423
424         if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
425             printf("V%02u ", lclNum);
426     }
427 }
428
429 #endif
430
431 /*****************************************************************************/
432 #ifdef DEBUG
433 /*****************************************************************************
434  *
435  *  Debugging helpers - display variables liveness info.
436  */
437
438 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
439 {
440     do
441     {
442         printf("BB%02u: ", beg->bbNum);
443
444         printf(" in  = [ ");
445         dispLifeSet(comp, mask, beg->bbLiveIn);
446         printf("] ,");
447
448         printf(" out = [ ");
449         dispLifeSet(comp, mask, beg->bbLiveOut);
450         printf("]");
451
452         if (beg->bbFlags & BBF_VISITED)
453             printf(" inner=%u", beg->bbFPinVars);
454
455         printf("\n");
456
457         beg = beg->bbNext;
458         if (!beg)
459             return;
460     } while (beg != end);
461 }
462
463 #if FEATURE_STACK_FP_X87
464 void Compiler::raDispFPlifeInfo()
465 {
466     BasicBlock* block;
467
468     for (block = fgFirstBB; block; block = block->bbNext)
469     {
470         GenTreePtr stmt;
471
472         printf("BB%02u: in  = [ ", block->bbNum);
473         dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
474         printf("]\n\n");
475
476         VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
477         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
478         {
479             GenTreePtr tree;
480
481             noway_assert(stmt->gtOper == GT_STMT);
482
483             for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
484             {
485                 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
486
487                 dispLifeSet(this, optAllFloatVars, life);
488                 printf("   ");
489                 gtDispTree(tree, 0, NULL, true);
490             }
491
492             printf("\n");
493         }
494
495         printf("BB%02u: out = [ ", block->bbNum);
496         dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
497         printf("]\n\n");
498     }
499 }
500 #endif // FEATURE_STACK_FP_X87
501 /*****************************************************************************/
502 #endif // DEBUG
503 /*****************************************************************************/
504
505 /*****************************************************************************/
506
507 void Compiler::raSetRegVarOrder(
508     var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
509 {
510     unsigned         normalVarOrderSize;
511     const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
512     unsigned         index;
513     unsigned         listIndex = 0;
514     regMaskTP        usedReg   = avoidReg;
515
516     noway_assert(*customVarOrderSize >= normalVarOrderSize);
517
518     if (prefReg)
519     {
520         /* First place the preferred registers at the start of customVarOrder */
521
522         regMaskTP regBit;
523         regNumber regNum;
524
525         for (index = 0; index < normalVarOrderSize; index++)
526         {
527             regNum = normalVarOrder[index];
528             regBit = genRegMask(regNum);
529
530             if (usedReg & regBit)
531                 continue;
532
533             if (prefReg & regBit)
534             {
535                 usedReg |= regBit;
536                 noway_assert(listIndex < normalVarOrderSize);
537                 customVarOrder[listIndex++] = regNum;
538                 prefReg -= regBit;
539                 if (prefReg == 0)
540                     break;
541             }
542         }
543
544 #if CPU_HAS_BYTE_REGS
545         /* Then if byteable registers are preferred place them */
546
547         if (prefReg & RBM_BYTE_REG_FLAG)
548         {
549             for (index = 0; index < normalVarOrderSize; index++)
550             {
551                 regNum = normalVarOrder[index];
552                 regBit = genRegMask(regNum);
553
554                 if (usedReg & regBit)
555                     continue;
556
557                 if (RBM_BYTE_REGS & regBit)
558                 {
559                     usedReg |= regBit;
560                     noway_assert(listIndex < normalVarOrderSize);
561                     customVarOrder[listIndex++] = regNum;
562                 }
563             }
564         }
565
566 #endif // CPU_HAS_BYTE_REGS
567     }
568
569     /* Now place all the non-preferred registers */
570
571     for (index = 0; index < normalVarOrderSize; index++)
572     {
573         regNumber regNum = normalVarOrder[index];
574         regMaskTP regBit = genRegMask(regNum);
575
576         if (usedReg & regBit)
577             continue;
578
579         usedReg |= regBit;
580         noway_assert(listIndex < normalVarOrderSize);
581         customVarOrder[listIndex++] = regNum;
582     }
583
584     if (avoidReg)
585     {
586         /* Now place the "avoid" registers */
587
588         for (index = 0; index < normalVarOrderSize; index++)
589         {
590             regNumber regNum = normalVarOrder[index];
591             regMaskTP regBit = genRegMask(regNum);
592
593             if (avoidReg & regBit)
594             {
595                 noway_assert(listIndex < normalVarOrderSize);
596                 customVarOrder[listIndex++] = regNum;
597                 avoidReg -= regBit;
598                 if (avoidReg == 0)
599                     break;
600             }
601         }
602     }
603
604     *customVarOrderSize = listIndex;
605     noway_assert(listIndex == normalVarOrderSize);
606 }
607
608 /*****************************************************************************
609  *
610  *  Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
611  */
612
613 void Compiler::raSetupArgMasks(RegState* regState)
614 {
615     /* Determine the registers holding incoming register arguments */
616     /*  and setup raAvoidArgRegMask to the set of registers that we  */
617     /*  may want to avoid when enregistering the locals.            */
618
619     regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
620     raAvoidArgRegMask                  = RBM_NONE;
621
622     LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
623
624     for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
625     {
626         noway_assert(argDsc->lvIsParam);
627
628         // Is it a register argument ?
629         if (!argDsc->lvIsRegArg)
630             continue;
631
632         // only process args that apply to the current register file
633         if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
634         {
635             continue;
636         }
637
638         // Is it dead on entry ??
639         // In certain cases such as when compJmpOpUsed is true,
640         // or when we have a generic type context arg that we must report
641         // then the arguments have to be kept alive throughout the prolog.
642         // So we have to consider it as live on entry.
643         //
644         bool keepArgAlive = compJmpOpUsed;
645         if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
646             ((lvaTable + info.compTypeCtxtArg) == argDsc))
647         {
648             keepArgAlive = true;
649         }
650
651         if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
652         {
653             continue;
654         }
655
656         // The code to set the regState for each arg is outlined for shared use
657         // by linear scan
658         regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
659
660         // Do we need to try to avoid this incoming arg registers?
661
662         // If it's not tracked, don't do the stuff below.
663         if (!argDsc->lvTracked)
664             continue;
665
666         // If the incoming arg is used after a call it is live accross
667         //  a call and will have to be allocated to a caller saved
668         //  register anyway (a very common case).
669         //
670         // In this case it is pointless to ask that the higher ref count
671         //  locals to avoid using the incoming arg register
672
673         unsigned argVarIndex = argDsc->lvVarIndex;
674
675         /* Does the incoming register and the arg variable interfere? */
676
677         if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
678         {
679             // No they do not interfere,
680             //  so we add inArgReg to raAvoidArgRegMask
681
682             raAvoidArgRegMask |= genRegMask(inArgReg);
683         }
684 #ifdef _TARGET_ARM_
685         if (argDsc->lvType == TYP_DOUBLE)
686         {
687             // Avoid the double register argument pair for register allocation.
688             if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
689             {
690                 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
691             }
692         }
693 #endif
694     }
695 }
696
697 #endif // LEGACY_BACKEND
698
699 // The code to set the regState for each arg is outlined for shared use
700 // by linear scan. (It is not shared for System V AMD64 platform.)
701 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
702 {
703     regNumber inArgReg  = argDsc->lvArgReg;
704     regMaskTP inArgMask = genRegMask(inArgReg);
705
706     if (regState->rsIsFloat)
707     {
708         noway_assert(inArgMask & RBM_FLTARG_REGS);
709     }
710     else //  regState is for the integer registers
711     {
712         // This might be the fixed return buffer register argument (on ARM64)
713         // We check and allow inArgReg to be theFixedRetBuffReg
714         if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
715         {
716             // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
717             noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
718             // We should have recorded the variable number for the return buffer arg
719             noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
720         }
721         else // we have a regular arg
722         {
723             noway_assert(inArgMask & RBM_ARG_REGS);
724         }
725     }
726
727     regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
728
729 #ifdef _TARGET_ARM_
730     if (argDsc->lvType == TYP_DOUBLE)
731     {
732         if (info.compIsVarArgs || opts.compUseSoftFP)
733         {
734             assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
735             assert(!regState->rsIsFloat);
736         }
737         else
738         {
739             assert(regState->rsIsFloat);
740             assert(emitter::isDoubleReg(inArgReg));
741         }
742         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
743     }
744     else if (argDsc->lvType == TYP_LONG)
745     {
746         assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
747         assert(!regState->rsIsFloat);
748         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
749     }
750 #endif // _TARGET_ARM_
751
752 #if FEATURE_MULTIREG_ARGS
753     if (varTypeIsStruct(argDsc->lvType))
754     {
755         if (argDsc->lvIsHfaRegArg())
756         {
757             assert(regState->rsIsFloat);
758             unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
759             for (unsigned i = 1; i < cSlots; i++)
760             {
761                 assert(inArgReg + i <= LAST_FP_ARGREG);
762                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
763             }
764         }
765         else
766         {
767             unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
768             for (unsigned i = 1; i < cSlots; i++)
769             {
770                 regNumber nextArgReg = (regNumber)(inArgReg + i);
771                 if (nextArgReg > REG_ARG_LAST)
772                 {
773                     break;
774                 }
775                 assert(regState->rsIsFloat == false);
776                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
777             }
778         }
779     }
780 #endif // FEATURE_MULTIREG_ARGS
781
782     return inArgReg;
783 }
784
785 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
786
787 /*****************************************************************************
788  *
789  *  Assign variables to live in registers, etc.
790  */
791
792 void Compiler::raAssignVars()
793 {
794 #ifdef DEBUG
795     if (verbose)
796         printf("*************** In raAssignVars()\n");
797 #endif
798     /* We need to keep track of which registers we ever touch */
799
800     codeGen->regSet.rsClearRegsModified();
801
802 #if FEATURE_STACK_FP_X87
803     // FP register allocation
804     raEnregisterVarsStackFP();
805     raGenerateFPRefCounts();
806 #endif
807
808     /* Predict registers used by code generation */
809     rpPredictRegUse(); // New reg predictor/allocator
810
811     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
812     // so that the gc tracking logic and lvMustInit logic will ignore them.
813
814     unsigned   lclNum;
815     LclVarDsc* varDsc;
816
817     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
818     {
819         if (varDsc->lvType != TYP_STRUCT)
820             continue;
821
822         if (!varDsc->lvPromoted)
823             continue;
824
825         if (varDsc->lvIsParam)
826             continue;
827
828         if (varDsc->lvRefCnt > 0)
829             continue;
830
831 #ifdef DEBUG
832         if (verbose)
833         {
834             printf("Mark unused struct local V%02u\n", lclNum);
835         }
836
837         lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
838
839         if (promotionType == PROMOTION_TYPE_DEPENDENT)
840         {
841             // This should only happen when all its field locals are unused as well.
842
843             for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
844                  varNum++)
845             {
846                 noway_assert(lvaTable[varNum].lvRefCnt == 0);
847             }
848         }
849         else
850         {
851             noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
852         }
853
854         varDsc->lvUnusedStruct = 1;
855 #endif
856
857         // Change such struct locals to ints
858
859         varDsc->lvType = TYP_INT; // Bash to a non-gc type.
860         noway_assert(!varDsc->lvTracked);
861         noway_assert(!varDsc->lvRegister);
862         varDsc->lvOnFrame  = false; // Force it not to be onstack.
863         varDsc->lvMustInit = false; // Force not to init it.
864         varDsc->lvStkOffs  = 0;     // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
865     }
866 }
867
868 /*****************************************************************************/
869 /*****************************************************************************/
870
871 /*****************************************************************************
872  *
873  *   Given a regNumber return the correct predictReg enum value
874  */
875
876 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
877 {
878     return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
879 }
880
881 /*****************************************************************************
882  *
883  *   Given a varIndex return the correct predictReg enum value
884  */
885
886 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
887 {
888     return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
889 }
890
891 /*****************************************************************************
892  *
893  *   Given a rpPredictReg return the correct varNumber value
894  */
895
896 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
897 {
898     return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
899 }
900
901 /*****************************************************************************
902  *
903  *   Given a rpPredictReg return true if it specifies a Txx register
904  */
905
906 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
907 {
908     if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
909         return true;
910     else
911         return false;
912 }
913
914 /*****************************************************************************
915  *
916  *   Given a regmask return the correct predictReg enum value
917  */
918
919 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
920 {
921     rpPredictReg result = PREDICT_NONE;
922     if (regmask != 0) /* Check if regmask has zero bits set */
923     {
924         if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
925         {
926             DWORD reg = 0;
927             assert(FitsIn<DWORD>(regmask));
928             BitScanForward(&reg, (DWORD)regmask);
929             return rpGetPredictForReg((regNumber)reg);
930         }
931
932 #if defined(_TARGET_ARM_)
933         /* It has multiple bits set */
934         else if (regmask == (RBM_R0 | RBM_R1))
935         {
936             result = PREDICT_PAIR_R0R1;
937         }
938         else if (regmask == (RBM_R2 | RBM_R3))
939         {
940             result = PREDICT_PAIR_R2R3;
941         }
942 #elif defined(_TARGET_X86_)
943         /* It has multiple bits set */
944         else if (regmask == (RBM_EAX | RBM_EDX))
945         {
946             result = PREDICT_PAIR_EAXEDX;
947         }
948         else if (regmask == (RBM_ECX | RBM_EBX))
949         {
950             result = PREDICT_PAIR_ECXEBX;
951         }
952 #endif
953         else /* It doesn't match anything */
954         {
955             result = PREDICT_NONE;
956             assert(!"unreachable");
957             NO_WAY("bad regpair");
958         }
959     }
960     return result;
961 }
962
963 /*****************************************************************************
964  *
965  *  Record a variable to register(s) interference
966  */
967
968 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
969
970 {
971     bool addedIntf = false;
972
973     if (regMask != 0)
974     {
975         for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
976         {
977             regMaskTP regBit = genRegMask(regNum);
978
979             if (regMask & regBit)
980             {
981                 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
982                 if (!VarSetOps::IsEmpty(this, newIntf))
983                 {
984 #ifdef DEBUG
985                     if (verbose)
986                     {
987                         VarSetOps::Iter newIntfIter(this, newIntf);
988                         unsigned        varNum = 0;
989                         while (newIntfIter.NextElem(&varNum))
990                         {
991                             unsigned   lclNum = lvaTrackedToVarNum[varNum];
992                             LclVarDsc* varDsc = &lvaTable[varNum];
993 #if FEATURE_FP_REGALLOC
994                             // Only print the useful interferences
995                             // i.e. floating point LclVar interference with floating point registers
996                             //         or integer LclVar interference with general purpose registers
997                             if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
998 #endif
999                             {
1000                                 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
1001                                        getRegName(regNum), msg);
1002                             }
1003                         }
1004                     }
1005 #endif
1006                     addedIntf = true;
1007                     VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1008                 }
1009
1010                 regMask -= regBit;
1011                 if (regMask == 0)
1012                     break;
1013             }
1014         }
1015     }
1016     return addedIntf;
1017 }
1018
1019 /*****************************************************************************
1020  *
1021  *  Record a new variable to variable(s) interference
1022  */
1023
1024 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1025 {
1026     noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1027     noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1028
1029     VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1030     VarSetOps::AddElemD(this, oneVar, varNum);
1031
1032     bool newIntf = fgMarkIntf(intfVar, oneVar);
1033
1034     if (newIntf)
1035         rpAddedVarIntf = true;
1036
1037 #ifdef DEBUG
1038     if (verbose && newIntf)
1039     {
1040         for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1041         {
1042             if (VarSetOps::IsMember(this, intfVar, oneNum))
1043             {
1044                 unsigned lclNum = lvaTrackedToVarNum[varNum];
1045                 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1046                 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1047                        oneNum, msg);
1048             }
1049         }
1050     }
1051 #endif
1052
1053     return newIntf;
1054 }
1055
1056 /*****************************************************************************
1057  *
1058  *   Determine preferred register mask for a given predictReg value
1059  */
1060
1061 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1062 {
1063     if (rpHasVarIndexForPredict(predictReg))
1064         predictReg = PREDICT_REG;
1065
1066     noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1067     noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1068
1069     regMaskTP regAvailForType = rpPredictMap[predictReg];
1070     if (varTypeIsFloating(type))
1071     {
1072         regAvailForType &= RBM_ALLFLOAT;
1073     }
1074     else
1075     {
1076         regAvailForType &= RBM_ALLINT;
1077     }
1078 #ifdef _TARGET_ARM_
1079     if (type == TYP_DOUBLE)
1080     {
1081         if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1082         {
1083             // Fix 388433 ARM JitStress WP7
1084             if ((regAvailForType & RBM_DBL_REGS) != 0)
1085             {
1086                 regAvailForType |= (regAvailForType << 1);
1087             }
1088             else
1089             {
1090                 regAvailForType = RBM_NONE;
1091             }
1092         }
1093     }
1094 #endif
1095     return regAvailForType;
1096 }
1097
1098 /*****************************************************************************
1099  *
1100  *  Predict register choice for a type.
1101  *
1102  *  Adds the predicted registers to rsModifiedRegsMask.
1103  */
1104 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1105 {
1106     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1107     regNumber regNum;
1108     regMaskTP regBits;
1109
1110     // Add any reserved register to the lockedRegs
1111     lockedRegs |= codeGen->regSet.rsMaskResvd;
1112
1113     /* Clear out the lockedRegs from preferReg */
1114     preferReg &= ~lockedRegs;
1115
1116     if (rpAsgVarNum != -1)
1117     {
1118         noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1119
1120         /* Don't pick the register used by rpAsgVarNum either */
1121         LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1122         noway_assert(tgtVar->lvRegNum != REG_STK);
1123
1124         preferReg &= ~genRegMask(tgtVar->lvRegNum);
1125     }
1126
1127     switch (type)
1128     {
1129         case TYP_BOOL:
1130         case TYP_BYTE:
1131         case TYP_UBYTE:
1132         case TYP_SHORT:
1133         case TYP_USHORT:
1134         case TYP_INT:
1135         case TYP_UINT:
1136         case TYP_REF:
1137         case TYP_BYREF:
1138 #ifdef _TARGET_AMD64_
1139         case TYP_LONG:
1140 #endif // _TARGET_AMD64_
1141
1142             // expand preferReg to all non-locked registers if no bits set
1143             preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1144
1145             if (preferReg == 0) // no bits set?
1146             {
1147                 // Add one predefined spill choice register if no bits set.
1148                 // (The jit will introduce one spill temp)
1149                 preferReg |= RBM_SPILL_CHOICE;
1150                 rpPredictSpillCnt++;
1151
1152 #ifdef DEBUG
1153                 if (verbose)
1154                     printf("Predict one spill temp\n");
1155 #endif
1156             }
1157
1158             if (preferReg != 0)
1159             {
1160                 /* Iterate the registers in the order specified by rpRegTmpOrder */
1161
1162                 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1163                 {
1164                     regNum  = rpRegTmpOrder[index];
1165                     regBits = genRegMask(regNum);
1166
1167                     if ((preferReg & regBits) == regBits)
1168                     {
1169                         goto RET;
1170                     }
1171                 }
1172             }
1173             /* Otherwise we have allocated all registers, so do nothing */
1174             break;
1175
1176 #ifndef _TARGET_AMD64_
1177         case TYP_LONG:
1178
1179             if ((preferReg == 0) ||                   // no bits set?
1180                 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1181             {
1182                 // expand preferReg to all non-locked registers
1183                 preferReg = RBM_ALLINT & ~lockedRegs;
1184             }
1185
1186             if (preferReg == 0) // no bits set?
1187             {
1188                 // Add EAX:EDX to the registers
1189                 // (The jit will introduce two spill temps)
1190                 preferReg = RBM_PAIR_TMP;
1191                 rpPredictSpillCnt += 2;
1192 #ifdef DEBUG
1193                 if (verbose)
1194                     printf("Predict two spill temps\n");
1195 #endif
1196             }
1197             else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1198             {
1199                 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1200                 {
1201                     // Add EAX to the registers
1202                     // (The jit will introduce one spill temp)
1203                     preferReg |= RBM_PAIR_TMP_LO;
1204                 }
1205                 else
1206                 {
1207                     // Add EDX to the registers
1208                     // (The jit will introduce one spill temp)
1209                     preferReg |= RBM_PAIR_TMP_HI;
1210                 }
1211                 rpPredictSpillCnt++;
1212 #ifdef DEBUG
1213                 if (verbose)
1214                     printf("Predict one spill temp\n");
1215 #endif
1216             }
1217
1218             regPairNo regPair;
1219             regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1220             if (regPair != REG_PAIR_NONE)
1221             {
1222                 regBits = genRegPairMask(regPair);
1223                 goto RET;
1224             }
1225
1226             /* Otherwise we have allocated all registers, so do nothing */
1227             break;
1228 #endif // _TARGET_AMD64_
1229
1230 #ifdef _TARGET_ARM_
1231         case TYP_STRUCT:
1232 #endif
1233
1234         case TYP_FLOAT:
1235         case TYP_DOUBLE:
1236
1237 #if FEATURE_FP_REGALLOC
1238             regMaskTP restrictMask;
1239             restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1240             assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1241
1242             // expand preferReg to all available non-locked registers if no bits set
1243             preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1244             regMaskTP preferDouble;
1245             preferDouble = preferReg & (preferReg >> 1);
1246
1247             if ((preferReg == 0) // no bits set?
1248 #ifdef _TARGET_ARM_
1249                 || ((type == TYP_DOUBLE) &&
1250                     ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1251 #endif
1252                 )
1253             {
1254                 // Add one predefined spill choice register if no bits set.
1255                 // (The jit will introduce one spill temp)
1256                 preferReg |= RBM_SPILL_CHOICE_FLT;
1257                 rpPredictSpillCnt++;
1258
1259 #ifdef DEBUG
1260                 if (verbose)
1261                     printf("Predict one spill temp (float)\n");
1262 #endif
1263             }
1264
1265             assert(preferReg != 0);
1266
1267             /* Iterate the registers in the order specified by raRegFltTmpOrder */
1268
1269             for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1270             {
1271                 regNum  = raRegFltTmpOrder[index];
1272                 regBits = genRegMask(regNum);
1273
1274                 if (varTypeIsFloating(type))
1275                 {
1276 #ifdef _TARGET_ARM_
1277                     if (type == TYP_DOUBLE)
1278                     {
1279                         if ((regBits & RBM_DBL_REGS) == 0)
1280                         {
1281                             continue; // We must restrict the set to the double registers
1282                         }
1283                         else
1284                         {
1285                             // TYP_DOUBLE use two consecutive registers
1286                             regBits |= genRegMask(REG_NEXT(regNum));
1287                         }
1288                     }
1289 #endif
1290                     // See if COMPlus_JitRegisterFP is restricting this FP register
1291                     //
1292                     if ((restrictMask & regBits) != regBits)
1293                         continue;
1294                 }
1295
1296                 if ((preferReg & regBits) == regBits)
1297                 {
1298                     goto RET;
1299                 }
1300             }
1301             /* Otherwise we have allocated all registers, so do nothing */
1302             break;
1303
1304 #else // !FEATURE_FP_REGALLOC
1305
1306             return RBM_NONE;
1307
1308 #endif
1309
1310         default:
1311             noway_assert(!"unexpected type in reg use prediction");
1312     }
1313
1314     /* Abnormal return */
1315     noway_assert(!"Ran out of registers in rpPredictRegPick");
1316     return RBM_NONE;
1317
1318 RET:
1319     /*
1320      *  If during the first prediction we need to allocate
1321      *  one of the registers that we used for coloring locals
1322      *  then flag this by setting rpPredictAssignAgain.
1323      *  We will have to go back and repredict the registers
1324      */
1325     if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1326         rpPredictAssignAgain = true;
1327
1328     // Add a register interference to each of the last use variables
1329     if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1330     {
1331         VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1332         VarSetOps::Assign(this, lastUse, rpLastUseVars);
1333         VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1334         VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1335         // While we still have any lastUse or inPlaceUse bits
1336         VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1337
1338         VARSET_TP       varAsSet(VarSetOps::MakeEmpty(this));
1339         VarSetOps::Iter iter(this, useUnion);
1340         unsigned        varNum = 0;
1341         while (iter.NextElem(&varNum))
1342         {
1343             // We'll need this for one of the calls...
1344             VarSetOps::ClearD(this, varAsSet);
1345             VarSetOps::AddElemD(this, varAsSet, varNum);
1346
1347             // If this varBit and lastUse?
1348             if (VarSetOps::IsMember(this, lastUse, varNum))
1349             {
1350                 // Record a register to variable interference
1351                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1352             }
1353
1354             // If this varBit and inPlaceUse?
1355             if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1356             {
1357                 // Record a register to variable interference
1358                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1359             }
1360         }
1361     }
1362     codeGen->regSet.rsSetRegsModified(regBits);
1363
1364     return regBits;
1365 }
1366
1367 /*****************************************************************************
1368  *
1369  *  Predict integer register use for generating an address mode for a tree,
1370  *  by setting tree->gtUsedRegs to all registers used by this tree and its
1371  *  children.
1372  *    tree       - is the child of a GT_IND node
1373  *    type       - the type of the GT_IND node (floating point/integer)
1374  *    lockedRegs - are the registers which are currently held by
1375  *                 a previously evaluated node.
1376  *    rsvdRegs   - registers which should not be allocated because they will
1377  *                 be needed to evaluate a node in the future
1378  *               - Also if rsvdRegs has the RBM_LASTUSE bit set then
1379  *                 the rpLastUseVars set should be saved and restored
1380  *                 so that we don't add any new variables to rpLastUseVars
1381  *    lenCSE     - is non-NULL only when we have a lenCSE expression
1382  *
1383  *  Return the scratch registers to be held by this tree. (one or two registers
1384  *  to form an address expression)
1385  */
1386
1387 regMaskTP Compiler::rpPredictAddressMode(
1388     GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1389 {
1390     GenTreePtr op1;
1391     GenTreePtr op2;
1392     GenTreePtr opTemp;
1393     genTreeOps oper = tree->OperGet();
1394     regMaskTP  op1Mask;
1395     regMaskTP  op2Mask;
1396     regMaskTP  regMask;
1397     ssize_t    sh;
1398     ssize_t    cns = 0;
1399     bool       rev;
1400     bool       hasTwoAddConst     = false;
1401     bool       restoreLastUseVars = false;
1402     VARSET_TP  oldLastUseVars(VarSetOps::MakeEmpty(this));
1403
1404     /* do we need to save and restore the rpLastUseVars set ? */
1405     if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1406     {
1407         restoreLastUseVars = true;
1408         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1409     }
1410     rsvdRegs &= ~RBM_LASTUSE;
1411
1412     /* if not an add, then just force it to a register */
1413
1414     if (oper != GT_ADD)
1415     {
1416         if (oper == GT_ARR_ELEM)
1417         {
1418             regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1419             goto DONE;
1420         }
1421         else
1422         {
1423             goto NO_ADDR_EXPR;
1424         }
1425     }
1426
1427     op1 = tree->gtOp.gtOp1;
1428     op2 = tree->gtOp.gtOp2;
1429     rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1430
1431     /* look for (x + y) + icon address mode */
1432
1433     if (op2->OperGet() == GT_CNS_INT)
1434     {
1435         cns = op2->gtIntCon.gtIconVal;
1436
1437         /* if not an add, then just force op1 into a register */
1438         if (op1->OperGet() != GT_ADD)
1439             goto ONE_ADDR_EXPR;
1440
1441         hasTwoAddConst = true;
1442
1443         /* Record the 'rev' flag, reverse evaluation order */
1444         rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1445
1446         op2 = op1->gtOp.gtOp2;
1447         op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1448     }
1449
1450     /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1451
1452     sh = 0;
1453     if (op2->OperGet() == GT_LSH)
1454     {
1455         if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1456         {
1457             sh     = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1458             opTemp = op2->gtOp.gtOp1;
1459         }
1460         else
1461         {
1462             opTemp = NULL;
1463         }
1464     }
1465     else
1466     {
1467         opTemp = op2;
1468     }
1469
1470     if (opTemp != NULL)
1471     {
1472         if (opTemp->OperGet() == GT_NOP)
1473         {
1474             opTemp = opTemp->gtOp.gtOp1;
1475         }
1476
1477         // Is this a const operand?
1478         if (opTemp->OperGet() == GT_CNS_INT)
1479         {
1480             // Compute the new cns value that Codegen will end up using
1481             cns += (opTemp->gtIntCon.gtIconVal << sh);
1482
1483             goto ONE_ADDR_EXPR;
1484         }
1485     }
1486
1487     /* Check for LSH in op1 slot */
1488
1489     if (op1->OperGet() != GT_LSH)
1490         goto TWO_ADDR_EXPR;
1491
1492     opTemp = op1->gtOp.gtOp2;
1493
1494     if (opTemp->OperGet() != GT_CNS_INT)
1495         goto TWO_ADDR_EXPR;
1496
1497     sh = opTemp->gtIntCon.gtIconVal;
1498
1499     /* Check for LSH of 0, special case */
1500     if (sh == 0)
1501         goto TWO_ADDR_EXPR;
1502
1503 #if defined(_TARGET_XARCH_)
1504
1505     /* Check for LSH of 1 2 or 3 */
1506     if (sh > 3)
1507         goto TWO_ADDR_EXPR;
1508
1509 #elif defined(_TARGET_ARM_)
1510
1511     /* Check for LSH of 1 to 30 */
1512     if (sh > 30)
1513         goto TWO_ADDR_EXPR;
1514
1515 #else
1516
1517     goto TWO_ADDR_EXPR;
1518
1519 #endif
1520
1521     /* Matched a leftShift by 'sh' subtree, move op1 down */
1522     op1 = op1->gtOp.gtOp1;
1523
1524 TWO_ADDR_EXPR:
1525
1526     /* Now we have to evaluate op1 and op2 into registers */
1527
1528     /* Evaluate op1 and op2 in the correct order */
1529     if (rev)
1530     {
1531         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1532         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1533     }
1534     else
1535     {
1536         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1537         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1538     }
1539
1540     /*  If op1 and op2 must be spilled and reloaded then
1541      *  op1 and op2 might be reloaded into the same register
1542      *  This can only happen when all the registers are lockedRegs
1543      */
1544     if ((op1Mask == op2Mask) && (op1Mask != 0))
1545     {
1546         /* We'll need to grab a different register for op2 */
1547         op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1548     }
1549
1550 #ifdef _TARGET_ARM_
1551     // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1552     //      [op2 + op1<<sh + cns]
1553     // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1554     //
1555     if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1556     {
1557         op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1558     }
1559
1560     //
1561     // On the ARM we will need at least one scratch register for trees that have this form:
1562     //     [op1 + op2 + cns] or  [op1 + op2<<sh + cns]
1563     // or for a float/double or long when we have both op1 and op2
1564     // or when we have an 'cns' that is too large for the ld/st instruction
1565     //
1566     if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1567     {
1568         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1569     }
1570
1571     //
1572     // If we create a CSE that immediately dies then we may need to add an additional register interference
1573     // so we don't color the CSE into R3
1574     //
1575     if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1576     {
1577         opTemp = op2->gtOp.gtOp2;
1578         if (opTemp->OperGet() == GT_LCL_VAR)
1579         {
1580             unsigned   varNum = opTemp->gtLclVar.gtLclNum;
1581             LclVarDsc* varDsc = &lvaTable[varNum];
1582
1583             if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1584             {
1585                 rpRecordRegIntf(RBM_TMP_0,
1586                                 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1587             }
1588         }
1589     }
1590 #endif
1591
1592     regMask          = (op1Mask | op2Mask);
1593     tree->gtUsedRegs = (regMaskSmall)regMask;
1594     goto DONE;
1595
1596 ONE_ADDR_EXPR:
1597
1598     /* now we have to evaluate op1 into a register */
1599
1600     op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1601     op2Mask = RBM_NONE;
1602
1603 #ifdef _TARGET_ARM_
1604     //
1605     // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1606     // instruction
1607     //
1608     if (!codeGen->validDispForLdSt(cns, type))
1609     {
1610         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1611     }
1612 #endif
1613
1614     regMask          = (op1Mask | op2Mask);
1615     tree->gtUsedRegs = (regMaskSmall)regMask;
1616     goto DONE;
1617
1618 NO_ADDR_EXPR:
1619
1620 #if !CPU_LOAD_STORE_ARCH
1621     if (oper == GT_CNS_INT)
1622     {
1623         /* Indirect of a constant does not require a register */
1624         regMask = RBM_NONE;
1625     }
1626     else
1627 #endif
1628     {
1629         /* now we have to evaluate tree into a register */
1630         regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1631     }
1632
1633 DONE:
1634     regMaskTP regUse = tree->gtUsedRegs;
1635
1636     if (!VarSetOps::IsEmpty(this, compCurLife))
1637     {
1638         // Add interference between the current set of life variables and
1639         //  the set of temporary registers need to evaluate the sub tree
1640         if (regUse)
1641         {
1642             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1643         }
1644     }
1645
1646     /* Do we need to resore the oldLastUseVars value */
1647     if (restoreLastUseVars)
1648     {
1649         /*
1650          *  If we used a GT_ASG targeted register then we need to add
1651          *  a variable interference between any new last use variables
1652          *  and the GT_ASG targeted register
1653          */
1654         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1655         {
1656             rpRecordVarIntf(rpAsgVarNum,
1657                             VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1658         }
1659         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1660     }
1661
1662     return regMask;
1663 }
1664
1665 /*****************************************************************************
1666  *
1667  *
1668  */
1669
1670 void Compiler::rpPredictRefAssign(unsigned lclNum)
1671 {
1672     LclVarDsc* varDsc = lvaTable + lclNum;
1673
1674     varDsc->lvRefAssign = 1;
1675
1676 #if NOGC_WRITE_BARRIERS
1677 #ifdef DEBUG
1678     if (verbose)
1679     {
1680         if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1681             printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1682                    varDsc->lvVarIndex);
1683     }
1684 #endif
1685
1686     /* Make sure that write barrier pointer variables never land in EDX */
1687     VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1688 #endif // NOGC_WRITE_BARRIERS
1689 }
1690
1691 /*****************************************************************************
1692  *
1693  * Predict the internal temp physical register usage for a block assignment tree,
1694  * by setting tree->gtUsedRegs.
1695  * Records the internal temp physical register usage for this tree.
1696  * Returns a mask of interfering registers for this tree.
1697  *
1698  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1699  * to the set of scratch registers needed when evaluating the tree.
1700  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1701  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1702  * predict additional internal temp physical registers to spill into.
1703  *
1704  *    tree       - is the child of a GT_IND node
1705  *    predictReg - what type of register does the tree need
1706  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1707  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1708  *    rsvdRegs   - registers which should not be allocated because they will
1709  *                 be needed to evaluate a node in the future
1710  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1711  *                 the rpLastUseVars set should be saved and restored
1712  *                 so that we don't add any new variables to rpLastUseVars.
1713  */
1714 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr   tree,
1715                                           rpPredictReg predictReg,
1716                                           regMaskTP    lockedRegs,
1717                                           regMaskTP    rsvdRegs)
1718 {
1719     regMaskTP regMask         = RBM_NONE;
1720     regMaskTP interferingRegs = RBM_NONE;
1721
1722     bool        hasGCpointer  = false;
1723     bool        dstIsOnStack  = false;
1724     bool        useMemHelper  = false;
1725     bool        useBarriers   = false;
1726     GenTreeBlk* dst           = tree->gtGetOp1()->AsBlk();
1727     GenTreePtr  dstAddr       = dst->Addr();
1728     GenTreePtr  srcAddrOrFill = tree->gtGetOp2IfPresent();
1729
1730     size_t blkSize = dst->gtBlkSize;
1731
1732     hasGCpointer = (dst->HasGCPtr());
1733
1734     bool isCopyBlk = tree->OperIsCopyBlkOp();
1735     bool isCopyObj = isCopyBlk && hasGCpointer;
1736     bool isInitBlk = tree->OperIsInitBlkOp();
1737
1738     if (isCopyBlk)
1739     {
1740         assert(srcAddrOrFill->OperIsIndir());
1741         srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1742     }
1743     else
1744     {
1745         // For initBlk, we don't need to worry about the GC pointers.
1746         hasGCpointer = false;
1747     }
1748
1749     if (blkSize != 0)
1750     {
1751         if (isCopyObj)
1752         {
1753             dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1754         }
1755
1756         if (isInitBlk)
1757         {
1758             if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1759             {
1760                 useMemHelper = true;
1761             }
1762         }
1763     }
1764     else
1765     {
1766         useMemHelper = true;
1767     }
1768
1769     if (hasGCpointer && !dstIsOnStack)
1770     {
1771         useBarriers = true;
1772     }
1773
1774 #ifdef _TARGET_ARM_
1775     //
1776     // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1777     //
1778     if (!useMemHelper && !useBarriers)
1779     {
1780         bool     useLoop        = false;
1781         unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1782
1783         // A mask to use to force the predictor to choose low registers (to reduce code size)
1784         regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1785
1786         // Allow the src and dst to be used in place, unless we use a loop, in which
1787         // case we will need scratch registers as we will be writing to them.
1788         rpPredictReg srcAndDstPredict = PREDICT_REG;
1789
1790         // Will we be using a loop to implement this INITBLK/COPYBLK?
1791         if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1792         {
1793             useLoop          = true;
1794             avoidReg         = RBM_NONE;
1795             srcAndDstPredict = PREDICT_SCRATCH_REG;
1796         }
1797
1798         if (tree->gtFlags & GTF_REVERSE_OPS)
1799         {
1800             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1801                                            dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1802             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1803         }
1804         else
1805         {
1806             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1807                                            srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1808             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1809         }
1810
1811         // We need at least one scratch register for a copyBlk
1812         if (isCopyBlk)
1813         {
1814             // Pick a low register to reduce the code size
1815             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1816         }
1817
1818         if (useLoop)
1819         {
1820             if (isCopyBlk)
1821             {
1822                 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1823                 // Pick another low register to reduce the code size
1824                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1825             }
1826
1827             // We need a loop index register
1828             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1829         }
1830
1831         tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1832
1833         return interferingRegs;
1834     }
1835 #endif
1836     // What order should the Dest, Val/Src, and Size be calculated
1837     GenTreePtr opsPtr[3];
1838     regMaskTP  regsPtr[3];
1839
1840 #if defined(_TARGET_XARCH_)
1841     fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1842
1843     // We're going to use these, might as well make them available now
1844
1845     codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1846     if (isCopyBlk)
1847         codeGen->regSet.rsSetRegsModified(RBM_ESI);
1848
1849 #elif defined(_TARGET_ARM_)
1850
1851     if (useMemHelper)
1852     {
1853         // For all other cases that involve non-constants, we just call memcpy/memset
1854         // JIT helpers
1855         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1856         interferingRegs |= RBM_CALLEE_TRASH;
1857 #ifdef DEBUG
1858         if (verbose)
1859             printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1860 #endif
1861     }
1862     else // useBarriers
1863     {
1864         assert(useBarriers);
1865         assert(isCopyBlk);
1866
1867         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1868
1869         // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1870         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1871 #ifdef DEBUG
1872         if (verbose)
1873             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1874 #endif
1875     }
1876 #else // !_TARGET_X86_ && !_TARGET_ARM_
1877 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1878 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1879     regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1880     regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1881                                    opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1882     regMask |= regsPtr[0];
1883     opsPtr[0]->gtUsedRegs |= regsPtr[0];
1884     rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1885
1886     regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1887                                    opsPtr2RsvdRegs | RBM_LASTUSE);
1888     regMask |= regsPtr[1];
1889     opsPtr[1]->gtUsedRegs |= regsPtr[1];
1890     rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1891
1892     regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1893     if (opsPtr[2] == nullptr)
1894     {
1895         // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1896         // Note that it is quite possible that no register is required, but this preserves
1897         // former behavior.
1898         regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1899         rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1900     }
1901     else
1902     {
1903         regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1904         opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1905     }
1906     regMask |= opsPtr2UsedRegs;
1907
1908     tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1909     return interferingRegs;
1910 }
1911
1912 /*****************************************************************************
1913  *
1914  * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1915  * Returns a regMask with the internal temp physical register usage for this tree.
1916  *
1917  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1918  * to the set of scratch registers needed when evaluating the tree.
1919  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1920  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1921  * predict additional internal temp physical registers to spill into.
1922  *
1923  *    tree       - is the child of a GT_IND node
1924  *    predictReg - what type of register does the tree need
1925  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1926  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1927  *    rsvdRegs   - registers which should not be allocated because they will
1928  *                 be needed to evaluate a node in the future
1929  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1930  *                 the rpLastUseVars set should be saved and restored
1931  *                 so that we don't add any new variables to rpLastUseVars.
1932  */
1933
1934 #pragma warning(disable : 4701)
1935
1936 #ifdef _PREFAST_
1937 #pragma warning(push)
1938 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1939 #endif
1940 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr   tree,
1941                                         rpPredictReg predictReg,
1942                                         regMaskTP    lockedRegs,
1943                                         regMaskTP    rsvdRegs)
1944 {
1945     regMaskTP    regMask = DUMMY_INIT(RBM_ILLEGAL);
1946     regMaskTP    op2Mask;
1947     regMaskTP    tmpMask;
1948     rpPredictReg op1PredictReg;
1949     rpPredictReg op2PredictReg;
1950     LclVarDsc*   varDsc = NULL;
1951     VARSET_TP    oldLastUseVars(VarSetOps::UninitVal());
1952
1953     VARSET_TP varBits(VarSetOps::UninitVal());
1954     VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1955
1956     bool      restoreLastUseVars = false;
1957     regMaskTP interferingRegs    = RBM_NONE;
1958
1959 #ifdef DEBUG
1960     // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1961     noway_assert(tree);
1962     noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1963     noway_assert(RBM_ILLEGAL);
1964     noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1965     /* impossible values, to make sure that we set them */
1966     tree->gtUsedRegs = RBM_ILLEGAL;
1967 #endif
1968
1969     /* Figure out what kind of a node we have */
1970
1971     genTreeOps oper = tree->OperGet();
1972     var_types  type = tree->TypeGet();
1973     unsigned   kind = tree->OperKind();
1974
1975     // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1976     genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1977     if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1978         predictReg = PREDICT_NONE;
1979     else if (rpHasVarIndexForPredict(predictReg))
1980     {
1981         // The only place where predictReg is set to a var is in the PURE
1982         // assignment case where varIndex is the var being assigned to.
1983         // We need to check whether the variable is used between here and
1984         // its redefinition.
1985         unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1986         unsigned lclNum   = lvaTrackedToVarNum[varIndex];
1987         bool     found    = false;
1988         for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1989         {
1990             if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1991             {
1992                 // Is this the pure assignment?
1993                 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1994                 {
1995                     predictReg = PREDICT_SCRATCH_REG;
1996                 }
1997                 found = true;
1998                 break;
1999             }
2000         }
2001         assert(found);
2002     }
2003
2004     if (rsvdRegs & RBM_LASTUSE)
2005     {
2006         restoreLastUseVars = true;
2007         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2008         rsvdRegs &= ~RBM_LASTUSE;
2009     }
2010
2011     /* Is this a constant or leaf node? */
2012
2013     if (kind & (GTK_CONST | GTK_LEAF))
2014     {
2015         bool      lastUse   = false;
2016         regMaskTP enregMask = RBM_NONE;
2017
2018         switch (oper)
2019         {
2020 #ifdef _TARGET_ARM_
2021             case GT_CNS_DBL:
2022                 // Codegen for floating point constants on the ARM is currently
2023                 // movw/movt    rT1, <lo32 bits>
2024                 // movw/movt    rT2, <hi32 bits>
2025                 //  vmov.i2d    dT0, rT1,rT2
2026                 //
2027                 // For TYP_FLOAT one integer register is required
2028                 //
2029                 // These integer register(s) immediately die
2030                 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2031                 if (type == TYP_DOUBLE)
2032                 {
2033                     // For TYP_DOUBLE a second integer register is required
2034                     //
2035                     tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2036                 }
2037
2038                 // We also need a floating point register that we keep
2039                 //
2040                 if (predictReg == PREDICT_NONE)
2041                     predictReg = PREDICT_SCRATCH_REG;
2042
2043                 regMask          = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2044                 tree->gtUsedRegs = regMask | tmpMask;
2045                 goto RETURN_CHECK;
2046 #endif
2047
2048             case GT_CNS_INT:
2049             case GT_CNS_LNG:
2050
2051                 if (rpHasVarIndexForPredict(predictReg))
2052                 {
2053                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2054                     rpAsgVarNum       = tgtIndex;
2055
2056                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2057                     predictReg = PREDICT_NONE;
2058
2059                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2060                     tgtVar->lvDependReg = true;
2061
2062                     if (type == TYP_LONG)
2063                     {
2064                         assert(oper == GT_CNS_LNG);
2065
2066                         if (tgtVar->lvOtherReg == REG_STK)
2067                         {
2068                             // Well we do need one register for a partially enregistered
2069                             type       = TYP_INT;
2070                             predictReg = PREDICT_SCRATCH_REG;
2071                         }
2072                     }
2073                 }
2074                 else
2075                 {
2076 #if !CPU_LOAD_STORE_ARCH
2077                     /* If the constant is a handle then it will need to have a relocation
2078                        applied to it.  It will need to be loaded into a register.
2079                        But never throw away an existing hint.
2080                        */
2081                     if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2082 #endif
2083                     {
2084                         if (predictReg == PREDICT_NONE)
2085                             predictReg = PREDICT_SCRATCH_REG;
2086                     }
2087                 }
2088                 break;
2089
2090             case GT_NO_OP:
2091                 break;
2092
2093             case GT_CLS_VAR:
2094                 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2095                     (genTypeSize(type) < sizeof(int)))
2096                 {
2097                     predictReg = PREDICT_SCRATCH_REG;
2098                 }
2099 #ifdef _TARGET_ARM_
2100                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2101                 //
2102                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2103                 {
2104                     // These integer register(s) immediately die
2105                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2106                     // Two integer registers are required for a TYP_DOUBLE
2107                     if (type == TYP_DOUBLE)
2108                         tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2109                 }
2110                 // We need a temp register in some cases of loads/stores to a class var
2111                 if (predictReg == PREDICT_NONE)
2112                 {
2113                     predictReg = PREDICT_SCRATCH_REG;
2114                 }
2115 #endif
2116                 if (rpHasVarIndexForPredict(predictReg))
2117                 {
2118                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2119                     rpAsgVarNum       = tgtIndex;
2120
2121                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2122                     predictReg = PREDICT_NONE;
2123
2124                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2125                     tgtVar->lvDependReg = true;
2126
2127                     if (type == TYP_LONG)
2128                     {
2129                         if (tgtVar->lvOtherReg == REG_STK)
2130                         {
2131                             // Well we do need one register for a partially enregistered
2132                             type       = TYP_INT;
2133                             predictReg = PREDICT_SCRATCH_REG;
2134                         }
2135                     }
2136                 }
2137                 break;
2138
2139             case GT_LCL_FLD:
2140 #ifdef _TARGET_ARM_
2141                 // Check for a misalignment on a Floating Point field
2142                 //
2143                 if (varTypeIsFloating(type))
2144                 {
2145                     if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2146                     {
2147                         // These integer register(s) immediately die
2148                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2149                         // Two integer registers are required for a TYP_DOUBLE
2150                         if (type == TYP_DOUBLE)
2151                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2152                     }
2153                 }
2154 #endif
2155                 __fallthrough;
2156
2157             case GT_LCL_VAR:
2158             case GT_REG_VAR:
2159
2160                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2161
2162                 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2163                 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2164                 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2165
2166 #if FEATURE_STACK_FP_X87
2167                 // If it's a floating point var, there's nothing to do
2168                 if (varTypeIsFloating(type))
2169                 {
2170                     tree->gtUsedRegs = RBM_NONE;
2171                     regMask          = RBM_NONE;
2172                     goto RETURN_CHECK;
2173                 }
2174 #endif
2175
2176                 // If the variable is already a register variable, no need to go further.
2177                 if (oper == GT_REG_VAR)
2178                     break;
2179
2180                 /* Apply the type of predictReg to the LCL_VAR */
2181
2182                 if (predictReg == PREDICT_REG)
2183                 {
2184                 PREDICT_REG_COMMON:
2185                     if (varDsc->lvRegNum == REG_STK)
2186                         break;
2187
2188                     goto GRAB_COUNT;
2189                 }
2190                 else if (predictReg == PREDICT_SCRATCH_REG)
2191                 {
2192                     noway_assert(predictReg == PREDICT_SCRATCH_REG);
2193
2194                     /* Is this the last use of a local var?   */
2195                     if (lastUse)
2196                     {
2197                         if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2198                             goto PREDICT_REG_COMMON;
2199                     }
2200                 }
2201                 else if (rpHasVarIndexForPredict(predictReg))
2202                 {
2203                     /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2204                     {
2205                         unsigned   tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2206                         LclVarDsc* tgtVar    = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2207                         VarSetOps::MakeSingleton(this, tgtIndex1);
2208
2209                         noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2210                         noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2211 #ifndef _TARGET_AMD64_
2212                         // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2213                         // so this assert is meaningless
2214                         noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2215 #endif // !_TARGET_AMD64_
2216
2217                         if (varDsc->lvTracked)
2218                         {
2219                             unsigned srcIndex;
2220                             srcIndex = varDsc->lvVarIndex;
2221
2222                             // If this register has it's last use here then we will prefer
2223                             // to color to the same register as tgtVar.
2224                             if (lastUse)
2225                             {
2226                                 /*
2227                                  *  Add an entry in the lvaVarPref graph to indicate
2228                                  *  that it would be worthwhile to color these two variables
2229                                  *  into the same physical register.
2230                                  *  This will help us avoid having an extra copy instruction
2231                                  */
2232                                 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2233                                 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2234                             }
2235
2236                             // Add a variable interference from srcIndex to each of the last use variables
2237                             if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2238                             {
2239                                 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2240                             }
2241                         }
2242                         rpAsgVarNum = tgtIndex1;
2243
2244                         /* We will rely on the target enregistered variable from the GT_ASG */
2245                         varDsc = tgtVar;
2246                     }
2247                 GRAB_COUNT:
2248                     unsigned grabCount;
2249                     grabCount = 0;
2250
2251                     if (genIsValidFloatReg(varDsc->lvRegNum))
2252                     {
2253                         enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2254                     }
2255                     else
2256                     {
2257                         enregMask = genRegMask(varDsc->lvRegNum);
2258                     }
2259
2260 #ifdef _TARGET_ARM_
2261                     if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2262                     {
2263                         // We need to compute the intermediate value using a TYP_DOUBLE
2264                         // but we storing the result in a TYP_SINGLE enregistered variable
2265                         //
2266                         grabCount++;
2267                     }
2268                     else
2269 #endif
2270                     {
2271                         /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2272                         if (enregMask & (rsvdRegs | lockedRegs))
2273                         {
2274                             grabCount++;
2275                         }
2276 #ifndef _TARGET_64BIT_
2277                         if (type == TYP_LONG)
2278                         {
2279                             if (varDsc->lvOtherReg != REG_STK)
2280                             {
2281                                 tmpMask = genRegMask(varDsc->lvOtherReg);
2282                                 enregMask |= tmpMask;
2283
2284                                 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2285                                 if (tmpMask & (rsvdRegs | lockedRegs))
2286                                     grabCount++;
2287                             }
2288                             else // lvOtherReg == REG_STK
2289                             {
2290                                 grabCount++;
2291                             }
2292                         }
2293 #endif // _TARGET_64BIT_
2294                     }
2295
2296                     varDsc->lvDependReg = true;
2297
2298                     if (grabCount == 0)
2299                     {
2300                         /* Does not need a register */
2301                         predictReg = PREDICT_NONE;
2302                         // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2303                         VarSetOps::UnionD(this, rpUseInPlace, varBits);
2304                     }
2305                     else // (grabCount > 0)
2306                     {
2307 #ifndef _TARGET_64BIT_
2308                         /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2309                         if ((type == TYP_LONG) && (grabCount == 1))
2310                         {
2311                             /* We will need to pick one register */
2312                             type = TYP_INT;
2313                             // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2314                             VarSetOps::UnionD(this, rpUseInPlace, varBits);
2315                         }
2316                         noway_assert((type == TYP_DOUBLE) ||
2317                                      (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2318 #else  // !_TARGET_64BIT_
2319                         noway_assert(grabCount == 1);
2320 #endif // !_TARGET_64BIT_
2321                     }
2322                 }
2323                 else if (type == TYP_STRUCT)
2324                 {
2325 #ifdef _TARGET_ARM_
2326                     // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2327                     //        predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2328                     //        As a workaround we just bash it to PREDICT_NONE here
2329                     //
2330                     if (predictReg != PREDICT_NONE)
2331                         predictReg = PREDICT_NONE;
2332 #endif
2333                     // Currently predictReg is saying that we will not need any scratch registers
2334                     noway_assert(predictReg == PREDICT_NONE);
2335
2336                     /* We may need to sign or zero extend a small type when pushing a struct */
2337                     if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2338                     {
2339                         for (unsigned varNum = varDsc->lvFieldLclStart;
2340                              varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2341                         {
2342                             LclVarDsc* fldVar = lvaTable + varNum;
2343
2344                             if (fldVar->lvStackAligned())
2345                             {
2346                                 // When we are stack aligned Codegen will just use
2347                                 // a push instruction and thus doesn't need any register
2348                                 // since we can push both a register or a stack frame location
2349                                 continue;
2350                             }
2351
2352                             if (varTypeIsByte(fldVar->TypeGet()))
2353                             {
2354                                 // We will need to reserve one byteable register,
2355                                 //
2356                                 type       = TYP_BYTE;
2357                                 predictReg = PREDICT_SCRATCH_REG;
2358 #if CPU_HAS_BYTE_REGS
2359                                 // It is best to enregister this fldVar in a byteable register
2360                                 //
2361                                 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2362 #endif
2363                             }
2364                             else if (varTypeIsShort(fldVar->TypeGet()))
2365                             {
2366                                 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2367                                 // If fldVar is not enregistered then we will need a scratch register
2368                                 //
2369                                 if (!isEnregistered)
2370                                 {
2371                                     // We will need either an int register or a byte register
2372                                     // If we are not requesting a byte register we will request an int register
2373                                     //
2374                                     if (type != TYP_BYTE)
2375                                         type   = TYP_INT;
2376                                     predictReg = PREDICT_SCRATCH_REG;
2377                                 }
2378                             }
2379                         }
2380                     }
2381                 }
2382                 else
2383                 {
2384                     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2385                     if (preferReg != 0)
2386                     {
2387                         if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2388                         {
2389                             varDsc->addPrefReg(preferReg, this);
2390                         }
2391                     }
2392                 }
2393                 break; /* end of case GT_LCL_VAR */
2394
2395             case GT_JMP:
2396                 tree->gtUsedRegs = RBM_NONE;
2397                 regMask          = RBM_NONE;
2398
2399 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2400                 // Mark the registers required to emit a tailcall profiler callback
2401                 if (compIsProfilerHookNeeded())
2402                 {
2403                     tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2404                 }
2405 #endif
2406                 goto RETURN_CHECK;
2407
2408             default:
2409                 break;
2410         } /* end of switch (oper) */
2411
2412         /* If we don't need to evaluate to register, regmask is the empty set */
2413         /* Otherwise we grab a temp for the local variable                    */
2414
2415         if (predictReg == PREDICT_NONE)
2416             regMask = RBM_NONE;
2417         else
2418         {
2419             regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2420
2421             if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2422             {
2423                 /* We need to sign or zero extend a small type when pushing a struct */
2424                 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2425
2426                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2427                 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2428
2429                 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2430                      varNum++)
2431                 {
2432                     LclVarDsc* fldVar = lvaTable + varNum;
2433                     if (fldVar->lvTracked)
2434                     {
2435                         VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2436                         rpRecordRegIntf(regMask, fldBit DEBUGARG(
2437                                                      "need scratch register when pushing a small field of a struct"));
2438                     }
2439                 }
2440             }
2441         }
2442
2443         /* Update the set of lastUse variables that we encountered so far */
2444         if (lastUse)
2445         {
2446             VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2447             VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2448
2449             /*
2450              *  Add interference from any previously locked temps into this last use variable.
2451              */
2452             if (lockedRegs)
2453             {
2454                 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2455             }
2456             /*
2457              *  Add interference from any reserved temps into this last use variable.
2458              */
2459             if (rsvdRegs)
2460             {
2461                 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2462             }
2463             /*
2464              *  For partially enregistered longs add an interference with the
2465              *  register return by rpPredictRegPick
2466              */
2467             if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2468             {
2469                 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2470             }
2471         }
2472
2473         tree->gtUsedRegs = (regMaskSmall)regMask;
2474         goto RETURN_CHECK;
2475     }
2476
2477     /* Is it a 'simple' unary/binary operator? */
2478
2479     if (kind & GTK_SMPOP)
2480     {
2481         GenTreePtr op1 = tree->gtOp.gtOp1;
2482         GenTreePtr op2 = tree->gtGetOp2IfPresent();
2483
2484         GenTreePtr opsPtr[3];
2485         regMaskTP  regsPtr[3];
2486
2487         VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2488
2489         switch (oper)
2490         {
2491             case GT_ASG:
2492
2493                 /* Is the value being assigned into a LCL_VAR? */
2494                 if (op1->gtOper == GT_LCL_VAR)
2495                 {
2496                     varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2497
2498                     /* Are we assigning a LCL_VAR the result of a call? */
2499                     if (op2->gtOper == GT_CALL)
2500                     {
2501                         /* Set a preferred register for the LCL_VAR */
2502                         if (isRegPairType(varDsc->TypeGet()))
2503                             varDsc->addPrefReg(RBM_LNGRET, this);
2504                         else if (!varTypeIsFloating(varDsc->TypeGet()))
2505                             varDsc->addPrefReg(RBM_INTRET, this);
2506 #ifdef _TARGET_AMD64_
2507                         else
2508                             varDsc->addPrefReg(RBM_FLOATRET, this);
2509 #endif
2510                         /*
2511                          *  When assigning the result of a call we don't
2512                          *  bother trying to target the right side of the
2513                          *  assignment, since we have a fixed calling convention.
2514                          */
2515                     }
2516                     else if (varDsc->lvTracked)
2517                     {
2518                         // We interfere with uses in place
2519                         if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2520                         {
2521                             rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2522                         }
2523
2524                         // Did we predict that this local will be fully enregistered?
2525                         // and the assignment type is the same as the expression type?
2526                         // and it is dead on the right side of the assignment?
2527                         // and we current have no other rpAsgVarNum active?
2528                         //
2529                         if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2530                             (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2531                         {
2532                             //
2533                             //  Yes, we should try to target the right side (op2) of this
2534                             //  assignment into the (enregistered) tracked variable.
2535                             //
2536
2537                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2538                             op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2539
2540                             // Remember that this is a new use in place
2541
2542                             // We've added "new UseInPlace"; remove from the global set.
2543                             VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2544
2545                             //  Note that later when we walk down to the leaf node for op2
2546                             //  if we decide to actually use the register for the 'varDsc'
2547                             //  to enregister the operand, the we will set rpAsgVarNum to
2548                             //  varDsc->lvVarIndex, by extracting this value using
2549                             //  rpGetVarIndexForPredict()
2550                             //
2551                             //  Also we reset rpAsgVarNum back to -1 after we have finished
2552                             //  predicting the current GT_ASG node
2553                             //
2554                             goto ASG_COMMON;
2555                         }
2556                     }
2557                 }
2558                 else if (tree->OperIsBlkOp())
2559                 {
2560                     interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2561                     regMask = 0;
2562                     goto RETURN_CHECK;
2563                 }
2564                 __fallthrough;
2565
2566             case GT_CHS:
2567
2568             case GT_ASG_OR:
2569             case GT_ASG_XOR:
2570             case GT_ASG_AND:
2571             case GT_ASG_SUB:
2572             case GT_ASG_ADD:
2573             case GT_ASG_MUL:
2574             case GT_ASG_DIV:
2575             case GT_ASG_UDIV:
2576
2577                 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2578                 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2579                 {
2580                     /* Is the value being assigned into an enregistered LCL_VAR? */
2581                     /* For debug code we only allow a simple op2 to be assigned */
2582                     if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2583                     {
2584                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2585                         /* Did we predict that this local will be enregistered? */
2586                         if (varDsc->lvRegNum != REG_STK)
2587                         {
2588                             /* Yes, we can use "reg <op>= addr" */
2589
2590                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2591                             op2PredictReg = PREDICT_NONE;
2592
2593                             goto ASG_COMMON;
2594                         }
2595                     }
2596                 }
2597
2598 #if CPU_LOAD_STORE_ARCH
2599                 if (oper != GT_ASG)
2600                 {
2601                     op1PredictReg = PREDICT_REG;
2602                     op2PredictReg = PREDICT_REG;
2603                 }
2604                 else
2605 #endif
2606                 {
2607                     /*
2608                      *  Otherwise, initialize the normal forcing of operands:
2609                      *   "addr <op>= reg"
2610                      */
2611                     op1PredictReg = PREDICT_ADDR;
2612                     op2PredictReg = PREDICT_REG;
2613                 }
2614
2615             ASG_COMMON:
2616
2617 #if !CPU_LOAD_STORE_ARCH
2618                 if (op2PredictReg != PREDICT_NONE)
2619                 {
2620                     /* Is the value being assigned a simple one? */
2621                     if (rpCanAsgOperWithoutReg(op2, false))
2622                         op2PredictReg = PREDICT_NONE;
2623                 }
2624 #endif
2625
2626                 bool simpleAssignment;
2627                 simpleAssignment = false;
2628
2629                 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2630                 {
2631                     // Add a variable interference from the assign target
2632                     // to each of the last use variables
2633                     if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2634                     {
2635                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2636
2637                         if (varDsc->lvTracked)
2638                         {
2639                             unsigned varIndex = varDsc->lvVarIndex;
2640
2641                             rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2642                         }
2643                     }
2644
2645                     /*  Record whether this tree is a simple assignment to a local */
2646
2647                     simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2648                 }
2649
2650                 bool requireByteReg;
2651                 requireByteReg = false;
2652
2653 #if CPU_HAS_BYTE_REGS
2654                 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2655
2656                 if (varTypeIsByte(type) &&
2657                     ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2658
2659                 {
2660                     // Byte-assignments typically need a byte register
2661                     requireByteReg = true;
2662
2663                     if (op1->gtOper == GT_LCL_VAR)
2664                     {
2665                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2666
2667                         // Did we predict that this local will be enregistered?
2668                         if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2669                         {
2670                             // We don't require a byte register when op1 is an enregistered local */
2671                             requireByteReg = false;
2672                         }
2673
2674                         // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2675                         if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2676                         {
2677                             // We should try to put op1 in an byte register
2678                             varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2679                         }
2680                     }
2681                 }
2682 #endif
2683
2684                 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2685
2686                 bool isWriteBarrierAsgNode;
2687                 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2688 #ifdef DEBUG
2689                 GCInfo::WriteBarrierForm wbf;
2690                 if (isWriteBarrierAsgNode)
2691                     wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2692                 else
2693                     wbf = GCInfo::WBF_NoBarrier;
2694 #endif // DEBUG
2695
2696                 regMaskTP wbaLockedRegs;
2697                 wbaLockedRegs = lockedRegs;
2698                 if (isWriteBarrierAsgNode)
2699                 {
2700 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2701 #ifdef DEBUG
2702                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2703                     {
2704 #endif // DEBUG
2705                         wbaLockedRegs |= RBM_WRITE_BARRIER;
2706                         op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2707                         assert(REG_WRITE_BARRIER == REG_EDX);
2708                         op1PredictReg = PREDICT_REG_EDX;
2709 #ifdef DEBUG
2710                     }
2711                     else
2712 #endif // DEBUG
2713 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2714
2715 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2716                     {
2717 #ifdef _TARGET_X86_
2718                         op1PredictReg = PREDICT_REG_ECX;
2719                         op2PredictReg = PREDICT_REG_EDX;
2720 #elif defined(_TARGET_ARM_)
2721                         op1PredictReg = PREDICT_REG_R0;
2722                         op2PredictReg = PREDICT_REG_R1;
2723
2724                         // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2725                         if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2726                         {
2727                             op1 = op1->gtOp.gtOp1;
2728                         }
2729 #else // !_TARGET_X86_ && !_TARGET_ARM_
2730 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2731 #endif
2732                     }
2733 #endif
2734                 }
2735
2736                 /*  Are we supposed to evaluate RHS first? */
2737
2738                 if (tree->gtFlags & GTF_REVERSE_OPS)
2739                 {
2740                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2741
2742 #if CPU_HAS_BYTE_REGS
2743                     // Should we insure that op2 gets evaluated into a byte register?
2744                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2745                     {
2746                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2747                         // and we can't select one that is already reserved (i.e. lockedRegs)
2748                         //
2749                         op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2750                         op2->gtUsedRegs |= op2Mask;
2751
2752                         // No longer a simple assignment because we're using extra registers and might
2753                         // have interference between op1 and op2.  See DevDiv #136681
2754                         simpleAssignment = false;
2755                     }
2756 #endif
2757                     /*
2758                      *  For a simple assignment we don't want the op2Mask to be
2759                      *  marked as interferring with the LCL_VAR, since it is likely
2760                      *  that we will want to enregister the LCL_VAR in exactly
2761                      *  the register that is used to compute op2
2762                      */
2763                     tmpMask = lockedRegs;
2764
2765                     if (!simpleAssignment)
2766                         tmpMask |= op2Mask;
2767
2768                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2769
2770                     // Did we relax the register prediction for op1 and op2 above ?
2771                     // - because we are depending upon op1 being enregistered
2772                     //
2773                     if ((op1PredictReg == PREDICT_NONE) &&
2774                         ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2775                     {
2776                         /* We must be assigning into an enregistered LCL_VAR */
2777                         noway_assert(op1->gtOper == GT_LCL_VAR);
2778                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2779                         noway_assert(varDsc->lvRegNum != REG_STK);
2780
2781                         /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2782                         varDsc->lvDependReg = true;
2783                     }
2784                 }
2785                 else
2786                 {
2787                     // For the case of simpleAssignments op2 should always be evaluated first
2788                     noway_assert(!simpleAssignment);
2789
2790                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2791                     if (isWriteBarrierAsgNode)
2792                     {
2793                         wbaLockedRegs |= op1->gtUsedRegs;
2794                     }
2795                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2796
2797 #if CPU_HAS_BYTE_REGS
2798                     // Should we insure that op2 gets evaluated into a byte register?
2799                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2800                     {
2801                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2802                         // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2803                         //
2804                         op2Mask |=
2805                             rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2806                         op2->gtUsedRegs |= op2Mask;
2807                     }
2808 #endif
2809                 }
2810
2811                 if (rpHasVarIndexForPredict(op2PredictReg))
2812                 {
2813                     rpAsgVarNum = -1;
2814                 }
2815
2816                 if (isWriteBarrierAsgNode)
2817                 {
2818 #if NOGC_WRITE_BARRIERS
2819 #ifdef DEBUG
2820                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2821                     {
2822 #endif // DEBUG
2823
2824                         /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2825                            passed to the write-barrier call in REG_WRITE_BARRIER */
2826
2827                         regMask = op2Mask;
2828
2829                         if (op1->gtOper == GT_IND)
2830                         {
2831                             GenTreePtr rv1, rv2;
2832                             unsigned   mul, cns;
2833                             bool       rev;
2834
2835                             /* Special handling of indirect assigns for write barrier */
2836
2837                             bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2838                                                                   &mul, &cns);
2839
2840                             /* Check address mode for enregisterable locals */
2841
2842                             if (yes)
2843                             {
2844                                 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2845                                 {
2846                                     rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2847                                 }
2848                                 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2849                                 {
2850                                     rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2851                                 }
2852                             }
2853                         }
2854
2855                         if (op2->gtOper == GT_LCL_VAR)
2856                         {
2857                             rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2858                         }
2859
2860                         // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2861                         if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2862                         {
2863                             rpRecordRegIntf(RBM_WRITE_BARRIER,
2864                                             rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2865                         }
2866                         tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2867 #ifdef DEBUG
2868                     }
2869                     else
2870 #endif // DEBUG
2871 #endif // NOGC_WRITE_BARRIERS
2872
2873 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2874                     {
2875 #ifdef _TARGET_ARM_
2876 #ifdef DEBUG
2877                         if (verbose)
2878                             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2879 #endif
2880                         //
2881                         // For the ARM target we have an optimized JIT Helper
2882                         // that only trashes a subset of the callee saved registers
2883                         //
2884
2885                         // NOTE: Adding it to the gtUsedRegs will cause the interference to
2886                         // be added appropriately
2887
2888                         // the RBM_CALLEE_TRASH_NOGC set is killed.  We will record this in interferingRegs
2889                         // instead of gtUsedRegs, because the latter will be modified later, but we need
2890                         // to remember to add the interference.
2891
2892                         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2893
2894                         op1->gtUsedRegs |= RBM_R0;
2895                         op2->gtUsedRegs |= RBM_R1;
2896 #else // _TARGET_ARM_
2897
2898 #ifdef DEBUG
2899                         if (verbose)
2900                             printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2901 #endif
2902                         // We have to call a normal JIT helper to perform the Write Barrier Assignment
2903                         // It will trash the callee saved registers
2904
2905                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2906 #endif // _TARGET_ARM_
2907                     }
2908 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2909                 }
2910
2911                 if (simpleAssignment)
2912                 {
2913                     /*
2914                      *  Consider a simple assignment to a local:
2915                      *
2916                      *   lcl = expr;
2917                      *
2918                      *  Since the "=" node is visited after the variable
2919                      *  is marked live (assuming it's live after the
2920                      *  assignment), we don't want to use the register
2921                      *  use mask of the "=" node but rather that of the
2922                      *  variable itself.
2923                      */
2924                     tree->gtUsedRegs = op1->gtUsedRegs;
2925                 }
2926                 else
2927                 {
2928                     tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2929                 }
2930                 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2931                 goto RETURN_CHECK;
2932
2933             case GT_ASG_LSH:
2934             case GT_ASG_RSH:
2935             case GT_ASG_RSZ:
2936                 /* assigning shift operators */
2937
2938                 noway_assert(type != TYP_LONG);
2939
2940 #if CPU_LOAD_STORE_ARCH
2941                 predictReg = PREDICT_ADDR;
2942 #else
2943                 predictReg = PREDICT_NONE;
2944 #endif
2945
2946                 /* shift count is handled same as ordinary shift */
2947                 goto HANDLE_SHIFT_COUNT;
2948
2949             case GT_ADDR:
2950                 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2951
2952                 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2953                 {
2954                     // We need a scratch register for the LEA instruction
2955                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2956                 }
2957
2958                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2959                 goto RETURN_CHECK;
2960
2961             case GT_CAST:
2962
2963                 /* Cannot cast to VOID */
2964                 noway_assert(type != TYP_VOID);
2965
2966                 /* cast to long is special */
2967                 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2968                 {
2969                     noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2970 #if CPU_LONG_USES_REGPAIR
2971                     rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2972
2973                     if (rpHasVarIndexForPredict(predictReg))
2974                     {
2975                         unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2976                         rpAsgVarNum       = tgtIndex;
2977
2978                         // We don't need any register as we plan on writing to the rpAsgVarNum register
2979                         predictReg = PREDICT_NONE;
2980
2981                         LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2982                         tgtVar->lvDependReg = true;
2983
2984                         if (tgtVar->lvOtherReg != REG_STK)
2985                         {
2986                             predictRegHi = PREDICT_NONE;
2987                         }
2988                     }
2989                     else
2990 #endif
2991                         if (predictReg == PREDICT_NONE)
2992                     {
2993                         predictReg = PREDICT_SCRATCH_REG;
2994                     }
2995 #ifdef _TARGET_ARM_
2996                     // If we are widening an int into a long using a targeted register pair we
2997                     // should retarget so that the low part get loaded into the appropriate register
2998                     else if (predictReg == PREDICT_PAIR_R0R1)
2999                     {
3000                         predictReg   = PREDICT_REG_R0;
3001                         predictRegHi = PREDICT_REG_R1;
3002                     }
3003                     else if (predictReg == PREDICT_PAIR_R2R3)
3004                     {
3005                         predictReg   = PREDICT_REG_R2;
3006                         predictRegHi = PREDICT_REG_R3;
3007                     }
3008 #endif
3009 #ifdef _TARGET_X86_
3010                     // If we are widening an int into a long using a targeted register pair we
3011                     // should retarget so that the low part get loaded into the appropriate register
3012                     else if (predictReg == PREDICT_PAIR_EAXEDX)
3013                     {
3014                         predictReg   = PREDICT_REG_EAX;
3015                         predictRegHi = PREDICT_REG_EDX;
3016                     }
3017                     else if (predictReg == PREDICT_PAIR_ECXEBX)
3018                     {
3019                         predictReg   = PREDICT_REG_ECX;
3020                         predictRegHi = PREDICT_REG_EBX;
3021                     }
3022 #endif
3023
3024                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3025
3026 #if CPU_LONG_USES_REGPAIR
3027                     if (predictRegHi != PREDICT_NONE)
3028                     {
3029                         // Now get one more reg for the upper part
3030                         regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3031                     }
3032 #endif
3033                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3034                     goto RETURN_CHECK;
3035                 }
3036
3037                 /* cast from long is special - it frees a register */
3038                 if (type <= TYP_INT // nice.  this presumably is intended to mean "signed int and shorter types"
3039                     && op1->gtType == TYP_LONG)
3040                 {
3041                     if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3042                         predictReg = PREDICT_REG;
3043
3044                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3045
3046                     // If we have 2 or more regs, free one of them
3047                     if (!genMaxOneBit(regMask))
3048                     {
3049                         /* Clear the 2nd lowest bit in regMask */
3050                         /* First set tmpMask to the lowest bit in regMask */
3051                         tmpMask = genFindLowestBit(regMask);
3052                         /* Next find the second lowest bit in regMask */
3053                         tmpMask = genFindLowestBit(regMask & ~tmpMask);
3054                         /* Clear this bit from regmask */
3055                         regMask &= ~tmpMask;
3056                     }
3057                     tree->gtUsedRegs = op1->gtUsedRegs;
3058                     goto RETURN_CHECK;
3059                 }
3060
3061 #if CPU_HAS_BYTE_REGS
3062                 /* cast from signed-byte is special - it uses byteable registers */
3063                 if (type == TYP_INT)
3064                 {
3065                     var_types smallType;
3066
3067                     if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3068                         smallType = tree->gtCast.CastOp()->TypeGet();
3069                     else
3070                         smallType = tree->gtCast.gtCastType;
3071
3072                     if (smallType == TYP_BYTE)
3073                     {
3074                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3075
3076                         if ((regMask & RBM_BYTE_REGS) == 0)
3077                             regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3078
3079                         tree->gtUsedRegs = (regMaskSmall)regMask;
3080                         goto RETURN_CHECK;
3081                     }
3082                 }
3083 #endif
3084
3085 #if FEATURE_STACK_FP_X87
3086                 /* cast to float/double is special */
3087                 if (varTypeIsFloating(type))
3088                 {
3089                     switch (op1->TypeGet())
3090                     {
3091                         /* uses fild, so don't need to be loaded to reg */
3092                         case TYP_INT:
3093                         case TYP_LONG:
3094                             rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3095                             tree->gtUsedRegs = op1->gtUsedRegs;
3096                             regMask          = 0;
3097                             goto RETURN_CHECK;
3098                         default:
3099                             break;
3100                     }
3101                 }
3102
3103                 /* Casting from integral type to floating type is special */
3104                 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3105                 {
3106                     if (opts.compCanUseSSE2)
3107                     {
3108                         // predict for SSE2 based casting
3109                         if (predictReg <= PREDICT_REG)
3110                             predictReg = PREDICT_SCRATCH_REG;
3111                         regMask        = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3112
3113                         // Get one more int reg to hold cast result
3114                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3115                         tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3116                         goto RETURN_CHECK;
3117                     }
3118                 }
3119 #endif
3120
3121 #if FEATURE_FP_REGALLOC
3122                 // Are we casting between int to float or float to int
3123                 // Fix 388428 ARM JitStress WP7
3124                 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3125                 {
3126                     // op1 needs to go into a register
3127                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3128
3129 #ifdef _TARGET_ARM_
3130                     if (varTypeIsFloating(op1->TypeGet()))
3131                     {
3132                         // We also need a fp scratch register for the convert operation
3133                         regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3134                                                     PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3135                     }
3136 #endif
3137                     // We also need a register to hold the result
3138                     regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3139                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3140                     goto RETURN_CHECK;
3141                 }
3142 #endif
3143
3144                 /* otherwise must load op1 into a register */
3145                 goto GENERIC_UNARY;
3146
3147             case GT_INTRINSIC:
3148
3149 #ifdef _TARGET_XARCH_
3150                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3151                 {
3152                     // This is a special case to handle the following
3153                     // optimization: conv.i4(round.d(d)) -> round.i(d)
3154                     // if flowgraph 3186
3155
3156                     if (predictReg <= PREDICT_REG)
3157                         predictReg = PREDICT_SCRATCH_REG;
3158
3159                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3160
3161                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3162
3163                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3164                     goto RETURN_CHECK;
3165                 }
3166 #endif
3167                 __fallthrough;
3168
3169             case GT_NEG:
3170 #ifdef _TARGET_ARM_
3171                 if (tree->TypeGet() == TYP_LONG)
3172                 {
3173                     // On ARM this consumes an extra register for the '0' value
3174                     if (predictReg <= PREDICT_REG)
3175                         predictReg = PREDICT_SCRATCH_REG;
3176
3177                     regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3178
3179                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3180
3181                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3182                     goto RETURN_CHECK;
3183                 }
3184 #endif // _TARGET_ARM_
3185
3186                 __fallthrough;
3187
3188             case GT_NOT:
3189             // these unary operators will write new values
3190             // and thus will need a scratch register
3191             GENERIC_UNARY:
3192                 /* generic unary operators */
3193
3194                 if (predictReg <= PREDICT_REG)
3195                     predictReg = PREDICT_SCRATCH_REG;
3196
3197                 __fallthrough;
3198
3199             case GT_NOP:
3200                 // these unary operators do not write new values
3201                 // and thus won't need a scratch register
3202                 CLANG_FORMAT_COMMENT_ANCHOR;
3203
3204 #if OPT_BOOL_OPS
3205                 if (!op1)
3206                 {
3207                     tree->gtUsedRegs = 0;
3208                     regMask          = 0;
3209                     goto RETURN_CHECK;
3210                 }
3211 #endif
3212                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3213                 tree->gtUsedRegs = op1->gtUsedRegs;
3214                 goto RETURN_CHECK;
3215
3216             case GT_IND:
3217             case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3218             {
3219                 bool      intoReg = true;
3220                 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3221
3222                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3223                 {
3224                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3225                 }
3226
3227                 if (predictReg == PREDICT_ADDR)
3228                 {
3229                     intoReg = false;
3230                 }
3231                 else if (predictReg == PREDICT_NONE)
3232                 {
3233                     if (type != TYP_LONG)
3234                     {
3235                         intoReg = false;
3236                     }
3237                     else
3238                     {
3239                         predictReg = PREDICT_REG;
3240                     }
3241                 }
3242
3243                 /* forcing to register? */
3244                 if (intoReg && (type != TYP_LONG))
3245                 {
3246                     rsvdRegs |= RBM_LASTUSE;
3247                 }
3248
3249                 GenTreePtr lenCSE;
3250                 lenCSE = NULL;
3251
3252                 /* check for address mode */
3253                 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3254                 tmpMask = RBM_NONE;
3255
3256 #if CPU_LOAD_STORE_ARCH
3257                 // We may need a scratch register for loading a long
3258                 if (type == TYP_LONG)
3259                 {
3260                     /* This scratch register immediately dies */
3261                     tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3262                 }
3263 #endif // CPU_LOAD_STORE_ARCH
3264
3265 #ifdef _TARGET_ARM_
3266                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3267                 //
3268                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3269                 {
3270                     /* These integer register(s) immediately die */
3271                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3272                     // Two integer registers are required for a TYP_DOUBLE
3273                     if (type == TYP_DOUBLE)
3274                         tmpMask |=
3275                             rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3276                 }
3277 #endif
3278
3279                 /* forcing to register? */
3280                 if (intoReg)
3281                 {
3282                     regMaskTP lockedMask = lockedRegs | rsvdRegs;
3283                     tmpMask |= regMask;
3284
3285                     // We will compute a new regMask that holds the register(s)
3286                     // that we will load the indirection into.
3287                     //
3288                     CLANG_FORMAT_COMMENT_ANCHOR;
3289
3290 #ifndef _TARGET_64BIT_
3291                     if (type == TYP_LONG)
3292                     {
3293                         // We need to use multiple load instructions here:
3294                         // For the first register we can not choose
3295                         // any registers that are being used in place or
3296                         // any register in the current regMask
3297                         //
3298                         regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3299
3300                         // For the second register we can choose a register that was
3301                         // used in place or any register in the old now overwritten regMask
3302                         // but not the same register that we picked above in 'regMask'
3303                         //
3304                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3305                         regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3306                     }
3307                     else
3308 #endif
3309                     {
3310                         // We will use one load instruction here:
3311                         // The load target register can be a register that was used in place
3312                         // or one of the register from the orginal regMask.
3313                         //
3314                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3315                         regMask = rpPredictRegPick(type, predictReg, lockedMask);
3316                     }
3317                 }
3318                 else if (predictReg != PREDICT_ADDR)
3319                 {
3320                     /* Unless the caller specified PREDICT_ADDR   */
3321                     /* we don't return the temp registers used    */
3322                     /* to form the address                        */
3323                     regMask = RBM_NONE;
3324                 }
3325             }
3326
3327                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3328
3329                 goto RETURN_CHECK;
3330
3331             case GT_EQ:
3332             case GT_NE:
3333             case GT_LT:
3334             case GT_LE:
3335             case GT_GE:
3336             case GT_GT:
3337
3338 #ifdef _TARGET_X86_
3339                 /* Floating point comparison uses EAX for flags */
3340                 if (varTypeIsFloating(op1->TypeGet()))
3341                 {
3342                     regMask = RBM_EAX;
3343                 }
3344                 else
3345 #endif
3346                     if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3347                 {
3348                     // Some comparisons are converted to ?:
3349                     noway_assert(!fgMorphRelopToQmark(op1));
3350
3351                     if (predictReg <= PREDICT_REG)
3352                         predictReg = PREDICT_SCRATCH_REG;
3353
3354                     // The set instructions need a byte register
3355                     regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3356                 }
3357                 else
3358                 {
3359                     regMask = RBM_NONE;
3360 #ifdef _TARGET_XARCH_
3361                     tmpMask = RBM_NONE;
3362                     // Optimize the compare with a constant cases for xarch
3363                     if (op1->gtOper == GT_CNS_INT)
3364                     {
3365                         if (op2->gtOper == GT_CNS_INT)
3366                             tmpMask =
3367                                 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3368                         rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3369                         tree->gtUsedRegs = op2->gtUsedRegs;
3370                         goto RETURN_CHECK;
3371                     }
3372                     else if (op2->gtOper == GT_CNS_INT)
3373                     {
3374                         rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3375                         tree->gtUsedRegs = op1->gtUsedRegs;
3376                         goto RETURN_CHECK;
3377                     }
3378                     else if (op2->gtOper == GT_CNS_LNG)
3379                     {
3380                         regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3381 #ifdef _TARGET_X86_
3382                         // We also need one extra register to read values from
3383                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3384 #endif // _TARGET_X86_
3385                         tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3386                         goto RETURN_CHECK;
3387                     }
3388 #endif // _TARGET_XARCH_
3389                 }
3390
3391                 unsigned op1TypeSize;
3392                 unsigned op2TypeSize;
3393
3394                 op1TypeSize = genTypeSize(op1->TypeGet());
3395                 op2TypeSize = genTypeSize(op2->TypeGet());
3396
3397                 op1PredictReg = PREDICT_REG;
3398                 op2PredictReg = PREDICT_REG;
3399
3400                 if (tree->gtFlags & GTF_REVERSE_OPS)
3401                 {
3402 #ifdef _TARGET_XARCH_
3403                     if (op1TypeSize == sizeof(int))
3404                         op1PredictReg = PREDICT_NONE;
3405 #endif
3406
3407                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3408                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3409                 }
3410                 else
3411                 {
3412 #ifdef _TARGET_XARCH_
3413                     // For full DWORD compares we can have
3414                     //
3415                     //      op1 is an address mode and op2 is a register
3416                     // or
3417                     //      op1 is a register and op2 is an address mode
3418                     //
3419                     if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3420                     {
3421                         if (op2->gtOper == GT_LCL_VAR)
3422                         {
3423                             unsigned lclNum = op2->gtLclVar.gtLclNum;
3424                             varDsc          = lvaTable + lclNum;
3425                             /* Did we predict that this local will be enregistered? */
3426                             if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3427                             {
3428                                 op1PredictReg = PREDICT_ADDR;
3429                             }
3430                         }
3431                     }
3432                     // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3433                     if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3434                         op2PredictReg = PREDICT_ADDR;
3435 #endif // _TARGET_XARCH_
3436
3437                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3438 #ifdef _TARGET_ARM_
3439                     if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3440 #endif
3441                     {
3442                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3443                     }
3444                 }
3445
3446 #ifdef _TARGET_XARCH_
3447                 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3448                 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3449                 // if one of the registers is small and the types aren't equal.
3450
3451                 if (regMask == RBM_NONE)
3452                 {
3453                     rpPredictReg op1xPredictReg, op2xPredictReg;
3454                     GenTreePtr   op1x, op2x;
3455                     if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3456                     {
3457                         op1xPredictReg = op2PredictReg;
3458                         op2xPredictReg = op1PredictReg;
3459                         op1x           = op2;
3460                         op2x           = op1;
3461                     }
3462                     else
3463                     {
3464                         op1xPredictReg = op1PredictReg;
3465                         op2xPredictReg = op2PredictReg;
3466                         op1x           = op1;
3467                         op2x           = op2;
3468                     }
3469                     if ((op1xPredictReg < PREDICT_REG) &&  // op1 doesn't get a register (probably an indir)
3470                         (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3471                         varTypeIsSmall(op1x->TypeGet()))   // op1 is smaller than an int
3472                     {
3473                         bool needTmp = false;
3474
3475                         // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3476                         // We could predict a byteable register for op2x, but what if we don't get it?
3477                         // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3478                         // result.
3479                         if (varTypeIsByte(op1x->TypeGet()))
3480                         {
3481                             needTmp = true;
3482                         }
3483                         else
3484                         {
3485                             if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3486                             {
3487                                 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3488                                     needTmp = true;
3489                             }
3490                             else
3491                             {
3492                                 if (op1x->TypeGet() != op2x->TypeGet())
3493                                     needTmp = true;
3494                             }
3495                         }
3496                         if (needTmp)
3497                         {
3498                             regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3499                         }
3500                     }
3501                 }
3502 #endif // _TARGET_XARCH_
3503
3504                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3505                 goto RETURN_CHECK;
3506
3507             case GT_MUL:
3508
3509 #ifndef _TARGET_AMD64_
3510                 if (type == TYP_LONG)
3511                 {
3512                     assert(tree->gtIsValid64RsltMul());
3513
3514                     /* Strip out the cast nodes */
3515
3516                     noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3517                     op1 = op1->gtCast.CastOp();
3518                     op2 = op2->gtCast.CastOp();
3519 #else
3520                 if (false)
3521                 {
3522 #endif // !_TARGET_AMD64_
3523                 USE_MULT_EAX:
3524
3525 #if defined(_TARGET_X86_)
3526                     // This will done by a 64-bit imul "imul eax, reg"
3527                     //   (i.e. EDX:EAX = EAX * reg)
3528
3529                     /* Are we supposed to evaluate op2 first? */
3530                     if (tree->gtFlags & GTF_REVERSE_OPS)
3531                     {
3532                         rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3533                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3534                     }
3535                     else
3536                     {
3537                         rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3538                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3539                     }
3540
3541                     /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3542
3543                     tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3544
3545                     /* set regMask to the set of held registers */
3546
3547                     regMask = RBM_PAIR_TMP_LO;
3548
3549                     if (type == TYP_LONG)
3550                         regMask |= RBM_PAIR_TMP_HI;
3551
3552 #elif defined(_TARGET_ARM_)
3553                     // This will done by a 4 operand multiply
3554
3555                     // Are we supposed to evaluate op2 first?
3556                     if (tree->gtFlags & GTF_REVERSE_OPS)
3557                     {
3558                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3559                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3560                     }
3561                     else
3562                     {
3563                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3564                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3565                     }
3566
3567                     // set regMask to the set of held registers,
3568                     //  the two scratch register we need to compute the mul result
3569
3570                     regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3571
3572                     // set gtUsedRegs toregMask and the registers needed by op1 and op2
3573
3574                     tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3575
3576 #else // !_TARGET_X86_ && !_TARGET_ARM_
3577 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3578 #endif
3579
3580                     goto RETURN_CHECK;
3581                 }
3582                 else
3583                 {
3584                     /* We use imulEAX for most unsigned multiply operations */
3585                     if (tree->gtOverflow())
3586                     {
3587                         if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3588                         {
3589                             goto USE_MULT_EAX;
3590                         }
3591                     }
3592                 }
3593
3594                 __fallthrough;
3595
3596             case GT_OR:
3597             case GT_XOR:
3598             case GT_AND:
3599
3600             case GT_SUB:
3601             case GT_ADD:
3602                 tree->gtUsedRegs = 0;
3603
3604                 if (predictReg <= PREDICT_REG)
3605                     predictReg = PREDICT_SCRATCH_REG;
3606
3607             GENERIC_BINARY:
3608
3609                 noway_assert(op2);
3610                 if (tree->gtFlags & GTF_REVERSE_OPS)
3611                 {
3612                     op1PredictReg = PREDICT_REG;
3613 #if !CPU_LOAD_STORE_ARCH
3614                     if (genTypeSize(op1->gtType) >= sizeof(int))
3615                         op1PredictReg = PREDICT_NONE;
3616 #endif
3617                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3618                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3619                 }
3620                 else
3621                 {
3622                     op2PredictReg = PREDICT_REG;
3623 #if !CPU_LOAD_STORE_ARCH
3624                     if (genTypeSize(op2->gtType) >= sizeof(int))
3625                         op2PredictReg = PREDICT_NONE;
3626 #endif
3627                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3628 #ifdef _TARGET_ARM_
3629                     // For most ALU operations we can generate a single instruction that encodes
3630                     // a small immediate integer constant value.  (except for multiply)
3631                     //
3632                     if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3633                     {
3634                         ssize_t ival = op2->gtIntCon.gtIconVal;
3635                         if (codeGen->validImmForAlu(ival))
3636                         {
3637                             op2PredictReg = PREDICT_NONE;
3638                         }
3639                         else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3640                                  ((oper == GT_ADD) || (oper == GT_SUB)))
3641                         {
3642                             op2PredictReg = PREDICT_NONE;
3643                         }
3644                     }
3645                     if (op2PredictReg == PREDICT_NONE)
3646                     {
3647                         op2->gtUsedRegs = RBM_NONE;
3648                     }
3649                     else
3650 #endif
3651                     {
3652                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3653                     }
3654                 }
3655                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3656
3657 #if CPU_HAS_BYTE_REGS
3658                 /* We have special register requirements for byte operations */
3659
3660                 if (varTypeIsByte(tree->TypeGet()))
3661                 {
3662                     /* For 8 bit arithmetic, one operands has to be in a
3663                        byte-addressable register, and the other has to be
3664                        in a byte-addrble reg or in memory. Assume its in a reg */
3665
3666                     regMaskTP regByteMask = 0;
3667                     regMaskTP op1ByteMask = op1->gtUsedRegs;
3668
3669                     if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3670                     {
3671                         // Pick a Byte register to use for op1
3672                         regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3673                         op1ByteMask = regByteMask;
3674                     }
3675
3676                     if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3677                     {
3678                         // Pick a Byte register to use for op2, avoiding the one used by op1
3679                         regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3680                     }
3681
3682                     if (regByteMask)
3683                     {
3684                         tree->gtUsedRegs |= regByteMask;
3685                         regMask = regByteMask;
3686                     }
3687                 }
3688 #endif
3689                 goto RETURN_CHECK;
3690
3691             case GT_DIV:
3692             case GT_MOD:
3693
3694             case GT_UDIV:
3695             case GT_UMOD:
3696
3697                 /* non-integer division handled in generic way */
3698                 if (!varTypeIsIntegral(type))
3699                 {
3700                     tree->gtUsedRegs = 0;
3701                     if (predictReg <= PREDICT_REG)
3702                         predictReg = PREDICT_SCRATCH_REG;
3703                     goto GENERIC_BINARY;
3704                 }
3705
3706 #ifndef _TARGET_64BIT_
3707
3708                 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3709                 {
3710                     /* Special case:  a mod with an int op2 is done inline using idiv or div
3711                        to avoid a costly call to the helper */
3712
3713                     noway_assert((op2->gtOper == GT_CNS_LNG) &&
3714                                  (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3715
3716 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3717                     if (tree->gtFlags & GTF_REVERSE_OPS)
3718                     {
3719                         tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3720                                                       rsvdRegs | op1->gtRsvdRegs);
3721                         tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3722                     }
3723                     else
3724                     {
3725                         tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3726                         tmpMask |=
3727                             rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3728                     }
3729                     regMask = RBM_PAIR_TMP;
3730 #else // !_TARGET_X86_ && !_TARGET_ARM_
3731 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3732 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3733
3734                     tree->gtUsedRegs =
3735                         (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3736                                        rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3737
3738                     goto RETURN_CHECK;
3739                 }
3740 #endif // _TARGET_64BIT_
3741
3742                 /* no divide immediate, so force integer constant which is not
3743                  * a power of two to register
3744                  */
3745
3746                 if (op2->OperKind() & GTK_CONST)
3747                 {
3748                     ssize_t ival = op2->gtIntConCommon.IconValue();
3749
3750                     /* Is the divisor a power of 2 ? */
3751
3752                     if (ival > 0 && genMaxOneBit(size_t(ival)))
3753                     {
3754                         goto GENERIC_UNARY;
3755                     }
3756                     else
3757                         op2PredictReg = PREDICT_SCRATCH_REG;
3758                 }
3759                 else
3760                 {
3761                     /* Non integer constant also must be enregistered */
3762                     op2PredictReg = PREDICT_REG;
3763                 }
3764
3765                 regMaskTP trashedMask;
3766                 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3767                 regMaskTP op1ExcludeMask;
3768                 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3769                 regMaskTP op2ExcludeMask;
3770                 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3771
3772 #ifdef _TARGET_XARCH_
3773                 /*  Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3774                  *  we can safely allow the "b" value to die. Unfortunately, if we simply
3775                  *  mark the node "b" as using EDX, this will not work if "b" is a register
3776                  *  variable that dies with this particular reference. Thus, if we want to
3777                  *  avoid this situation (where we would have to spill the variable from
3778                  *  EDX to someplace else), we need to explicitly mark the interference
3779                  *  of the variable at this point.
3780                  */
3781
3782                 if (op2->gtOper == GT_LCL_VAR)
3783                 {
3784                     unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3785                     varDsc          = lvaTable + lclNum;
3786                     if (varDsc->lvTracked)
3787                     {
3788 #ifdef DEBUG
3789                         if (verbose)
3790                         {
3791                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3792                                 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3793                                        varDsc->lvVarIndex);
3794                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3795                                 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3796                                        varDsc->lvVarIndex);
3797                         }
3798 #endif
3799                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3800                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3801                     }
3802                 }
3803
3804                 /* set the held register based on opcode */
3805                 if (oper == GT_DIV || oper == GT_UDIV)
3806                     regMask = RBM_EAX;
3807                 else
3808                     regMask    = RBM_EDX;
3809                 trashedMask    = (RBM_EAX | RBM_EDX);
3810                 op1ExcludeMask = 0;
3811                 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3812
3813 #endif // _TARGET_XARCH_
3814
3815 #ifdef _TARGET_ARM_
3816                 trashedMask    = RBM_NONE;
3817                 op1ExcludeMask = RBM_NONE;
3818                 op2ExcludeMask = RBM_NONE;
3819 #endif
3820
3821                 /* set the lvPref reg if possible */
3822                 GenTreePtr dest;
3823                 /*
3824                  *  Walking the gtNext link twice from here should get us back
3825                  *  to our parent node, if this is an simple assignment tree.
3826                  */
3827                 dest = tree->gtNext;
3828                 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3829                     dest->gtNext->gtOp.gtOp2 == tree)
3830                 {
3831                     varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3832                     varDsc->addPrefReg(regMask, this);
3833                 }
3834 #ifdef _TARGET_XARCH_
3835                 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3836 #else
3837                 op1PredictReg        = PREDICT_SCRATCH_REG;
3838 #endif
3839
3840                 /* are we supposed to evaluate op2 first? */
3841                 if (tree->gtFlags & GTF_REVERSE_OPS)
3842                 {
3843                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3844                                                   rsvdRegs | op1->gtRsvdRegs);
3845                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3846                 }
3847                 else
3848                 {
3849                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3850                                                   rsvdRegs | op2->gtRsvdRegs);
3851                     rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3852                 }
3853 #ifdef _TARGET_ARM_
3854                 regMask = tmpMask;
3855 #endif
3856                 /* grab EAX, EDX for this tree node */
3857                 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3858
3859                 goto RETURN_CHECK;
3860
3861             case GT_LSH:
3862             case GT_RSH:
3863             case GT_RSZ:
3864
3865                 if (predictReg <= PREDICT_REG)
3866                     predictReg = PREDICT_SCRATCH_REG;
3867
3868 #ifndef _TARGET_64BIT_
3869                 if (type == TYP_LONG)
3870                 {
3871                     if (op2->IsCnsIntOrI())
3872                     {
3873                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3874                         // no register used by op2
3875                         op2->gtUsedRegs  = 0;
3876                         tree->gtUsedRegs = op1->gtUsedRegs;
3877                     }
3878                     else
3879                     {
3880                         // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3881                         tmpMask = lockedRegs;
3882                         tmpMask &= ~RBM_LNGARG_0;
3883                         tmpMask &= ~RBM_SHIFT_LNG;
3884
3885                         // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3886                         if (tree->gtFlags & GTF_REVERSE_OPS)
3887                         {
3888                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3889                             tmpMask |= RBM_SHIFT_LNG;
3890                             // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3891                             // Fix 383843 X86/ARM ILGEN
3892                             rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3893                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3894                         }
3895                         else
3896                         {
3897                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3898                             tmpMask |= RBM_LNGARG_0;
3899                             // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3900                             // Fix 383839 ARM ILGEN
3901                             rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3902                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3903                         }
3904                         regMask = RBM_LNGRET; // function return registers
3905                         op1->gtUsedRegs |= RBM_LNGARG_0;
3906                         op2->gtUsedRegs |= RBM_SHIFT_LNG;
3907
3908                         tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3909
3910                         // We are using a helper function to do shift:
3911                         //
3912                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3913                     }
3914                 }
3915                 else
3916 #endif // _TARGET_64BIT_
3917                 {
3918 #ifdef _TARGET_XARCH_
3919                     if (!op2->IsCnsIntOrI())
3920                         predictReg = PREDICT_NOT_REG_ECX;
3921 #endif
3922
3923                 HANDLE_SHIFT_COUNT:
3924                     // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3925
3926                     regMaskTP tmpRsvdRegs;
3927
3928                     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3929                     {
3930                         regMask     = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3931                         rsvdRegs    = RBM_LASTUSE;
3932                         tmpRsvdRegs = RBM_NONE;
3933                     }
3934                     else
3935                     {
3936                         regMask = RBM_NONE;
3937                         // Special case op1 of a constant
3938                         if (op1->IsCnsIntOrI())
3939                             tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3940                                                        // System.Xml.Schema.BitSet:Get(int):bool
3941                         else
3942                             tmpRsvdRegs = op1->gtRsvdRegs;
3943                     }
3944
3945                     op2Mask = RBM_NONE;
3946                     if (!op2->IsCnsIntOrI())
3947                     {
3948                         if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3949                         {
3950                             op2PredictReg = PREDICT_REG_SHIFT;
3951                         }
3952                         else
3953                         {
3954                             op2PredictReg = PREDICT_REG;
3955                         }
3956
3957                         /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3958                         op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3959
3960                         // If our target arch has a REG_SHIFT register then
3961                         //     we set the PrefReg when we have a LclVar for op2
3962                         //     we add an interference with REG_SHIFT for any other LclVars alive at op2
3963                         if (REG_SHIFT != REG_NA)
3964                         {
3965                             VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3966
3967                             while (op2->gtOper == GT_COMMA)
3968                             {
3969                                 op2 = op2->gtOp.gtOp2;
3970                             }
3971
3972                             if (op2->gtOper == GT_LCL_VAR)
3973                             {
3974                                 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3975                                 varDsc->setPrefReg(REG_SHIFT, this);
3976                                 if (varDsc->lvTracked)
3977                                 {
3978                                     VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3979                                 }
3980                             }
3981
3982                             // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3983                             // excluding the LclVar that was used for the shift amount as it is read-only
3984                             // and can be kept alive through the shift operation
3985                             //
3986                             rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3987                             // In case op2Mask doesn't contain the required shift register,
3988                             // we will or it in now.
3989                             op2Mask |= RBM_SHIFT;
3990                         }
3991                     }
3992
3993                     if (tree->gtFlags & GTF_REVERSE_OPS)
3994                     {
3995                         assert(regMask == RBM_NONE);
3996                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3997                     }
3998
3999 #if CPU_HAS_BYTE_REGS
4000                     if (varTypeIsByte(type))
4001                     {
4002                         // Fix 383789 X86 ILGEN
4003                         // Fix 383813 X86 ILGEN
4004                         // Fix 383828 X86 ILGEN
4005                         if (op1->gtOper == GT_LCL_VAR)
4006                         {
4007                             varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4008                             if (varDsc->lvTracked)
4009                             {
4010                                 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4011
4012                                 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4013                                 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4014                             }
4015                         }
4016                         if ((regMask & RBM_BYTE_REGS) == 0)
4017                         {
4018                             // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4019                             // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4020                             //
4021                             regMask |=
4022                                 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4023                         }
4024                     }
4025 #endif
4026                     tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4027                 }
4028
4029                 goto RETURN_CHECK;
4030
4031             case GT_COMMA:
4032                 if (tree->gtFlags & GTF_REVERSE_OPS)
4033                 {
4034                     if (predictReg == PREDICT_NONE)
4035                     {
4036                         predictReg = PREDICT_REG;
4037                     }
4038                     else if (rpHasVarIndexForPredict(predictReg))
4039                     {
4040                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4041                         predictReg = PREDICT_SCRATCH_REG;
4042                     }
4043
4044                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4045                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4046                 }
4047                 else
4048                 {
4049                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4050
4051                     /* CodeGen will enregister the op2 side of a GT_COMMA */
4052                     if (predictReg == PREDICT_NONE)
4053                     {
4054                         predictReg = PREDICT_REG;
4055                     }
4056                     else if (rpHasVarIndexForPredict(predictReg))
4057                     {
4058                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4059                         predictReg = PREDICT_SCRATCH_REG;
4060                     }
4061
4062                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4063                 }
4064                 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4065                 //
4066                 tree->gtUsedRegs = op2->gtUsedRegs;
4067                 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4068                 {
4069                     LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4070
4071                     if (op2VarDsc->lvTracked)
4072                     {
4073                         VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4074                         rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4075                     }
4076                 }
4077                 goto RETURN_CHECK;
4078
4079             case GT_QMARK:
4080             {
4081                 noway_assert(op1 != NULL && op2 != NULL);
4082
4083                 /*
4084                  *  If the gtUsedRegs conflicts with lockedRegs
4085                  *  then we going to have to spill some registers
4086                  *  into the non-trashed register set to keep it alive
4087                  */
4088                 unsigned spillCnt;
4089                 spillCnt = 0;
4090                 regMaskTP spillRegs;
4091                 spillRegs = lockedRegs & tree->gtUsedRegs;
4092
4093                 while (spillRegs)
4094                 {
4095                     /* Find the next register that needs to be spilled */
4096                     tmpMask = genFindLowestBit(spillRegs);
4097
4098 #ifdef DEBUG
4099                     if (verbose)
4100                     {
4101                         printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4102                         gtDispTree(tree, 0, NULL, true);
4103                     }
4104 #endif
4105                     /* In Codegen it will typically introduce a spill temp here */
4106                     /* rather than relocating the register to a non trashed reg */
4107                     rpPredictSpillCnt++;
4108                     spillCnt++;
4109
4110                     /* Remove it from the spillRegs and lockedRegs*/
4111                     spillRegs &= ~tmpMask;
4112                     lockedRegs &= ~tmpMask;
4113                 }
4114                 {
4115                     VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4116
4117                     /* Evaluate the <cond> subtree */
4118                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4119                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4120                     tree->gtUsedRegs = op1->gtUsedRegs;
4121
4122                     noway_assert(op2->gtOper == GT_COLON);
4123                     if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4124                     {
4125                         // Don't try to target the register specified in predictReg when we have complex subtrees
4126                         //
4127                         predictReg = PREDICT_SCRATCH_REG;
4128                     }
4129                     GenTreePtr elseTree = op2->AsColon()->ElseNode();
4130                     GenTreePtr thenTree = op2->AsColon()->ThenNode();
4131
4132                     noway_assert(thenTree != NULL && elseTree != NULL);
4133
4134                     // Update compCurLife to only those vars live on the <then> subtree
4135
4136                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4137
4138                     if (type == TYP_VOID)
4139                     {
4140                         /* Evaluate the <then> subtree */
4141                         rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4142                         regMask    = RBM_NONE;
4143                         predictReg = PREDICT_NONE;
4144                     }
4145                     else
4146                     {
4147                         // A mask to use to force the predictor to choose low registers (to reduce code size)
4148                         regMaskTP avoidRegs = RBM_NONE;
4149 #ifdef _TARGET_ARM_
4150                         avoidRegs = (RBM_R12 | RBM_LR);
4151 #endif
4152                         if (predictReg <= PREDICT_REG)
4153                             predictReg = PREDICT_SCRATCH_REG;
4154
4155                         /* Evaluate the <then> subtree */
4156                         regMask =
4157                             rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4158
4159                         if (regMask)
4160                         {
4161                             rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4162                             if (op1PredictReg != PREDICT_NONE)
4163                                 predictReg = op1PredictReg;
4164                         }
4165                     }
4166
4167                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4168
4169                     /* Evaluate the <else> subtree */
4170                     // First record the post-then liveness, and reset the current liveness to the else
4171                     // branch liveness.
4172                     CLANG_FORMAT_COMMENT_ANCHOR;
4173
4174 #ifdef DEBUG
4175                     VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4176 #endif
4177
4178                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4179
4180                     rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4181                     tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4182
4183                     // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4184                     // They each have only one successor, which they share.  Their live-out sets must equal the
4185                     // live-in set of this virtual successor block, and thus must be the same.  We can assert
4186                     // that equality here.
4187                     assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4188
4189                     if (spillCnt > 0)
4190                     {
4191                         regMaskTP reloadMask = RBM_NONE;
4192
4193                         while (spillCnt)
4194                         {
4195                             regMaskTP reloadReg;
4196
4197                             /* Get an extra register to hold it */
4198                             reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4199 #ifdef DEBUG
4200                             if (verbose)
4201                             {
4202                                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4203                                 gtDispTree(tree, 0, NULL, true);
4204                             }
4205 #endif
4206                             reloadMask |= reloadReg;
4207
4208                             spillCnt--;
4209                         }
4210
4211                         /* update the gtUsedRegs mask */
4212                         tree->gtUsedRegs |= reloadMask;
4213                     }
4214                 }
4215
4216                 goto RETURN_CHECK;
4217             }
4218             case GT_RETURN:
4219                 tree->gtUsedRegs = RBM_NONE;
4220                 regMask          = RBM_NONE;
4221
4222                 /* Is there a return value? */
4223                 if (op1 != NULL)
4224                 {
4225 #if FEATURE_FP_REGALLOC
4226                     if (varTypeIsFloating(type))
4227                     {
4228                         predictReg = PREDICT_FLTRET;
4229                         if (type == TYP_FLOAT)
4230                             regMask = RBM_FLOATRET;
4231                         else
4232                             regMask = RBM_DOUBLERET;
4233                     }
4234                     else
4235 #endif
4236                         if (isRegPairType(type))
4237                     {
4238                         predictReg = PREDICT_LNGRET;
4239                         regMask    = RBM_LNGRET;
4240                     }
4241                     else
4242                     {
4243                         predictReg = PREDICT_INTRET;
4244                         regMask    = RBM_INTRET;
4245                     }
4246                     if (info.compCallUnmanaged)
4247                     {
4248                         lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4249                     }
4250                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4251                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4252                 }
4253
4254 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4255                 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4256                 // We could optimize on registers based on int/long or no return value.  But to
4257                 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4258                 if (compIsProfilerHookNeeded())
4259                 {
4260                     tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4261                 }
4262
4263 #endif
4264                 goto RETURN_CHECK;
4265
4266             case GT_RETFILT:
4267                 if (op1 != NULL)
4268                 {
4269                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4270                     regMask          = genReturnRegForTree(tree);
4271                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4272                     goto RETURN_CHECK;
4273                 }
4274                 tree->gtUsedRegs = 0;
4275                 regMask          = 0;
4276
4277                 goto RETURN_CHECK;
4278
4279             case GT_JTRUE:
4280                 /* This must be a test of a relational operator */
4281
4282                 noway_assert(op1->OperIsCompare());
4283
4284                 /* Only condition code set by this operation */
4285
4286                 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4287
4288                 tree->gtUsedRegs = op1->gtUsedRegs;
4289                 regMask          = 0;
4290
4291                 goto RETURN_CHECK;
4292
4293             case GT_SWITCH:
4294                 noway_assert(type <= TYP_INT);
4295                 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4296 #ifdef _TARGET_ARM_
4297                 {
4298                     regMask          = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4299                     unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4300                     if (jumpCnt > 2)
4301                     {
4302                         // Table based switch requires an extra register for the table base
4303                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4304                     }
4305                     tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4306                 }
4307 #else  // !_TARGET_ARM_
4308                 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4309                 tree->gtUsedRegs = op1->gtUsedRegs;
4310 #endif // _TARGET_ARM_
4311                 regMask = 0;
4312                 goto RETURN_CHECK;
4313
4314             case GT_CKFINITE:
4315                 if (predictReg <= PREDICT_REG)
4316                     predictReg = PREDICT_SCRATCH_REG;
4317
4318                 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4319                 // Need a reg to load exponent into
4320                 regMask          = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4321                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4322                 goto RETURN_CHECK;
4323
4324             case GT_LCLHEAP:
4325                 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4326                 op2Mask = 0;
4327
4328 #ifdef _TARGET_ARM_
4329                 if (info.compInitMem)
4330                 {
4331                     // We zero out two registers in the ARM codegen path
4332                     op2Mask |=
4333                         rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4334                 }
4335 #endif
4336
4337                 op1->gtUsedRegs |= (regMaskSmall)regMask;
4338                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4339
4340                 // The result will be put in the reg we picked for the size
4341                 // regMask = <already set as we want it to be>
4342
4343                 goto RETURN_CHECK;
4344
4345             case GT_OBJ:
4346             {
4347 #ifdef _TARGET_ARM_
4348                 if (predictReg <= PREDICT_REG)
4349                     predictReg = PREDICT_SCRATCH_REG;
4350
4351                 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4352                                                           // registers (to reduce code size)
4353                 regMask = RBM_NONE;
4354                 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4355 #endif
4356
4357                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4358                 {
4359                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4360                 }
4361
4362 #ifdef _TARGET_ARM_
4363                 unsigned  objSize   = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4364                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4365                 // If it has one bit set, and that's an arg reg...
4366                 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4367                 {
4368                     // We are passing the 'obj' in the argument registers
4369                     //
4370                     regNumber rn = genRegNumFromMask(preferReg);
4371
4372                     //  Add the registers used to pass the 'obj' to regMask.
4373                     for (unsigned i = 0; i < objSize / 4; i++)
4374                     {
4375                         if (rn == MAX_REG_ARG)
4376                             break;
4377                         // Otherwise...
4378                         regMask |= genRegMask(rn);
4379                         rn = genRegArgNext(rn);
4380                     }
4381                 }
4382                 else
4383                 {
4384                     // We are passing the 'obj' in the outgoing arg space
4385                     // We will need one register to load into unless the 'obj' size is 4 or less.
4386                     //
4387                     if (objSize > 4)
4388                     {
4389                         regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4390                     }
4391                 }
4392                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4393                 goto RETURN_CHECK;
4394 #else  // !_TARGET_ARM
4395                 goto GENERIC_UNARY;
4396 #endif // _TARGET_ARM_
4397             }
4398
4399             case GT_MKREFANY:
4400             {
4401 #ifdef _TARGET_ARM_
4402                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4403                 regMask             = RBM_NONE;
4404                 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4405                 {
4406                     // A MKREFANY takes up two registers.
4407                     regNumber rn = genRegNumFromMask(preferReg);
4408                     regMask      = RBM_NONE;
4409                     if (rn < MAX_REG_ARG)
4410                     {
4411                         regMask |= genRegMask(rn);
4412                         rn = genRegArgNext(rn);
4413                         if (rn < MAX_REG_ARG)
4414                             regMask |= genRegMask(rn);
4415                     }
4416                 }
4417                 if (regMask != RBM_NONE)
4418                 {
4419                     // Condensation of GENERIC_BINARY path.
4420                     assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4421                     op2PredictReg        = PREDICT_REG;
4422                     regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4423                     rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4424                     regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4425                     tree->gtUsedRegs = (regMaskSmall)regMask;
4426                     goto RETURN_CHECK;
4427                 }
4428                 tree->gtUsedRegs = op1->gtUsedRegs;
4429 #endif // _TARGET_ARM_
4430                 goto GENERIC_BINARY;
4431             }
4432
4433             case GT_BOX:
4434                 goto GENERIC_UNARY;
4435
4436             case GT_LOCKADD:
4437                 goto GENERIC_BINARY;
4438
4439             case GT_XADD:
4440             case GT_XCHG:
4441                 // Ensure we can write to op2.  op2 will hold the output.
4442                 if (predictReg < PREDICT_SCRATCH_REG)
4443                     predictReg = PREDICT_SCRATCH_REG;
4444
4445                 if (tree->gtFlags & GTF_REVERSE_OPS)
4446                 {
4447                     op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4448                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4449                 }
4450                 else
4451                 {
4452                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4453                     op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4454                 }
4455                 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4456                 goto RETURN_CHECK;
4457
4458             case GT_ARR_LENGTH:
4459                 goto GENERIC_UNARY;
4460
4461             case GT_INIT_VAL:
4462                 // This unary operator simply passes through the value from its child (much like GT_NOP)
4463                 // and thus won't need a scratch register.
4464                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4465                 tree->gtUsedRegs = op1->gtUsedRegs;
4466                 goto RETURN_CHECK;
4467
4468             default:
4469 #ifdef DEBUG
4470                 gtDispTree(tree);
4471 #endif
4472                 noway_assert(!"unexpected simple operator in reg use prediction");
4473                 break;
4474         }
4475     }
4476
4477     /* See what kind of a special operator we have here */
4478
4479     switch (oper)
4480     {
4481         GenTreePtr      args;
4482         GenTreeArgList* list;
4483         regMaskTP       keepMask;
4484         unsigned        regArgsNum;
4485         int             regIndex;
4486         regMaskTP       regArgMask;
4487         regMaskTP       curArgMask;
4488
4489         case GT_CALL:
4490
4491         {
4492
4493             /* initialize so we can just or in various bits */
4494             tree->gtUsedRegs = RBM_NONE;
4495
4496 #if GTF_CALL_REG_SAVE
4497             /*
4498              *  Unless the GTF_CALL_REG_SAVE flag is set,
4499              *  we can't preserve the RBM_CALLEE_TRASH registers.
4500              *  (likewise we can't preserve the return registers)
4501              *  So we remove them from the lockedRegs set and
4502              *  record any of them in the keepMask
4503              */
4504
4505             if (tree->gtFlags & GTF_CALL_REG_SAVE)
4506             {
4507                 regMaskTP trashMask = genReturnRegForTree(tree);
4508
4509                 keepMask = lockedRegs & trashMask;
4510                 lockedRegs &= ~trashMask;
4511             }
4512             else
4513 #endif
4514             {
4515                 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4516                 lockedRegs &= ~RBM_CALLEE_TRASH;
4517             }
4518
4519             regArgsNum = 0;
4520             regIndex   = 0;
4521
4522             /* Is there an object pointer? */
4523             if (tree->gtCall.gtCallObjp)
4524             {
4525                 /* Evaluate the instance pointer first */
4526
4527                 args = tree->gtCall.gtCallObjp;
4528
4529                 /* the objPtr always goes to an integer register (through temp or directly) */
4530                 noway_assert(regArgsNum == 0);
4531                 regArgsNum++;
4532
4533                 /* Must be passed in a register */
4534
4535                 noway_assert(args->gtFlags & GTF_LATE_ARG);
4536
4537                 /* Must be either a deferred reg arg node or a GT_ASG node */
4538
4539                 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4540                              args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4541
4542                 if (!args->IsArgPlaceHolderNode())
4543                 {
4544                     rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4545                 }
4546             }
4547             VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4548             VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4549
4550             /* process argument list */
4551             for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4552             {
4553                 args = list->Current();
4554
4555                 if (args->gtFlags & GTF_LATE_ARG)
4556                 {
4557                     /* Must be either a Placeholder/NOP node or a GT_ASG node */
4558
4559                     noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4560                                  args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4561
4562                     if (!args->IsArgPlaceHolderNode())
4563                     {
4564                         rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4565                     }
4566
4567                     regArgsNum++;
4568                 }
4569                 else
4570                 {
4571 #ifdef FEATURE_FIXED_OUT_ARGS
4572                     // We'll store this argument into the outgoing argument area
4573                     // It needs to be in a register to be stored.
4574                     //
4575                     predictReg = PREDICT_REG;
4576
4577 #else // !FEATURE_FIXED_OUT_ARGS
4578                     // We'll generate a push for this argument
4579                     //
4580                     predictReg = PREDICT_NONE;
4581                     if (varTypeIsSmall(args->TypeGet()))
4582                     {
4583                         /* We may need to sign or zero extend a small type using a register */
4584                         predictReg = PREDICT_SCRATCH_REG;
4585                     }
4586 #endif
4587
4588                     rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4589                 }
4590                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4591                 tree->gtUsedRegs |= args->gtUsedRegs;
4592             }
4593
4594             /* Is there a late argument list */
4595
4596             regIndex   = 0;
4597             regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4598             args       = NULL;
4599
4600             /* process the late argument list */
4601             for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4602             {
4603                 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4604                 LclVarDsc* promotedStructLocal = NULL;
4605
4606                 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4607                 tmpMask    = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4608
4609                 assert(list->OperIsList());
4610
4611                 args = list->Current();
4612                 list = list->Rest();
4613
4614                 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4615
4616                 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4617                 assert(curArgTabEntry);
4618
4619                 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4620                 unsigned  numSlots =
4621                     curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4622
4623                 rpPredictReg argPredictReg;
4624                 regMaskTP    avoidReg = RBM_NONE;
4625
4626                 if (regNum != REG_STK)
4627                 {
4628                     argPredictReg = rpGetPredictForReg(regNum);
4629                     curArgMask |= genRegMask(regNum);
4630                 }
4631                 else
4632                 {
4633                     assert(numSlots > 0);
4634                     argPredictReg = PREDICT_NONE;
4635 #ifdef _TARGET_ARM_
4636                     // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4637                     avoidReg = (RBM_R12 | RBM_LR);
4638 #endif
4639                 }
4640
4641 #ifdef _TARGET_ARM_
4642                 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4643                 //
4644                 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4645                 {
4646                     // 64-bit longs and doubles require 2 consecutive argument registers
4647                     curArgMask |= genRegMask(REG_NEXT(regNum));
4648                 }
4649                 else if (args->TypeGet() == TYP_STRUCT)
4650                 {
4651                     GenTreePtr argx       = args;
4652                     GenTreePtr lclVarTree = NULL;
4653
4654                     /* The GT_OBJ may be be a child of a GT_COMMA */
4655                     while (argx->gtOper == GT_COMMA)
4656                     {
4657                         argx = argx->gtOp.gtOp2;
4658                     }
4659                     unsigned originalSize = 0;
4660
4661                     if (argx->gtOper == GT_OBJ)
4662                     {
4663                         originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4664
4665                         // Is it the address of a promoted struct local?
4666                         if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4667                         {
4668                             lclVarTree        = argx->gtObj.gtOp1->gtOp.gtOp1;
4669                             LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4670                             if (varDsc->lvPromoted)
4671                                 promotedStructLocal = varDsc;
4672                         }
4673                     }
4674                     else if (argx->gtOper == GT_LCL_VAR)
4675                     {
4676                         varDsc       = lvaTable + argx->gtLclVarCommon.gtLclNum;
4677                         originalSize = varDsc->lvSize();
4678
4679                         // Is it a promoted struct local?
4680                         if (varDsc->lvPromoted)
4681                             promotedStructLocal = varDsc;
4682                     }
4683                     else if (argx->gtOper == GT_MKREFANY)
4684                     {
4685                         originalSize = 2 * TARGET_POINTER_SIZE;
4686                     }
4687                     else
4688                     {
4689                         noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4690                     }
4691
4692                     // We only pass arguments differently if it a struct local "independently" promoted, which
4693                     // allows the field locals can be independently enregistered.
4694                     if (promotedStructLocal != NULL)
4695                     {
4696                         if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4697                             promotedStructLocal = NULL;
4698                     }
4699
4700                     unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4701
4702                     // Are we passing a TYP_STRUCT in multiple integer registers?
4703                     // if so set up curArgMask to reflect this
4704                     // Also slots is updated to reflect the number of outgoing arg slots that we will write
4705                     if (regNum != REG_STK)
4706                     {
4707                         regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4708                         assert(genIsValidReg(regNum));
4709                         regNumber nextReg = REG_NEXT(regNum);
4710                         slots--;
4711                         while (slots > 0 && nextReg <= regLast)
4712                         {
4713                             curArgMask |= genRegMask(nextReg);
4714                             nextReg = REG_NEXT(nextReg);
4715                             slots--;
4716                         }
4717                     }
4718
4719                     if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4720                     {
4721                         // All or a portion of this struct will be placed in the argument registers indicated by
4722                         // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4723                         // that the second arg to be evaluated interferes with the reg for the first, the third with
4724                         // the regs for the first and second, etc. But since we always place the stack slots before
4725                         // placing the register slots we do not add inteferences for any part of the struct that gets
4726                         // passed on the stack.
4727
4728                         argPredictReg =
4729                             PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4730                         regMaskTP prevArgMask = RBM_NONE;
4731                         for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4732                         {
4733                             LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4734                             if (fieldVarDsc->lvTracked)
4735                             {
4736                                 assert(lclVarTree != NULL);
4737                                 if (prevArgMask != RBM_NONE)
4738                                 {
4739                                     rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4740                                                                      DEBUGARG("fieldVar/argReg"));
4741                                 }
4742                             }
4743                             // Now see many registers this uses up.
4744                             unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4745                             unsigned nextAfterLastRegOffset =
4746                                 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4747                                 TARGET_POINTER_SIZE;
4748                             unsigned nextAfterLastArgRegOffset =
4749                                 min(nextAfterLastRegOffset,
4750                                     genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4751
4752                             for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4753                                  regOffset++)
4754                             {
4755                                 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4756                             }
4757
4758                             if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4759                             {
4760                                 break;
4761                             }
4762
4763                             if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4764                             {
4765                                 // Add the argument register used here as a preferred register for this fieldVarDsc
4766                                 //
4767                                 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4768                                 fieldVarDsc->setPrefReg(firstRegUsed, this);
4769                             }
4770                         }
4771                         compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4772                     }
4773
4774                     // If slots is greater than zero then part or all of this TYP_STRUCT
4775                     // argument is passed in the outgoing argument area. (except HFA arg)
4776                     //
4777                     if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4778                     {
4779                         // We will need a register to address the TYP_STRUCT
4780                         // Note that we can use an argument register in curArgMask as in
4781                         // codegen we pass the stack portion of the argument before we
4782                         // setup the register part.
4783                         //
4784
4785                         // Force the predictor to choose a LOW_REG here to reduce code bloat
4786                         avoidReg = (RBM_R12 | RBM_LR);
4787
4788                         assert(tmpMask == RBM_NONE);
4789                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4790
4791                         // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4792                         // arg area
4793                         if (slots > 1)
4794                         {
4795                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4796                                                         lockedRegs | regArgMask | tmpMask | avoidReg);
4797                         }
4798                     }
4799                 } // (args->TypeGet() == TYP_STRUCT)
4800 #endif            // _TARGET_ARM_
4801
4802                 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4803                 // as we have already calculated the correct tmpMask and curArgMask values and
4804                 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4805                 //
4806                 if (promotedStructLocal == NULL)
4807                 {
4808                     /* Target the appropriate argument register */
4809                     tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4810                 }
4811
4812                 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4813                 // for the duration of the OBJ.
4814                 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4815                 {
4816                     GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4817                     assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4818                     compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4819                 }
4820
4821                 regArgMask |= curArgMask;
4822                 args->gtUsedRegs |= (tmpMask | regArgMask);
4823                 tree->gtUsedRegs |= args->gtUsedRegs;
4824                 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4825
4826                 if (args->gtUsedRegs != RBM_NONE)
4827                 {
4828                     // Add register interference with the set of registers used or in use when we evaluated
4829                     // the current arg, with whatever is alive after the current arg
4830                     //
4831                     rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4832                 }
4833                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4834             }
4835             assert(list == NULL);
4836
4837 #ifdef LEGACY_BACKEND
4838 #if CPU_LOAD_STORE_ARCH
4839 #ifdef FEATURE_READYTORUN_COMPILER
4840             if (tree->gtCall.IsR2RRelativeIndir())
4841             {
4842                 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4843             }
4844 #endif // FEATURE_READYTORUN_COMPILER
4845 #endif // CPU_LOAD_STORE_ARCH
4846 #endif // LEGACY_BACKEND
4847
4848             regMaskTP callAddrMask;
4849             callAddrMask = RBM_NONE;
4850 #if CPU_LOAD_STORE_ARCH
4851             predictReg = PREDICT_SCRATCH_REG;
4852 #else
4853             predictReg       = PREDICT_NONE;
4854 #endif
4855
4856             switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4857             {
4858                 case GTF_CALL_VIRT_STUB:
4859
4860                     // We only want to record an interference between the virtual stub
4861                     // param reg and anything that's live AFTER the call, but we've not
4862                     // yet processed the indirect target.  So add virtualStubParamInfo.regMask
4863                     // to interferingRegs.
4864                     interferingRegs |= virtualStubParamInfo->GetRegMask();
4865 #ifdef DEBUG
4866                     if (verbose)
4867                         printf("Adding interference with Virtual Stub Param\n");
4868 #endif
4869                     codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4870
4871                     if (tree->gtCall.gtCallType == CT_INDIRECT)
4872                     {
4873                         predictReg = virtualStubParamInfo->GetPredict();
4874                     }
4875                     break;
4876
4877                 case GTF_CALL_VIRT_VTABLE:
4878                     predictReg = PREDICT_SCRATCH_REG;
4879                     break;
4880
4881                 case GTF_CALL_NONVIRT:
4882                     predictReg = PREDICT_SCRATCH_REG;
4883                     break;
4884             }
4885
4886             if (tree->gtCall.gtCallType == CT_INDIRECT)
4887             {
4888 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4889                 if (tree->gtCall.gtCallCookie)
4890                 {
4891                     codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4892
4893                     callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4894                                                         lockedRegs | regArgMask, RBM_LASTUSE);
4895
4896                     // Just in case we predict some other registers, force interference with our two special
4897                     // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4898                     callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4899
4900                     predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4901                 }
4902 #endif
4903                 callAddrMask |=
4904                     rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4905             }
4906             else if (predictReg != PREDICT_NONE)
4907             {
4908                 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4909             }
4910
4911             if (tree->gtFlags & GTF_CALL_UNMANAGED)
4912             {
4913                 // Need a register for tcbReg
4914                 callAddrMask |=
4915                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4916 #if CPU_LOAD_STORE_ARCH
4917                 // Need an extra register for tmpReg
4918                 callAddrMask |=
4919                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4920 #endif
4921             }
4922
4923             tree->gtUsedRegs |= callAddrMask;
4924
4925             /* After the call restore the orginal value of lockedRegs */
4926             lockedRegs |= keepMask;
4927
4928             /* set the return register */
4929             regMask = genReturnRegForTree(tree);
4930
4931             if (regMask & rsvdRegs)
4932             {
4933                 // We will need to relocate the return register value
4934                 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4935 #if FEATURE_FP_REGALLOC
4936                 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4937 #endif
4938                 regMask = RBM_NONE;
4939
4940                 if (intRegMask)
4941                 {
4942                     if (intRegMask == RBM_INTRET)
4943                     {
4944                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4945                     }
4946                     else if (intRegMask == RBM_LNGRET)
4947                     {
4948                         regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4949                     }
4950                     else
4951                     {
4952                         noway_assert(!"unexpected return regMask");
4953                     }
4954                 }
4955
4956 #if FEATURE_FP_REGALLOC
4957                 if (floatRegMask)
4958                 {
4959                     if (floatRegMask == RBM_FLOATRET)
4960                     {
4961                         regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4962                     }
4963                     else if (floatRegMask == RBM_DOUBLERET)
4964                     {
4965                         regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4966                     }
4967                     else // HFA return case
4968                     {
4969                         for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4970                         {
4971                             regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4972                         }
4973                     }
4974                 }
4975 #endif
4976             }
4977
4978             /* the return registers (if any) are killed */
4979             tree->gtUsedRegs |= regMask;
4980
4981 #if GTF_CALL_REG_SAVE
4982             if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4983 #endif
4984             {
4985                 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4986                 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4987             }
4988         }
4989
4990 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4991             // Mark required registers for emitting tailcall profiler callback as used
4992             if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4993             {
4994                 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
4995             }
4996 #endif
4997             break;
4998
4999         case GT_ARR_ELEM:
5000
5001             // Figure out which registers can't be touched
5002             unsigned dim;
5003             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5004                 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5005
5006             regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5007
5008             regMaskTP dimsMask;
5009             dimsMask = 0;
5010
5011 #if CPU_LOAD_STORE_ARCH
5012             // We need a register to load the bounds of the MD array
5013             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5014 #endif
5015
5016             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5017             {
5018                 /* We need scratch registers to compute index-lower_bound.
5019                    Also, gtArrInds[0]'s register will be used as the second
5020                    addressability register (besides gtArrObj's) */
5021
5022                 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5023                                                         lockedRegs | regMask | dimsMask, rsvdRegs);
5024                 if (dim == 0)
5025                     regMask |= dimMask;
5026
5027                 dimsMask |= dimMask;
5028             }
5029 #ifdef _TARGET_XARCH_
5030             // INS_imul doesnt have an immediate constant.
5031             if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5032                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5033 #endif
5034             tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5035             break;
5036
5037         case GT_CMPXCHG:
5038         {
5039 #ifdef _TARGET_XARCH_
5040             rsvdRegs |= RBM_EAX;
5041 #endif
5042             if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5043             {
5044                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5045             }
5046             else
5047             {
5048                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5049             }
5050             op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5051
5052 #ifdef _TARGET_XARCH_
5053             rsvdRegs &= ~RBM_EAX;
5054             tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5055                                           rsvdRegs | regMask | op2Mask);
5056             tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5057             predictReg       = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5058 #else
5059             tmpMask          = 0;
5060             tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5061 #endif
5062         }
5063         break;
5064
5065         case GT_ARR_BOUNDS_CHECK:
5066         {
5067             regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5068             regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5069             rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5070
5071             tree->gtUsedRegs =
5072                 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5073         }
5074         break;
5075
5076         default:
5077             NO_WAY("unexpected special operator in reg use prediction");
5078             break;
5079     }
5080
5081 RETURN_CHECK:
5082
5083 #ifdef DEBUG
5084     /* make sure we set them to something reasonable */
5085     if (tree->gtUsedRegs & RBM_ILLEGAL)
5086         noway_assert(!"used regs not set properly in reg use prediction");
5087
5088     if (regMask & RBM_ILLEGAL)
5089         noway_assert(!"return value not set propery in reg use prediction");
5090
5091 #endif
5092
5093     /*
5094      *  If the gtUsedRegs conflicts with lockedRegs
5095      *  then we going to have to spill some registers
5096      *  into the non-trashed register set to keep it alive
5097      */
5098     regMaskTP spillMask;
5099     spillMask = tree->gtUsedRegs & lockedRegs;
5100
5101     if (spillMask)
5102     {
5103         while (spillMask)
5104         {
5105             /* Find the next register that needs to be spilled */
5106             tmpMask = genFindLowestBit(spillMask);
5107
5108 #ifdef DEBUG
5109             if (verbose)
5110             {
5111                 printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5112                 gtDispTree(tree, 0, NULL, true);
5113                 if ((tmpMask & regMask) == 0)
5114                 {
5115                     printf("Predict reload of   %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5116                     gtDispTree(tree, 0, NULL, true);
5117                 }
5118             }
5119 #endif
5120             /* In Codegen it will typically introduce a spill temp here */
5121             /* rather than relocating the register to a non trashed reg */
5122             rpPredictSpillCnt++;
5123
5124             /* Remove it from the spillMask */
5125             spillMask &= ~tmpMask;
5126         }
5127     }
5128
5129     /*
5130      *  If the return registers in regMask conflicts with the lockedRegs
5131      *  then we allocate extra registers for the reload of the conflicting
5132      *  registers.
5133      *
5134      *  Set spillMask to the set of locked registers that have to be reloaded here.
5135      *  reloadMask is set to the extra registers that are used to reload
5136      *  the spilled lockedRegs.
5137      */
5138
5139     noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5140     spillMask = lockedRegs & regMask;
5141
5142     if (spillMask)
5143     {
5144         /* Remove the spillMask from regMask */
5145         regMask &= ~spillMask;
5146
5147         regMaskTP reloadMask = RBM_NONE;
5148         while (spillMask)
5149         {
5150             /* Get an extra register to hold it */
5151             regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5152 #ifdef DEBUG
5153             if (verbose)
5154             {
5155                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5156                 gtDispTree(tree, 0, NULL, true);
5157             }
5158 #endif
5159             reloadMask |= reloadReg;
5160
5161             /* Remove it from the spillMask */
5162             spillMask &= ~genFindLowestBit(spillMask);
5163         }
5164
5165         /* Update regMask to use the reloadMask */
5166         regMask |= reloadMask;
5167
5168         /* update the gtUsedRegs mask */
5169         tree->gtUsedRegs |= (regMaskSmall)regMask;
5170     }
5171
5172     regMaskTP regUse = tree->gtUsedRegs;
5173     regUse |= interferingRegs;
5174
5175     if (!VarSetOps::IsEmpty(this, compCurLife))
5176     {
5177         // Add interference between the current set of live variables and
5178         //  the set of temporary registers need to evaluate the sub tree
5179         if (regUse)
5180         {
5181             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5182         }
5183     }
5184
5185     if (rpAsgVarNum != -1)
5186     {
5187         // Add interference between the registers used (if any)
5188         // and the assignment target variable
5189         if (regUse)
5190         {
5191             rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5192         }
5193
5194         // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5195         // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5196         // to the set of currently live variables. This new interference will prevent us
5197         // from using the register value used here for enregistering different live variable
5198         //
5199         if (!VarSetOps::IsEmpty(this, compCurLife))
5200         {
5201             rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5202         }
5203     }
5204
5205     /* Do we need to resore the oldLastUseVars value */
5206     if (restoreLastUseVars)
5207     {
5208         /*  If we used a GT_ASG targeted register then we need to add
5209          *  a variable interference between any new last use variables
5210          *  and the GT_ASG targeted register
5211          */
5212         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5213         {
5214             rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5215                                              DEBUGARG("asgn tgt last use conflict"));
5216         }
5217         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5218     }
5219
5220     return regMask;
5221 }
5222 #ifdef _PREFAST_
5223 #pragma warning(pop)
5224 #endif
5225
5226 #endif // LEGACY_BACKEND
5227
5228 /****************************************************************************/
5229 /* Returns true when we must create an EBP frame
5230    This is used to force most managed methods to have EBP based frames
5231    which allows the ETW kernel stackwalker to walk the stacks of managed code
5232    this allows the kernel to perform light weight profiling
5233  */
5234 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5235 {
5236     bool result = false;
5237 #ifdef DEBUG
5238     const char* reason = nullptr;
5239 #endif
5240
5241 #if ETW_EBP_FRAMED
5242     if (!result && (opts.MinOpts() || opts.compDbgCode))
5243     {
5244         INDEBUG(reason = "Debug Code");
5245         result = true;
5246     }
5247     if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5248     {
5249         INDEBUG(reason = "IL Code Size");
5250         result = true;
5251     }
5252     if (!result && (fgBBcount > 3))
5253     {
5254         INDEBUG(reason = "BasicBlock Count");
5255         result = true;
5256     }
5257     if (!result && fgHasLoops)
5258     {
5259         INDEBUG(reason = "Method has Loops");
5260         result = true;
5261     }
5262     if (!result && (optCallCount >= 2))
5263     {
5264         INDEBUG(reason = "Call Count");
5265         result = true;
5266     }
5267     if (!result && (optIndirectCallCount >= 1))
5268     {
5269         INDEBUG(reason = "Indirect Call");
5270         result = true;
5271     }
5272 #endif // ETW_EBP_FRAMED
5273
5274     // VM wants to identify the containing frame of an InlinedCallFrame always
5275     // via the frame register never the stack register so we need a frame.
5276     if (!result && (optNativeCallCount != 0))
5277     {
5278         INDEBUG(reason = "Uses PInvoke");
5279         result = true;
5280     }
5281
5282 #ifdef _TARGET_ARM64_
5283     // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5284     // pointer frames.
5285     if (!result)
5286     {
5287         INDEBUG(reason = "Temporary ARM64 force frame pointer");
5288         result = true;
5289     }
5290 #endif // _TARGET_ARM64_
5291
5292 #ifdef DEBUG
5293     if ((result == true) && (wbReason != nullptr))
5294     {
5295         *wbReason = reason;
5296     }
5297 #endif
5298
5299     return result;
5300 }
5301
5302 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5303
5304 /*****************************************************************************
5305  *
5306  *  Predict which variables will be assigned to registers
5307  *  This is x86 specific and only predicts the integer registers and
5308  *  must be conservative, any register that is predicted to be enregister
5309  *  must end up being enregistered.
5310  *
5311  *  The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5312  *  predicted to be enregistered to minimize calls to rpPredictRegPick.
5313  *
5314  */
5315
5316 #ifdef _PREFAST_
5317 #pragma warning(push)
5318 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5319 #endif
5320 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5321 {
5322     unsigned regInx;
5323
5324     if (rpPasses <= rpPassesPessimize)
5325     {
5326         // Assume that we won't have to reverse EBP enregistration
5327         rpReverseEBPenreg = false;
5328
5329         // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5330         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5331             rpFrameType = FT_EBP_FRAME;
5332         else
5333             rpFrameType = FT_ESP_FRAME;
5334     }
5335
5336 #if !ETW_EBP_FRAMED
5337     // If we are using FPBASE as the frame register, we cannot also use it for
5338     // a local var
5339     if (rpFrameType == FT_EBP_FRAME)
5340     {
5341         regAvail &= ~RBM_FPBASE;
5342     }
5343 #endif // !ETW_EBP_FRAMED
5344
5345     rpStkPredict        = 0;
5346     rpPredictAssignMask = regAvail;
5347
5348     raSetupArgMasks(&codeGen->intRegState);
5349 #if !FEATURE_STACK_FP_X87
5350     raSetupArgMasks(&codeGen->floatRegState);
5351 #endif
5352
5353     // If there is a secret stub param, it is also live in
5354     if (info.compPublishStubParam)
5355     {
5356         codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5357     }
5358
5359     if (regAvail == RBM_NONE)
5360     {
5361         unsigned   lclNum;
5362         LclVarDsc* varDsc;
5363
5364         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5365         {
5366 #if FEATURE_STACK_FP_X87
5367             if (!varDsc->IsFloatRegType())
5368 #endif
5369             {
5370                 varDsc->lvRegNum = REG_STK;
5371                 if (isRegPairType(varDsc->lvType))
5372                     varDsc->lvOtherReg = REG_STK;
5373             }
5374         }
5375     }
5376
5377 #ifdef DEBUG
5378     if (verbose)
5379     {
5380         printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5381         printf("\n        Available registers = ");
5382         dspRegMask(regAvail);
5383         printf("\n");
5384     }
5385 #endif
5386
5387     if (regAvail == RBM_NONE)
5388     {
5389         return RBM_NONE;
5390     }
5391
5392     /* We cannot change the lvVarIndexes at this point, so we  */
5393     /* can only re-order the existing set of tracked variables */
5394     /* Which will change the order in which we select the      */
5395     /* locals for enregistering.                               */
5396
5397     assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5398
5399     // Should not be set unless optimizing
5400     noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5401
5402     if (lvaSortAgain)
5403         lvaSortOnly();
5404
5405 #ifdef DEBUG
5406     fgDebugCheckBBlist();
5407 #endif
5408
5409     /* Initialize the weighted count of variables that could have */
5410     /* been enregistered but weren't */
5411     unsigned refCntStk    = 0; // sum of     ref counts for all stack based variables
5412     unsigned refCntEBP    = 0; // sum of     ref counts for EBP enregistered variables
5413     unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5414 #if DOUBLE_ALIGN
5415     unsigned refCntStkParam;  // sum of     ref counts for all stack based parameters
5416     unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5417
5418 #if FEATURE_STACK_FP_X87
5419     refCntStkParam  = raCntStkParamDblStackFP;
5420     refCntWtdStkDbl = raCntWtdStkDblStackFP;
5421     refCntStk       = raCntStkStackFP;
5422 #else
5423     refCntStkParam  = 0;
5424     refCntWtdStkDbl = 0;
5425     refCntStk       = 0;
5426 #endif // FEATURE_STACK_FP_X87
5427
5428 #endif // DOUBLE_ALIGN
5429
5430     /* Set of registers used to enregister variables in the predition */
5431     regMaskTP regUsed = RBM_NONE;
5432
5433     /*-------------------------------------------------------------------------
5434      *
5435      *  Predict/Assign the enregistered locals in ref-count order
5436      *
5437      */
5438
5439     VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5440
5441     unsigned FPRegVarLiveInCnt;
5442     FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5443
5444     LclVarDsc* varDsc;
5445     for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5446     {
5447         bool notWorthy = false;
5448
5449         unsigned  varIndex;
5450         bool      isDouble;
5451         regMaskTP regAvailForType;
5452         var_types regType;
5453         regMaskTP avoidReg;
5454         unsigned  customVarOrderSize;
5455         regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5456         bool      firstHalf;
5457         regNumber saveOtherReg;
5458
5459         varDsc = lvaRefSorted[sortNum];
5460
5461 #if FEATURE_STACK_FP_X87
5462         if (varTypeIsFloating(varDsc->TypeGet()))
5463         {
5464 #ifdef DEBUG
5465             if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5466             {
5467                 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5468                 // be en-registered.
5469                 noway_assert(!varDsc->lvRegister);
5470             }
5471 #endif
5472             continue;
5473         }
5474 #endif
5475
5476         /* Check the set of invariant things that would prevent enregistration */
5477
5478         /* Ignore the variable if it's not tracked */
5479         if (!varDsc->lvTracked)
5480             goto CANT_REG;
5481
5482         /* Get hold of the index and the interference mask for the variable */
5483         varIndex = varDsc->lvVarIndex;
5484
5485         // Remove 'varIndex' from unprocessedVars
5486         VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5487
5488         // Skip the variable if it's marked as DoNotEnregister.
5489
5490         if (varDsc->lvDoNotEnregister)
5491             goto CANT_REG;
5492
5493         /* TODO: For now if we have JMP all register args go to stack
5494          * TODO: Later consider extending the life of the argument or make a copy of it */
5495
5496         if (compJmpOpUsed && varDsc->lvIsRegArg)
5497             goto CANT_REG;
5498
5499         /* Skip the variable if the ref count is zero */
5500
5501         if (varDsc->lvRefCnt == 0)
5502             goto CANT_REG;
5503
5504         /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5505
5506         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5507         {
5508             goto CANT_REG;
5509         }
5510
5511         /* Is the unweighted ref count too low to be interesting? */
5512
5513         if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5514             (varDsc->lvRefCnt <= 1))
5515         {
5516             /* Sometimes it's useful to enregister a variable with only one use */
5517             /*   arguments referenced in loops are one example */
5518
5519             if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5520                 goto OK_TO_ENREGISTER;
5521
5522             /* If the variable has a preferred register set it may be useful to put it there */
5523             if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5524                 goto OK_TO_ENREGISTER;
5525
5526             /* Keep going; the table is sorted by "weighted" ref count */
5527             goto CANT_REG;
5528         }
5529
5530     OK_TO_ENREGISTER:
5531
5532         if (varTypeIsFloating(varDsc->TypeGet()))
5533         {
5534             regType         = varDsc->TypeGet();
5535             regAvailForType = regAvail & RBM_ALLFLOAT;
5536         }
5537         else
5538         {
5539             regType         = TYP_INT;
5540             regAvailForType = regAvail & RBM_ALLINT;
5541         }
5542
5543 #ifdef _TARGET_ARM_
5544         isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5545
5546         if (isDouble)
5547         {
5548             regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5549         }
5550 #endif
5551
5552         /* If we don't have any registers available then skip the enregistration attempt */
5553         if (regAvailForType == RBM_NONE)
5554             goto NO_REG;
5555
5556         // On the pessimize passes don't even try to enregister LONGS
5557         if (isRegPairType(varDsc->lvType))
5558         {
5559             if (rpPasses > rpPassesPessimize)
5560                 goto NO_REG;
5561             else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5562                 goto NO_REG;
5563         }
5564
5565         // Set of registers to avoid when performing register allocation
5566         avoidReg = RBM_NONE;
5567
5568         if (!varDsc->lvIsRegArg)
5569         {
5570             /* For local variables,
5571              *  avoid the incoming arguments,
5572              *  but only if you conflict with them */
5573
5574             if (raAvoidArgRegMask != 0)
5575             {
5576                 LclVarDsc* argDsc;
5577                 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5578
5579                 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5580                 {
5581                     if (!argDsc->lvIsRegArg)
5582                         continue;
5583
5584                     bool      isFloat  = argDsc->IsFloatRegType();
5585                     regNumber inArgReg = argDsc->lvArgReg;
5586                     regMaskTP inArgBit = genRegMask(inArgReg);
5587
5588                     // Is this inArgReg in the raAvoidArgRegMask set?
5589
5590                     if (!(raAvoidArgRegMask & inArgBit))
5591                         continue;
5592
5593                     noway_assert(argDsc->lvIsParam);
5594                     noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5595
5596                     unsigned locVarIndex = varDsc->lvVarIndex;
5597                     unsigned argVarIndex = argDsc->lvVarIndex;
5598
5599                     /* Does this variable interfere with the arg variable ? */
5600                     if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5601                     {
5602                         noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5603                         /* Yes, so try to avoid the incoming arg reg */
5604                         avoidReg |= inArgBit;
5605                     }
5606                     else
5607                     {
5608                         noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5609                     }
5610                 }
5611             }
5612         }
5613
5614         // Now we will try to predict which register the variable
5615         // could  be enregistered in
5616
5617         customVarOrderSize = MAX_VAR_ORDER_SIZE;
5618
5619         raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5620
5621         firstHalf    = false;
5622         saveOtherReg = DUMMY_INIT(REG_NA);
5623
5624         for (regInx = 0; regInx < customVarOrderSize; regInx++)
5625         {
5626             regNumber regNum  = customVarOrder[regInx];
5627             regMaskTP regBits = genRegMask(regNum);
5628
5629             /* Skip this register if it isn't available */
5630             if ((regAvailForType & regBits) == 0)
5631                 continue;
5632
5633             /* Skip this register if it interferes with the variable */
5634
5635             if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5636                 continue;
5637
5638             if (varTypeIsFloating(regType))
5639             {
5640 #ifdef _TARGET_ARM_
5641                 if (isDouble)
5642                 {
5643                     regNumber regNext = REG_NEXT(regNum);
5644                     regBits |= genRegMask(regNext);
5645
5646                     /* Skip if regNext interferes with the variable */
5647                     if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5648                         continue;
5649                 }
5650 #endif
5651             }
5652
5653             bool firstUseOfReg     = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5654             bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5655             bool calleeSavedReg    = ((regBits & RBM_CALLEE_SAVED) != 0);
5656
5657             /* Skip this register if the weighted ref count is less than two
5658                and we are considering a unused callee saved register */
5659
5660             if (lessThanTwoRefWtd && // less than two references (weighted)
5661                 firstUseOfReg &&     // first use of this register
5662                 calleeSavedReg)      // callee saved register
5663             {
5664                 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5665
5666                 // psc is abbeviation for possibleSameColor
5667                 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5668
5669                 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5670                 unsigned        pscIndex = 0;
5671                 while (pscIndexIter.NextElem(&pscIndex))
5672                 {
5673                     LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5674                     totalRefCntWtd += pscVar->lvRefCntWtd;
5675                     if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5676                         break;
5677                 }
5678
5679                 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5680                 {
5681                     notWorthy = true;
5682                     continue; // not worth spilling a callee saved register
5683                 }
5684                 // otherwise we will spill this callee saved registers,
5685                 // because its uses when combined with the uses of
5686                 // other yet to be processed candidates exceed our threshold.
5687                 // totalRefCntWtd = totalRefCntWtd;
5688             }
5689
5690             /* Looks good - mark the variable as living in the register */
5691
5692             if (isRegPairType(varDsc->lvType))
5693             {
5694                 if (firstHalf == false)
5695                 {
5696                     /* Enregister the first half of the long */
5697                     varDsc->lvRegNum   = regNum;
5698                     saveOtherReg       = varDsc->lvOtherReg;
5699                     varDsc->lvOtherReg = REG_STK;
5700                     firstHalf          = true;
5701                 }
5702                 else
5703                 {
5704                     /* Ensure 'well-formed' register pairs */
5705                     /* (those returned by gen[Pick|Grab]RegPair) */
5706
5707                     if (regNum < varDsc->lvRegNum)
5708                     {
5709                         varDsc->lvOtherReg = varDsc->lvRegNum;
5710                         varDsc->lvRegNum   = regNum;
5711                     }
5712                     else
5713                     {
5714                         varDsc->lvOtherReg = regNum;
5715                     }
5716                     firstHalf = false;
5717                 }
5718             }
5719             else
5720             {
5721                 varDsc->lvRegNum = regNum;
5722 #ifdef _TARGET_ARM_
5723                 if (isDouble)
5724                 {
5725                     varDsc->lvOtherReg = REG_NEXT(regNum);
5726                 }
5727 #endif
5728             }
5729
5730             if (regNum == REG_FPBASE)
5731             {
5732                 refCntEBP += varDsc->lvRefCnt;
5733                 refCntWtdEBP += varDsc->lvRefCntWtd;
5734 #if DOUBLE_ALIGN
5735                 if (varDsc->lvIsParam)
5736                 {
5737                     refCntStkParam += varDsc->lvRefCnt;
5738                 }
5739 #endif
5740             }
5741
5742             /* Record this register in the regUsed set */
5743             regUsed |= regBits;
5744
5745             /* The register is now ineligible for all interfering variables */
5746
5747             VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5748
5749 #ifdef _TARGET_ARM_
5750             if (isDouble)
5751             {
5752                 regNumber       secondHalf = REG_NEXT(regNum);
5753                 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5754                 unsigned        intfIndex = 0;
5755                 while (iter.NextElem(&intfIndex))
5756                 {
5757                     VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5758                 }
5759             }
5760 #endif
5761
5762             /* If a register argument, remove its incoming register
5763              * from the "avoid" list */
5764
5765             if (varDsc->lvIsRegArg)
5766             {
5767                 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5768 #ifdef _TARGET_ARM_
5769                 if (isDouble)
5770                 {
5771                     raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5772                 }
5773 #endif
5774             }
5775
5776             /* A variable of TYP_LONG can take two registers */
5777             if (firstHalf)
5778                 continue;
5779
5780             // Since we have successfully enregistered this variable it is
5781             // now time to move on and consider the next variable
5782             goto ENREG_VAR;
5783         }
5784
5785         if (firstHalf)
5786         {
5787             noway_assert(isRegPairType(varDsc->lvType));
5788
5789             /* This TYP_LONG is partially enregistered */
5790
5791             noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5792
5793             if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5794             {
5795                 rpLostEnreg = true;
5796             }
5797
5798             raAddToStkPredict(varDsc->lvRefCntWtd);
5799             goto ENREG_VAR;
5800         }
5801
5802     NO_REG:;
5803         if (varDsc->lvDependReg)
5804         {
5805             rpLostEnreg = true;
5806         }
5807
5808         if (!notWorthy)
5809         {
5810             /* Weighted count of variables that could have been enregistered but weren't */
5811             raAddToStkPredict(varDsc->lvRefCntWtd);
5812
5813             if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5814                 raAddToStkPredict(varDsc->lvRefCntWtd);
5815         }
5816
5817     CANT_REG:;
5818         varDsc->lvRegister = false;
5819
5820         varDsc->lvRegNum = REG_STK;
5821         if (isRegPairType(varDsc->lvType))
5822             varDsc->lvOtherReg = REG_STK;
5823
5824         /* unweighted count of variables that were not enregistered */
5825
5826         refCntStk += varDsc->lvRefCnt;
5827
5828 #if DOUBLE_ALIGN
5829         if (varDsc->lvIsParam)
5830         {
5831             refCntStkParam += varDsc->lvRefCnt;
5832         }
5833         else
5834         {
5835             /* Is it a stack based double? */
5836             /* Note that double params are excluded since they can not be double aligned */
5837             if (varDsc->lvType == TYP_DOUBLE)
5838             {
5839                 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5840             }
5841         }
5842 #endif
5843 #ifdef DEBUG
5844         if (verbose)
5845         {
5846             printf("; ");
5847             gtDispLclVar((unsigned)(varDsc - lvaTable));
5848             if (varDsc->lvTracked)
5849                 printf("T%02u", varDsc->lvVarIndex);
5850             else
5851                 printf("   ");
5852             printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5853             if (varDsc->lvDoNotEnregister)
5854                 printf(", do-not-enregister");
5855             printf("\n");
5856         }
5857 #endif
5858         continue;
5859
5860     ENREG_VAR:;
5861
5862         varDsc->lvRegister = true;
5863
5864         // Record the fact that we enregistered a stack arg when tail call is used.
5865         if (compJmpOpUsed && !varDsc->lvIsRegArg)
5866         {
5867             rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5868             if (isRegPairType(varDsc->lvType))
5869             {
5870                 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5871             }
5872         }
5873
5874 #ifdef DEBUG
5875         if (verbose)
5876         {
5877             printf("; ");
5878             gtDispLclVar((unsigned)(varDsc - lvaTable));
5879             printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5880                    refCntWtd2str(varDsc->lvRefCntWtd));
5881             varDsc->PrintVarReg();
5882 #ifdef _TARGET_ARM_
5883             if (isDouble)
5884             {
5885                 printf(":%s", getRegName(varDsc->lvOtherReg));
5886             }
5887 #endif
5888             printf("\n");
5889         }
5890 #endif
5891     }
5892
5893 #if ETW_EBP_FRAMED
5894     noway_assert(refCntEBP == 0);
5895 #endif
5896
5897 #ifdef DEBUG
5898     if (verbose)
5899     {
5900         if (refCntStk > 0)
5901             printf("; refCntStk       = %u\n", refCntStk);
5902         if (refCntEBP > 0)
5903             printf("; refCntEBP       = %u\n", refCntEBP);
5904         if (refCntWtdEBP > 0)
5905             printf("; refCntWtdEBP    = %u\n", refCntWtdEBP);
5906 #if DOUBLE_ALIGN
5907         if (refCntStkParam > 0)
5908             printf("; refCntStkParam  = %u\n", refCntStkParam);
5909         if (refCntWtdStkDbl > 0)
5910             printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5911 #endif
5912     }
5913 #endif
5914
5915     /* Determine how the EBP register should be used */
5916     CLANG_FORMAT_COMMENT_ANCHOR;
5917
5918 #if DOUBLE_ALIGN
5919
5920     if (!codeGen->isFramePointerRequired())
5921     {
5922         noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5923
5924         /*
5925             First let us decide if we should use EBP to create a
5926             double-aligned frame, instead of enregistering variables
5927         */
5928
5929         if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5930         {
5931             rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5932             goto REVERSE_EBP_ENREG;
5933         }
5934
5935         if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5936         {
5937             if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5938             {
5939                 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5940                 goto REVERSE_EBP_ENREG;
5941             }
5942         }
5943     }
5944
5945 #endif // DOUBLE_ALIGN
5946
5947     if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5948     {
5949 #ifdef _TARGET_XARCH_
5950 // clang-format off
5951         /*  If we are using EBP to enregister variables then
5952             will we actually save bytes by setting up an EBP frame?
5953
5954             Each stack reference is an extra byte of code if we use
5955             an ESP frame.
5956
5957             Here we measure the savings that we get by using EBP to
5958             enregister variables vs. the cost in code size that we
5959             pay when using an ESP based frame.
5960
5961             We pay one byte of code for each refCntStk
5962             but we save one byte (or more) for each refCntEBP.
5963
5964             Our savings are the elimination of a stack memory read/write.
5965             We use the loop weighted value of
5966                refCntWtdEBP * mem_access_weight (0, 3, 6)
5967             to represent this savings.
5968          */
5969
5970         // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5971         // to set up an EBP frame in the prolog and epilog
5972         #define EBP_FRAME_SETUP_SIZE  5
5973         // clang-format on
5974
5975         if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5976         {
5977             unsigned bytesSaved        = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5978             unsigned mem_access_weight = 3;
5979
5980             if (compCodeOpt() == SMALL_CODE)
5981                 mem_access_weight = 0;
5982             else if (compCodeOpt() == FAST_CODE)
5983                 mem_access_weight *= 2;
5984
5985             if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5986             {
5987                 /* It's not be a good idea to use EBP in our predictions */
5988                 CLANG_FORMAT_COMMENT_ANCHOR;
5989 #ifdef DEBUG
5990                 if (verbose && (refCntEBP > 0))
5991                     printf("; Predicting that it's not worth using EBP to enregister variables\n");
5992 #endif
5993                 rpFrameType = FT_EBP_FRAME;
5994                 goto REVERSE_EBP_ENREG;
5995             }
5996         }
5997 #endif // _TARGET_XARCH_
5998
5999         if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6000         {
6001 #ifdef DEBUG
6002             const char* reason;
6003 #endif
6004             if (rpMustCreateEBPCalled == false)
6005             {
6006                 rpMustCreateEBPCalled = true;
6007                 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6008                 {
6009 #ifdef DEBUG
6010                     if (verbose)
6011                         printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6012 #endif
6013                     codeGen->setFrameRequired(true);
6014
6015                     rpFrameType = FT_EBP_FRAME;
6016                     goto REVERSE_EBP_ENREG;
6017                 }
6018             }
6019         }
6020     }
6021
6022     goto EXIT;
6023
6024 REVERSE_EBP_ENREG:
6025
6026     noway_assert(rpFrameType != FT_ESP_FRAME);
6027
6028     rpReverseEBPenreg = true;
6029
6030 #if !ETW_EBP_FRAMED
6031     if (refCntEBP > 0)
6032     {
6033         noway_assert(regUsed & RBM_FPBASE);
6034
6035         regUsed &= ~RBM_FPBASE;
6036
6037         /* variables that were enregistered in EBP become stack based variables */
6038         raAddToStkPredict(refCntWtdEBP);
6039
6040         unsigned lclNum;
6041
6042         /* We're going to have to undo some predicted enregistered variables */
6043         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6044         {
6045             /* Is this a register variable? */
6046             if (varDsc->lvRegNum != REG_STK)
6047             {
6048                 if (isRegPairType(varDsc->lvType))
6049                 {
6050                     /* Only one can be EBP */
6051                     if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6052                     {
6053                         if (varDsc->lvRegNum == REG_FPBASE)
6054                             varDsc->lvRegNum = varDsc->lvOtherReg;
6055
6056                         varDsc->lvOtherReg = REG_STK;
6057
6058                         if (varDsc->lvRegNum == REG_STK)
6059                             varDsc->lvRegister = false;
6060
6061                         if (varDsc->lvDependReg)
6062                             rpLostEnreg = true;
6063 #ifdef DEBUG
6064                         if (verbose)
6065                             goto DUMP_MSG;
6066 #endif
6067                     }
6068                 }
6069                 else
6070                 {
6071                     if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6072                     {
6073                         varDsc->lvRegNum = REG_STK;
6074
6075                         varDsc->lvRegister = false;
6076
6077                         if (varDsc->lvDependReg)
6078                             rpLostEnreg = true;
6079 #ifdef DEBUG
6080                         if (verbose)
6081                         {
6082                         DUMP_MSG:
6083                             printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6084                                    varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6085                                    (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6086                         }
6087 #endif
6088                     }
6089                 }
6090             }
6091         }
6092     }
6093 #endif // ETW_EBP_FRAMED
6094
6095 EXIT:;
6096
6097     unsigned lclNum;
6098     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6099     {
6100         /* Clear the lvDependReg flag for next iteration of the predictor */
6101         varDsc->lvDependReg = false;
6102
6103         // If we set rpLostEnreg and this is the first pessimize pass
6104         // then reverse the enreg of all TYP_LONG
6105         if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6106         {
6107             varDsc->lvRegNum   = REG_STK;
6108             varDsc->lvOtherReg = REG_STK;
6109         }
6110     }
6111
6112 #ifdef DEBUG
6113     if (verbose && raNewBlocks)
6114     {
6115         printf("\nAdded FP register killing blocks:\n");
6116         fgDispBasicBlocks();
6117         printf("\n");
6118     }
6119 #endif
6120     noway_assert(rpFrameType != FT_NOT_SET);
6121
6122     /* return the set of registers used to enregister variables */
6123     return regUsed;
6124 }
6125 #ifdef _PREFAST_
6126 #pragma warning(pop)
6127 #endif
6128
6129 /*****************************************************************************
6130  *
6131  *  Predict register use for every tree in the function. Note that we do this
6132  *  at different times (not to mention in a totally different way) for x86 vs
6133  *  RISC targets.
6134  */
6135 void Compiler::rpPredictRegUse()
6136 {
6137 #ifdef DEBUG
6138     if (verbose)
6139         raDumpVarIntf();
6140 #endif
6141
6142     // We might want to adjust the ref counts based on interference
6143     raAdjustVarIntf();
6144
6145     regMaskTP allAcceptableRegs = RBM_ALLINT;
6146
6147 #if FEATURE_FP_REGALLOC
6148     allAcceptableRegs |= raConfigRestrictMaskFP();
6149 #endif
6150
6151     allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6152
6153     /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6154        to acdHelper(). This is done implicitly, without creating a GT_CALL
6155        node. Hence, this interference is be handled implicitly by
6156        restricting the registers used for enregistering variables */
6157
6158     if (opts.compDbgCode)
6159     {
6160         allAcceptableRegs &= RBM_CALLEE_SAVED;
6161     }
6162
6163     /* Compute the initial regmask to use for the first pass */
6164     regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6165     regMaskTP regUsed;
6166
6167 #if CPU_USES_BLOCK_MOVE
6168     /* If we might need to generate a rep mov instruction */
6169     /* remove ESI and EDI */
6170     if (compBlkOpUsed)
6171         regAvail &= ~(RBM_ESI | RBM_EDI);
6172 #endif
6173
6174 #ifdef _TARGET_X86_
6175     /* If we using longs then we remove ESI to allow */
6176     /* ESI:EBX to be saved accross a call */
6177     if (compLongUsed)
6178         regAvail &= ~(RBM_ESI);
6179 #endif
6180
6181 #ifdef _TARGET_ARM_
6182     // For the first register allocation pass we don't want to color using r4
6183     // as we want to allow it to be used to color the internal temps instead
6184     // when r0,r1,r2,r3 are all in use.
6185     //
6186     regAvail &= ~(RBM_R4);
6187 #endif
6188
6189 #if ETW_EBP_FRAMED
6190     // We never have EBP available when ETW_EBP_FRAME is defined
6191     regAvail &= ~RBM_FPBASE;
6192 #else
6193     /* If a frame pointer is required then we remove EBP */
6194     if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6195         regAvail &= ~RBM_FPBASE;
6196 #endif
6197
6198 #ifdef DEBUG
6199     BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6200     if (fJitNoRegLoc)
6201         regAvail = RBM_NONE;
6202 #endif
6203
6204     if ((opts.compFlags & CLFLG_REGVAR) == 0)
6205         regAvail = RBM_NONE;
6206
6207 #if FEATURE_STACK_FP_X87
6208     VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6209     VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6210
6211     // Calculate the set of all tracked FP/non-FP variables
6212     //  into optAllFloatVars and optAllNonFPvars
6213
6214     unsigned   lclNum;
6215     LclVarDsc* varDsc;
6216
6217     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6218     {
6219         /* Ignore the variable if it's not tracked */
6220
6221         if (!varDsc->lvTracked)
6222             continue;
6223
6224         /* Get hold of the index and the interference mask for the variable */
6225
6226         unsigned varNum = varDsc->lvVarIndex;
6227
6228         /* add to the set of all tracked FP/non-FP variables */
6229
6230         if (varDsc->IsFloatRegType())
6231             VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6232         else
6233             VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6234     }
6235 #endif
6236
6237     for (unsigned i = 0; i < REG_COUNT; i++)
6238     {
6239         VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6240     }
6241     for (unsigned i = 0; i < lvaTrackedCount; i++)
6242     {
6243         VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6244     }
6245
6246     raNewBlocks          = false;
6247     rpPredictAssignAgain = false;
6248     rpPasses             = 0;
6249
6250     bool      mustPredict   = true;
6251     unsigned  stmtNum       = 0;
6252     unsigned  oldStkPredict = DUMMY_INIT(~0);
6253     VARSET_TP oldLclRegIntf[REG_COUNT];
6254
6255     for (unsigned i = 0; i < REG_COUNT; i++)
6256     {
6257         VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6258     }
6259
6260     while (true)
6261     {
6262         /* Assign registers to variables using the variable/register interference
6263            graph (raLclRegIntf[]) calculated in the previous pass */
6264         regUsed = rpPredictAssignRegVars(regAvail);
6265
6266         mustPredict |= rpLostEnreg;
6267
6268 #ifdef _TARGET_ARM_
6269         // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6270         if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6271             !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6272         {
6273             // We can release our reservation on R10 and use it to color registers
6274             codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6275             allAcceptableRegs |= RBM_OPT_RSVD;
6276         }
6277 #endif
6278
6279         /* Is our new prediction good enough?? */
6280         if (!mustPredict)
6281         {
6282             /* For small methods (less than 12 stmts), we add a    */
6283             /*   extra pass if we are predicting the use of some   */
6284             /*   of the caller saved registers.                    */
6285             /* This fixes RAID perf bug 43440 VB Ackerman function */
6286
6287             if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6288             {
6289                 goto EXTRA_PASS;
6290             }
6291
6292             /* If every variable was fully enregistered then we're done */
6293             if (rpStkPredict == 0)
6294                 goto ALL_DONE;
6295
6296             // This was a successful prediction.  Record it, in case it turns out to be the best one.
6297             rpRecordPrediction();
6298
6299             if (rpPasses > 1)
6300             {
6301                 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6302
6303                 // Be careful about overflow
6304                 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6305                 if (oldStkPredict < highStkPredict)
6306                     goto ALL_DONE;
6307
6308                 if (rpStkPredict < rpPasses * 8)
6309                     goto ALL_DONE;
6310
6311                 if (rpPasses >= (rpPassesMax - 1))
6312                     goto ALL_DONE;
6313             }
6314
6315         EXTRA_PASS:
6316             /* We will do another pass */;
6317         }
6318
6319 #ifdef DEBUG
6320         if (JitConfig.JitAssertOnMaxRAPasses())
6321         {
6322             noway_assert(rpPasses < rpPassesMax &&
6323                          "This may not a bug, but dev team should look and see what is happening");
6324         }
6325 #endif
6326
6327         // The "64" here had been "VARSET_SZ".  It is unclear why this number is connected with
6328         // the (max) size of a VARSET.  We've eliminated this constant, so I left this as a constant.  We hope
6329         // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6330         if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6331         {
6332             NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6333         }
6334
6335 #ifdef DEBUG
6336         if (verbose)
6337         {
6338             if (rpPasses > 0)
6339             {
6340                 if (rpLostEnreg)
6341                     printf("\n; Another pass due to rpLostEnreg");
6342                 if (rpAddedVarIntf)
6343                     printf("\n; Another pass due to rpAddedVarIntf");
6344                 if ((rpPasses == 1) && rpPredictAssignAgain)
6345                     printf("\n; Another pass due to rpPredictAssignAgain");
6346             }
6347             printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6348         }
6349 #endif
6350
6351         /*  Zero the variable/register interference graph */
6352         for (unsigned i = 0; i < REG_COUNT; i++)
6353         {
6354             VarSetOps::ClearD(this, raLclRegIntf[i]);
6355         }
6356
6357         // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6358         // it must not be in a register trashed by the callee
6359         if (info.compLvFrameListRoot != BAD_VAR_NUM)
6360         {
6361             assert(!opts.ShouldUsePInvokeHelpers());
6362             noway_assert(info.compLvFrameListRoot < lvaCount);
6363
6364             LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6365
6366             if (pinvokeVarDsc->lvTracked)
6367             {
6368                 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6369                                                       DEBUGARG("compLvFrameListRoot"));
6370
6371                 // We would prefer to have this be enregister in the PINVOKE_TCB register
6372                 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6373             }
6374
6375             // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6376             // worst case).  Make sure that the return value compiler temp that we create for the single
6377             // return block knows about this interference.
6378             if (genReturnLocal != BAD_VAR_NUM)
6379             {
6380                 noway_assert(genReturnBB);
6381                 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6382                 if (localTmp->lvTracked)
6383                 {
6384                     rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6385                                     VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6386                 }
6387             }
6388         }
6389
6390 #ifdef _TARGET_ARM_
6391         if (compFloatingPointUsed)
6392         {
6393             bool hasMustInitFloat = false;
6394
6395             // if we have any must-init floating point LclVars then we will add register interferences
6396             // for the arguments with RBM_SCRATCH
6397             // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6398             // we won't home the arguments into REG_SCRATCH
6399
6400             unsigned   lclNum;
6401             LclVarDsc* varDsc;
6402
6403             for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6404             {
6405                 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6406                 {
6407                     hasMustInitFloat = true;
6408                     break;
6409                 }
6410             }
6411
6412             if (hasMustInitFloat)
6413             {
6414                 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6415                 {
6416                     // If is an incoming argument, that is tracked and not floating-point
6417                     if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6418                     {
6419                         rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6420                                                          DEBUGARG("arg home with must-init fp"));
6421                     }
6422                 }
6423             }
6424         }
6425 #endif
6426
6427         stmtNum        = 0;
6428         rpAddedVarIntf = false;
6429         rpLostEnreg    = false;
6430
6431         /* Walk the basic blocks and predict reg use for each tree */
6432
6433         for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6434         {
6435             GenTreePtr stmt;
6436             compCurBB       = block;
6437             compCurLifeTree = NULL;
6438             VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6439
6440             compCurBB = block;
6441
6442             for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6443             {
6444                 noway_assert(stmt->gtOper == GT_STMT);
6445
6446                 rpPredictSpillCnt = 0;
6447                 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6448                 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6449
6450                 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6451                 stmtNum++;
6452 #ifdef DEBUG
6453                 if (verbose && 1)
6454                 {
6455                     printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6456                     gtDispTree(tree);
6457                     printf("\n");
6458                 }
6459 #endif
6460                 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6461
6462                 noway_assert(rpAsgVarNum == -1);
6463
6464                 if (rpPredictSpillCnt > tmpIntSpillMax)
6465                     tmpIntSpillMax = rpPredictSpillCnt;
6466             }
6467         }
6468         rpPasses++;
6469
6470         /* Decide whether we need to set mustPredict */
6471         mustPredict = false;
6472
6473 #ifdef _TARGET_ARM_
6474         // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6475         // reserve r10, and if it was used, throw out the last prediction and repredict.
6476         if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6477         {
6478             codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6479             allAcceptableRegs &= ~RBM_OPT_RSVD;
6480             if ((regUsed & RBM_OPT_RSVD) != 0)
6481             {
6482                 mustPredict              = true;
6483                 rpBestRecordedPrediction = nullptr;
6484             }
6485         }
6486 #endif
6487
6488         if (rpAddedVarIntf)
6489         {
6490             mustPredict = true;
6491 #ifdef DEBUG
6492             if (verbose)
6493                 raDumpVarIntf();
6494 #endif
6495         }
6496
6497         if (rpPasses == 1)
6498         {
6499             if ((opts.compFlags & CLFLG_REGVAR) == 0)
6500                 goto ALL_DONE;
6501
6502             if (rpPredictAssignAgain)
6503                 mustPredict = true;
6504 #ifdef DEBUG
6505             if (fJitNoRegLoc)
6506                 goto ALL_DONE;
6507 #endif
6508         }
6509
6510         /* Calculate the new value to use for regAvail */
6511
6512         regAvail = allAcceptableRegs;
6513
6514         /* If a frame pointer is required then we remove EBP */
6515         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6516             regAvail &= ~RBM_FPBASE;
6517
6518 #if ETW_EBP_FRAMED
6519         // We never have EBP available when ETW_EBP_FRAME is defined
6520         regAvail &= ~RBM_FPBASE;
6521 #endif
6522
6523         // If we have done n-passes then we must continue to pessimize the
6524         // interference graph by or-ing the interferences from the previous pass
6525
6526         if (rpPasses > rpPassesPessimize)
6527         {
6528             for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6529                 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6530
6531             /* If we reverse an EBP enregistration then keep it that way */
6532             if (rpReverseEBPenreg)
6533                 regAvail &= ~RBM_FPBASE;
6534         }
6535
6536 #ifdef DEBUG
6537         if (verbose)
6538             raDumpRegIntf();
6539 #endif
6540
6541         /*  Save the old variable/register interference graph */
6542         for (unsigned i = 0; i < REG_COUNT; i++)
6543         {
6544             VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6545         }
6546         oldStkPredict = rpStkPredict;
6547     } // end of while (true)
6548
6549 ALL_DONE:;
6550
6551     // If we recorded a better feasible allocation than we ended up with, go back to using it.
6552     rpUseRecordedPredictionIfBetter();
6553
6554 #if DOUBLE_ALIGN
6555     codeGen->setDoubleAlign(false);
6556 #endif
6557
6558     switch (rpFrameType)
6559     {
6560         default:
6561             noway_assert(!"rpFrameType not set correctly!");
6562             break;
6563         case FT_ESP_FRAME:
6564             noway_assert(!codeGen->isFramePointerRequired());
6565             noway_assert(!codeGen->isFrameRequired());
6566             codeGen->setFramePointerUsed(false);
6567             break;
6568         case FT_EBP_FRAME:
6569             noway_assert((regUsed & RBM_FPBASE) == 0);
6570             codeGen->setFramePointerUsed(true);
6571             break;
6572 #if DOUBLE_ALIGN
6573         case FT_DOUBLE_ALIGN_FRAME:
6574             noway_assert((regUsed & RBM_FPBASE) == 0);
6575             noway_assert(!codeGen->isFramePointerRequired());
6576             codeGen->setFramePointerUsed(false);
6577             codeGen->setDoubleAlign(true);
6578             break;
6579 #endif
6580     }
6581
6582     /* Record the set of registers that we need */
6583     codeGen->regSet.rsClearRegsModified();
6584     if (regUsed != RBM_NONE)
6585     {
6586         codeGen->regSet.rsSetRegsModified(regUsed);
6587     }
6588
6589     /* We need genFullPtrRegMap if :
6590      * The method is fully interruptible, or
6591      * We are generating an EBP-less frame (for stack-pointer deltas)
6592      */
6593
6594     genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6595
6596     raMarkStkVars();
6597 #ifdef DEBUG
6598     if (verbose)
6599     {
6600         printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6601         printf("  rpStkPredict was %u\n", rpStkPredict);
6602     }
6603 #endif
6604     rpRegAllocDone = true;
6605 }
6606
6607 #endif // LEGACY_BACKEND
6608
6609 /*****************************************************************************
6610  *
6611  *  Mark all variables as to whether they live on the stack frame
6612  *  (part or whole), and if so what the base is (FP or SP).
6613  */
6614
6615 void Compiler::raMarkStkVars()
6616 {
6617     unsigned   lclNum;
6618     LclVarDsc* varDsc;
6619
6620     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6621     {
6622         // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6623         CLANG_FORMAT_COMMENT_ANCHOR;
6624
6625 #ifdef LEGACY_BACKEND
6626         varDsc->lvOnFrame = false;
6627 #endif // LEGACY_BACKEND
6628
6629         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6630         {
6631             noway_assert(!varDsc->lvRegister);
6632             goto ON_STK;
6633         }
6634
6635         /* Fully enregistered variables don't need any frame space */
6636
6637         if (varDsc->lvRegister)
6638         {
6639             if (!isRegPairType(varDsc->TypeGet()))
6640             {
6641                 goto NOT_STK;
6642             }
6643
6644             /* For "large" variables make sure both halves are enregistered */
6645
6646             if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6647             {
6648                 goto NOT_STK;
6649             }
6650         }
6651         /* Unused variables typically don't get any frame space */
6652         else if (varDsc->lvRefCnt == 0)
6653         {
6654             bool needSlot = false;
6655
6656             bool stkFixedArgInVarArgs =
6657                 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6658
6659             // If its address has been exposed, ignore lvRefCnt. However, exclude
6660             // fixed arguments in varargs method as lvOnFrame shouldn't be set
6661             // for them as we don't want to explicitly report them to GC.
6662
6663             if (!stkFixedArgInVarArgs)
6664             {
6665                 needSlot |= varDsc->lvAddrExposed;
6666             }
6667
6668 #if FEATURE_FIXED_OUT_ARGS
6669
6670             /* Is this the dummy variable representing GT_LCLBLK ? */
6671             needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6672
6673 #endif // FEATURE_FIXED_OUT_ARGS
6674
6675 #ifdef DEBUG
6676             /* For debugging, note that we have to reserve space even for
6677                unused variables if they are ever in scope. However, this is not
6678                an issue as fgExtendDbgLifetimes() adds an initialization and
6679                variables in scope will not have a zero ref-cnt.
6680              */
6681             if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6682             {
6683                 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6684                 {
6685                     noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6686                 }
6687             }
6688 #endif
6689             /*
6690               For Debug Code, we have to reserve space even if the variable is never
6691               in scope. We will also need to initialize it if it is a GC var.
6692               So we set lvMustInit and artifically bump up the ref-cnt.
6693              */
6694
6695             if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6696             {
6697                 needSlot |= true;
6698
6699                 if (lvaTypeIsGC(lclNum))
6700                 {
6701                     varDsc->lvRefCnt = 1;
6702                 }
6703
6704                 if (!varDsc->lvIsParam)
6705                 {
6706                     varDsc->lvMustInit = true;
6707                 }
6708             }
6709
6710 #ifndef LEGACY_BACKEND
6711             varDsc->lvOnFrame = needSlot;
6712 #endif // !LEGACY_BACKEND
6713             if (!needSlot)
6714             {
6715                 /* Clear the lvMustInit flag in case it is set */
6716                 varDsc->lvMustInit = false;
6717
6718                 goto NOT_STK;
6719             }
6720         }
6721
6722 #ifndef LEGACY_BACKEND
6723         if (!varDsc->lvOnFrame)
6724         {
6725             goto NOT_STK;
6726         }
6727 #endif // !LEGACY_BACKEND
6728
6729     ON_STK:
6730         /* The variable (or part of it) lives on the stack frame */
6731
6732         noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6733 #if FEATURE_FIXED_OUT_ARGS
6734         noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6735 #else  // FEATURE_FIXED_OUT_ARGS
6736         noway_assert(lvaLclSize(lclNum) != 0);
6737 #endif // FEATURE_FIXED_OUT_ARGS
6738
6739         varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6740                                   // stack frame
6741
6742     NOT_STK:;
6743         varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6744
6745 #if DOUBLE_ALIGN
6746
6747         if (codeGen->doDoubleAlign())
6748         {
6749             noway_assert(codeGen->isFramePointerUsed() == false);
6750
6751             /* All arguments are off of EBP with double-aligned frames */
6752
6753             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6754             {
6755                 varDsc->lvFramePointerBased = true;
6756             }
6757         }
6758
6759 #endif
6760
6761         /* Some basic checks */
6762
6763         // It must be in a register, on frame, or have zero references.
6764
6765         noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6766
6767 #ifndef LEGACY_BACKEND
6768         // We can't have both lvRegister and lvOnFrame for RyuJIT
6769         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6770 #else  // LEGACY_BACKEND
6771
6772         /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6773         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6774                      (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6775 #endif // LEGACY_BACKEND
6776
6777 #ifdef DEBUG
6778
6779         // For varargs functions, there should be no direct references to
6780         // parameter variables except for 'this' (because these were morphed
6781         // in the importer) and the 'arglist' parameter (which is not a GC
6782         // pointer). and the return buffer argument (if we are returning a
6783         // struct).
6784         // This is important because we don't want to try to report them
6785         // to the GC, as the frame offsets in these local varables would
6786         // not be correct.
6787
6788         if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6789         {
6790             if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6791             {
6792                 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6793             }
6794         }
6795 #endif
6796     }
6797 }
6798
6799 #ifdef LEGACY_BACKEND
6800 void Compiler::rpRecordPrediction()
6801 {
6802     if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6803     {
6804         if (rpBestRecordedPrediction == NULL)
6805         {
6806             rpBestRecordedPrediction =
6807                 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6808         }
6809         for (unsigned k = 0; k < lvaCount; k++)
6810         {
6811             rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6812             rpBestRecordedPrediction[k].m_regNum         = (regNumberSmall)lvaTable[k].GetRegNum();
6813             rpBestRecordedPrediction[k].m_otherReg       = (regNumberSmall)lvaTable[k].GetOtherReg();
6814         }
6815         rpBestRecordedStkPredict = rpStkPredict;
6816         JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6817     }
6818 }
6819
6820 void Compiler::rpUseRecordedPredictionIfBetter()
6821 {
6822     JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6823             rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6824     if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6825     {
6826         JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6827                 rpBestRecordedStkPredict);
6828
6829         for (unsigned k = 0; k < lvaCount; k++)
6830         {
6831             lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6832             lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6833             lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6834         }
6835     }
6836 }
6837 #endif // LEGACY_BACKEND