Merge pull request #14853 from sdmaclea/PR-ARM64-SIMD-applyCalleeSaveHeuristics
[platform/upstream/coreclr.git] / src / jit / regalloc.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           RegAlloc                                        XX
9 XX                                                                           XX
10 XX  Does the register allocation and puts the remaining lclVars on the stack XX
11 XX                                                                           XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 */
15
16 #include "jitpch.h"
17 #ifdef _MSC_VER
18 #pragma hdrstop
19 #endif
20 #include "regalloc.h"
21
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
24 {
25     DWORD val = JitConfig.JitRegisterFP();
26
27     return (enumConfigRegisterFP)(val & 0x3);
28 }
29 #endif // FEATURE_FP_REGALLOC
30
31 regMaskTP Compiler::raConfigRestrictMaskFP()
32 {
33     regMaskTP result = RBM_NONE;
34
35 #if FEATURE_FP_REGALLOC
36     switch (raConfigRegisterFP())
37     {
38         case CONFIG_REGISTER_FP_NONE:
39             result = RBM_NONE;
40             break;
41         case CONFIG_REGISTER_FP_CALLEE_TRASH:
42             result = RBM_FLT_CALLEE_TRASH;
43             break;
44         case CONFIG_REGISTER_FP_CALLEE_SAVED:
45             result = RBM_FLT_CALLEE_SAVED;
46             break;
47         case CONFIG_REGISTER_FP_FULL:
48             result = RBM_ALLFLOAT;
49             break;
50     }
51 #endif
52
53     return result;
54 }
55
56 #if DOUBLE_ALIGN
57 DWORD Compiler::getCanDoubleAlign()
58 {
59 #ifdef DEBUG
60     if (compStressCompile(STRESS_DBL_ALN, 20))
61         return MUST_DOUBLE_ALIGN;
62
63     return JitConfig.JitDoubleAlign();
64 #else
65     return DEFAULT_DOUBLE_ALIGN;
66 #endif
67 }
68
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
71 //
72 // Arguments:
73 //    refCntStk       - sum of     ref counts for all stack based variables
74 //    refCntEBP       - sum of     ref counts for EBP enregistered variables
75 //    refCntWtdEBP    - sum of wtd ref counts for EBP enregistered variables
76 //    refCntStkParam  - sum of     ref counts for all stack based parameters
77 //    refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 //                      with double fields).
79 //
80 // Return Value:
81 //    Returns true if this method estimates that a double-aligned frame would be beneficial
82 //
83 // Notes:
84 //    The impact of a double-aligned frame is computed as follows:
85 //    - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 //    - We pay a byte of code for each non-parameter stack reference.
87 //    - We save the misalignment penalty and possible cache-line crossing penalty.
88 //      This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 //    - We pay 7 extra bytes for:
90 //        MOV EBP,ESP,
91 //        LEA ESP,[EBP-offset]
92 //        AND ESP,-8 to double align ESP
93 //    - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
94 //
95 //    If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 //    Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 //    ref count for double-aligned values.
98 //
99 bool Compiler::shouldDoubleAlign(
100     unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
101 {
102     bool           doDoubleAlign        = false;
103     const unsigned DBL_ALIGN_SETUP_SIZE = 7;
104
105     unsigned bytesUsed         = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106     unsigned misaligned_weight = 4;
107
108     if (compCodeOpt() == Compiler::SMALL_CODE)
109         misaligned_weight = 0;
110
111     if (compCodeOpt() == Compiler::FAST_CODE)
112         misaligned_weight *= 4;
113
114     JITDUMP("\nDouble alignment:\n");
115     JITDUMP("  Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116     JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117     JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
118
119     if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
120     {
121         JITDUMP("    Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
122     }
123     else if (refCntWtdEBP > refCntWtdStkDbl * 2)
124     {
125         // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126         // not double aligned.
127         // Here are the numbers that make this not double-aligned.
128         //     refCntWtdStkDbl = 0x164
129         //     refCntWtdEBP    = 0x1a4
130         // We think we do need to change the heuristic to be in favor of double-align.
131
132         JITDUMP("    Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
133     }
134     else
135     {
136         // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137         JITDUMP("    Predicting to create a double-aligned frame\n");
138         doDoubleAlign = true;
139     }
140     return doDoubleAlign;
141 }
142 #endif // DOUBLE_ALIGN
143
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
145
146 void Compiler::raInit()
147 {
148 #if FEATURE_STACK_FP_X87
149     /* We have not assigned any FP variables to registers yet */
150
151     VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
152 #endif
153     codeGen->intRegState.rsIsFloat   = false;
154     codeGen->floatRegState.rsIsFloat = true;
155
156     rpReverseEBPenreg = false;
157     rpAsgVarNum       = -1;
158     rpPassesMax       = 6;
159     rpPassesPessimize = rpPassesMax - 3;
160     if (opts.compDbgCode)
161     {
162         rpPassesMax++;
163     }
164     rpStkPredict            = (unsigned)-1;
165     rpFrameType             = FT_NOT_SET;
166     rpLostEnreg             = false;
167     rpMustCreateEBPCalled   = false;
168     rpRegAllocDone          = false;
169     rpMaskPInvokeEpilogIntf = RBM_NONE;
170
171     rpPredictMap[PREDICT_NONE] = RBM_NONE;
172     rpPredictMap[PREDICT_ADDR] = RBM_NONE;
173
174 #if FEATURE_FP_REGALLOC
175     rpPredictMap[PREDICT_REG]         = RBM_ALLINT | RBM_ALLFLOAT;
176     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
177 #else
178     rpPredictMap[PREDICT_REG]         = RBM_ALLINT;
179     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
180 #endif
181
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
184
185 #if defined(_TARGET_ARM_)
186
187     rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188     rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189     rpPredictMap[PREDICT_REG_SP]    = RBM_ILLEGAL;
190
191 #elif defined(_TARGET_AMD64_)
192
193     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
196
197 #elif defined(_TARGET_X86_)
198
199     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
202     rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203     rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
204
205 #endif
206
207     rpBestRecordedPrediction = NULL;
208 }
209
210 /*****************************************************************************
211  *
212  *  The following table(s) determines the order in which registers are considered
213  *  for variables to live in
214  */
215
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
217 {
218 #if FEATURE_FP_REGALLOC
219     if (varTypeIsFloating(regType))
220     {
221         static const regNumber raRegVarOrderFlt[]   = {REG_VAR_ORDER_FLT};
222         const unsigned         raRegVarOrderFltSize = sizeof(raRegVarOrderFlt) / sizeof(raRegVarOrderFlt[0]);
223
224         if (wbVarOrderSize != NULL)
225             *wbVarOrderSize = raRegVarOrderFltSize;
226
227         return &raRegVarOrderFlt[0];
228     }
229     else
230 #endif
231     {
232         static const regNumber raRegVarOrder[]   = {REG_VAR_ORDER};
233         const unsigned         raRegVarOrderSize = sizeof(raRegVarOrder) / sizeof(raRegVarOrder[0]);
234
235         if (wbVarOrderSize != NULL)
236             *wbVarOrderSize = raRegVarOrderSize;
237
238         return &raRegVarOrder[0];
239     }
240 }
241
242 #ifdef DEBUG
243
244 /*****************************************************************************
245  *
246  *  Dump out the variable interference graph
247  *
248  */
249
250 void Compiler::raDumpVarIntf()
251 {
252     unsigned   lclNum;
253     LclVarDsc* varDsc;
254
255     printf("Var. interference graph for %s\n", info.compFullName);
256
257     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
258     {
259         /* Ignore the variable if it's not tracked */
260
261         if (!varDsc->lvTracked)
262             continue;
263
264         /* Get hold of the index and the interference mask for the variable */
265         unsigned varIndex = varDsc->lvVarIndex;
266
267         printf("  V%02u,T%02u and ", lclNum, varIndex);
268
269         unsigned refIndex;
270
271         for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
272         {
273             if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274                 printf("T%02u ", refIndex);
275             else
276                 printf("    ");
277         }
278
279         printf("\n");
280     }
281
282     printf("\n");
283 }
284
285 /*****************************************************************************
286  *
287  *  Dump out the register interference graph
288  *
289  */
290 void Compiler::raDumpRegIntf()
291 {
292     printf("Reg. interference graph for %s\n", info.compFullName);
293
294     unsigned   lclNum;
295     LclVarDsc* varDsc;
296
297     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
298     {
299         unsigned varNum;
300
301         /* Ignore the variable if it's not tracked */
302
303         if (!varDsc->lvTracked)
304             continue;
305
306         /* Get hold of the index and the interference mask for the variable */
307
308         varNum = varDsc->lvVarIndex;
309
310         printf("  V%02u,T%02u and ", lclNum, varNum);
311
312         if (varDsc->IsFloatRegType())
313         {
314 #if !FEATURE_STACK_FP_X87
315             for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
316             {
317                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318                     printf("%3s ", getRegName(regNum, true));
319                 else
320                     printf("    ");
321             }
322 #endif
323         }
324         else
325         {
326             for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
327             {
328                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329                     printf("%3s ", getRegName(regNum));
330                 else
331                     printf("    ");
332             }
333         }
334
335         printf("\n");
336     }
337
338     printf("\n");
339 }
340 #endif // DEBUG
341
342 /*****************************************************************************
343  *
344  * We'll adjust the ref counts based on interference
345  *
346  */
347
348 void Compiler::raAdjustVarIntf()
349 {
350     // This method was not correct and has been disabled.
351     return;
352 }
353
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
357  */
358
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
360 {
361     var_types type = tree->TypeGet();
362
363     if (type == TYP_STRUCT && IsHfa(tree))
364     {
365         int retSlots = GetHfaCount(tree);
366         return ((1 << retSlots) - 1) << REG_FLOATRET;
367     }
368
369     const static regMaskTP returnMap[TYP_COUNT] = {
370         RBM_ILLEGAL,   // TYP_UNDEF,
371         RBM_NONE,      // TYP_VOID,
372         RBM_INTRET,    // TYP_BOOL,
373         RBM_INTRET,    // TYP_CHAR,
374         RBM_INTRET,    // TYP_BYTE,
375         RBM_INTRET,    // TYP_UBYTE,
376         RBM_INTRET,    // TYP_SHORT,
377         RBM_INTRET,    // TYP_USHORT,
378         RBM_INTRET,    // TYP_INT,
379         RBM_INTRET,    // TYP_UINT,
380         RBM_LNGRET,    // TYP_LONG,
381         RBM_LNGRET,    // TYP_ULONG,
382         RBM_FLOATRET,  // TYP_FLOAT,
383         RBM_DOUBLERET, // TYP_DOUBLE,
384         RBM_INTRET,    // TYP_REF,
385         RBM_INTRET,    // TYP_BYREF,
386         RBM_INTRET,    // TYP_ARRAY,
387         RBM_ILLEGAL,   // TYP_STRUCT,
388         RBM_ILLEGAL,   // TYP_BLK,
389         RBM_ILLEGAL,   // TYP_LCLBLK,
390         RBM_ILLEGAL,   // TYP_PTR,
391         RBM_ILLEGAL,   // TYP_FNC,
392         RBM_ILLEGAL,   // TYP_UNKNOWN,
393     };
394
395     assert((unsigned)type < sizeof(returnMap) / sizeof(returnMap[0]));
396     assert(returnMap[TYP_LONG] == RBM_LNGRET);
397     assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
398     assert(returnMap[TYP_REF] == RBM_INTRET);
399     assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
400
401     regMaskTP result = returnMap[type];
402     assert(result != RBM_ILLEGAL);
403     return result;
404 }
405
406 /*****************************************************************************/
407
408 /****************************************************************************/
409
410 #ifdef DEBUG
411
412 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
413 {
414     unsigned   lclNum;
415     LclVarDsc* varDsc;
416
417     for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
418     {
419         if (!varDsc->lvTracked)
420             continue;
421
422         if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
423             continue;
424
425         if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
426             printf("V%02u ", lclNum);
427     }
428 }
429
430 #endif
431
432 /*****************************************************************************/
433 #ifdef DEBUG
434 /*****************************************************************************
435  *
436  *  Debugging helpers - display variables liveness info.
437  */
438
439 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
440 {
441     do
442     {
443         printf("BB%02u: ", beg->bbNum);
444
445         printf(" in  = [ ");
446         dispLifeSet(comp, mask, beg->bbLiveIn);
447         printf("] ,");
448
449         printf(" out = [ ");
450         dispLifeSet(comp, mask, beg->bbLiveOut);
451         printf("]");
452
453         if (beg->bbFlags & BBF_VISITED)
454             printf(" inner=%u", beg->bbFPinVars);
455
456         printf("\n");
457
458         beg = beg->bbNext;
459         if (!beg)
460             return;
461     } while (beg != end);
462 }
463
464 #if FEATURE_STACK_FP_X87
465 void Compiler::raDispFPlifeInfo()
466 {
467     BasicBlock* block;
468
469     for (block = fgFirstBB; block; block = block->bbNext)
470     {
471         GenTreePtr stmt;
472
473         printf("BB%02u: in  = [ ", block->bbNum);
474         dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
475         printf("]\n\n");
476
477         VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
478         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
479         {
480             GenTreePtr tree;
481
482             noway_assert(stmt->gtOper == GT_STMT);
483
484             for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
485             {
486                 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
487
488                 dispLifeSet(this, optAllFloatVars, life);
489                 printf("   ");
490                 gtDispTree(tree, 0, NULL, true);
491             }
492
493             printf("\n");
494         }
495
496         printf("BB%02u: out = [ ", block->bbNum);
497         dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
498         printf("]\n\n");
499     }
500 }
501 #endif // FEATURE_STACK_FP_X87
502 /*****************************************************************************/
503 #endif // DEBUG
504 /*****************************************************************************/
505
506 /*****************************************************************************/
507
508 void Compiler::raSetRegVarOrder(
509     var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
510 {
511     unsigned         normalVarOrderSize;
512     const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
513     unsigned         index;
514     unsigned         listIndex = 0;
515     regMaskTP        usedReg   = avoidReg;
516
517     noway_assert(*customVarOrderSize >= normalVarOrderSize);
518
519     if (prefReg)
520     {
521         /* First place the preferred registers at the start of customVarOrder */
522
523         regMaskTP regBit;
524         regNumber regNum;
525
526         for (index = 0; index < normalVarOrderSize; index++)
527         {
528             regNum = normalVarOrder[index];
529             regBit = genRegMask(regNum);
530
531             if (usedReg & regBit)
532                 continue;
533
534             if (prefReg & regBit)
535             {
536                 usedReg |= regBit;
537                 noway_assert(listIndex < normalVarOrderSize);
538                 customVarOrder[listIndex++] = regNum;
539                 prefReg -= regBit;
540                 if (prefReg == 0)
541                     break;
542             }
543         }
544
545 #if CPU_HAS_BYTE_REGS
546         /* Then if byteable registers are preferred place them */
547
548         if (prefReg & RBM_BYTE_REG_FLAG)
549         {
550             for (index = 0; index < normalVarOrderSize; index++)
551             {
552                 regNum = normalVarOrder[index];
553                 regBit = genRegMask(regNum);
554
555                 if (usedReg & regBit)
556                     continue;
557
558                 if (RBM_BYTE_REGS & regBit)
559                 {
560                     usedReg |= regBit;
561                     noway_assert(listIndex < normalVarOrderSize);
562                     customVarOrder[listIndex++] = regNum;
563                 }
564             }
565         }
566
567 #endif // CPU_HAS_BYTE_REGS
568     }
569
570     /* Now place all the non-preferred registers */
571
572     for (index = 0; index < normalVarOrderSize; index++)
573     {
574         regNumber regNum = normalVarOrder[index];
575         regMaskTP regBit = genRegMask(regNum);
576
577         if (usedReg & regBit)
578             continue;
579
580         usedReg |= regBit;
581         noway_assert(listIndex < normalVarOrderSize);
582         customVarOrder[listIndex++] = regNum;
583     }
584
585     if (avoidReg)
586     {
587         /* Now place the "avoid" registers */
588
589         for (index = 0; index < normalVarOrderSize; index++)
590         {
591             regNumber regNum = normalVarOrder[index];
592             regMaskTP regBit = genRegMask(regNum);
593
594             if (avoidReg & regBit)
595             {
596                 noway_assert(listIndex < normalVarOrderSize);
597                 customVarOrder[listIndex++] = regNum;
598                 avoidReg -= regBit;
599                 if (avoidReg == 0)
600                     break;
601             }
602         }
603     }
604
605     *customVarOrderSize = listIndex;
606     noway_assert(listIndex == normalVarOrderSize);
607 }
608
609 /*****************************************************************************
610  *
611  *  Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
612  */
613
614 void Compiler::raSetupArgMasks(RegState* regState)
615 {
616     /* Determine the registers holding incoming register arguments */
617     /*  and setup raAvoidArgRegMask to the set of registers that we  */
618     /*  may want to avoid when enregistering the locals.            */
619
620     regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
621     raAvoidArgRegMask                  = RBM_NONE;
622
623     LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
624
625     for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
626     {
627         noway_assert(argDsc->lvIsParam);
628
629         // Is it a register argument ?
630         if (!argDsc->lvIsRegArg)
631             continue;
632
633         // only process args that apply to the current register file
634         if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
635         {
636             continue;
637         }
638
639         // Is it dead on entry ??
640         // In certain cases such as when compJmpOpUsed is true,
641         // or when we have a generic type context arg that we must report
642         // then the arguments have to be kept alive throughout the prolog.
643         // So we have to consider it as live on entry.
644         //
645         bool keepArgAlive = compJmpOpUsed;
646         if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
647             ((lvaTable + info.compTypeCtxtArg) == argDsc))
648         {
649             keepArgAlive = true;
650         }
651
652         if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
653         {
654             continue;
655         }
656
657         // The code to set the regState for each arg is outlined for shared use
658         // by linear scan
659         regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
660
661         // Do we need to try to avoid this incoming arg registers?
662
663         // If it's not tracked, don't do the stuff below.
664         if (!argDsc->lvTracked)
665             continue;
666
667         // If the incoming arg is used after a call it is live accross
668         //  a call and will have to be allocated to a caller saved
669         //  register anyway (a very common case).
670         //
671         // In this case it is pointless to ask that the higher ref count
672         //  locals to avoid using the incoming arg register
673
674         unsigned argVarIndex = argDsc->lvVarIndex;
675
676         /* Does the incoming register and the arg variable interfere? */
677
678         if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
679         {
680             // No they do not interfere,
681             //  so we add inArgReg to raAvoidArgRegMask
682
683             raAvoidArgRegMask |= genRegMask(inArgReg);
684         }
685 #ifdef _TARGET_ARM_
686         if (argDsc->lvType == TYP_DOUBLE)
687         {
688             // Avoid the double register argument pair for register allocation.
689             if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
690             {
691                 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
692             }
693         }
694 #endif
695     }
696 }
697
698 #endif // LEGACY_BACKEND
699
700 // The code to set the regState for each arg is outlined for shared use
701 // by linear scan. (It is not shared for System V AMD64 platform.)
702 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
703 {
704     regNumber inArgReg  = argDsc->lvArgReg;
705     regMaskTP inArgMask = genRegMask(inArgReg);
706
707     if (regState->rsIsFloat)
708     {
709         noway_assert(inArgMask & RBM_FLTARG_REGS);
710     }
711     else //  regState is for the integer registers
712     {
713         // This might be the fixed return buffer register argument (on ARM64)
714         // We check and allow inArgReg to be theFixedRetBuffReg
715         if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
716         {
717             // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
718             noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
719             // We should have recorded the variable number for the return buffer arg
720             noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
721         }
722         else // we have a regular arg
723         {
724             noway_assert(inArgMask & RBM_ARG_REGS);
725         }
726     }
727
728     regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
729
730 #ifdef _TARGET_ARM_
731     if (argDsc->lvType == TYP_DOUBLE)
732     {
733         if (info.compIsVarArgs || opts.compUseSoftFP)
734         {
735             assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
736             assert(!regState->rsIsFloat);
737         }
738         else
739         {
740             assert(regState->rsIsFloat);
741             assert(emitter::isDoubleReg(inArgReg));
742         }
743         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
744     }
745     else if (argDsc->lvType == TYP_LONG)
746     {
747         assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
748         assert(!regState->rsIsFloat);
749         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
750     }
751 #endif // _TARGET_ARM_
752
753 #if FEATURE_MULTIREG_ARGS
754     if (argDsc->lvType == TYP_STRUCT)
755     {
756         if (argDsc->lvIsHfaRegArg())
757         {
758             assert(regState->rsIsFloat);
759             unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
760             for (unsigned i = 1; i < cSlots; i++)
761             {
762                 assert(inArgReg + i <= LAST_FP_ARGREG);
763                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
764             }
765         }
766         else
767         {
768             unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
769             for (unsigned i = 1; i < cSlots; i++)
770             {
771                 regNumber nextArgReg = (regNumber)(inArgReg + i);
772                 if (nextArgReg > REG_ARG_LAST)
773                 {
774                     break;
775                 }
776                 assert(regState->rsIsFloat == false);
777                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
778             }
779         }
780     }
781 #endif // FEATURE_MULTIREG_ARGS
782
783     return inArgReg;
784 }
785
786 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
787
788 /*****************************************************************************
789  *
790  *  Assign variables to live in registers, etc.
791  */
792
793 void Compiler::raAssignVars()
794 {
795 #ifdef DEBUG
796     if (verbose)
797         printf("*************** In raAssignVars()\n");
798 #endif
799     /* We need to keep track of which registers we ever touch */
800
801     codeGen->regSet.rsClearRegsModified();
802
803 #if FEATURE_STACK_FP_X87
804     // FP register allocation
805     raEnregisterVarsStackFP();
806     raGenerateFPRefCounts();
807 #endif
808
809     /* Predict registers used by code generation */
810     rpPredictRegUse(); // New reg predictor/allocator
811
812     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
813     // so that the gc tracking logic and lvMustInit logic will ignore them.
814
815     unsigned   lclNum;
816     LclVarDsc* varDsc;
817
818     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
819     {
820         if (varDsc->lvType != TYP_STRUCT)
821             continue;
822
823         if (!varDsc->lvPromoted)
824             continue;
825
826         if (varDsc->lvIsParam)
827             continue;
828
829         if (varDsc->lvRefCnt > 0)
830             continue;
831
832 #ifdef DEBUG
833         if (verbose)
834         {
835             printf("Mark unused struct local V%02u\n", lclNum);
836         }
837
838         lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
839
840         if (promotionType == PROMOTION_TYPE_DEPENDENT)
841         {
842             // This should only happen when all its field locals are unused as well.
843
844             for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
845                  varNum++)
846             {
847                 noway_assert(lvaTable[varNum].lvRefCnt == 0);
848             }
849         }
850         else
851         {
852             noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
853         }
854
855         varDsc->lvUnusedStruct = 1;
856 #endif
857
858         // Change such struct locals to ints
859
860         varDsc->lvType = TYP_INT; // Bash to a non-gc type.
861         noway_assert(!varDsc->lvTracked);
862         noway_assert(!varDsc->lvRegister);
863         varDsc->lvOnFrame  = false; // Force it not to be onstack.
864         varDsc->lvMustInit = false; // Force not to init it.
865         varDsc->lvStkOffs  = 0;     // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
866     }
867 }
868
869 /*****************************************************************************/
870 /*****************************************************************************/
871
872 /*****************************************************************************
873  *
874  *   Given a regNumber return the correct predictReg enum value
875  */
876
877 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
878 {
879     return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
880 }
881
882 /*****************************************************************************
883  *
884  *   Given a varIndex return the correct predictReg enum value
885  */
886
887 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
888 {
889     return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
890 }
891
892 /*****************************************************************************
893  *
894  *   Given a rpPredictReg return the correct varNumber value
895  */
896
897 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
898 {
899     return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
900 }
901
902 /*****************************************************************************
903  *
904  *   Given a rpPredictReg return true if it specifies a Txx register
905  */
906
907 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
908 {
909     if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
910         return true;
911     else
912         return false;
913 }
914
915 /*****************************************************************************
916  *
917  *   Given a regmask return the correct predictReg enum value
918  */
919
920 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
921 {
922     rpPredictReg result = PREDICT_NONE;
923     if (regmask != 0) /* Check if regmask has zero bits set */
924     {
925         if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
926         {
927             DWORD reg = 0;
928             assert(FitsIn<DWORD>(regmask));
929             BitScanForward(&reg, (DWORD)regmask);
930             return rpGetPredictForReg((regNumber)reg);
931         }
932
933 #if defined(_TARGET_ARM_)
934         /* It has multiple bits set */
935         else if (regmask == (RBM_R0 | RBM_R1))
936         {
937             result = PREDICT_PAIR_R0R1;
938         }
939         else if (regmask == (RBM_R2 | RBM_R3))
940         {
941             result = PREDICT_PAIR_R2R3;
942         }
943 #elif defined(_TARGET_X86_)
944         /* It has multiple bits set */
945         else if (regmask == (RBM_EAX | RBM_EDX))
946         {
947             result = PREDICT_PAIR_EAXEDX;
948         }
949         else if (regmask == (RBM_ECX | RBM_EBX))
950         {
951             result = PREDICT_PAIR_ECXEBX;
952         }
953 #endif
954         else /* It doesn't match anything */
955         {
956             result = PREDICT_NONE;
957             assert(!"unreachable");
958             NO_WAY("bad regpair");
959         }
960     }
961     return result;
962 }
963
964 /*****************************************************************************
965  *
966  *  Record a variable to register(s) interference
967  */
968
969 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
970
971 {
972     bool addedIntf = false;
973
974     if (regMask != 0)
975     {
976         for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
977         {
978             regMaskTP regBit = genRegMask(regNum);
979
980             if (regMask & regBit)
981             {
982                 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
983                 if (!VarSetOps::IsEmpty(this, newIntf))
984                 {
985 #ifdef DEBUG
986                     if (verbose)
987                     {
988                         VarSetOps::Iter newIntfIter(this, newIntf);
989                         unsigned        varNum = 0;
990                         while (newIntfIter.NextElem(&varNum))
991                         {
992                             unsigned   lclNum = lvaTrackedToVarNum[varNum];
993                             LclVarDsc* varDsc = &lvaTable[varNum];
994 #if FEATURE_FP_REGALLOC
995                             // Only print the useful interferences
996                             // i.e. floating point LclVar interference with floating point registers
997                             //         or integer LclVar interference with general purpose registers
998                             if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
999 #endif
1000                             {
1001                                 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
1002                                        getRegName(regNum), msg);
1003                             }
1004                         }
1005                     }
1006 #endif
1007                     addedIntf = true;
1008                     VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1009                 }
1010
1011                 regMask -= regBit;
1012                 if (regMask == 0)
1013                     break;
1014             }
1015         }
1016     }
1017     return addedIntf;
1018 }
1019
1020 /*****************************************************************************
1021  *
1022  *  Record a new variable to variable(s) interference
1023  */
1024
1025 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1026 {
1027     noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1028     noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1029
1030     VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1031     VarSetOps::AddElemD(this, oneVar, varNum);
1032
1033     bool newIntf = fgMarkIntf(intfVar, oneVar);
1034
1035     if (newIntf)
1036         rpAddedVarIntf = true;
1037
1038 #ifdef DEBUG
1039     if (verbose && newIntf)
1040     {
1041         for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1042         {
1043             if (VarSetOps::IsMember(this, intfVar, oneNum))
1044             {
1045                 unsigned lclNum = lvaTrackedToVarNum[varNum];
1046                 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1047                 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1048                        oneNum, msg);
1049             }
1050         }
1051     }
1052 #endif
1053
1054     return newIntf;
1055 }
1056
1057 /*****************************************************************************
1058  *
1059  *   Determine preferred register mask for a given predictReg value
1060  */
1061
1062 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1063 {
1064     if (rpHasVarIndexForPredict(predictReg))
1065         predictReg = PREDICT_REG;
1066
1067     noway_assert((unsigned)predictReg < sizeof(rpPredictMap) / sizeof(rpPredictMap[0]));
1068     noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1069
1070     regMaskTP regAvailForType = rpPredictMap[predictReg];
1071     if (varTypeIsFloating(type))
1072     {
1073         regAvailForType &= RBM_ALLFLOAT;
1074     }
1075     else
1076     {
1077         regAvailForType &= RBM_ALLINT;
1078     }
1079 #ifdef _TARGET_ARM_
1080     if (type == TYP_DOUBLE)
1081     {
1082         if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1083         {
1084             // Fix 388433 ARM JitStress WP7
1085             if ((regAvailForType & RBM_DBL_REGS) != 0)
1086             {
1087                 regAvailForType |= (regAvailForType << 1);
1088             }
1089             else
1090             {
1091                 regAvailForType = RBM_NONE;
1092             }
1093         }
1094     }
1095 #endif
1096     return regAvailForType;
1097 }
1098
1099 /*****************************************************************************
1100  *
1101  *  Predict register choice for a type.
1102  *
1103  *  Adds the predicted registers to rsModifiedRegsMask.
1104  */
1105 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1106 {
1107     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1108     regNumber regNum;
1109     regMaskTP regBits;
1110
1111     // Add any reserved register to the lockedRegs
1112     lockedRegs |= codeGen->regSet.rsMaskResvd;
1113
1114     /* Clear out the lockedRegs from preferReg */
1115     preferReg &= ~lockedRegs;
1116
1117     if (rpAsgVarNum != -1)
1118     {
1119         noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1120
1121         /* Don't pick the register used by rpAsgVarNum either */
1122         LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1123         noway_assert(tgtVar->lvRegNum != REG_STK);
1124
1125         preferReg &= ~genRegMask(tgtVar->lvRegNum);
1126     }
1127
1128     switch (type)
1129     {
1130         case TYP_BOOL:
1131         case TYP_BYTE:
1132         case TYP_UBYTE:
1133         case TYP_SHORT:
1134         case TYP_CHAR:
1135         case TYP_INT:
1136         case TYP_UINT:
1137         case TYP_REF:
1138         case TYP_BYREF:
1139 #ifdef _TARGET_AMD64_
1140         case TYP_LONG:
1141 #endif // _TARGET_AMD64_
1142
1143             // expand preferReg to all non-locked registers if no bits set
1144             preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1145
1146             if (preferReg == 0) // no bits set?
1147             {
1148                 // Add one predefined spill choice register if no bits set.
1149                 // (The jit will introduce one spill temp)
1150                 preferReg |= RBM_SPILL_CHOICE;
1151                 rpPredictSpillCnt++;
1152
1153 #ifdef DEBUG
1154                 if (verbose)
1155                     printf("Predict one spill temp\n");
1156 #endif
1157             }
1158
1159             if (preferReg != 0)
1160             {
1161                 /* Iterate the registers in the order specified by rpRegTmpOrder */
1162
1163                 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1164                 {
1165                     regNum  = rpRegTmpOrder[index];
1166                     regBits = genRegMask(regNum);
1167
1168                     if ((preferReg & regBits) == regBits)
1169                     {
1170                         goto RET;
1171                     }
1172                 }
1173             }
1174             /* Otherwise we have allocated all registers, so do nothing */
1175             break;
1176
1177 #ifndef _TARGET_AMD64_
1178         case TYP_LONG:
1179
1180             if ((preferReg == 0) ||                   // no bits set?
1181                 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1182             {
1183                 // expand preferReg to all non-locked registers
1184                 preferReg = RBM_ALLINT & ~lockedRegs;
1185             }
1186
1187             if (preferReg == 0) // no bits set?
1188             {
1189                 // Add EAX:EDX to the registers
1190                 // (The jit will introduce two spill temps)
1191                 preferReg = RBM_PAIR_TMP;
1192                 rpPredictSpillCnt += 2;
1193 #ifdef DEBUG
1194                 if (verbose)
1195                     printf("Predict two spill temps\n");
1196 #endif
1197             }
1198             else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1199             {
1200                 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1201                 {
1202                     // Add EAX to the registers
1203                     // (The jit will introduce one spill temp)
1204                     preferReg |= RBM_PAIR_TMP_LO;
1205                 }
1206                 else
1207                 {
1208                     // Add EDX to the registers
1209                     // (The jit will introduce one spill temp)
1210                     preferReg |= RBM_PAIR_TMP_HI;
1211                 }
1212                 rpPredictSpillCnt++;
1213 #ifdef DEBUG
1214                 if (verbose)
1215                     printf("Predict one spill temp\n");
1216 #endif
1217             }
1218
1219             regPairNo regPair;
1220             regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1221             if (regPair != REG_PAIR_NONE)
1222             {
1223                 regBits = genRegPairMask(regPair);
1224                 goto RET;
1225             }
1226
1227             /* Otherwise we have allocated all registers, so do nothing */
1228             break;
1229 #endif // _TARGET_AMD64_
1230
1231 #ifdef _TARGET_ARM_
1232         case TYP_STRUCT:
1233 #endif
1234
1235         case TYP_FLOAT:
1236         case TYP_DOUBLE:
1237
1238 #if FEATURE_FP_REGALLOC
1239             regMaskTP restrictMask;
1240             restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1241             assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1242
1243             // expand preferReg to all available non-locked registers if no bits set
1244             preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1245             regMaskTP preferDouble;
1246             preferDouble = preferReg & (preferReg >> 1);
1247
1248             if ((preferReg == 0) // no bits set?
1249 #ifdef _TARGET_ARM_
1250                 || ((type == TYP_DOUBLE) &&
1251                     ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1252 #endif
1253                 )
1254             {
1255                 // Add one predefined spill choice register if no bits set.
1256                 // (The jit will introduce one spill temp)
1257                 preferReg |= RBM_SPILL_CHOICE_FLT;
1258                 rpPredictSpillCnt++;
1259
1260 #ifdef DEBUG
1261                 if (verbose)
1262                     printf("Predict one spill temp (float)\n");
1263 #endif
1264             }
1265
1266             assert(preferReg != 0);
1267
1268             /* Iterate the registers in the order specified by raRegFltTmpOrder */
1269
1270             for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1271             {
1272                 regNum  = raRegFltTmpOrder[index];
1273                 regBits = genRegMask(regNum);
1274
1275                 if (varTypeIsFloating(type))
1276                 {
1277 #ifdef _TARGET_ARM_
1278                     if (type == TYP_DOUBLE)
1279                     {
1280                         if ((regBits & RBM_DBL_REGS) == 0)
1281                         {
1282                             continue; // We must restrict the set to the double registers
1283                         }
1284                         else
1285                         {
1286                             // TYP_DOUBLE use two consecutive registers
1287                             regBits |= genRegMask(REG_NEXT(regNum));
1288                         }
1289                     }
1290 #endif
1291                     // See if COMPlus_JitRegisterFP is restricting this FP register
1292                     //
1293                     if ((restrictMask & regBits) != regBits)
1294                         continue;
1295                 }
1296
1297                 if ((preferReg & regBits) == regBits)
1298                 {
1299                     goto RET;
1300                 }
1301             }
1302             /* Otherwise we have allocated all registers, so do nothing */
1303             break;
1304
1305 #else // !FEATURE_FP_REGALLOC
1306
1307             return RBM_NONE;
1308
1309 #endif
1310
1311         default:
1312             noway_assert(!"unexpected type in reg use prediction");
1313     }
1314
1315     /* Abnormal return */
1316     noway_assert(!"Ran out of registers in rpPredictRegPick");
1317     return RBM_NONE;
1318
1319 RET:
1320     /*
1321      *  If during the first prediction we need to allocate
1322      *  one of the registers that we used for coloring locals
1323      *  then flag this by setting rpPredictAssignAgain.
1324      *  We will have to go back and repredict the registers
1325      */
1326     if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1327         rpPredictAssignAgain = true;
1328
1329     // Add a register interference to each of the last use variables
1330     if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1331     {
1332         VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1333         VarSetOps::Assign(this, lastUse, rpLastUseVars);
1334         VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1335         VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1336         // While we still have any lastUse or inPlaceUse bits
1337         VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1338
1339         VARSET_TP       varAsSet(VarSetOps::MakeEmpty(this));
1340         VarSetOps::Iter iter(this, useUnion);
1341         unsigned        varNum = 0;
1342         while (iter.NextElem(&varNum))
1343         {
1344             // We'll need this for one of the calls...
1345             VarSetOps::ClearD(this, varAsSet);
1346             VarSetOps::AddElemD(this, varAsSet, varNum);
1347
1348             // If this varBit and lastUse?
1349             if (VarSetOps::IsMember(this, lastUse, varNum))
1350             {
1351                 // Record a register to variable interference
1352                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1353             }
1354
1355             // If this varBit and inPlaceUse?
1356             if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1357             {
1358                 // Record a register to variable interference
1359                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1360             }
1361         }
1362     }
1363     codeGen->regSet.rsSetRegsModified(regBits);
1364
1365     return regBits;
1366 }
1367
1368 /*****************************************************************************
1369  *
1370  *  Predict integer register use for generating an address mode for a tree,
1371  *  by setting tree->gtUsedRegs to all registers used by this tree and its
1372  *  children.
1373  *    tree       - is the child of a GT_IND node
1374  *    type       - the type of the GT_IND node (floating point/integer)
1375  *    lockedRegs - are the registers which are currently held by
1376  *                 a previously evaluated node.
1377  *    rsvdRegs   - registers which should not be allocated because they will
1378  *                 be needed to evaluate a node in the future
1379  *               - Also if rsvdRegs has the RBM_LASTUSE bit set then
1380  *                 the rpLastUseVars set should be saved and restored
1381  *                 so that we don't add any new variables to rpLastUseVars
1382  *    lenCSE     - is non-NULL only when we have a lenCSE expression
1383  *
1384  *  Return the scratch registers to be held by this tree. (one or two registers
1385  *  to form an address expression)
1386  */
1387
1388 regMaskTP Compiler::rpPredictAddressMode(
1389     GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1390 {
1391     GenTreePtr op1;
1392     GenTreePtr op2;
1393     GenTreePtr opTemp;
1394     genTreeOps oper = tree->OperGet();
1395     regMaskTP  op1Mask;
1396     regMaskTP  op2Mask;
1397     regMaskTP  regMask;
1398     ssize_t    sh;
1399     ssize_t    cns = 0;
1400     bool       rev;
1401     bool       hasTwoAddConst     = false;
1402     bool       restoreLastUseVars = false;
1403     VARSET_TP  oldLastUseVars(VarSetOps::MakeEmpty(this));
1404
1405     /* do we need to save and restore the rpLastUseVars set ? */
1406     if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1407     {
1408         restoreLastUseVars = true;
1409         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1410     }
1411     rsvdRegs &= ~RBM_LASTUSE;
1412
1413     /* if not an add, then just force it to a register */
1414
1415     if (oper != GT_ADD)
1416     {
1417         if (oper == GT_ARR_ELEM)
1418         {
1419             regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1420             goto DONE;
1421         }
1422         else
1423         {
1424             goto NO_ADDR_EXPR;
1425         }
1426     }
1427
1428     op1 = tree->gtOp.gtOp1;
1429     op2 = tree->gtOp.gtOp2;
1430     rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1431
1432     /* look for (x + y) + icon address mode */
1433
1434     if (op2->OperGet() == GT_CNS_INT)
1435     {
1436         cns = op2->gtIntCon.gtIconVal;
1437
1438         /* if not an add, then just force op1 into a register */
1439         if (op1->OperGet() != GT_ADD)
1440             goto ONE_ADDR_EXPR;
1441
1442         hasTwoAddConst = true;
1443
1444         /* Record the 'rev' flag, reverse evaluation order */
1445         rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1446
1447         op2 = op1->gtOp.gtOp2;
1448         op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1449     }
1450
1451     /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1452
1453     sh = 0;
1454     if (op2->OperGet() == GT_LSH)
1455     {
1456         if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1457         {
1458             sh     = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1459             opTemp = op2->gtOp.gtOp1;
1460         }
1461         else
1462         {
1463             opTemp = NULL;
1464         }
1465     }
1466     else
1467     {
1468         opTemp = op2;
1469     }
1470
1471     if (opTemp != NULL)
1472     {
1473         if (opTemp->OperGet() == GT_NOP)
1474         {
1475             opTemp = opTemp->gtOp.gtOp1;
1476         }
1477
1478         // Is this a const operand?
1479         if (opTemp->OperGet() == GT_CNS_INT)
1480         {
1481             // Compute the new cns value that Codegen will end up using
1482             cns += (opTemp->gtIntCon.gtIconVal << sh);
1483
1484             goto ONE_ADDR_EXPR;
1485         }
1486     }
1487
1488     /* Check for LSH in op1 slot */
1489
1490     if (op1->OperGet() != GT_LSH)
1491         goto TWO_ADDR_EXPR;
1492
1493     opTemp = op1->gtOp.gtOp2;
1494
1495     if (opTemp->OperGet() != GT_CNS_INT)
1496         goto TWO_ADDR_EXPR;
1497
1498     sh = opTemp->gtIntCon.gtIconVal;
1499
1500     /* Check for LSH of 0, special case */
1501     if (sh == 0)
1502         goto TWO_ADDR_EXPR;
1503
1504 #if defined(_TARGET_XARCH_)
1505
1506     /* Check for LSH of 1 2 or 3 */
1507     if (sh > 3)
1508         goto TWO_ADDR_EXPR;
1509
1510 #elif defined(_TARGET_ARM_)
1511
1512     /* Check for LSH of 1 to 30 */
1513     if (sh > 30)
1514         goto TWO_ADDR_EXPR;
1515
1516 #else
1517
1518     goto TWO_ADDR_EXPR;
1519
1520 #endif
1521
1522     /* Matched a leftShift by 'sh' subtree, move op1 down */
1523     op1 = op1->gtOp.gtOp1;
1524
1525 TWO_ADDR_EXPR:
1526
1527     /* Now we have to evaluate op1 and op2 into registers */
1528
1529     /* Evaluate op1 and op2 in the correct order */
1530     if (rev)
1531     {
1532         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1533         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1534     }
1535     else
1536     {
1537         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1538         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1539     }
1540
1541     /*  If op1 and op2 must be spilled and reloaded then
1542      *  op1 and op2 might be reloaded into the same register
1543      *  This can only happen when all the registers are lockedRegs
1544      */
1545     if ((op1Mask == op2Mask) && (op1Mask != 0))
1546     {
1547         /* We'll need to grab a different register for op2 */
1548         op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1549     }
1550
1551 #ifdef _TARGET_ARM_
1552     // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1553     //      [op2 + op1<<sh + cns]
1554     // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1555     //
1556     if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1557     {
1558         op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1559     }
1560
1561     //
1562     // On the ARM we will need at least one scratch register for trees that have this form:
1563     //     [op1 + op2 + cns] or  [op1 + op2<<sh + cns]
1564     // or for a float/double or long when we have both op1 and op2
1565     // or when we have an 'cns' that is too large for the ld/st instruction
1566     //
1567     if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1568     {
1569         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1570     }
1571
1572     //
1573     // If we create a CSE that immediately dies then we may need to add an additional register interference
1574     // so we don't color the CSE into R3
1575     //
1576     if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1577     {
1578         opTemp = op2->gtOp.gtOp2;
1579         if (opTemp->OperGet() == GT_LCL_VAR)
1580         {
1581             unsigned   varNum = opTemp->gtLclVar.gtLclNum;
1582             LclVarDsc* varDsc = &lvaTable[varNum];
1583
1584             if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1585             {
1586                 rpRecordRegIntf(RBM_TMP_0,
1587                                 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1588             }
1589         }
1590     }
1591 #endif
1592
1593     regMask          = (op1Mask | op2Mask);
1594     tree->gtUsedRegs = (regMaskSmall)regMask;
1595     goto DONE;
1596
1597 ONE_ADDR_EXPR:
1598
1599     /* now we have to evaluate op1 into a register */
1600
1601     op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1602     op2Mask = RBM_NONE;
1603
1604 #ifdef _TARGET_ARM_
1605     //
1606     // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1607     // instruction
1608     //
1609     if (!codeGen->validDispForLdSt(cns, type))
1610     {
1611         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1612     }
1613 #endif
1614
1615     regMask          = (op1Mask | op2Mask);
1616     tree->gtUsedRegs = (regMaskSmall)regMask;
1617     goto DONE;
1618
1619 NO_ADDR_EXPR:
1620
1621 #if !CPU_LOAD_STORE_ARCH
1622     if (oper == GT_CNS_INT)
1623     {
1624         /* Indirect of a constant does not require a register */
1625         regMask = RBM_NONE;
1626     }
1627     else
1628 #endif
1629     {
1630         /* now we have to evaluate tree into a register */
1631         regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1632     }
1633
1634 DONE:
1635     regMaskTP regUse = tree->gtUsedRegs;
1636
1637     if (!VarSetOps::IsEmpty(this, compCurLife))
1638     {
1639         // Add interference between the current set of life variables and
1640         //  the set of temporary registers need to evaluate the sub tree
1641         if (regUse)
1642         {
1643             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1644         }
1645     }
1646
1647     /* Do we need to resore the oldLastUseVars value */
1648     if (restoreLastUseVars)
1649     {
1650         /*
1651          *  If we used a GT_ASG targeted register then we need to add
1652          *  a variable interference between any new last use variables
1653          *  and the GT_ASG targeted register
1654          */
1655         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1656         {
1657             rpRecordVarIntf(rpAsgVarNum,
1658                             VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1659         }
1660         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1661     }
1662
1663     return regMask;
1664 }
1665
1666 /*****************************************************************************
1667  *
1668  *
1669  */
1670
1671 void Compiler::rpPredictRefAssign(unsigned lclNum)
1672 {
1673     LclVarDsc* varDsc = lvaTable + lclNum;
1674
1675     varDsc->lvRefAssign = 1;
1676
1677 #if NOGC_WRITE_BARRIERS
1678 #ifdef DEBUG
1679     if (verbose)
1680     {
1681         if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1682             printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1683                    varDsc->lvVarIndex);
1684     }
1685 #endif
1686
1687     /* Make sure that write barrier pointer variables never land in EDX */
1688     VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1689 #endif // NOGC_WRITE_BARRIERS
1690 }
1691
1692 /*****************************************************************************
1693  *
1694  * Predict the internal temp physical register usage for a block assignment tree,
1695  * by setting tree->gtUsedRegs.
1696  * Records the internal temp physical register usage for this tree.
1697  * Returns a mask of interfering registers for this tree.
1698  *
1699  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1700  * to the set of scratch registers needed when evaluating the tree.
1701  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1702  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1703  * predict additional internal temp physical registers to spill into.
1704  *
1705  *    tree       - is the child of a GT_IND node
1706  *    predictReg - what type of register does the tree need
1707  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1708  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1709  *    rsvdRegs   - registers which should not be allocated because they will
1710  *                 be needed to evaluate a node in the future
1711  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1712  *                 the rpLastUseVars set should be saved and restored
1713  *                 so that we don't add any new variables to rpLastUseVars.
1714  */
1715 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr   tree,
1716                                           rpPredictReg predictReg,
1717                                           regMaskTP    lockedRegs,
1718                                           regMaskTP    rsvdRegs)
1719 {
1720     regMaskTP regMask         = RBM_NONE;
1721     regMaskTP interferingRegs = RBM_NONE;
1722
1723     bool        hasGCpointer  = false;
1724     bool        dstIsOnStack  = false;
1725     bool        useMemHelper  = false;
1726     bool        useBarriers   = false;
1727     GenTreeBlk* dst           = tree->gtGetOp1()->AsBlk();
1728     GenTreePtr  dstAddr       = dst->Addr();
1729     GenTreePtr  srcAddrOrFill = tree->gtGetOp2IfPresent();
1730
1731     size_t blkSize = dst->gtBlkSize;
1732
1733     hasGCpointer = (dst->HasGCPtr());
1734
1735     bool isCopyBlk = tree->OperIsCopyBlkOp();
1736     bool isCopyObj = isCopyBlk && hasGCpointer;
1737     bool isInitBlk = tree->OperIsInitBlkOp();
1738
1739     if (isCopyBlk)
1740     {
1741         assert(srcAddrOrFill->OperIsIndir());
1742         srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1743     }
1744     else
1745     {
1746         // For initBlk, we don't need to worry about the GC pointers.
1747         hasGCpointer = false;
1748     }
1749
1750     if (blkSize != 0)
1751     {
1752         if (isCopyObj)
1753         {
1754             dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1755         }
1756
1757         if (isInitBlk)
1758         {
1759             if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1760             {
1761                 useMemHelper = true;
1762             }
1763         }
1764     }
1765     else
1766     {
1767         useMemHelper = true;
1768     }
1769
1770     if (hasGCpointer && !dstIsOnStack)
1771     {
1772         useBarriers = true;
1773     }
1774
1775 #ifdef _TARGET_ARM_
1776     //
1777     // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1778     //
1779     if (!useMemHelper && !useBarriers)
1780     {
1781         bool     useLoop        = false;
1782         unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1783
1784         // A mask to use to force the predictor to choose low registers (to reduce code size)
1785         regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1786
1787         // Allow the src and dst to be used in place, unless we use a loop, in which
1788         // case we will need scratch registers as we will be writing to them.
1789         rpPredictReg srcAndDstPredict = PREDICT_REG;
1790
1791         // Will we be using a loop to implement this INITBLK/COPYBLK?
1792         if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1793         {
1794             useLoop          = true;
1795             avoidReg         = RBM_NONE;
1796             srcAndDstPredict = PREDICT_SCRATCH_REG;
1797         }
1798
1799         if (tree->gtFlags & GTF_REVERSE_OPS)
1800         {
1801             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1802                                            dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1803             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1804         }
1805         else
1806         {
1807             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1808                                            srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1809             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1810         }
1811
1812         // We need at least one scratch register for a copyBlk
1813         if (isCopyBlk)
1814         {
1815             // Pick a low register to reduce the code size
1816             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1817         }
1818
1819         if (useLoop)
1820         {
1821             if (isCopyBlk)
1822             {
1823                 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1824                 // Pick another low register to reduce the code size
1825                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1826             }
1827
1828             // We need a loop index register
1829             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1830         }
1831
1832         tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1833
1834         return interferingRegs;
1835     }
1836 #endif
1837     // What order should the Dest, Val/Src, and Size be calculated
1838     GenTreePtr opsPtr[3];
1839     regMaskTP  regsPtr[3];
1840
1841 #if defined(_TARGET_XARCH_)
1842     fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1843
1844     // We're going to use these, might as well make them available now
1845
1846     codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1847     if (isCopyBlk)
1848         codeGen->regSet.rsSetRegsModified(RBM_ESI);
1849
1850 #elif defined(_TARGET_ARM_)
1851
1852     if (useMemHelper)
1853     {
1854         // For all other cases that involve non-constants, we just call memcpy/memset
1855         // JIT helpers
1856         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1857         interferingRegs |= RBM_CALLEE_TRASH;
1858 #ifdef DEBUG
1859         if (verbose)
1860             printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1861 #endif
1862     }
1863     else // useBarriers
1864     {
1865         assert(useBarriers);
1866         assert(isCopyBlk);
1867
1868         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1869
1870         // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1871         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1872 #ifdef DEBUG
1873         if (verbose)
1874             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1875 #endif
1876     }
1877 #else // !_TARGET_X86_ && !_TARGET_ARM_
1878 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1879 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1880     regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1881     regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1882                                    opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1883     regMask |= regsPtr[0];
1884     opsPtr[0]->gtUsedRegs |= regsPtr[0];
1885     rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1886
1887     regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1888                                    opsPtr2RsvdRegs | RBM_LASTUSE);
1889     regMask |= regsPtr[1];
1890     opsPtr[1]->gtUsedRegs |= regsPtr[1];
1891     rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1892
1893     regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1894     if (opsPtr[2] == nullptr)
1895     {
1896         // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1897         // Note that it is quite possible that no register is required, but this preserves
1898         // former behavior.
1899         regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1900         rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1901     }
1902     else
1903     {
1904         regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1905         opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1906     }
1907     regMask |= opsPtr2UsedRegs;
1908
1909     tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1910     return interferingRegs;
1911 }
1912
1913 /*****************************************************************************
1914  *
1915  * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1916  * Returns a regMask with the internal temp physical register usage for this tree.
1917  *
1918  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1919  * to the set of scratch registers needed when evaluating the tree.
1920  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1921  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1922  * predict additional internal temp physical registers to spill into.
1923  *
1924  *    tree       - is the child of a GT_IND node
1925  *    predictReg - what type of register does the tree need
1926  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1927  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1928  *    rsvdRegs   - registers which should not be allocated because they will
1929  *                 be needed to evaluate a node in the future
1930  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1931  *                 the rpLastUseVars set should be saved and restored
1932  *                 so that we don't add any new variables to rpLastUseVars.
1933  */
1934
1935 #pragma warning(disable : 4701)
1936
1937 #ifdef _PREFAST_
1938 #pragma warning(push)
1939 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1940 #endif
1941 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr   tree,
1942                                         rpPredictReg predictReg,
1943                                         regMaskTP    lockedRegs,
1944                                         regMaskTP    rsvdRegs)
1945 {
1946     regMaskTP    regMask = DUMMY_INIT(RBM_ILLEGAL);
1947     regMaskTP    op2Mask;
1948     regMaskTP    tmpMask;
1949     rpPredictReg op1PredictReg;
1950     rpPredictReg op2PredictReg;
1951     LclVarDsc*   varDsc = NULL;
1952     VARSET_TP    oldLastUseVars(VarSetOps::UninitVal());
1953
1954     VARSET_TP varBits(VarSetOps::UninitVal());
1955     VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1956
1957     bool      restoreLastUseVars = false;
1958     regMaskTP interferingRegs    = RBM_NONE;
1959
1960 #ifdef DEBUG
1961     // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1962     noway_assert(tree);
1963     noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1964     noway_assert(RBM_ILLEGAL);
1965     noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1966     /* impossible values, to make sure that we set them */
1967     tree->gtUsedRegs = RBM_ILLEGAL;
1968 #endif
1969
1970     /* Figure out what kind of a node we have */
1971
1972     genTreeOps oper = tree->OperGet();
1973     var_types  type = tree->TypeGet();
1974     unsigned   kind = tree->OperKind();
1975
1976     // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1977     genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1978     if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1979         predictReg = PREDICT_NONE;
1980     else if (rpHasVarIndexForPredict(predictReg))
1981     {
1982         // The only place where predictReg is set to a var is in the PURE
1983         // assignment case where varIndex is the var being assigned to.
1984         // We need to check whether the variable is used between here and
1985         // its redefinition.
1986         unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1987         unsigned lclNum   = lvaTrackedToVarNum[varIndex];
1988         bool     found    = false;
1989         for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1990         {
1991             if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1992             {
1993                 // Is this the pure assignment?
1994                 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1995                 {
1996                     predictReg = PREDICT_SCRATCH_REG;
1997                 }
1998                 found = true;
1999                 break;
2000             }
2001         }
2002         assert(found);
2003     }
2004
2005     if (rsvdRegs & RBM_LASTUSE)
2006     {
2007         restoreLastUseVars = true;
2008         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2009         rsvdRegs &= ~RBM_LASTUSE;
2010     }
2011
2012     /* Is this a constant or leaf node? */
2013
2014     if (kind & (GTK_CONST | GTK_LEAF))
2015     {
2016         bool      lastUse   = false;
2017         regMaskTP enregMask = RBM_NONE;
2018
2019         switch (oper)
2020         {
2021 #ifdef _TARGET_ARM_
2022             case GT_CNS_DBL:
2023                 // Codegen for floating point constants on the ARM is currently
2024                 // movw/movt    rT1, <lo32 bits>
2025                 // movw/movt    rT2, <hi32 bits>
2026                 //  vmov.i2d    dT0, rT1,rT2
2027                 //
2028                 // For TYP_FLOAT one integer register is required
2029                 //
2030                 // These integer register(s) immediately die
2031                 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2032                 if (type == TYP_DOUBLE)
2033                 {
2034                     // For TYP_DOUBLE a second integer register is required
2035                     //
2036                     tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2037                 }
2038
2039                 // We also need a floating point register that we keep
2040                 //
2041                 if (predictReg == PREDICT_NONE)
2042                     predictReg = PREDICT_SCRATCH_REG;
2043
2044                 regMask          = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2045                 tree->gtUsedRegs = regMask | tmpMask;
2046                 goto RETURN_CHECK;
2047 #endif
2048
2049             case GT_CNS_INT:
2050             case GT_CNS_LNG:
2051
2052                 if (rpHasVarIndexForPredict(predictReg))
2053                 {
2054                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2055                     rpAsgVarNum       = tgtIndex;
2056
2057                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2058                     predictReg = PREDICT_NONE;
2059
2060                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2061                     tgtVar->lvDependReg = true;
2062
2063                     if (type == TYP_LONG)
2064                     {
2065                         assert(oper == GT_CNS_LNG);
2066
2067                         if (tgtVar->lvOtherReg == REG_STK)
2068                         {
2069                             // Well we do need one register for a partially enregistered
2070                             type       = TYP_INT;
2071                             predictReg = PREDICT_SCRATCH_REG;
2072                         }
2073                     }
2074                 }
2075                 else
2076                 {
2077 #if !CPU_LOAD_STORE_ARCH
2078                     /* If the constant is a handle then it will need to have a relocation
2079                        applied to it.  It will need to be loaded into a register.
2080                        But never throw away an existing hint.
2081                        */
2082                     if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2083 #endif
2084                     {
2085                         if (predictReg == PREDICT_NONE)
2086                             predictReg = PREDICT_SCRATCH_REG;
2087                     }
2088                 }
2089                 break;
2090
2091             case GT_NO_OP:
2092                 break;
2093
2094             case GT_CLS_VAR:
2095                 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2096                     (genTypeSize(type) < sizeof(int)))
2097                 {
2098                     predictReg = PREDICT_SCRATCH_REG;
2099                 }
2100 #ifdef _TARGET_ARM_
2101                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2102                 //
2103                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2104                 {
2105                     // These integer register(s) immediately die
2106                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2107                     // Two integer registers are required for a TYP_DOUBLE
2108                     if (type == TYP_DOUBLE)
2109                         tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2110                 }
2111                 // We need a temp register in some cases of loads/stores to a class var
2112                 if (predictReg == PREDICT_NONE)
2113                 {
2114                     predictReg = PREDICT_SCRATCH_REG;
2115                 }
2116 #endif
2117                 if (rpHasVarIndexForPredict(predictReg))
2118                 {
2119                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2120                     rpAsgVarNum       = tgtIndex;
2121
2122                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2123                     predictReg = PREDICT_NONE;
2124
2125                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2126                     tgtVar->lvDependReg = true;
2127
2128                     if (type == TYP_LONG)
2129                     {
2130                         if (tgtVar->lvOtherReg == REG_STK)
2131                         {
2132                             // Well we do need one register for a partially enregistered
2133                             type       = TYP_INT;
2134                             predictReg = PREDICT_SCRATCH_REG;
2135                         }
2136                     }
2137                 }
2138                 break;
2139
2140             case GT_LCL_FLD:
2141 #ifdef _TARGET_ARM_
2142                 // Check for a misalignment on a Floating Point field
2143                 //
2144                 if (varTypeIsFloating(type))
2145                 {
2146                     if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2147                     {
2148                         // These integer register(s) immediately die
2149                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2150                         // Two integer registers are required for a TYP_DOUBLE
2151                         if (type == TYP_DOUBLE)
2152                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2153                     }
2154                 }
2155 #endif
2156                 __fallthrough;
2157
2158             case GT_LCL_VAR:
2159             case GT_REG_VAR:
2160
2161                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2162
2163                 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2164                 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2165                 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2166
2167 #if FEATURE_STACK_FP_X87
2168                 // If it's a floating point var, there's nothing to do
2169                 if (varTypeIsFloating(type))
2170                 {
2171                     tree->gtUsedRegs = RBM_NONE;
2172                     regMask          = RBM_NONE;
2173                     goto RETURN_CHECK;
2174                 }
2175 #endif
2176
2177                 // If the variable is already a register variable, no need to go further.
2178                 if (oper == GT_REG_VAR)
2179                     break;
2180
2181                 /* Apply the type of predictReg to the LCL_VAR */
2182
2183                 if (predictReg == PREDICT_REG)
2184                 {
2185                 PREDICT_REG_COMMON:
2186                     if (varDsc->lvRegNum == REG_STK)
2187                         break;
2188
2189                     goto GRAB_COUNT;
2190                 }
2191                 else if (predictReg == PREDICT_SCRATCH_REG)
2192                 {
2193                     noway_assert(predictReg == PREDICT_SCRATCH_REG);
2194
2195                     /* Is this the last use of a local var?   */
2196                     if (lastUse)
2197                     {
2198                         if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2199                             goto PREDICT_REG_COMMON;
2200                     }
2201                 }
2202                 else if (rpHasVarIndexForPredict(predictReg))
2203                 {
2204                     /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2205                     {
2206                         unsigned   tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2207                         LclVarDsc* tgtVar    = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2208                         VarSetOps::MakeSingleton(this, tgtIndex1);
2209
2210                         noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2211                         noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2212 #ifndef _TARGET_AMD64_
2213                         // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2214                         // so this assert is meaningless
2215                         noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2216 #endif // !_TARGET_AMD64_
2217
2218                         if (varDsc->lvTracked)
2219                         {
2220                             unsigned srcIndex;
2221                             srcIndex = varDsc->lvVarIndex;
2222
2223                             // If this register has it's last use here then we will prefer
2224                             // to color to the same register as tgtVar.
2225                             if (lastUse)
2226                             {
2227                                 /*
2228                                  *  Add an entry in the lvaVarPref graph to indicate
2229                                  *  that it would be worthwhile to color these two variables
2230                                  *  into the same physical register.
2231                                  *  This will help us avoid having an extra copy instruction
2232                                  */
2233                                 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2234                                 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2235                             }
2236
2237                             // Add a variable interference from srcIndex to each of the last use variables
2238                             if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2239                             {
2240                                 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2241                             }
2242                         }
2243                         rpAsgVarNum = tgtIndex1;
2244
2245                         /* We will rely on the target enregistered variable from the GT_ASG */
2246                         varDsc = tgtVar;
2247                     }
2248                 GRAB_COUNT:
2249                     unsigned grabCount;
2250                     grabCount = 0;
2251
2252                     if (genIsValidFloatReg(varDsc->lvRegNum))
2253                     {
2254                         enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2255                     }
2256                     else
2257                     {
2258                         enregMask = genRegMask(varDsc->lvRegNum);
2259                     }
2260
2261 #ifdef _TARGET_ARM_
2262                     if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2263                     {
2264                         // We need to compute the intermediate value using a TYP_DOUBLE
2265                         // but we storing the result in a TYP_SINGLE enregistered variable
2266                         //
2267                         grabCount++;
2268                     }
2269                     else
2270 #endif
2271                     {
2272                         /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2273                         if (enregMask & (rsvdRegs | lockedRegs))
2274                         {
2275                             grabCount++;
2276                         }
2277 #ifndef _TARGET_64BIT_
2278                         if (type == TYP_LONG)
2279                         {
2280                             if (varDsc->lvOtherReg != REG_STK)
2281                             {
2282                                 tmpMask = genRegMask(varDsc->lvOtherReg);
2283                                 enregMask |= tmpMask;
2284
2285                                 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2286                                 if (tmpMask & (rsvdRegs | lockedRegs))
2287                                     grabCount++;
2288                             }
2289                             else // lvOtherReg == REG_STK
2290                             {
2291                                 grabCount++;
2292                             }
2293                         }
2294 #endif // _TARGET_64BIT_
2295                     }
2296
2297                     varDsc->lvDependReg = true;
2298
2299                     if (grabCount == 0)
2300                     {
2301                         /* Does not need a register */
2302                         predictReg = PREDICT_NONE;
2303                         // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2304                         VarSetOps::UnionD(this, rpUseInPlace, varBits);
2305                     }
2306                     else // (grabCount > 0)
2307                     {
2308 #ifndef _TARGET_64BIT_
2309                         /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2310                         if ((type == TYP_LONG) && (grabCount == 1))
2311                         {
2312                             /* We will need to pick one register */
2313                             type = TYP_INT;
2314                             // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2315                             VarSetOps::UnionD(this, rpUseInPlace, varBits);
2316                         }
2317                         noway_assert((type == TYP_DOUBLE) ||
2318                                      (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2319 #else  // !_TARGET_64BIT_
2320                         noway_assert(grabCount == 1);
2321 #endif // !_TARGET_64BIT_
2322                     }
2323                 }
2324                 else if (type == TYP_STRUCT)
2325                 {
2326 #ifdef _TARGET_ARM_
2327                     // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2328                     //        predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2329                     //        As a workaround we just bash it to PREDICT_NONE here
2330                     //
2331                     if (predictReg != PREDICT_NONE)
2332                         predictReg = PREDICT_NONE;
2333 #endif
2334                     // Currently predictReg is saying that we will not need any scratch registers
2335                     noway_assert(predictReg == PREDICT_NONE);
2336
2337                     /* We may need to sign or zero extend a small type when pushing a struct */
2338                     if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2339                     {
2340                         for (unsigned varNum = varDsc->lvFieldLclStart;
2341                              varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2342                         {
2343                             LclVarDsc* fldVar = lvaTable + varNum;
2344
2345                             if (fldVar->lvStackAligned())
2346                             {
2347                                 // When we are stack aligned Codegen will just use
2348                                 // a push instruction and thus doesn't need any register
2349                                 // since we can push both a register or a stack frame location
2350                                 continue;
2351                             }
2352
2353                             if (varTypeIsByte(fldVar->TypeGet()))
2354                             {
2355                                 // We will need to reserve one byteable register,
2356                                 //
2357                                 type       = TYP_BYTE;
2358                                 predictReg = PREDICT_SCRATCH_REG;
2359 #if CPU_HAS_BYTE_REGS
2360                                 // It is best to enregister this fldVar in a byteable register
2361                                 //
2362                                 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2363 #endif
2364                             }
2365                             else if (varTypeIsShort(fldVar->TypeGet()))
2366                             {
2367                                 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2368                                 // If fldVar is not enregistered then we will need a scratch register
2369                                 //
2370                                 if (!isEnregistered)
2371                                 {
2372                                     // We will need either an int register or a byte register
2373                                     // If we are not requesting a byte register we will request an int register
2374                                     //
2375                                     if (type != TYP_BYTE)
2376                                         type   = TYP_INT;
2377                                     predictReg = PREDICT_SCRATCH_REG;
2378                                 }
2379                             }
2380                         }
2381                     }
2382                 }
2383                 else
2384                 {
2385                     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2386                     if (preferReg != 0)
2387                     {
2388                         if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2389                         {
2390                             varDsc->addPrefReg(preferReg, this);
2391                         }
2392                     }
2393                 }
2394                 break; /* end of case GT_LCL_VAR */
2395
2396             case GT_JMP:
2397                 tree->gtUsedRegs = RBM_NONE;
2398                 regMask          = RBM_NONE;
2399
2400 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2401                 // Mark the registers required to emit a tailcall profiler callback
2402                 if (compIsProfilerHookNeeded())
2403                 {
2404                     tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2405                 }
2406 #endif
2407                 goto RETURN_CHECK;
2408
2409             default:
2410                 break;
2411         } /* end of switch (oper) */
2412
2413         /* If we don't need to evaluate to register, regmask is the empty set */
2414         /* Otherwise we grab a temp for the local variable                    */
2415
2416         if (predictReg == PREDICT_NONE)
2417             regMask = RBM_NONE;
2418         else
2419         {
2420             regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2421
2422             if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2423             {
2424                 /* We need to sign or zero extend a small type when pushing a struct */
2425                 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2426
2427                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2428                 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2429
2430                 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2431                      varNum++)
2432                 {
2433                     LclVarDsc* fldVar = lvaTable + varNum;
2434                     if (fldVar->lvTracked)
2435                     {
2436                         VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2437                         rpRecordRegIntf(regMask, fldBit DEBUGARG(
2438                                                      "need scratch register when pushing a small field of a struct"));
2439                     }
2440                 }
2441             }
2442         }
2443
2444         /* Update the set of lastUse variables that we encountered so far */
2445         if (lastUse)
2446         {
2447             VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2448             VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2449
2450             /*
2451              *  Add interference from any previously locked temps into this last use variable.
2452              */
2453             if (lockedRegs)
2454             {
2455                 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2456             }
2457             /*
2458              *  Add interference from any reserved temps into this last use variable.
2459              */
2460             if (rsvdRegs)
2461             {
2462                 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2463             }
2464             /*
2465              *  For partially enregistered longs add an interference with the
2466              *  register return by rpPredictRegPick
2467              */
2468             if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2469             {
2470                 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2471             }
2472         }
2473
2474         tree->gtUsedRegs = (regMaskSmall)regMask;
2475         goto RETURN_CHECK;
2476     }
2477
2478     /* Is it a 'simple' unary/binary operator? */
2479
2480     if (kind & GTK_SMPOP)
2481     {
2482         GenTreePtr op1 = tree->gtOp.gtOp1;
2483         GenTreePtr op2 = tree->gtGetOp2IfPresent();
2484
2485         GenTreePtr opsPtr[3];
2486         regMaskTP  regsPtr[3];
2487
2488         VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2489
2490         switch (oper)
2491         {
2492             case GT_ASG:
2493
2494                 /* Is the value being assigned into a LCL_VAR? */
2495                 if (op1->gtOper == GT_LCL_VAR)
2496                 {
2497                     varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2498
2499                     /* Are we assigning a LCL_VAR the result of a call? */
2500                     if (op2->gtOper == GT_CALL)
2501                     {
2502                         /* Set a preferred register for the LCL_VAR */
2503                         if (isRegPairType(varDsc->TypeGet()))
2504                             varDsc->addPrefReg(RBM_LNGRET, this);
2505                         else if (!varTypeIsFloating(varDsc->TypeGet()))
2506                             varDsc->addPrefReg(RBM_INTRET, this);
2507 #ifdef _TARGET_AMD64_
2508                         else
2509                             varDsc->addPrefReg(RBM_FLOATRET, this);
2510 #endif
2511                         /*
2512                          *  When assigning the result of a call we don't
2513                          *  bother trying to target the right side of the
2514                          *  assignment, since we have a fixed calling convention.
2515                          */
2516                     }
2517                     else if (varDsc->lvTracked)
2518                     {
2519                         // We interfere with uses in place
2520                         if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2521                         {
2522                             rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2523                         }
2524
2525                         // Did we predict that this local will be fully enregistered?
2526                         // and the assignment type is the same as the expression type?
2527                         // and it is dead on the right side of the assignment?
2528                         // and we current have no other rpAsgVarNum active?
2529                         //
2530                         if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2531                             (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2532                         {
2533                             //
2534                             //  Yes, we should try to target the right side (op2) of this
2535                             //  assignment into the (enregistered) tracked variable.
2536                             //
2537
2538                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2539                             op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2540
2541                             // Remember that this is a new use in place
2542
2543                             // We've added "new UseInPlace"; remove from the global set.
2544                             VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2545
2546                             //  Note that later when we walk down to the leaf node for op2
2547                             //  if we decide to actually use the register for the 'varDsc'
2548                             //  to enregister the operand, the we will set rpAsgVarNum to
2549                             //  varDsc->lvVarIndex, by extracting this value using
2550                             //  rpGetVarIndexForPredict()
2551                             //
2552                             //  Also we reset rpAsgVarNum back to -1 after we have finished
2553                             //  predicting the current GT_ASG node
2554                             //
2555                             goto ASG_COMMON;
2556                         }
2557                     }
2558                 }
2559                 else if (tree->OperIsBlkOp())
2560                 {
2561                     interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2562                     regMask = 0;
2563                     goto RETURN_CHECK;
2564                 }
2565                 __fallthrough;
2566
2567             case GT_CHS:
2568
2569             case GT_ASG_OR:
2570             case GT_ASG_XOR:
2571             case GT_ASG_AND:
2572             case GT_ASG_SUB:
2573             case GT_ASG_ADD:
2574             case GT_ASG_MUL:
2575             case GT_ASG_DIV:
2576             case GT_ASG_UDIV:
2577
2578                 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2579                 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2580                 {
2581                     /* Is the value being assigned into an enregistered LCL_VAR? */
2582                     /* For debug code we only allow a simple op2 to be assigned */
2583                     if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2584                     {
2585                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2586                         /* Did we predict that this local will be enregistered? */
2587                         if (varDsc->lvRegNum != REG_STK)
2588                         {
2589                             /* Yes, we can use "reg <op>= addr" */
2590
2591                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2592                             op2PredictReg = PREDICT_NONE;
2593
2594                             goto ASG_COMMON;
2595                         }
2596                     }
2597                 }
2598
2599 #if CPU_LOAD_STORE_ARCH
2600                 if (oper != GT_ASG)
2601                 {
2602                     op1PredictReg = PREDICT_REG;
2603                     op2PredictReg = PREDICT_REG;
2604                 }
2605                 else
2606 #endif
2607                 {
2608                     /*
2609                      *  Otherwise, initialize the normal forcing of operands:
2610                      *   "addr <op>= reg"
2611                      */
2612                     op1PredictReg = PREDICT_ADDR;
2613                     op2PredictReg = PREDICT_REG;
2614                 }
2615
2616             ASG_COMMON:
2617
2618 #if !CPU_LOAD_STORE_ARCH
2619                 if (op2PredictReg != PREDICT_NONE)
2620                 {
2621                     /* Is the value being assigned a simple one? */
2622                     if (rpCanAsgOperWithoutReg(op2, false))
2623                         op2PredictReg = PREDICT_NONE;
2624                 }
2625 #endif
2626
2627                 bool simpleAssignment;
2628                 simpleAssignment = false;
2629
2630                 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2631                 {
2632                     // Add a variable interference from the assign target
2633                     // to each of the last use variables
2634                     if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2635                     {
2636                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2637
2638                         if (varDsc->lvTracked)
2639                         {
2640                             unsigned varIndex = varDsc->lvVarIndex;
2641
2642                             rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2643                         }
2644                     }
2645
2646                     /*  Record whether this tree is a simple assignment to a local */
2647
2648                     simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2649                 }
2650
2651                 bool requireByteReg;
2652                 requireByteReg = false;
2653
2654 #if CPU_HAS_BYTE_REGS
2655                 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2656
2657                 if (varTypeIsByte(type) &&
2658                     ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2659
2660                 {
2661                     // Byte-assignments typically need a byte register
2662                     requireByteReg = true;
2663
2664                     if (op1->gtOper == GT_LCL_VAR)
2665                     {
2666                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2667
2668                         // Did we predict that this local will be enregistered?
2669                         if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2670                         {
2671                             // We don't require a byte register when op1 is an enregistered local */
2672                             requireByteReg = false;
2673                         }
2674
2675                         // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2676                         if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2677                         {
2678                             // We should try to put op1 in an byte register
2679                             varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2680                         }
2681                     }
2682                 }
2683 #endif
2684
2685                 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2686
2687                 bool isWriteBarrierAsgNode;
2688                 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2689 #ifdef DEBUG
2690                 GCInfo::WriteBarrierForm wbf;
2691                 if (isWriteBarrierAsgNode)
2692                     wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2693                 else
2694                     wbf = GCInfo::WBF_NoBarrier;
2695 #endif // DEBUG
2696
2697                 regMaskTP wbaLockedRegs;
2698                 wbaLockedRegs = lockedRegs;
2699                 if (isWriteBarrierAsgNode)
2700                 {
2701 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2702 #ifdef DEBUG
2703                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2704                     {
2705 #endif // DEBUG
2706                         wbaLockedRegs |= RBM_WRITE_BARRIER;
2707                         op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2708                         assert(REG_WRITE_BARRIER == REG_EDX);
2709                         op1PredictReg = PREDICT_REG_EDX;
2710 #ifdef DEBUG
2711                     }
2712                     else
2713 #endif // DEBUG
2714 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2715
2716 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2717                     {
2718 #ifdef _TARGET_X86_
2719                         op1PredictReg = PREDICT_REG_ECX;
2720                         op2PredictReg = PREDICT_REG_EDX;
2721 #elif defined(_TARGET_ARM_)
2722                         op1PredictReg = PREDICT_REG_R0;
2723                         op2PredictReg = PREDICT_REG_R1;
2724
2725                         // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2726                         if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2727                         {
2728                             op1 = op1->gtOp.gtOp1;
2729                         }
2730 #else // !_TARGET_X86_ && !_TARGET_ARM_
2731 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2732 #endif
2733                     }
2734 #endif
2735                 }
2736
2737                 /*  Are we supposed to evaluate RHS first? */
2738
2739                 if (tree->gtFlags & GTF_REVERSE_OPS)
2740                 {
2741                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2742
2743 #if CPU_HAS_BYTE_REGS
2744                     // Should we insure that op2 gets evaluated into a byte register?
2745                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2746                     {
2747                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2748                         // and we can't select one that is already reserved (i.e. lockedRegs)
2749                         //
2750                         op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2751                         op2->gtUsedRegs |= op2Mask;
2752
2753                         // No longer a simple assignment because we're using extra registers and might
2754                         // have interference between op1 and op2.  See DevDiv #136681
2755                         simpleAssignment = false;
2756                     }
2757 #endif
2758                     /*
2759                      *  For a simple assignment we don't want the op2Mask to be
2760                      *  marked as interferring with the LCL_VAR, since it is likely
2761                      *  that we will want to enregister the LCL_VAR in exactly
2762                      *  the register that is used to compute op2
2763                      */
2764                     tmpMask = lockedRegs;
2765
2766                     if (!simpleAssignment)
2767                         tmpMask |= op2Mask;
2768
2769                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2770
2771                     // Did we relax the register prediction for op1 and op2 above ?
2772                     // - because we are depending upon op1 being enregistered
2773                     //
2774                     if ((op1PredictReg == PREDICT_NONE) &&
2775                         ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2776                     {
2777                         /* We must be assigning into an enregistered LCL_VAR */
2778                         noway_assert(op1->gtOper == GT_LCL_VAR);
2779                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2780                         noway_assert(varDsc->lvRegNum != REG_STK);
2781
2782                         /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2783                         varDsc->lvDependReg = true;
2784                     }
2785                 }
2786                 else
2787                 {
2788                     // For the case of simpleAssignments op2 should always be evaluated first
2789                     noway_assert(!simpleAssignment);
2790
2791                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2792                     if (isWriteBarrierAsgNode)
2793                     {
2794                         wbaLockedRegs |= op1->gtUsedRegs;
2795                     }
2796                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2797
2798 #if CPU_HAS_BYTE_REGS
2799                     // Should we insure that op2 gets evaluated into a byte register?
2800                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2801                     {
2802                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2803                         // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2804                         //
2805                         op2Mask |=
2806                             rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2807                         op2->gtUsedRegs |= op2Mask;
2808                     }
2809 #endif
2810                 }
2811
2812                 if (rpHasVarIndexForPredict(op2PredictReg))
2813                 {
2814                     rpAsgVarNum = -1;
2815                 }
2816
2817                 if (isWriteBarrierAsgNode)
2818                 {
2819 #if NOGC_WRITE_BARRIERS
2820 #ifdef DEBUG
2821                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2822                     {
2823 #endif // DEBUG
2824
2825                         /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2826                            passed to the write-barrier call in REG_WRITE_BARRIER */
2827
2828                         regMask = op2Mask;
2829
2830                         if (op1->gtOper == GT_IND)
2831                         {
2832                             GenTreePtr rv1, rv2;
2833                             unsigned   mul, cns;
2834                             bool       rev;
2835
2836                             /* Special handling of indirect assigns for write barrier */
2837
2838                             bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2839                                                                   &mul, &cns);
2840
2841                             /* Check address mode for enregisterable locals */
2842
2843                             if (yes)
2844                             {
2845                                 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2846                                 {
2847                                     rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2848                                 }
2849                                 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2850                                 {
2851                                     rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2852                                 }
2853                             }
2854                         }
2855
2856                         if (op2->gtOper == GT_LCL_VAR)
2857                         {
2858                             rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2859                         }
2860
2861                         // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2862                         if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2863                         {
2864                             rpRecordRegIntf(RBM_WRITE_BARRIER,
2865                                             rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2866                         }
2867                         tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2868 #ifdef DEBUG
2869                     }
2870                     else
2871 #endif // DEBUG
2872 #endif // NOGC_WRITE_BARRIERS
2873
2874 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2875                     {
2876 #ifdef _TARGET_ARM_
2877 #ifdef DEBUG
2878                         if (verbose)
2879                             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2880 #endif
2881                         //
2882                         // For the ARM target we have an optimized JIT Helper
2883                         // that only trashes a subset of the callee saved registers
2884                         //
2885
2886                         // NOTE: Adding it to the gtUsedRegs will cause the interference to
2887                         // be added appropriately
2888
2889                         // the RBM_CALLEE_TRASH_NOGC set is killed.  We will record this in interferingRegs
2890                         // instead of gtUsedRegs, because the latter will be modified later, but we need
2891                         // to remember to add the interference.
2892
2893                         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2894
2895                         op1->gtUsedRegs |= RBM_R0;
2896                         op2->gtUsedRegs |= RBM_R1;
2897 #else // _TARGET_ARM_
2898
2899 #ifdef DEBUG
2900                         if (verbose)
2901                             printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2902 #endif
2903                         // We have to call a normal JIT helper to perform the Write Barrier Assignment
2904                         // It will trash the callee saved registers
2905
2906                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2907 #endif // _TARGET_ARM_
2908                     }
2909 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2910                 }
2911
2912                 if (simpleAssignment)
2913                 {
2914                     /*
2915                      *  Consider a simple assignment to a local:
2916                      *
2917                      *   lcl = expr;
2918                      *
2919                      *  Since the "=" node is visited after the variable
2920                      *  is marked live (assuming it's live after the
2921                      *  assignment), we don't want to use the register
2922                      *  use mask of the "=" node but rather that of the
2923                      *  variable itself.
2924                      */
2925                     tree->gtUsedRegs = op1->gtUsedRegs;
2926                 }
2927                 else
2928                 {
2929                     tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2930                 }
2931                 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2932                 goto RETURN_CHECK;
2933
2934             case GT_ASG_LSH:
2935             case GT_ASG_RSH:
2936             case GT_ASG_RSZ:
2937                 /* assigning shift operators */
2938
2939                 noway_assert(type != TYP_LONG);
2940
2941 #if CPU_LOAD_STORE_ARCH
2942                 predictReg = PREDICT_ADDR;
2943 #else
2944                 predictReg = PREDICT_NONE;
2945 #endif
2946
2947                 /* shift count is handled same as ordinary shift */
2948                 goto HANDLE_SHIFT_COUNT;
2949
2950             case GT_ADDR:
2951                 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2952
2953                 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2954                 {
2955                     // We need a scratch register for the LEA instruction
2956                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2957                 }
2958
2959                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2960                 goto RETURN_CHECK;
2961
2962             case GT_CAST:
2963
2964                 /* Cannot cast to VOID */
2965                 noway_assert(type != TYP_VOID);
2966
2967                 /* cast to long is special */
2968                 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2969                 {
2970                     noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2971 #if CPU_LONG_USES_REGPAIR
2972                     rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2973
2974                     if (rpHasVarIndexForPredict(predictReg))
2975                     {
2976                         unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2977                         rpAsgVarNum       = tgtIndex;
2978
2979                         // We don't need any register as we plan on writing to the rpAsgVarNum register
2980                         predictReg = PREDICT_NONE;
2981
2982                         LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2983                         tgtVar->lvDependReg = true;
2984
2985                         if (tgtVar->lvOtherReg != REG_STK)
2986                         {
2987                             predictRegHi = PREDICT_NONE;
2988                         }
2989                     }
2990                     else
2991 #endif
2992                         if (predictReg == PREDICT_NONE)
2993                     {
2994                         predictReg = PREDICT_SCRATCH_REG;
2995                     }
2996 #ifdef _TARGET_ARM_
2997                     // If we are widening an int into a long using a targeted register pair we
2998                     // should retarget so that the low part get loaded into the appropriate register
2999                     else if (predictReg == PREDICT_PAIR_R0R1)
3000                     {
3001                         predictReg   = PREDICT_REG_R0;
3002                         predictRegHi = PREDICT_REG_R1;
3003                     }
3004                     else if (predictReg == PREDICT_PAIR_R2R3)
3005                     {
3006                         predictReg   = PREDICT_REG_R2;
3007                         predictRegHi = PREDICT_REG_R3;
3008                     }
3009 #endif
3010 #ifdef _TARGET_X86_
3011                     // If we are widening an int into a long using a targeted register pair we
3012                     // should retarget so that the low part get loaded into the appropriate register
3013                     else if (predictReg == PREDICT_PAIR_EAXEDX)
3014                     {
3015                         predictReg   = PREDICT_REG_EAX;
3016                         predictRegHi = PREDICT_REG_EDX;
3017                     }
3018                     else if (predictReg == PREDICT_PAIR_ECXEBX)
3019                     {
3020                         predictReg   = PREDICT_REG_ECX;
3021                         predictRegHi = PREDICT_REG_EBX;
3022                     }
3023 #endif
3024
3025                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3026
3027 #if CPU_LONG_USES_REGPAIR
3028                     if (predictRegHi != PREDICT_NONE)
3029                     {
3030                         // Now get one more reg for the upper part
3031                         regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3032                     }
3033 #endif
3034                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3035                     goto RETURN_CHECK;
3036                 }
3037
3038                 /* cast from long is special - it frees a register */
3039                 if (type <= TYP_INT // nice.  this presumably is intended to mean "signed int and shorter types"
3040                     && op1->gtType == TYP_LONG)
3041                 {
3042                     if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3043                         predictReg = PREDICT_REG;
3044
3045                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3046
3047                     // If we have 2 or more regs, free one of them
3048                     if (!genMaxOneBit(regMask))
3049                     {
3050                         /* Clear the 2nd lowest bit in regMask */
3051                         /* First set tmpMask to the lowest bit in regMask */
3052                         tmpMask = genFindLowestBit(regMask);
3053                         /* Next find the second lowest bit in regMask */
3054                         tmpMask = genFindLowestBit(regMask & ~tmpMask);
3055                         /* Clear this bit from regmask */
3056                         regMask &= ~tmpMask;
3057                     }
3058                     tree->gtUsedRegs = op1->gtUsedRegs;
3059                     goto RETURN_CHECK;
3060                 }
3061
3062 #if CPU_HAS_BYTE_REGS
3063                 /* cast from signed-byte is special - it uses byteable registers */
3064                 if (type == TYP_INT)
3065                 {
3066                     var_types smallType;
3067
3068                     if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3069                         smallType = tree->gtCast.CastOp()->TypeGet();
3070                     else
3071                         smallType = tree->gtCast.gtCastType;
3072
3073                     if (smallType == TYP_BYTE)
3074                     {
3075                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3076
3077                         if ((regMask & RBM_BYTE_REGS) == 0)
3078                             regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3079
3080                         tree->gtUsedRegs = (regMaskSmall)regMask;
3081                         goto RETURN_CHECK;
3082                     }
3083                 }
3084 #endif
3085
3086 #if FEATURE_STACK_FP_X87
3087                 /* cast to float/double is special */
3088                 if (varTypeIsFloating(type))
3089                 {
3090                     switch (op1->TypeGet())
3091                     {
3092                         /* uses fild, so don't need to be loaded to reg */
3093                         case TYP_INT:
3094                         case TYP_LONG:
3095                             rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3096                             tree->gtUsedRegs = op1->gtUsedRegs;
3097                             regMask          = 0;
3098                             goto RETURN_CHECK;
3099                         default:
3100                             break;
3101                     }
3102                 }
3103
3104                 /* Casting from integral type to floating type is special */
3105                 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3106                 {
3107                     if (opts.compCanUseSSE2)
3108                     {
3109                         // predict for SSE2 based casting
3110                         if (predictReg <= PREDICT_REG)
3111                             predictReg = PREDICT_SCRATCH_REG;
3112                         regMask        = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3113
3114                         // Get one more int reg to hold cast result
3115                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3116                         tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3117                         goto RETURN_CHECK;
3118                     }
3119                 }
3120 #endif
3121
3122 #if FEATURE_FP_REGALLOC
3123                 // Are we casting between int to float or float to int
3124                 // Fix 388428 ARM JitStress WP7
3125                 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3126                 {
3127                     // op1 needs to go into a register
3128                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3129
3130 #ifdef _TARGET_ARM_
3131                     if (varTypeIsFloating(op1->TypeGet()))
3132                     {
3133                         // We also need a fp scratch register for the convert operation
3134                         regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3135                                                     PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3136                     }
3137 #endif
3138                     // We also need a register to hold the result
3139                     regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3140                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3141                     goto RETURN_CHECK;
3142                 }
3143 #endif
3144
3145                 /* otherwise must load op1 into a register */
3146                 goto GENERIC_UNARY;
3147
3148             case GT_INTRINSIC:
3149
3150 #ifdef _TARGET_XARCH_
3151                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3152                 {
3153                     // This is a special case to handle the following
3154                     // optimization: conv.i4(round.d(d)) -> round.i(d)
3155                     // if flowgraph 3186
3156
3157                     if (predictReg <= PREDICT_REG)
3158                         predictReg = PREDICT_SCRATCH_REG;
3159
3160                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3161
3162                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3163
3164                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3165                     goto RETURN_CHECK;
3166                 }
3167 #endif
3168                 __fallthrough;
3169
3170             case GT_NEG:
3171 #ifdef _TARGET_ARM_
3172                 if (tree->TypeGet() == TYP_LONG)
3173                 {
3174                     // On ARM this consumes an extra register for the '0' value
3175                     if (predictReg <= PREDICT_REG)
3176                         predictReg = PREDICT_SCRATCH_REG;
3177
3178                     regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3179
3180                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3181
3182                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3183                     goto RETURN_CHECK;
3184                 }
3185 #endif // _TARGET_ARM_
3186
3187                 __fallthrough;
3188
3189             case GT_NOT:
3190             // these unary operators will write new values
3191             // and thus will need a scratch register
3192             GENERIC_UNARY:
3193                 /* generic unary operators */
3194
3195                 if (predictReg <= PREDICT_REG)
3196                     predictReg = PREDICT_SCRATCH_REG;
3197
3198                 __fallthrough;
3199
3200             case GT_NOP:
3201                 // these unary operators do not write new values
3202                 // and thus won't need a scratch register
3203                 CLANG_FORMAT_COMMENT_ANCHOR;
3204
3205 #if OPT_BOOL_OPS
3206                 if (!op1)
3207                 {
3208                     tree->gtUsedRegs = 0;
3209                     regMask          = 0;
3210                     goto RETURN_CHECK;
3211                 }
3212 #endif
3213                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3214                 tree->gtUsedRegs = op1->gtUsedRegs;
3215                 goto RETURN_CHECK;
3216
3217             case GT_IND:
3218             case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3219             {
3220                 bool      intoReg = true;
3221                 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3222
3223                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3224                 {
3225                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3226                 }
3227
3228                 if (predictReg == PREDICT_ADDR)
3229                 {
3230                     intoReg = false;
3231                 }
3232                 else if (predictReg == PREDICT_NONE)
3233                 {
3234                     if (type != TYP_LONG)
3235                     {
3236                         intoReg = false;
3237                     }
3238                     else
3239                     {
3240                         predictReg = PREDICT_REG;
3241                     }
3242                 }
3243
3244                 /* forcing to register? */
3245                 if (intoReg && (type != TYP_LONG))
3246                 {
3247                     rsvdRegs |= RBM_LASTUSE;
3248                 }
3249
3250                 GenTreePtr lenCSE;
3251                 lenCSE = NULL;
3252
3253                 /* check for address mode */
3254                 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3255                 tmpMask = RBM_NONE;
3256
3257 #if CPU_LOAD_STORE_ARCH
3258                 // We may need a scratch register for loading a long
3259                 if (type == TYP_LONG)
3260                 {
3261                     /* This scratch register immediately dies */
3262                     tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3263                 }
3264 #endif // CPU_LOAD_STORE_ARCH
3265
3266 #ifdef _TARGET_ARM_
3267                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3268                 //
3269                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3270                 {
3271                     /* These integer register(s) immediately die */
3272                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3273                     // Two integer registers are required for a TYP_DOUBLE
3274                     if (type == TYP_DOUBLE)
3275                         tmpMask |=
3276                             rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3277                 }
3278 #endif
3279
3280                 /* forcing to register? */
3281                 if (intoReg)
3282                 {
3283                     regMaskTP lockedMask = lockedRegs | rsvdRegs;
3284                     tmpMask |= regMask;
3285
3286                     // We will compute a new regMask that holds the register(s)
3287                     // that we will load the indirection into.
3288                     //
3289                     CLANG_FORMAT_COMMENT_ANCHOR;
3290
3291 #ifndef _TARGET_64BIT_
3292                     if (type == TYP_LONG)
3293                     {
3294                         // We need to use multiple load instructions here:
3295                         // For the first register we can not choose
3296                         // any registers that are being used in place or
3297                         // any register in the current regMask
3298                         //
3299                         regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3300
3301                         // For the second register we can choose a register that was
3302                         // used in place or any register in the old now overwritten regMask
3303                         // but not the same register that we picked above in 'regMask'
3304                         //
3305                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3306                         regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3307                     }
3308                     else
3309 #endif
3310                     {
3311                         // We will use one load instruction here:
3312                         // The load target register can be a register that was used in place
3313                         // or one of the register from the orginal regMask.
3314                         //
3315                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3316                         regMask = rpPredictRegPick(type, predictReg, lockedMask);
3317                     }
3318                 }
3319                 else if (predictReg != PREDICT_ADDR)
3320                 {
3321                     /* Unless the caller specified PREDICT_ADDR   */
3322                     /* we don't return the temp registers used    */
3323                     /* to form the address                        */
3324                     regMask = RBM_NONE;
3325                 }
3326             }
3327
3328                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3329
3330                 goto RETURN_CHECK;
3331
3332             case GT_EQ:
3333             case GT_NE:
3334             case GT_LT:
3335             case GT_LE:
3336             case GT_GE:
3337             case GT_GT:
3338
3339 #ifdef _TARGET_X86_
3340                 /* Floating point comparison uses EAX for flags */
3341                 if (varTypeIsFloating(op1->TypeGet()))
3342                 {
3343                     regMask = RBM_EAX;
3344                 }
3345                 else
3346 #endif
3347                     if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3348                 {
3349                     // Some comparisons are converted to ?:
3350                     noway_assert(!fgMorphRelopToQmark(op1));
3351
3352                     if (predictReg <= PREDICT_REG)
3353                         predictReg = PREDICT_SCRATCH_REG;
3354
3355                     // The set instructions need a byte register
3356                     regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3357                 }
3358                 else
3359                 {
3360                     regMask = RBM_NONE;
3361 #ifdef _TARGET_XARCH_
3362                     tmpMask = RBM_NONE;
3363                     // Optimize the compare with a constant cases for xarch
3364                     if (op1->gtOper == GT_CNS_INT)
3365                     {
3366                         if (op2->gtOper == GT_CNS_INT)
3367                             tmpMask =
3368                                 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3369                         rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3370                         tree->gtUsedRegs = op2->gtUsedRegs;
3371                         goto RETURN_CHECK;
3372                     }
3373                     else if (op2->gtOper == GT_CNS_INT)
3374                     {
3375                         rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3376                         tree->gtUsedRegs = op1->gtUsedRegs;
3377                         goto RETURN_CHECK;
3378                     }
3379                     else if (op2->gtOper == GT_CNS_LNG)
3380                     {
3381                         regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3382 #ifdef _TARGET_X86_
3383                         // We also need one extra register to read values from
3384                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3385 #endif // _TARGET_X86_
3386                         tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3387                         goto RETURN_CHECK;
3388                     }
3389 #endif // _TARGET_XARCH_
3390                 }
3391
3392                 unsigned op1TypeSize;
3393                 unsigned op2TypeSize;
3394
3395                 op1TypeSize = genTypeSize(op1->TypeGet());
3396                 op2TypeSize = genTypeSize(op2->TypeGet());
3397
3398                 op1PredictReg = PREDICT_REG;
3399                 op2PredictReg = PREDICT_REG;
3400
3401                 if (tree->gtFlags & GTF_REVERSE_OPS)
3402                 {
3403 #ifdef _TARGET_XARCH_
3404                     if (op1TypeSize == sizeof(int))
3405                         op1PredictReg = PREDICT_NONE;
3406 #endif
3407
3408                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3409                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3410                 }
3411                 else
3412                 {
3413 #ifdef _TARGET_XARCH_
3414                     // For full DWORD compares we can have
3415                     //
3416                     //      op1 is an address mode and op2 is a register
3417                     // or
3418                     //      op1 is a register and op2 is an address mode
3419                     //
3420                     if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3421                     {
3422                         if (op2->gtOper == GT_LCL_VAR)
3423                         {
3424                             unsigned lclNum = op2->gtLclVar.gtLclNum;
3425                             varDsc          = lvaTable + lclNum;
3426                             /* Did we predict that this local will be enregistered? */
3427                             if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3428                             {
3429                                 op1PredictReg = PREDICT_ADDR;
3430                             }
3431                         }
3432                     }
3433                     // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3434                     if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3435                         op2PredictReg = PREDICT_ADDR;
3436 #endif // _TARGET_XARCH_
3437
3438                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3439 #ifdef _TARGET_ARM_
3440                     if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3441 #endif
3442                     {
3443                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3444                     }
3445                 }
3446
3447 #ifdef _TARGET_XARCH_
3448                 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3449                 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3450                 // if one of the registers is small and the types aren't equal.
3451
3452                 if (regMask == RBM_NONE)
3453                 {
3454                     rpPredictReg op1xPredictReg, op2xPredictReg;
3455                     GenTreePtr   op1x, op2x;
3456                     if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3457                     {
3458                         op1xPredictReg = op2PredictReg;
3459                         op2xPredictReg = op1PredictReg;
3460                         op1x           = op2;
3461                         op2x           = op1;
3462                     }
3463                     else
3464                     {
3465                         op1xPredictReg = op1PredictReg;
3466                         op2xPredictReg = op2PredictReg;
3467                         op1x           = op1;
3468                         op2x           = op2;
3469                     }
3470                     if ((op1xPredictReg < PREDICT_REG) &&  // op1 doesn't get a register (probably an indir)
3471                         (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3472                         varTypeIsSmall(op1x->TypeGet()))   // op1 is smaller than an int
3473                     {
3474                         bool needTmp = false;
3475
3476                         // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3477                         // We could predict a byteable register for op2x, but what if we don't get it?
3478                         // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3479                         // result.
3480                         if (varTypeIsByte(op1x->TypeGet()))
3481                         {
3482                             needTmp = true;
3483                         }
3484                         else
3485                         {
3486                             if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3487                             {
3488                                 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3489                                     needTmp = true;
3490                             }
3491                             else
3492                             {
3493                                 if (op1x->TypeGet() != op2x->TypeGet())
3494                                     needTmp = true;
3495                             }
3496                         }
3497                         if (needTmp)
3498                         {
3499                             regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3500                         }
3501                     }
3502                 }
3503 #endif // _TARGET_XARCH_
3504
3505                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3506                 goto RETURN_CHECK;
3507
3508             case GT_MUL:
3509
3510 #ifndef _TARGET_AMD64_
3511                 if (type == TYP_LONG)
3512                 {
3513                     assert(tree->gtIsValid64RsltMul());
3514
3515                     /* Strip out the cast nodes */
3516
3517                     noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3518                     op1 = op1->gtCast.CastOp();
3519                     op2 = op2->gtCast.CastOp();
3520 #else
3521                 if (false)
3522                 {
3523 #endif // !_TARGET_AMD64_
3524                 USE_MULT_EAX:
3525
3526 #if defined(_TARGET_X86_)
3527                     // This will done by a 64-bit imul "imul eax, reg"
3528                     //   (i.e. EDX:EAX = EAX * reg)
3529
3530                     /* Are we supposed to evaluate op2 first? */
3531                     if (tree->gtFlags & GTF_REVERSE_OPS)
3532                     {
3533                         rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3534                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3535                     }
3536                     else
3537                     {
3538                         rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3539                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3540                     }
3541
3542                     /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3543
3544                     tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3545
3546                     /* set regMask to the set of held registers */
3547
3548                     regMask = RBM_PAIR_TMP_LO;
3549
3550                     if (type == TYP_LONG)
3551                         regMask |= RBM_PAIR_TMP_HI;
3552
3553 #elif defined(_TARGET_ARM_)
3554                     // This will done by a 4 operand multiply
3555
3556                     // Are we supposed to evaluate op2 first?
3557                     if (tree->gtFlags & GTF_REVERSE_OPS)
3558                     {
3559                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3560                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3561                     }
3562                     else
3563                     {
3564                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3565                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3566                     }
3567
3568                     // set regMask to the set of held registers,
3569                     //  the two scratch register we need to compute the mul result
3570
3571                     regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3572
3573                     // set gtUsedRegs toregMask and the registers needed by op1 and op2
3574
3575                     tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3576
3577 #else // !_TARGET_X86_ && !_TARGET_ARM_
3578 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3579 #endif
3580
3581                     goto RETURN_CHECK;
3582                 }
3583                 else
3584                 {
3585                     /* We use imulEAX for most unsigned multiply operations */
3586                     if (tree->gtOverflow())
3587                     {
3588                         if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3589                         {
3590                             goto USE_MULT_EAX;
3591                         }
3592                     }
3593                 }
3594
3595                 __fallthrough;
3596
3597             case GT_OR:
3598             case GT_XOR:
3599             case GT_AND:
3600
3601             case GT_SUB:
3602             case GT_ADD:
3603                 tree->gtUsedRegs = 0;
3604
3605                 if (predictReg <= PREDICT_REG)
3606                     predictReg = PREDICT_SCRATCH_REG;
3607
3608             GENERIC_BINARY:
3609
3610                 noway_assert(op2);
3611                 if (tree->gtFlags & GTF_REVERSE_OPS)
3612                 {
3613                     op1PredictReg = PREDICT_REG;
3614 #if !CPU_LOAD_STORE_ARCH
3615                     if (genTypeSize(op1->gtType) >= sizeof(int))
3616                         op1PredictReg = PREDICT_NONE;
3617 #endif
3618                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3619                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3620                 }
3621                 else
3622                 {
3623                     op2PredictReg = PREDICT_REG;
3624 #if !CPU_LOAD_STORE_ARCH
3625                     if (genTypeSize(op2->gtType) >= sizeof(int))
3626                         op2PredictReg = PREDICT_NONE;
3627 #endif
3628                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3629 #ifdef _TARGET_ARM_
3630                     // For most ALU operations we can generate a single instruction that encodes
3631                     // a small immediate integer constant value.  (except for multiply)
3632                     //
3633                     if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3634                     {
3635                         ssize_t ival = op2->gtIntCon.gtIconVal;
3636                         if (codeGen->validImmForAlu(ival))
3637                         {
3638                             op2PredictReg = PREDICT_NONE;
3639                         }
3640                         else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3641                                  ((oper == GT_ADD) || (oper == GT_SUB)))
3642                         {
3643                             op2PredictReg = PREDICT_NONE;
3644                         }
3645                     }
3646                     if (op2PredictReg == PREDICT_NONE)
3647                     {
3648                         op2->gtUsedRegs = RBM_NONE;
3649                     }
3650                     else
3651 #endif
3652                     {
3653                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3654                     }
3655                 }
3656                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3657
3658 #if CPU_HAS_BYTE_REGS
3659                 /* We have special register requirements for byte operations */
3660
3661                 if (varTypeIsByte(tree->TypeGet()))
3662                 {
3663                     /* For 8 bit arithmetic, one operands has to be in a
3664                        byte-addressable register, and the other has to be
3665                        in a byte-addrble reg or in memory. Assume its in a reg */
3666
3667                     regMaskTP regByteMask = 0;
3668                     regMaskTP op1ByteMask = op1->gtUsedRegs;
3669
3670                     if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3671                     {
3672                         // Pick a Byte register to use for op1
3673                         regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3674                         op1ByteMask = regByteMask;
3675                     }
3676
3677                     if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3678                     {
3679                         // Pick a Byte register to use for op2, avoiding the one used by op1
3680                         regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3681                     }
3682
3683                     if (regByteMask)
3684                     {
3685                         tree->gtUsedRegs |= regByteMask;
3686                         regMask = regByteMask;
3687                     }
3688                 }
3689 #endif
3690                 goto RETURN_CHECK;
3691
3692             case GT_DIV:
3693             case GT_MOD:
3694
3695             case GT_UDIV:
3696             case GT_UMOD:
3697
3698                 /* non-integer division handled in generic way */
3699                 if (!varTypeIsIntegral(type))
3700                 {
3701                     tree->gtUsedRegs = 0;
3702                     if (predictReg <= PREDICT_REG)
3703                         predictReg = PREDICT_SCRATCH_REG;
3704                     goto GENERIC_BINARY;
3705                 }
3706
3707 #ifndef _TARGET_64BIT_
3708
3709                 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3710                 {
3711                     /* Special case:  a mod with an int op2 is done inline using idiv or div
3712                        to avoid a costly call to the helper */
3713
3714                     noway_assert((op2->gtOper == GT_CNS_LNG) &&
3715                                  (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3716
3717 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3718                     if (tree->gtFlags & GTF_REVERSE_OPS)
3719                     {
3720                         tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3721                                                       rsvdRegs | op1->gtRsvdRegs);
3722                         tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3723                     }
3724                     else
3725                     {
3726                         tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3727                         tmpMask |=
3728                             rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3729                     }
3730                     regMask = RBM_PAIR_TMP;
3731 #else // !_TARGET_X86_ && !_TARGET_ARM_
3732 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3733 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3734
3735                     tree->gtUsedRegs =
3736                         (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3737                                        rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3738
3739                     goto RETURN_CHECK;
3740                 }
3741 #endif // _TARGET_64BIT_
3742
3743                 /* no divide immediate, so force integer constant which is not
3744                  * a power of two to register
3745                  */
3746
3747                 if (op2->OperKind() & GTK_CONST)
3748                 {
3749                     ssize_t ival = op2->gtIntConCommon.IconValue();
3750
3751                     /* Is the divisor a power of 2 ? */
3752
3753                     if (ival > 0 && genMaxOneBit(size_t(ival)))
3754                     {
3755                         goto GENERIC_UNARY;
3756                     }
3757                     else
3758                         op2PredictReg = PREDICT_SCRATCH_REG;
3759                 }
3760                 else
3761                 {
3762                     /* Non integer constant also must be enregistered */
3763                     op2PredictReg = PREDICT_REG;
3764                 }
3765
3766                 regMaskTP trashedMask;
3767                 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3768                 regMaskTP op1ExcludeMask;
3769                 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3770                 regMaskTP op2ExcludeMask;
3771                 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3772
3773 #ifdef _TARGET_XARCH_
3774                 /*  Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3775                  *  we can safely allow the "b" value to die. Unfortunately, if we simply
3776                  *  mark the node "b" as using EDX, this will not work if "b" is a register
3777                  *  variable that dies with this particular reference. Thus, if we want to
3778                  *  avoid this situation (where we would have to spill the variable from
3779                  *  EDX to someplace else), we need to explicitly mark the interference
3780                  *  of the variable at this point.
3781                  */
3782
3783                 if (op2->gtOper == GT_LCL_VAR)
3784                 {
3785                     unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3786                     varDsc          = lvaTable + lclNum;
3787                     if (varDsc->lvTracked)
3788                     {
3789 #ifdef DEBUG
3790                         if (verbose)
3791                         {
3792                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3793                                 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3794                                        varDsc->lvVarIndex);
3795                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3796                                 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3797                                        varDsc->lvVarIndex);
3798                         }
3799 #endif
3800                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3801                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3802                     }
3803                 }
3804
3805                 /* set the held register based on opcode */
3806                 if (oper == GT_DIV || oper == GT_UDIV)
3807                     regMask = RBM_EAX;
3808                 else
3809                     regMask    = RBM_EDX;
3810                 trashedMask    = (RBM_EAX | RBM_EDX);
3811                 op1ExcludeMask = 0;
3812                 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3813
3814 #endif // _TARGET_XARCH_
3815
3816 #ifdef _TARGET_ARM_
3817                 trashedMask    = RBM_NONE;
3818                 op1ExcludeMask = RBM_NONE;
3819                 op2ExcludeMask = RBM_NONE;
3820 #endif
3821
3822                 /* set the lvPref reg if possible */
3823                 GenTreePtr dest;
3824                 /*
3825                  *  Walking the gtNext link twice from here should get us back
3826                  *  to our parent node, if this is an simple assignment tree.
3827                  */
3828                 dest = tree->gtNext;
3829                 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3830                     dest->gtNext->gtOp.gtOp2 == tree)
3831                 {
3832                     varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3833                     varDsc->addPrefReg(regMask, this);
3834                 }
3835 #ifdef _TARGET_XARCH_
3836                 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3837 #else
3838                 op1PredictReg        = PREDICT_SCRATCH_REG;
3839 #endif
3840
3841                 /* are we supposed to evaluate op2 first? */
3842                 if (tree->gtFlags & GTF_REVERSE_OPS)
3843                 {
3844                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3845                                                   rsvdRegs | op1->gtRsvdRegs);
3846                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3847                 }
3848                 else
3849                 {
3850                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3851                                                   rsvdRegs | op2->gtRsvdRegs);
3852                     rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3853                 }
3854 #ifdef _TARGET_ARM_
3855                 regMask = tmpMask;
3856 #endif
3857                 /* grab EAX, EDX for this tree node */
3858                 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3859
3860                 goto RETURN_CHECK;
3861
3862             case GT_LSH:
3863             case GT_RSH:
3864             case GT_RSZ:
3865
3866                 if (predictReg <= PREDICT_REG)
3867                     predictReg = PREDICT_SCRATCH_REG;
3868
3869 #ifndef _TARGET_64BIT_
3870                 if (type == TYP_LONG)
3871                 {
3872                     if (op2->IsCnsIntOrI())
3873                     {
3874                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3875                         // no register used by op2
3876                         op2->gtUsedRegs  = 0;
3877                         tree->gtUsedRegs = op1->gtUsedRegs;
3878                     }
3879                     else
3880                     {
3881                         // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3882                         tmpMask = lockedRegs;
3883                         tmpMask &= ~RBM_LNGARG_0;
3884                         tmpMask &= ~RBM_SHIFT_LNG;
3885
3886                         // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3887                         if (tree->gtFlags & GTF_REVERSE_OPS)
3888                         {
3889                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3890                             tmpMask |= RBM_SHIFT_LNG;
3891                             // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3892                             // Fix 383843 X86/ARM ILGEN
3893                             rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3894                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3895                         }
3896                         else
3897                         {
3898                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3899                             tmpMask |= RBM_LNGARG_0;
3900                             // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3901                             // Fix 383839 ARM ILGEN
3902                             rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3903                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3904                         }
3905                         regMask = RBM_LNGRET; // function return registers
3906                         op1->gtUsedRegs |= RBM_LNGARG_0;
3907                         op2->gtUsedRegs |= RBM_SHIFT_LNG;
3908
3909                         tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3910
3911                         // We are using a helper function to do shift:
3912                         //
3913                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3914                     }
3915                 }
3916                 else
3917 #endif // _TARGET_64BIT_
3918                 {
3919 #ifdef _TARGET_XARCH_
3920                     if (!op2->IsCnsIntOrI())
3921                         predictReg = PREDICT_NOT_REG_ECX;
3922 #endif
3923
3924                 HANDLE_SHIFT_COUNT:
3925                     // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3926
3927                     regMaskTP tmpRsvdRegs;
3928
3929                     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3930                     {
3931                         regMask     = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3932                         rsvdRegs    = RBM_LASTUSE;
3933                         tmpRsvdRegs = RBM_NONE;
3934                     }
3935                     else
3936                     {
3937                         regMask = RBM_NONE;
3938                         // Special case op1 of a constant
3939                         if (op1->IsCnsIntOrI())
3940                             tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3941                                                        // System.Xml.Schema.BitSet:Get(int):bool
3942                         else
3943                             tmpRsvdRegs = op1->gtRsvdRegs;
3944                     }
3945
3946                     op2Mask = RBM_NONE;
3947                     if (!op2->IsCnsIntOrI())
3948                     {
3949                         if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3950                         {
3951                             op2PredictReg = PREDICT_REG_SHIFT;
3952                         }
3953                         else
3954                         {
3955                             op2PredictReg = PREDICT_REG;
3956                         }
3957
3958                         /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3959                         op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3960
3961                         // If our target arch has a REG_SHIFT register then
3962                         //     we set the PrefReg when we have a LclVar for op2
3963                         //     we add an interference with REG_SHIFT for any other LclVars alive at op2
3964                         if (REG_SHIFT != REG_NA)
3965                         {
3966                             VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3967
3968                             while (op2->gtOper == GT_COMMA)
3969                             {
3970                                 op2 = op2->gtOp.gtOp2;
3971                             }
3972
3973                             if (op2->gtOper == GT_LCL_VAR)
3974                             {
3975                                 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3976                                 varDsc->setPrefReg(REG_SHIFT, this);
3977                                 if (varDsc->lvTracked)
3978                                 {
3979                                     VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3980                                 }
3981                             }
3982
3983                             // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3984                             // excluding the LclVar that was used for the shift amount as it is read-only
3985                             // and can be kept alive through the shift operation
3986                             //
3987                             rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3988                             // In case op2Mask doesn't contain the required shift register,
3989                             // we will or it in now.
3990                             op2Mask |= RBM_SHIFT;
3991                         }
3992                     }
3993
3994                     if (tree->gtFlags & GTF_REVERSE_OPS)
3995                     {
3996                         assert(regMask == RBM_NONE);
3997                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3998                     }
3999
4000 #if CPU_HAS_BYTE_REGS
4001                     if (varTypeIsByte(type))
4002                     {
4003                         // Fix 383789 X86 ILGEN
4004                         // Fix 383813 X86 ILGEN
4005                         // Fix 383828 X86 ILGEN
4006                         if (op1->gtOper == GT_LCL_VAR)
4007                         {
4008                             varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4009                             if (varDsc->lvTracked)
4010                             {
4011                                 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4012
4013                                 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4014                                 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4015                             }
4016                         }
4017                         if ((regMask & RBM_BYTE_REGS) == 0)
4018                         {
4019                             // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4020                             // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4021                             //
4022                             regMask |=
4023                                 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4024                         }
4025                     }
4026 #endif
4027                     tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4028                 }
4029
4030                 goto RETURN_CHECK;
4031
4032             case GT_COMMA:
4033                 if (tree->gtFlags & GTF_REVERSE_OPS)
4034                 {
4035                     if (predictReg == PREDICT_NONE)
4036                     {
4037                         predictReg = PREDICT_REG;
4038                     }
4039                     else if (rpHasVarIndexForPredict(predictReg))
4040                     {
4041                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4042                         predictReg = PREDICT_SCRATCH_REG;
4043                     }
4044
4045                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4046                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4047                 }
4048                 else
4049                 {
4050                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4051
4052                     /* CodeGen will enregister the op2 side of a GT_COMMA */
4053                     if (predictReg == PREDICT_NONE)
4054                     {
4055                         predictReg = PREDICT_REG;
4056                     }
4057                     else if (rpHasVarIndexForPredict(predictReg))
4058                     {
4059                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4060                         predictReg = PREDICT_SCRATCH_REG;
4061                     }
4062
4063                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4064                 }
4065                 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4066                 //
4067                 tree->gtUsedRegs = op2->gtUsedRegs;
4068                 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4069                 {
4070                     LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4071
4072                     if (op2VarDsc->lvTracked)
4073                     {
4074                         VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4075                         rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4076                     }
4077                 }
4078                 goto RETURN_CHECK;
4079
4080             case GT_QMARK:
4081             {
4082                 noway_assert(op1 != NULL && op2 != NULL);
4083
4084                 /*
4085                  *  If the gtUsedRegs conflicts with lockedRegs
4086                  *  then we going to have to spill some registers
4087                  *  into the non-trashed register set to keep it alive
4088                  */
4089                 unsigned spillCnt;
4090                 spillCnt = 0;
4091                 regMaskTP spillRegs;
4092                 spillRegs = lockedRegs & tree->gtUsedRegs;
4093
4094                 while (spillRegs)
4095                 {
4096                     /* Find the next register that needs to be spilled */
4097                     tmpMask = genFindLowestBit(spillRegs);
4098
4099 #ifdef DEBUG
4100                     if (verbose)
4101                     {
4102                         printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4103                         gtDispTree(tree, 0, NULL, true);
4104                     }
4105 #endif
4106                     /* In Codegen it will typically introduce a spill temp here */
4107                     /* rather than relocating the register to a non trashed reg */
4108                     rpPredictSpillCnt++;
4109                     spillCnt++;
4110
4111                     /* Remove it from the spillRegs and lockedRegs*/
4112                     spillRegs &= ~tmpMask;
4113                     lockedRegs &= ~tmpMask;
4114                 }
4115                 {
4116                     VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4117
4118                     /* Evaluate the <cond> subtree */
4119                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4120                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4121                     tree->gtUsedRegs = op1->gtUsedRegs;
4122
4123                     noway_assert(op2->gtOper == GT_COLON);
4124                     if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4125                     {
4126                         // Don't try to target the register specified in predictReg when we have complex subtrees
4127                         //
4128                         predictReg = PREDICT_SCRATCH_REG;
4129                     }
4130                     GenTreePtr elseTree = op2->AsColon()->ElseNode();
4131                     GenTreePtr thenTree = op2->AsColon()->ThenNode();
4132
4133                     noway_assert(thenTree != NULL && elseTree != NULL);
4134
4135                     // Update compCurLife to only those vars live on the <then> subtree
4136
4137                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4138
4139                     if (type == TYP_VOID)
4140                     {
4141                         /* Evaluate the <then> subtree */
4142                         rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4143                         regMask    = RBM_NONE;
4144                         predictReg = PREDICT_NONE;
4145                     }
4146                     else
4147                     {
4148                         // A mask to use to force the predictor to choose low registers (to reduce code size)
4149                         regMaskTP avoidRegs = RBM_NONE;
4150 #ifdef _TARGET_ARM_
4151                         avoidRegs = (RBM_R12 | RBM_LR);
4152 #endif
4153                         if (predictReg <= PREDICT_REG)
4154                             predictReg = PREDICT_SCRATCH_REG;
4155
4156                         /* Evaluate the <then> subtree */
4157                         regMask =
4158                             rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4159
4160                         if (regMask)
4161                         {
4162                             rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4163                             if (op1PredictReg != PREDICT_NONE)
4164                                 predictReg = op1PredictReg;
4165                         }
4166                     }
4167
4168                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4169
4170                     /* Evaluate the <else> subtree */
4171                     // First record the post-then liveness, and reset the current liveness to the else
4172                     // branch liveness.
4173                     CLANG_FORMAT_COMMENT_ANCHOR;
4174
4175 #ifdef DEBUG
4176                     VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4177 #endif
4178
4179                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4180
4181                     rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4182                     tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4183
4184                     // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4185                     // They each have only one successor, which they share.  Their live-out sets must equal the
4186                     // live-in set of this virtual successor block, and thus must be the same.  We can assert
4187                     // that equality here.
4188                     assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4189
4190                     if (spillCnt > 0)
4191                     {
4192                         regMaskTP reloadMask = RBM_NONE;
4193
4194                         while (spillCnt)
4195                         {
4196                             regMaskTP reloadReg;
4197
4198                             /* Get an extra register to hold it */
4199                             reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4200 #ifdef DEBUG
4201                             if (verbose)
4202                             {
4203                                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4204                                 gtDispTree(tree, 0, NULL, true);
4205                             }
4206 #endif
4207                             reloadMask |= reloadReg;
4208
4209                             spillCnt--;
4210                         }
4211
4212                         /* update the gtUsedRegs mask */
4213                         tree->gtUsedRegs |= reloadMask;
4214                     }
4215                 }
4216
4217                 goto RETURN_CHECK;
4218             }
4219             case GT_RETURN:
4220                 tree->gtUsedRegs = RBM_NONE;
4221                 regMask          = RBM_NONE;
4222
4223                 /* Is there a return value? */
4224                 if (op1 != NULL)
4225                 {
4226 #if FEATURE_FP_REGALLOC
4227                     if (varTypeIsFloating(type))
4228                     {
4229                         predictReg = PREDICT_FLTRET;
4230                         if (type == TYP_FLOAT)
4231                             regMask = RBM_FLOATRET;
4232                         else
4233                             regMask = RBM_DOUBLERET;
4234                     }
4235                     else
4236 #endif
4237                         if (isRegPairType(type))
4238                     {
4239                         predictReg = PREDICT_LNGRET;
4240                         regMask    = RBM_LNGRET;
4241                     }
4242                     else
4243                     {
4244                         predictReg = PREDICT_INTRET;
4245                         regMask    = RBM_INTRET;
4246                     }
4247                     if (info.compCallUnmanaged)
4248                     {
4249                         lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4250                     }
4251                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4252                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4253                 }
4254
4255 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4256                 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4257                 // We could optimize on registers based on int/long or no return value.  But to
4258                 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4259                 if (compIsProfilerHookNeeded())
4260                 {
4261                     tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4262                 }
4263
4264 #endif
4265                 goto RETURN_CHECK;
4266
4267             case GT_RETFILT:
4268                 if (op1 != NULL)
4269                 {
4270                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4271                     regMask          = genReturnRegForTree(tree);
4272                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4273                     goto RETURN_CHECK;
4274                 }
4275                 tree->gtUsedRegs = 0;
4276                 regMask          = 0;
4277
4278                 goto RETURN_CHECK;
4279
4280             case GT_JTRUE:
4281                 /* This must be a test of a relational operator */
4282
4283                 noway_assert(op1->OperIsCompare());
4284
4285                 /* Only condition code set by this operation */
4286
4287                 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4288
4289                 tree->gtUsedRegs = op1->gtUsedRegs;
4290                 regMask          = 0;
4291
4292                 goto RETURN_CHECK;
4293
4294             case GT_SWITCH:
4295                 noway_assert(type <= TYP_INT);
4296                 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4297 #ifdef _TARGET_ARM_
4298                 {
4299                     regMask          = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4300                     unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4301                     if (jumpCnt > 2)
4302                     {
4303                         // Table based switch requires an extra register for the table base
4304                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4305                     }
4306                     tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4307                 }
4308 #else  // !_TARGET_ARM_
4309                 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4310                 tree->gtUsedRegs = op1->gtUsedRegs;
4311 #endif // _TARGET_ARM_
4312                 regMask = 0;
4313                 goto RETURN_CHECK;
4314
4315             case GT_CKFINITE:
4316                 if (predictReg <= PREDICT_REG)
4317                     predictReg = PREDICT_SCRATCH_REG;
4318
4319                 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4320                 // Need a reg to load exponent into
4321                 regMask          = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4322                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4323                 goto RETURN_CHECK;
4324
4325             case GT_LCLHEAP:
4326                 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4327                 op2Mask = 0;
4328
4329 #ifdef _TARGET_ARM_
4330                 if (info.compInitMem)
4331                 {
4332                     // We zero out two registers in the ARM codegen path
4333                     op2Mask |=
4334                         rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4335                 }
4336 #endif
4337
4338                 op1->gtUsedRegs |= (regMaskSmall)regMask;
4339                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4340
4341                 // The result will be put in the reg we picked for the size
4342                 // regMask = <already set as we want it to be>
4343
4344                 goto RETURN_CHECK;
4345
4346             case GT_OBJ:
4347             {
4348 #ifdef _TARGET_ARM_
4349                 if (predictReg <= PREDICT_REG)
4350                     predictReg = PREDICT_SCRATCH_REG;
4351
4352                 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4353                                                           // registers (to reduce code size)
4354                 regMask = RBM_NONE;
4355                 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4356 #endif
4357
4358                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4359                 {
4360                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4361                 }
4362
4363 #ifdef _TARGET_ARM_
4364                 unsigned  objSize   = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4365                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4366                 // If it has one bit set, and that's an arg reg...
4367                 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4368                 {
4369                     // We are passing the 'obj' in the argument registers
4370                     //
4371                     regNumber rn = genRegNumFromMask(preferReg);
4372
4373                     //  Add the registers used to pass the 'obj' to regMask.
4374                     for (unsigned i = 0; i < objSize / 4; i++)
4375                     {
4376                         if (rn == MAX_REG_ARG)
4377                             break;
4378                         // Otherwise...
4379                         regMask |= genRegMask(rn);
4380                         rn = genRegArgNext(rn);
4381                     }
4382                 }
4383                 else
4384                 {
4385                     // We are passing the 'obj' in the outgoing arg space
4386                     // We will need one register to load into unless the 'obj' size is 4 or less.
4387                     //
4388                     if (objSize > 4)
4389                     {
4390                         regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4391                     }
4392                 }
4393                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4394                 goto RETURN_CHECK;
4395 #else  // !_TARGET_ARM
4396                 goto GENERIC_UNARY;
4397 #endif // _TARGET_ARM_
4398             }
4399
4400             case GT_MKREFANY:
4401             {
4402 #ifdef _TARGET_ARM_
4403                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4404                 regMask             = RBM_NONE;
4405                 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4406                 {
4407                     // A MKREFANY takes up two registers.
4408                     regNumber rn = genRegNumFromMask(preferReg);
4409                     regMask      = RBM_NONE;
4410                     if (rn < MAX_REG_ARG)
4411                     {
4412                         regMask |= genRegMask(rn);
4413                         rn = genRegArgNext(rn);
4414                         if (rn < MAX_REG_ARG)
4415                             regMask |= genRegMask(rn);
4416                     }
4417                 }
4418                 if (regMask != RBM_NONE)
4419                 {
4420                     // Condensation of GENERIC_BINARY path.
4421                     assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4422                     op2PredictReg        = PREDICT_REG;
4423                     regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4424                     rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4425                     regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4426                     tree->gtUsedRegs = (regMaskSmall)regMask;
4427                     goto RETURN_CHECK;
4428                 }
4429                 tree->gtUsedRegs = op1->gtUsedRegs;
4430 #endif // _TARGET_ARM_
4431                 goto GENERIC_BINARY;
4432             }
4433
4434             case GT_BOX:
4435                 goto GENERIC_UNARY;
4436
4437             case GT_LOCKADD:
4438                 goto GENERIC_BINARY;
4439
4440             case GT_XADD:
4441             case GT_XCHG:
4442                 // Ensure we can write to op2.  op2 will hold the output.
4443                 if (predictReg < PREDICT_SCRATCH_REG)
4444                     predictReg = PREDICT_SCRATCH_REG;
4445
4446                 if (tree->gtFlags & GTF_REVERSE_OPS)
4447                 {
4448                     op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4449                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4450                 }
4451                 else
4452                 {
4453                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4454                     op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4455                 }
4456                 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4457                 goto RETURN_CHECK;
4458
4459             case GT_ARR_LENGTH:
4460                 goto GENERIC_UNARY;
4461
4462             case GT_INIT_VAL:
4463                 // This unary operator simply passes through the value from its child (much like GT_NOP)
4464                 // and thus won't need a scratch register.
4465                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4466                 tree->gtUsedRegs = op1->gtUsedRegs;
4467                 goto RETURN_CHECK;
4468
4469             default:
4470 #ifdef DEBUG
4471                 gtDispTree(tree);
4472 #endif
4473                 noway_assert(!"unexpected simple operator in reg use prediction");
4474                 break;
4475         }
4476     }
4477
4478     /* See what kind of a special operator we have here */
4479
4480     switch (oper)
4481     {
4482         GenTreePtr      args;
4483         GenTreeArgList* list;
4484         regMaskTP       keepMask;
4485         unsigned        regArgsNum;
4486         int             regIndex;
4487         regMaskTP       regArgMask;
4488         regMaskTP       curArgMask;
4489
4490         case GT_CALL:
4491
4492         {
4493
4494             /* initialize so we can just or in various bits */
4495             tree->gtUsedRegs = RBM_NONE;
4496
4497 #if GTF_CALL_REG_SAVE
4498             /*
4499              *  Unless the GTF_CALL_REG_SAVE flag is set,
4500              *  we can't preserve the RBM_CALLEE_TRASH registers.
4501              *  (likewise we can't preserve the return registers)
4502              *  So we remove them from the lockedRegs set and
4503              *  record any of them in the keepMask
4504              */
4505
4506             if (tree->gtFlags & GTF_CALL_REG_SAVE)
4507             {
4508                 regMaskTP trashMask = genReturnRegForTree(tree);
4509
4510                 keepMask = lockedRegs & trashMask;
4511                 lockedRegs &= ~trashMask;
4512             }
4513             else
4514 #endif
4515             {
4516                 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4517                 lockedRegs &= ~RBM_CALLEE_TRASH;
4518             }
4519
4520             regArgsNum = 0;
4521             regIndex   = 0;
4522
4523             /* Is there an object pointer? */
4524             if (tree->gtCall.gtCallObjp)
4525             {
4526                 /* Evaluate the instance pointer first */
4527
4528                 args = tree->gtCall.gtCallObjp;
4529
4530                 /* the objPtr always goes to an integer register (through temp or directly) */
4531                 noway_assert(regArgsNum == 0);
4532                 regArgsNum++;
4533
4534                 /* Must be passed in a register */
4535
4536                 noway_assert(args->gtFlags & GTF_LATE_ARG);
4537
4538                 /* Must be either a deferred reg arg node or a GT_ASG node */
4539
4540                 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4541                              args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4542
4543                 if (!args->IsArgPlaceHolderNode())
4544                 {
4545                     rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4546                 }
4547             }
4548             VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4549             VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4550
4551             /* process argument list */
4552             for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4553             {
4554                 args = list->Current();
4555
4556                 if (args->gtFlags & GTF_LATE_ARG)
4557                 {
4558                     /* Must be either a Placeholder/NOP node or a GT_ASG node */
4559
4560                     noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4561                                  args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4562
4563                     if (!args->IsArgPlaceHolderNode())
4564                     {
4565                         rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4566                     }
4567
4568                     regArgsNum++;
4569                 }
4570                 else
4571                 {
4572 #ifdef FEATURE_FIXED_OUT_ARGS
4573                     // We'll store this argument into the outgoing argument area
4574                     // It needs to be in a register to be stored.
4575                     //
4576                     predictReg = PREDICT_REG;
4577
4578 #else // !FEATURE_FIXED_OUT_ARGS
4579                     // We'll generate a push for this argument
4580                     //
4581                     predictReg = PREDICT_NONE;
4582                     if (varTypeIsSmall(args->TypeGet()))
4583                     {
4584                         /* We may need to sign or zero extend a small type using a register */
4585                         predictReg = PREDICT_SCRATCH_REG;
4586                     }
4587 #endif
4588
4589                     rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4590                 }
4591                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4592                 tree->gtUsedRegs |= args->gtUsedRegs;
4593             }
4594
4595             /* Is there a late argument list */
4596
4597             regIndex   = 0;
4598             regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4599             args       = NULL;
4600
4601             /* process the late argument list */
4602             for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4603             {
4604                 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4605                 LclVarDsc* promotedStructLocal = NULL;
4606
4607                 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4608                 tmpMask    = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4609
4610                 assert(list->OperIsList());
4611
4612                 args = list->Current();
4613                 list = list->Rest();
4614
4615                 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4616
4617                 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4618                 assert(curArgTabEntry);
4619
4620                 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4621                 unsigned  numSlots =
4622                     curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4623
4624                 rpPredictReg argPredictReg;
4625                 regMaskTP    avoidReg = RBM_NONE;
4626
4627                 if (regNum != REG_STK)
4628                 {
4629                     argPredictReg = rpGetPredictForReg(regNum);
4630                     curArgMask |= genRegMask(regNum);
4631                 }
4632                 else
4633                 {
4634                     assert(numSlots > 0);
4635                     argPredictReg = PREDICT_NONE;
4636 #ifdef _TARGET_ARM_
4637                     // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4638                     avoidReg = (RBM_R12 | RBM_LR);
4639 #endif
4640                 }
4641
4642 #ifdef _TARGET_ARM_
4643                 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4644                 //
4645                 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4646                 {
4647                     // 64-bit longs and doubles require 2 consecutive argument registers
4648                     curArgMask |= genRegMask(REG_NEXT(regNum));
4649                 }
4650                 else if (args->TypeGet() == TYP_STRUCT)
4651                 {
4652                     GenTreePtr argx       = args;
4653                     GenTreePtr lclVarTree = NULL;
4654
4655                     /* The GT_OBJ may be be a child of a GT_COMMA */
4656                     while (argx->gtOper == GT_COMMA)
4657                     {
4658                         argx = argx->gtOp.gtOp2;
4659                     }
4660                     unsigned originalSize = 0;
4661
4662                     if (argx->gtOper == GT_OBJ)
4663                     {
4664                         originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4665
4666                         // Is it the address of a promoted struct local?
4667                         if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4668                         {
4669                             lclVarTree        = argx->gtObj.gtOp1->gtOp.gtOp1;
4670                             LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4671                             if (varDsc->lvPromoted)
4672                                 promotedStructLocal = varDsc;
4673                         }
4674                     }
4675                     else if (argx->gtOper == GT_LCL_VAR)
4676                     {
4677                         varDsc       = lvaTable + argx->gtLclVarCommon.gtLclNum;
4678                         originalSize = varDsc->lvSize();
4679
4680                         // Is it a promoted struct local?
4681                         if (varDsc->lvPromoted)
4682                             promotedStructLocal = varDsc;
4683                     }
4684                     else if (argx->gtOper == GT_MKREFANY)
4685                     {
4686                         originalSize = 2 * TARGET_POINTER_SIZE;
4687                     }
4688                     else
4689                     {
4690                         noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4691                     }
4692
4693                     // We only pass arguments differently if it a struct local "independently" promoted, which
4694                     // allows the field locals can be independently enregistered.
4695                     if (promotedStructLocal != NULL)
4696                     {
4697                         if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4698                             promotedStructLocal = NULL;
4699                     }
4700
4701                     unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4702
4703                     // Are we passing a TYP_STRUCT in multiple integer registers?
4704                     // if so set up curArgMask to reflect this
4705                     // Also slots is updated to reflect the number of outgoing arg slots that we will write
4706                     if (regNum != REG_STK)
4707                     {
4708                         regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4709                         assert(genIsValidReg(regNum));
4710                         regNumber nextReg = REG_NEXT(regNum);
4711                         slots--;
4712                         while (slots > 0 && nextReg <= regLast)
4713                         {
4714                             curArgMask |= genRegMask(nextReg);
4715                             nextReg = REG_NEXT(nextReg);
4716                             slots--;
4717                         }
4718                     }
4719
4720                     if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4721                     {
4722                         // All or a portion of this struct will be placed in the argument registers indicated by
4723                         // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4724                         // that the second arg to be evaluated interferes with the reg for the first, the third with
4725                         // the regs for the first and second, etc. But since we always place the stack slots before
4726                         // placing the register slots we do not add inteferences for any part of the struct that gets
4727                         // passed on the stack.
4728
4729                         argPredictReg =
4730                             PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4731                         regMaskTP prevArgMask = RBM_NONE;
4732                         for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4733                         {
4734                             LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4735                             if (fieldVarDsc->lvTracked)
4736                             {
4737                                 assert(lclVarTree != NULL);
4738                                 if (prevArgMask != RBM_NONE)
4739                                 {
4740                                     rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4741                                                                      DEBUGARG("fieldVar/argReg"));
4742                                 }
4743                             }
4744                             // Now see many registers this uses up.
4745                             unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4746                             unsigned nextAfterLastRegOffset =
4747                                 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4748                                 TARGET_POINTER_SIZE;
4749                             unsigned nextAfterLastArgRegOffset =
4750                                 min(nextAfterLastRegOffset,
4751                                     genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4752
4753                             for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4754                                  regOffset++)
4755                             {
4756                                 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4757                             }
4758
4759                             if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4760                             {
4761                                 break;
4762                             }
4763
4764                             if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4765                             {
4766                                 // Add the argument register used here as a preferred register for this fieldVarDsc
4767                                 //
4768                                 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4769                                 fieldVarDsc->setPrefReg(firstRegUsed, this);
4770                             }
4771                         }
4772                         compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4773                     }
4774
4775                     // If slots is greater than zero then part or all of this TYP_STRUCT
4776                     // argument is passed in the outgoing argument area. (except HFA arg)
4777                     //
4778                     if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4779                     {
4780                         // We will need a register to address the TYP_STRUCT
4781                         // Note that we can use an argument register in curArgMask as in
4782                         // codegen we pass the stack portion of the argument before we
4783                         // setup the register part.
4784                         //
4785
4786                         // Force the predictor to choose a LOW_REG here to reduce code bloat
4787                         avoidReg = (RBM_R12 | RBM_LR);
4788
4789                         assert(tmpMask == RBM_NONE);
4790                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4791
4792                         // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4793                         // arg area
4794                         if (slots > 1)
4795                         {
4796                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4797                                                         lockedRegs | regArgMask | tmpMask | avoidReg);
4798                         }
4799                     }
4800                 } // (args->TypeGet() == TYP_STRUCT)
4801 #endif            // _TARGET_ARM_
4802
4803                 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4804                 // as we have already calculated the correct tmpMask and curArgMask values and
4805                 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4806                 //
4807                 if (promotedStructLocal == NULL)
4808                 {
4809                     /* Target the appropriate argument register */
4810                     tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4811                 }
4812
4813                 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4814                 // for the duration of the OBJ.
4815                 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4816                 {
4817                     GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4818                     assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4819                     compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4820                 }
4821
4822                 regArgMask |= curArgMask;
4823                 args->gtUsedRegs |= (tmpMask | regArgMask);
4824                 tree->gtUsedRegs |= args->gtUsedRegs;
4825                 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4826
4827                 if (args->gtUsedRegs != RBM_NONE)
4828                 {
4829                     // Add register interference with the set of registers used or in use when we evaluated
4830                     // the current arg, with whatever is alive after the current arg
4831                     //
4832                     rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4833                 }
4834                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4835             }
4836             assert(list == NULL);
4837
4838 #ifdef LEGACY_BACKEND
4839 #if CPU_LOAD_STORE_ARCH
4840 #ifdef FEATURE_READYTORUN_COMPILER
4841             if (tree->gtCall.IsR2RRelativeIndir())
4842             {
4843                 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4844             }
4845 #endif // FEATURE_READYTORUN_COMPILER
4846 #endif // CPU_LOAD_STORE_ARCH
4847 #endif // LEGACY_BACKEND
4848
4849             regMaskTP callAddrMask;
4850             callAddrMask = RBM_NONE;
4851 #if CPU_LOAD_STORE_ARCH
4852             predictReg = PREDICT_SCRATCH_REG;
4853 #else
4854             predictReg       = PREDICT_NONE;
4855 #endif
4856
4857             switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4858             {
4859                 case GTF_CALL_VIRT_STUB:
4860
4861                     // We only want to record an interference between the virtual stub
4862                     // param reg and anything that's live AFTER the call, but we've not
4863                     // yet processed the indirect target.  So add virtualStubParamInfo.regMask
4864                     // to interferingRegs.
4865                     interferingRegs |= virtualStubParamInfo->GetRegMask();
4866 #ifdef DEBUG
4867                     if (verbose)
4868                         printf("Adding interference with Virtual Stub Param\n");
4869 #endif
4870                     codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4871
4872                     if (tree->gtCall.gtCallType == CT_INDIRECT)
4873                     {
4874                         predictReg = virtualStubParamInfo->GetPredict();
4875                     }
4876                     break;
4877
4878                 case GTF_CALL_VIRT_VTABLE:
4879                     predictReg = PREDICT_SCRATCH_REG;
4880                     break;
4881
4882                 case GTF_CALL_NONVIRT:
4883                     predictReg = PREDICT_SCRATCH_REG;
4884                     break;
4885             }
4886
4887             if (tree->gtCall.gtCallType == CT_INDIRECT)
4888             {
4889 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4890                 if (tree->gtCall.gtCallCookie)
4891                 {
4892                     codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4893
4894                     callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4895                                                         lockedRegs | regArgMask, RBM_LASTUSE);
4896
4897                     // Just in case we predict some other registers, force interference with our two special
4898                     // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4899                     callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4900
4901                     predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4902                 }
4903 #endif
4904                 callAddrMask |=
4905                     rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4906             }
4907             else if (predictReg != PREDICT_NONE)
4908             {
4909                 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4910             }
4911
4912             if (tree->gtFlags & GTF_CALL_UNMANAGED)
4913             {
4914                 // Need a register for tcbReg
4915                 callAddrMask |=
4916                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4917 #if CPU_LOAD_STORE_ARCH
4918                 // Need an extra register for tmpReg
4919                 callAddrMask |=
4920                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4921 #endif
4922             }
4923
4924             tree->gtUsedRegs |= callAddrMask;
4925
4926             /* After the call restore the orginal value of lockedRegs */
4927             lockedRegs |= keepMask;
4928
4929             /* set the return register */
4930             regMask = genReturnRegForTree(tree);
4931
4932             if (regMask & rsvdRegs)
4933             {
4934                 // We will need to relocate the return register value
4935                 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4936 #if FEATURE_FP_REGALLOC
4937                 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4938 #endif
4939                 regMask = RBM_NONE;
4940
4941                 if (intRegMask)
4942                 {
4943                     if (intRegMask == RBM_INTRET)
4944                     {
4945                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4946                     }
4947                     else if (intRegMask == RBM_LNGRET)
4948                     {
4949                         regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4950                     }
4951                     else
4952                     {
4953                         noway_assert(!"unexpected return regMask");
4954                     }
4955                 }
4956
4957 #if FEATURE_FP_REGALLOC
4958                 if (floatRegMask)
4959                 {
4960                     if (floatRegMask == RBM_FLOATRET)
4961                     {
4962                         regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4963                     }
4964                     else if (floatRegMask == RBM_DOUBLERET)
4965                     {
4966                         regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4967                     }
4968                     else // HFA return case
4969                     {
4970                         for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4971                         {
4972                             regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4973                         }
4974                     }
4975                 }
4976 #endif
4977             }
4978
4979             /* the return registers (if any) are killed */
4980             tree->gtUsedRegs |= regMask;
4981
4982 #if GTF_CALL_REG_SAVE
4983             if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4984 #endif
4985             {
4986                 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4987                 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4988             }
4989         }
4990
4991 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4992             // Mark required registers for emitting tailcall profiler callback as used
4993             if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4994             {
4995                 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
4996             }
4997 #endif
4998             break;
4999
5000         case GT_ARR_ELEM:
5001
5002             // Figure out which registers can't be touched
5003             unsigned dim;
5004             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5005                 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5006
5007             regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5008
5009             regMaskTP dimsMask;
5010             dimsMask = 0;
5011
5012 #if CPU_LOAD_STORE_ARCH
5013             // We need a register to load the bounds of the MD array
5014             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5015 #endif
5016
5017             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5018             {
5019                 /* We need scratch registers to compute index-lower_bound.
5020                    Also, gtArrInds[0]'s register will be used as the second
5021                    addressability register (besides gtArrObj's) */
5022
5023                 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5024                                                         lockedRegs | regMask | dimsMask, rsvdRegs);
5025                 if (dim == 0)
5026                     regMask |= dimMask;
5027
5028                 dimsMask |= dimMask;
5029             }
5030 #ifdef _TARGET_XARCH_
5031             // INS_imul doesnt have an immediate constant.
5032             if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5033                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5034 #endif
5035             tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5036             break;
5037
5038         case GT_CMPXCHG:
5039         {
5040 #ifdef _TARGET_XARCH_
5041             rsvdRegs |= RBM_EAX;
5042 #endif
5043             if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5044             {
5045                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5046             }
5047             else
5048             {
5049                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5050             }
5051             op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5052
5053 #ifdef _TARGET_XARCH_
5054             rsvdRegs &= ~RBM_EAX;
5055             tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5056                                           rsvdRegs | regMask | op2Mask);
5057             tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5058             predictReg       = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5059 #else
5060             tmpMask          = 0;
5061             tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5062 #endif
5063         }
5064         break;
5065
5066         case GT_ARR_BOUNDS_CHECK:
5067         {
5068             regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5069             regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5070             rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5071
5072             tree->gtUsedRegs =
5073                 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5074         }
5075         break;
5076
5077         default:
5078             NO_WAY("unexpected special operator in reg use prediction");
5079             break;
5080     }
5081
5082 RETURN_CHECK:
5083
5084 #ifdef DEBUG
5085     /* make sure we set them to something reasonable */
5086     if (tree->gtUsedRegs & RBM_ILLEGAL)
5087         noway_assert(!"used regs not set properly in reg use prediction");
5088
5089     if (regMask & RBM_ILLEGAL)
5090         noway_assert(!"return value not set propery in reg use prediction");
5091
5092 #endif
5093
5094     /*
5095      *  If the gtUsedRegs conflicts with lockedRegs
5096      *  then we going to have to spill some registers
5097      *  into the non-trashed register set to keep it alive
5098      */
5099     regMaskTP spillMask;
5100     spillMask = tree->gtUsedRegs & lockedRegs;
5101
5102     if (spillMask)
5103     {
5104         while (spillMask)
5105         {
5106             /* Find the next register that needs to be spilled */
5107             tmpMask = genFindLowestBit(spillMask);
5108
5109 #ifdef DEBUG
5110             if (verbose)
5111             {
5112                 printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5113                 gtDispTree(tree, 0, NULL, true);
5114                 if ((tmpMask & regMask) == 0)
5115                 {
5116                     printf("Predict reload of   %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5117                     gtDispTree(tree, 0, NULL, true);
5118                 }
5119             }
5120 #endif
5121             /* In Codegen it will typically introduce a spill temp here */
5122             /* rather than relocating the register to a non trashed reg */
5123             rpPredictSpillCnt++;
5124
5125             /* Remove it from the spillMask */
5126             spillMask &= ~tmpMask;
5127         }
5128     }
5129
5130     /*
5131      *  If the return registers in regMask conflicts with the lockedRegs
5132      *  then we allocate extra registers for the reload of the conflicting
5133      *  registers.
5134      *
5135      *  Set spillMask to the set of locked registers that have to be reloaded here.
5136      *  reloadMask is set to the extra registers that are used to reload
5137      *  the spilled lockedRegs.
5138      */
5139
5140     noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5141     spillMask = lockedRegs & regMask;
5142
5143     if (spillMask)
5144     {
5145         /* Remove the spillMask from regMask */
5146         regMask &= ~spillMask;
5147
5148         regMaskTP reloadMask = RBM_NONE;
5149         while (spillMask)
5150         {
5151             /* Get an extra register to hold it */
5152             regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5153 #ifdef DEBUG
5154             if (verbose)
5155             {
5156                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5157                 gtDispTree(tree, 0, NULL, true);
5158             }
5159 #endif
5160             reloadMask |= reloadReg;
5161
5162             /* Remove it from the spillMask */
5163             spillMask &= ~genFindLowestBit(spillMask);
5164         }
5165
5166         /* Update regMask to use the reloadMask */
5167         regMask |= reloadMask;
5168
5169         /* update the gtUsedRegs mask */
5170         tree->gtUsedRegs |= (regMaskSmall)regMask;
5171     }
5172
5173     regMaskTP regUse = tree->gtUsedRegs;
5174     regUse |= interferingRegs;
5175
5176     if (!VarSetOps::IsEmpty(this, compCurLife))
5177     {
5178         // Add interference between the current set of live variables and
5179         //  the set of temporary registers need to evaluate the sub tree
5180         if (regUse)
5181         {
5182             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5183         }
5184     }
5185
5186     if (rpAsgVarNum != -1)
5187     {
5188         // Add interference between the registers used (if any)
5189         // and the assignment target variable
5190         if (regUse)
5191         {
5192             rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5193         }
5194
5195         // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5196         // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5197         // to the set of currently live variables. This new interference will prevent us
5198         // from using the register value used here for enregistering different live variable
5199         //
5200         if (!VarSetOps::IsEmpty(this, compCurLife))
5201         {
5202             rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5203         }
5204     }
5205
5206     /* Do we need to resore the oldLastUseVars value */
5207     if (restoreLastUseVars)
5208     {
5209         /*  If we used a GT_ASG targeted register then we need to add
5210          *  a variable interference between any new last use variables
5211          *  and the GT_ASG targeted register
5212          */
5213         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5214         {
5215             rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5216                                              DEBUGARG("asgn tgt last use conflict"));
5217         }
5218         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5219     }
5220
5221     return regMask;
5222 }
5223 #ifdef _PREFAST_
5224 #pragma warning(pop)
5225 #endif
5226
5227 #endif // LEGACY_BACKEND
5228
5229 /****************************************************************************/
5230 /* Returns true when we must create an EBP frame
5231    This is used to force most managed methods to have EBP based frames
5232    which allows the ETW kernel stackwalker to walk the stacks of managed code
5233    this allows the kernel to perform light weight profiling
5234  */
5235 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5236 {
5237     bool result = false;
5238 #ifdef DEBUG
5239     const char* reason = nullptr;
5240 #endif
5241
5242 #if ETW_EBP_FRAMED
5243     if (!result && (opts.MinOpts() || opts.compDbgCode))
5244     {
5245         INDEBUG(reason = "Debug Code");
5246         result = true;
5247     }
5248     if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5249     {
5250         INDEBUG(reason = "IL Code Size");
5251         result = true;
5252     }
5253     if (!result && (fgBBcount > 3))
5254     {
5255         INDEBUG(reason = "BasicBlock Count");
5256         result = true;
5257     }
5258     if (!result && fgHasLoops)
5259     {
5260         INDEBUG(reason = "Method has Loops");
5261         result = true;
5262     }
5263     if (!result && (optCallCount >= 2))
5264     {
5265         INDEBUG(reason = "Call Count");
5266         result = true;
5267     }
5268     if (!result && (optIndirectCallCount >= 1))
5269     {
5270         INDEBUG(reason = "Indirect Call");
5271         result = true;
5272     }
5273 #endif // ETW_EBP_FRAMED
5274
5275     // VM wants to identify the containing frame of an InlinedCallFrame always
5276     // via the frame register never the stack register so we need a frame.
5277     if (!result && (optNativeCallCount != 0))
5278     {
5279         INDEBUG(reason = "Uses PInvoke");
5280         result = true;
5281     }
5282
5283 #ifdef _TARGET_ARM64_
5284     // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5285     // pointer frames.
5286     if (!result)
5287     {
5288         INDEBUG(reason = "Temporary ARM64 force frame pointer");
5289         result = true;
5290     }
5291 #endif // _TARGET_ARM64_
5292
5293 #ifdef DEBUG
5294     if ((result == true) && (wbReason != nullptr))
5295     {
5296         *wbReason = reason;
5297     }
5298 #endif
5299
5300     return result;
5301 }
5302
5303 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5304
5305 /*****************************************************************************
5306  *
5307  *  Predict which variables will be assigned to registers
5308  *  This is x86 specific and only predicts the integer registers and
5309  *  must be conservative, any register that is predicted to be enregister
5310  *  must end up being enregistered.
5311  *
5312  *  The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5313  *  predicted to be enregistered to minimize calls to rpPredictRegPick.
5314  *
5315  */
5316
5317 #ifdef _PREFAST_
5318 #pragma warning(push)
5319 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5320 #endif
5321 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5322 {
5323     unsigned regInx;
5324
5325     if (rpPasses <= rpPassesPessimize)
5326     {
5327         // Assume that we won't have to reverse EBP enregistration
5328         rpReverseEBPenreg = false;
5329
5330         // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5331         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5332             rpFrameType = FT_EBP_FRAME;
5333         else
5334             rpFrameType = FT_ESP_FRAME;
5335     }
5336
5337 #if !ETW_EBP_FRAMED
5338     // If we are using FPBASE as the frame register, we cannot also use it for
5339     // a local var
5340     if (rpFrameType == FT_EBP_FRAME)
5341     {
5342         regAvail &= ~RBM_FPBASE;
5343     }
5344 #endif // !ETW_EBP_FRAMED
5345
5346     rpStkPredict        = 0;
5347     rpPredictAssignMask = regAvail;
5348
5349     raSetupArgMasks(&codeGen->intRegState);
5350 #if !FEATURE_STACK_FP_X87
5351     raSetupArgMasks(&codeGen->floatRegState);
5352 #endif
5353
5354     // If there is a secret stub param, it is also live in
5355     if (info.compPublishStubParam)
5356     {
5357         codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5358     }
5359
5360     if (regAvail == RBM_NONE)
5361     {
5362         unsigned   lclNum;
5363         LclVarDsc* varDsc;
5364
5365         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5366         {
5367 #if FEATURE_STACK_FP_X87
5368             if (!varDsc->IsFloatRegType())
5369 #endif
5370             {
5371                 varDsc->lvRegNum = REG_STK;
5372                 if (isRegPairType(varDsc->lvType))
5373                     varDsc->lvOtherReg = REG_STK;
5374             }
5375         }
5376     }
5377
5378 #ifdef DEBUG
5379     if (verbose)
5380     {
5381         printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5382         printf("\n        Available registers = ");
5383         dspRegMask(regAvail);
5384         printf("\n");
5385     }
5386 #endif
5387
5388     if (regAvail == RBM_NONE)
5389     {
5390         return RBM_NONE;
5391     }
5392
5393     /* We cannot change the lvVarIndexes at this point, so we  */
5394     /* can only re-order the existing set of tracked variables */
5395     /* Which will change the order in which we select the      */
5396     /* locals for enregistering.                               */
5397
5398     assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5399
5400     // Should not be set unless optimizing
5401     noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5402
5403     if (lvaSortAgain)
5404         lvaSortOnly();
5405
5406 #ifdef DEBUG
5407     fgDebugCheckBBlist();
5408 #endif
5409
5410     /* Initialize the weighted count of variables that could have */
5411     /* been enregistered but weren't */
5412     unsigned refCntStk    = 0; // sum of     ref counts for all stack based variables
5413     unsigned refCntEBP    = 0; // sum of     ref counts for EBP enregistered variables
5414     unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5415 #if DOUBLE_ALIGN
5416     unsigned refCntStkParam;  // sum of     ref counts for all stack based parameters
5417     unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5418
5419 #if FEATURE_STACK_FP_X87
5420     refCntStkParam  = raCntStkParamDblStackFP;
5421     refCntWtdStkDbl = raCntWtdStkDblStackFP;
5422     refCntStk       = raCntStkStackFP;
5423 #else
5424     refCntStkParam  = 0;
5425     refCntWtdStkDbl = 0;
5426     refCntStk       = 0;
5427 #endif // FEATURE_STACK_FP_X87
5428
5429 #endif // DOUBLE_ALIGN
5430
5431     /* Set of registers used to enregister variables in the predition */
5432     regMaskTP regUsed = RBM_NONE;
5433
5434     /*-------------------------------------------------------------------------
5435      *
5436      *  Predict/Assign the enregistered locals in ref-count order
5437      *
5438      */
5439
5440     VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5441
5442     unsigned FPRegVarLiveInCnt;
5443     FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5444
5445     LclVarDsc* varDsc;
5446     for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5447     {
5448         bool notWorthy = false;
5449
5450         unsigned  varIndex;
5451         bool      isDouble;
5452         regMaskTP regAvailForType;
5453         var_types regType;
5454         regMaskTP avoidReg;
5455         unsigned  customVarOrderSize;
5456         regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5457         bool      firstHalf;
5458         regNumber saveOtherReg;
5459
5460         varDsc = lvaRefSorted[sortNum];
5461
5462 #if FEATURE_STACK_FP_X87
5463         if (varTypeIsFloating(varDsc->TypeGet()))
5464         {
5465 #ifdef DEBUG
5466             if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5467             {
5468                 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5469                 // be en-registered.
5470                 noway_assert(!varDsc->lvRegister);
5471             }
5472 #endif
5473             continue;
5474         }
5475 #endif
5476
5477         /* Check the set of invariant things that would prevent enregistration */
5478
5479         /* Ignore the variable if it's not tracked */
5480         if (!varDsc->lvTracked)
5481             goto CANT_REG;
5482
5483         /* Get hold of the index and the interference mask for the variable */
5484         varIndex = varDsc->lvVarIndex;
5485
5486         // Remove 'varIndex' from unprocessedVars
5487         VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5488
5489         // Skip the variable if it's marked as DoNotEnregister.
5490
5491         if (varDsc->lvDoNotEnregister)
5492             goto CANT_REG;
5493
5494         /* TODO: For now if we have JMP all register args go to stack
5495          * TODO: Later consider extending the life of the argument or make a copy of it */
5496
5497         if (compJmpOpUsed && varDsc->lvIsRegArg)
5498             goto CANT_REG;
5499
5500         /* Skip the variable if the ref count is zero */
5501
5502         if (varDsc->lvRefCnt == 0)
5503             goto CANT_REG;
5504
5505         /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5506
5507         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5508         {
5509             goto CANT_REG;
5510         }
5511
5512         /* Is the unweighted ref count too low to be interesting? */
5513
5514         if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5515             (varDsc->lvRefCnt <= 1))
5516         {
5517             /* Sometimes it's useful to enregister a variable with only one use */
5518             /*   arguments referenced in loops are one example */
5519
5520             if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5521                 goto OK_TO_ENREGISTER;
5522
5523             /* If the variable has a preferred register set it may be useful to put it there */
5524             if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5525                 goto OK_TO_ENREGISTER;
5526
5527             /* Keep going; the table is sorted by "weighted" ref count */
5528             goto CANT_REG;
5529         }
5530
5531     OK_TO_ENREGISTER:
5532
5533         if (varTypeIsFloating(varDsc->TypeGet()))
5534         {
5535             regType         = varDsc->TypeGet();
5536             regAvailForType = regAvail & RBM_ALLFLOAT;
5537         }
5538         else
5539         {
5540             regType         = TYP_INT;
5541             regAvailForType = regAvail & RBM_ALLINT;
5542         }
5543
5544 #ifdef _TARGET_ARM_
5545         isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5546
5547         if (isDouble)
5548         {
5549             regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5550         }
5551 #endif
5552
5553         /* If we don't have any registers available then skip the enregistration attempt */
5554         if (regAvailForType == RBM_NONE)
5555             goto NO_REG;
5556
5557         // On the pessimize passes don't even try to enregister LONGS
5558         if (isRegPairType(varDsc->lvType))
5559         {
5560             if (rpPasses > rpPassesPessimize)
5561                 goto NO_REG;
5562             else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5563                 goto NO_REG;
5564         }
5565
5566         // Set of registers to avoid when performing register allocation
5567         avoidReg = RBM_NONE;
5568
5569         if (!varDsc->lvIsRegArg)
5570         {
5571             /* For local variables,
5572              *  avoid the incoming arguments,
5573              *  but only if you conflict with them */
5574
5575             if (raAvoidArgRegMask != 0)
5576             {
5577                 LclVarDsc* argDsc;
5578                 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5579
5580                 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5581                 {
5582                     if (!argDsc->lvIsRegArg)
5583                         continue;
5584
5585                     bool      isFloat  = argDsc->IsFloatRegType();
5586                     regNumber inArgReg = argDsc->lvArgReg;
5587                     regMaskTP inArgBit = genRegMask(inArgReg);
5588
5589                     // Is this inArgReg in the raAvoidArgRegMask set?
5590
5591                     if (!(raAvoidArgRegMask & inArgBit))
5592                         continue;
5593
5594                     noway_assert(argDsc->lvIsParam);
5595                     noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5596
5597                     unsigned locVarIndex = varDsc->lvVarIndex;
5598                     unsigned argVarIndex = argDsc->lvVarIndex;
5599
5600                     /* Does this variable interfere with the arg variable ? */
5601                     if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5602                     {
5603                         noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5604                         /* Yes, so try to avoid the incoming arg reg */
5605                         avoidReg |= inArgBit;
5606                     }
5607                     else
5608                     {
5609                         noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5610                     }
5611                 }
5612             }
5613         }
5614
5615         // Now we will try to predict which register the variable
5616         // could  be enregistered in
5617
5618         customVarOrderSize = MAX_VAR_ORDER_SIZE;
5619
5620         raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5621
5622         firstHalf    = false;
5623         saveOtherReg = DUMMY_INIT(REG_NA);
5624
5625         for (regInx = 0; regInx < customVarOrderSize; regInx++)
5626         {
5627             regNumber regNum  = customVarOrder[regInx];
5628             regMaskTP regBits = genRegMask(regNum);
5629
5630             /* Skip this register if it isn't available */
5631             if ((regAvailForType & regBits) == 0)
5632                 continue;
5633
5634             /* Skip this register if it interferes with the variable */
5635
5636             if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5637                 continue;
5638
5639             if (varTypeIsFloating(regType))
5640             {
5641 #ifdef _TARGET_ARM_
5642                 if (isDouble)
5643                 {
5644                     regNumber regNext = REG_NEXT(regNum);
5645                     regBits |= genRegMask(regNext);
5646
5647                     /* Skip if regNext interferes with the variable */
5648                     if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5649                         continue;
5650                 }
5651 #endif
5652             }
5653
5654             bool firstUseOfReg     = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5655             bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5656             bool calleeSavedReg    = ((regBits & RBM_CALLEE_SAVED) != 0);
5657
5658             /* Skip this register if the weighted ref count is less than two
5659                and we are considering a unused callee saved register */
5660
5661             if (lessThanTwoRefWtd && // less than two references (weighted)
5662                 firstUseOfReg &&     // first use of this register
5663                 calleeSavedReg)      // callee saved register
5664             {
5665                 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5666
5667                 // psc is abbeviation for possibleSameColor
5668                 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5669
5670                 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5671                 unsigned        pscIndex = 0;
5672                 while (pscIndexIter.NextElem(&pscIndex))
5673                 {
5674                     LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5675                     totalRefCntWtd += pscVar->lvRefCntWtd;
5676                     if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5677                         break;
5678                 }
5679
5680                 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5681                 {
5682                     notWorthy = true;
5683                     continue; // not worth spilling a callee saved register
5684                 }
5685                 // otherwise we will spill this callee saved registers,
5686                 // because its uses when combined with the uses of
5687                 // other yet to be processed candidates exceed our threshold.
5688                 // totalRefCntWtd = totalRefCntWtd;
5689             }
5690
5691             /* Looks good - mark the variable as living in the register */
5692
5693             if (isRegPairType(varDsc->lvType))
5694             {
5695                 if (firstHalf == false)
5696                 {
5697                     /* Enregister the first half of the long */
5698                     varDsc->lvRegNum   = regNum;
5699                     saveOtherReg       = varDsc->lvOtherReg;
5700                     varDsc->lvOtherReg = REG_STK;
5701                     firstHalf          = true;
5702                 }
5703                 else
5704                 {
5705                     /* Ensure 'well-formed' register pairs */
5706                     /* (those returned by gen[Pick|Grab]RegPair) */
5707
5708                     if (regNum < varDsc->lvRegNum)
5709                     {
5710                         varDsc->lvOtherReg = varDsc->lvRegNum;
5711                         varDsc->lvRegNum   = regNum;
5712                     }
5713                     else
5714                     {
5715                         varDsc->lvOtherReg = regNum;
5716                     }
5717                     firstHalf = false;
5718                 }
5719             }
5720             else
5721             {
5722                 varDsc->lvRegNum = regNum;
5723 #ifdef _TARGET_ARM_
5724                 if (isDouble)
5725                 {
5726                     varDsc->lvOtherReg = REG_NEXT(regNum);
5727                 }
5728 #endif
5729             }
5730
5731             if (regNum == REG_FPBASE)
5732             {
5733                 refCntEBP += varDsc->lvRefCnt;
5734                 refCntWtdEBP += varDsc->lvRefCntWtd;
5735 #if DOUBLE_ALIGN
5736                 if (varDsc->lvIsParam)
5737                 {
5738                     refCntStkParam += varDsc->lvRefCnt;
5739                 }
5740 #endif
5741             }
5742
5743             /* Record this register in the regUsed set */
5744             regUsed |= regBits;
5745
5746             /* The register is now ineligible for all interfering variables */
5747
5748             VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5749
5750 #ifdef _TARGET_ARM_
5751             if (isDouble)
5752             {
5753                 regNumber       secondHalf = REG_NEXT(regNum);
5754                 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5755                 unsigned        intfIndex = 0;
5756                 while (iter.NextElem(&intfIndex))
5757                 {
5758                     VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5759                 }
5760             }
5761 #endif
5762
5763             /* If a register argument, remove its incoming register
5764              * from the "avoid" list */
5765
5766             if (varDsc->lvIsRegArg)
5767             {
5768                 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5769 #ifdef _TARGET_ARM_
5770                 if (isDouble)
5771                 {
5772                     raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5773                 }
5774 #endif
5775             }
5776
5777             /* A variable of TYP_LONG can take two registers */
5778             if (firstHalf)
5779                 continue;
5780
5781             // Since we have successfully enregistered this variable it is
5782             // now time to move on and consider the next variable
5783             goto ENREG_VAR;
5784         }
5785
5786         if (firstHalf)
5787         {
5788             noway_assert(isRegPairType(varDsc->lvType));
5789
5790             /* This TYP_LONG is partially enregistered */
5791
5792             noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5793
5794             if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5795             {
5796                 rpLostEnreg = true;
5797             }
5798
5799             raAddToStkPredict(varDsc->lvRefCntWtd);
5800             goto ENREG_VAR;
5801         }
5802
5803     NO_REG:;
5804         if (varDsc->lvDependReg)
5805         {
5806             rpLostEnreg = true;
5807         }
5808
5809         if (!notWorthy)
5810         {
5811             /* Weighted count of variables that could have been enregistered but weren't */
5812             raAddToStkPredict(varDsc->lvRefCntWtd);
5813
5814             if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5815                 raAddToStkPredict(varDsc->lvRefCntWtd);
5816         }
5817
5818     CANT_REG:;
5819         varDsc->lvRegister = false;
5820
5821         varDsc->lvRegNum = REG_STK;
5822         if (isRegPairType(varDsc->lvType))
5823             varDsc->lvOtherReg = REG_STK;
5824
5825         /* unweighted count of variables that were not enregistered */
5826
5827         refCntStk += varDsc->lvRefCnt;
5828
5829 #if DOUBLE_ALIGN
5830         if (varDsc->lvIsParam)
5831         {
5832             refCntStkParam += varDsc->lvRefCnt;
5833         }
5834         else
5835         {
5836             /* Is it a stack based double? */
5837             /* Note that double params are excluded since they can not be double aligned */
5838             if (varDsc->lvType == TYP_DOUBLE)
5839             {
5840                 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5841             }
5842         }
5843 #endif
5844 #ifdef DEBUG
5845         if (verbose)
5846         {
5847             printf("; ");
5848             gtDispLclVar((unsigned)(varDsc - lvaTable));
5849             if (varDsc->lvTracked)
5850                 printf("T%02u", varDsc->lvVarIndex);
5851             else
5852                 printf("   ");
5853             printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5854             if (varDsc->lvDoNotEnregister)
5855                 printf(", do-not-enregister");
5856             printf("\n");
5857         }
5858 #endif
5859         continue;
5860
5861     ENREG_VAR:;
5862
5863         varDsc->lvRegister = true;
5864
5865         // Record the fact that we enregistered a stack arg when tail call is used.
5866         if (compJmpOpUsed && !varDsc->lvIsRegArg)
5867         {
5868             rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5869             if (isRegPairType(varDsc->lvType))
5870             {
5871                 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5872             }
5873         }
5874
5875 #ifdef DEBUG
5876         if (verbose)
5877         {
5878             printf("; ");
5879             gtDispLclVar((unsigned)(varDsc - lvaTable));
5880             printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5881                    refCntWtd2str(varDsc->lvRefCntWtd));
5882             varDsc->PrintVarReg();
5883 #ifdef _TARGET_ARM_
5884             if (isDouble)
5885             {
5886                 printf(":%s", getRegName(varDsc->lvOtherReg));
5887             }
5888 #endif
5889             printf("\n");
5890         }
5891 #endif
5892     }
5893
5894 #if ETW_EBP_FRAMED
5895     noway_assert(refCntEBP == 0);
5896 #endif
5897
5898 #ifdef DEBUG
5899     if (verbose)
5900     {
5901         if (refCntStk > 0)
5902             printf("; refCntStk       = %u\n", refCntStk);
5903         if (refCntEBP > 0)
5904             printf("; refCntEBP       = %u\n", refCntEBP);
5905         if (refCntWtdEBP > 0)
5906             printf("; refCntWtdEBP    = %u\n", refCntWtdEBP);
5907 #if DOUBLE_ALIGN
5908         if (refCntStkParam > 0)
5909             printf("; refCntStkParam  = %u\n", refCntStkParam);
5910         if (refCntWtdStkDbl > 0)
5911             printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5912 #endif
5913     }
5914 #endif
5915
5916     /* Determine how the EBP register should be used */
5917     CLANG_FORMAT_COMMENT_ANCHOR;
5918
5919 #if DOUBLE_ALIGN
5920
5921     if (!codeGen->isFramePointerRequired())
5922     {
5923         noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5924
5925         /*
5926             First let us decide if we should use EBP to create a
5927             double-aligned frame, instead of enregistering variables
5928         */
5929
5930         if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5931         {
5932             rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5933             goto REVERSE_EBP_ENREG;
5934         }
5935
5936         if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5937         {
5938             if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5939             {
5940                 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5941                 goto REVERSE_EBP_ENREG;
5942             }
5943         }
5944     }
5945
5946 #endif // DOUBLE_ALIGN
5947
5948     if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5949     {
5950 #ifdef _TARGET_XARCH_
5951 // clang-format off
5952         /*  If we are using EBP to enregister variables then
5953             will we actually save bytes by setting up an EBP frame?
5954
5955             Each stack reference is an extra byte of code if we use
5956             an ESP frame.
5957
5958             Here we measure the savings that we get by using EBP to
5959             enregister variables vs. the cost in code size that we
5960             pay when using an ESP based frame.
5961
5962             We pay one byte of code for each refCntStk
5963             but we save one byte (or more) for each refCntEBP.
5964
5965             Our savings are the elimination of a stack memory read/write.
5966             We use the loop weighted value of
5967                refCntWtdEBP * mem_access_weight (0, 3, 6)
5968             to represent this savings.
5969          */
5970
5971         // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5972         // to set up an EBP frame in the prolog and epilog
5973         #define EBP_FRAME_SETUP_SIZE  5
5974         // clang-format on
5975
5976         if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5977         {
5978             unsigned bytesSaved        = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5979             unsigned mem_access_weight = 3;
5980
5981             if (compCodeOpt() == SMALL_CODE)
5982                 mem_access_weight = 0;
5983             else if (compCodeOpt() == FAST_CODE)
5984                 mem_access_weight *= 2;
5985
5986             if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5987             {
5988                 /* It's not be a good idea to use EBP in our predictions */
5989                 CLANG_FORMAT_COMMENT_ANCHOR;
5990 #ifdef DEBUG
5991                 if (verbose && (refCntEBP > 0))
5992                     printf("; Predicting that it's not worth using EBP to enregister variables\n");
5993 #endif
5994                 rpFrameType = FT_EBP_FRAME;
5995                 goto REVERSE_EBP_ENREG;
5996             }
5997         }
5998 #endif // _TARGET_XARCH_
5999
6000         if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6001         {
6002 #ifdef DEBUG
6003             const char* reason;
6004 #endif
6005             if (rpMustCreateEBPCalled == false)
6006             {
6007                 rpMustCreateEBPCalled = true;
6008                 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6009                 {
6010 #ifdef DEBUG
6011                     if (verbose)
6012                         printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6013 #endif
6014                     codeGen->setFrameRequired(true);
6015
6016                     rpFrameType = FT_EBP_FRAME;
6017                     goto REVERSE_EBP_ENREG;
6018                 }
6019             }
6020         }
6021     }
6022
6023     goto EXIT;
6024
6025 REVERSE_EBP_ENREG:
6026
6027     noway_assert(rpFrameType != FT_ESP_FRAME);
6028
6029     rpReverseEBPenreg = true;
6030
6031 #if !ETW_EBP_FRAMED
6032     if (refCntEBP > 0)
6033     {
6034         noway_assert(regUsed & RBM_FPBASE);
6035
6036         regUsed &= ~RBM_FPBASE;
6037
6038         /* variables that were enregistered in EBP become stack based variables */
6039         raAddToStkPredict(refCntWtdEBP);
6040
6041         unsigned lclNum;
6042
6043         /* We're going to have to undo some predicted enregistered variables */
6044         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6045         {
6046             /* Is this a register variable? */
6047             if (varDsc->lvRegNum != REG_STK)
6048             {
6049                 if (isRegPairType(varDsc->lvType))
6050                 {
6051                     /* Only one can be EBP */
6052                     if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6053                     {
6054                         if (varDsc->lvRegNum == REG_FPBASE)
6055                             varDsc->lvRegNum = varDsc->lvOtherReg;
6056
6057                         varDsc->lvOtherReg = REG_STK;
6058
6059                         if (varDsc->lvRegNum == REG_STK)
6060                             varDsc->lvRegister = false;
6061
6062                         if (varDsc->lvDependReg)
6063                             rpLostEnreg = true;
6064 #ifdef DEBUG
6065                         if (verbose)
6066                             goto DUMP_MSG;
6067 #endif
6068                     }
6069                 }
6070                 else
6071                 {
6072                     if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6073                     {
6074                         varDsc->lvRegNum = REG_STK;
6075
6076                         varDsc->lvRegister = false;
6077
6078                         if (varDsc->lvDependReg)
6079                             rpLostEnreg = true;
6080 #ifdef DEBUG
6081                         if (verbose)
6082                         {
6083                         DUMP_MSG:
6084                             printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6085                                    varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6086                                    (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6087                         }
6088 #endif
6089                     }
6090                 }
6091             }
6092         }
6093     }
6094 #endif // ETW_EBP_FRAMED
6095
6096 EXIT:;
6097
6098     unsigned lclNum;
6099     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6100     {
6101         /* Clear the lvDependReg flag for next iteration of the predictor */
6102         varDsc->lvDependReg = false;
6103
6104         // If we set rpLostEnreg and this is the first pessimize pass
6105         // then reverse the enreg of all TYP_LONG
6106         if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6107         {
6108             varDsc->lvRegNum   = REG_STK;
6109             varDsc->lvOtherReg = REG_STK;
6110         }
6111     }
6112
6113 #ifdef DEBUG
6114     if (verbose && raNewBlocks)
6115     {
6116         printf("\nAdded FP register killing blocks:\n");
6117         fgDispBasicBlocks();
6118         printf("\n");
6119     }
6120 #endif
6121     noway_assert(rpFrameType != FT_NOT_SET);
6122
6123     /* return the set of registers used to enregister variables */
6124     return regUsed;
6125 }
6126 #ifdef _PREFAST_
6127 #pragma warning(pop)
6128 #endif
6129
6130 /*****************************************************************************
6131  *
6132  *  Predict register use for every tree in the function. Note that we do this
6133  *  at different times (not to mention in a totally different way) for x86 vs
6134  *  RISC targets.
6135  */
6136 void Compiler::rpPredictRegUse()
6137 {
6138 #ifdef DEBUG
6139     if (verbose)
6140         raDumpVarIntf();
6141 #endif
6142
6143     // We might want to adjust the ref counts based on interference
6144     raAdjustVarIntf();
6145
6146     regMaskTP allAcceptableRegs = RBM_ALLINT;
6147
6148 #if FEATURE_FP_REGALLOC
6149     allAcceptableRegs |= raConfigRestrictMaskFP();
6150 #endif
6151
6152     allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6153
6154     /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6155        to acdHelper(). This is done implicitly, without creating a GT_CALL
6156        node. Hence, this interference is be handled implicitly by
6157        restricting the registers used for enregistering variables */
6158
6159     if (opts.compDbgCode)
6160     {
6161         allAcceptableRegs &= RBM_CALLEE_SAVED;
6162     }
6163
6164     /* Compute the initial regmask to use for the first pass */
6165     regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6166     regMaskTP regUsed;
6167
6168 #if CPU_USES_BLOCK_MOVE
6169     /* If we might need to generate a rep mov instruction */
6170     /* remove ESI and EDI */
6171     if (compBlkOpUsed)
6172         regAvail &= ~(RBM_ESI | RBM_EDI);
6173 #endif
6174
6175 #ifdef _TARGET_X86_
6176     /* If we using longs then we remove ESI to allow */
6177     /* ESI:EBX to be saved accross a call */
6178     if (compLongUsed)
6179         regAvail &= ~(RBM_ESI);
6180 #endif
6181
6182 #ifdef _TARGET_ARM_
6183     // For the first register allocation pass we don't want to color using r4
6184     // as we want to allow it to be used to color the internal temps instead
6185     // when r0,r1,r2,r3 are all in use.
6186     //
6187     regAvail &= ~(RBM_R4);
6188 #endif
6189
6190 #if ETW_EBP_FRAMED
6191     // We never have EBP available when ETW_EBP_FRAME is defined
6192     regAvail &= ~RBM_FPBASE;
6193 #else
6194     /* If a frame pointer is required then we remove EBP */
6195     if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6196         regAvail &= ~RBM_FPBASE;
6197 #endif
6198
6199 #ifdef DEBUG
6200     BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6201     if (fJitNoRegLoc)
6202         regAvail = RBM_NONE;
6203 #endif
6204
6205     if ((opts.compFlags & CLFLG_REGVAR) == 0)
6206         regAvail = RBM_NONE;
6207
6208 #if FEATURE_STACK_FP_X87
6209     VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6210     VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6211
6212     // Calculate the set of all tracked FP/non-FP variables
6213     //  into optAllFloatVars and optAllNonFPvars
6214
6215     unsigned   lclNum;
6216     LclVarDsc* varDsc;
6217
6218     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6219     {
6220         /* Ignore the variable if it's not tracked */
6221
6222         if (!varDsc->lvTracked)
6223             continue;
6224
6225         /* Get hold of the index and the interference mask for the variable */
6226
6227         unsigned varNum = varDsc->lvVarIndex;
6228
6229         /* add to the set of all tracked FP/non-FP variables */
6230
6231         if (varDsc->IsFloatRegType())
6232             VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6233         else
6234             VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6235     }
6236 #endif
6237
6238     for (unsigned i = 0; i < REG_COUNT; i++)
6239     {
6240         VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6241     }
6242     for (unsigned i = 0; i < lvaTrackedCount; i++)
6243     {
6244         VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6245     }
6246
6247     raNewBlocks          = false;
6248     rpPredictAssignAgain = false;
6249     rpPasses             = 0;
6250
6251     bool      mustPredict   = true;
6252     unsigned  stmtNum       = 0;
6253     unsigned  oldStkPredict = DUMMY_INIT(~0);
6254     VARSET_TP oldLclRegIntf[REG_COUNT];
6255
6256     for (unsigned i = 0; i < REG_COUNT; i++)
6257     {
6258         VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6259     }
6260
6261     while (true)
6262     {
6263         /* Assign registers to variables using the variable/register interference
6264            graph (raLclRegIntf[]) calculated in the previous pass */
6265         regUsed = rpPredictAssignRegVars(regAvail);
6266
6267         mustPredict |= rpLostEnreg;
6268
6269 #ifdef _TARGET_ARM_
6270         // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6271         if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6272             !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6273         {
6274             // We can release our reservation on R10 and use it to color registers
6275             codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6276             allAcceptableRegs |= RBM_OPT_RSVD;
6277         }
6278 #endif
6279
6280         /* Is our new prediction good enough?? */
6281         if (!mustPredict)
6282         {
6283             /* For small methods (less than 12 stmts), we add a    */
6284             /*   extra pass if we are predicting the use of some   */
6285             /*   of the caller saved registers.                    */
6286             /* This fixes RAID perf bug 43440 VB Ackerman function */
6287
6288             if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6289             {
6290                 goto EXTRA_PASS;
6291             }
6292
6293             /* If every variable was fully enregistered then we're done */
6294             if (rpStkPredict == 0)
6295                 goto ALL_DONE;
6296
6297             // This was a successful prediction.  Record it, in case it turns out to be the best one.
6298             rpRecordPrediction();
6299
6300             if (rpPasses > 1)
6301             {
6302                 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6303
6304                 // Be careful about overflow
6305                 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6306                 if (oldStkPredict < highStkPredict)
6307                     goto ALL_DONE;
6308
6309                 if (rpStkPredict < rpPasses * 8)
6310                     goto ALL_DONE;
6311
6312                 if (rpPasses >= (rpPassesMax - 1))
6313                     goto ALL_DONE;
6314             }
6315
6316         EXTRA_PASS:
6317             /* We will do another pass */;
6318         }
6319
6320 #ifdef DEBUG
6321         if (JitConfig.JitAssertOnMaxRAPasses())
6322         {
6323             noway_assert(rpPasses < rpPassesMax &&
6324                          "This may not a bug, but dev team should look and see what is happening");
6325         }
6326 #endif
6327
6328         // The "64" here had been "VARSET_SZ".  It is unclear why this number is connected with
6329         // the (max) size of a VARSET.  We've eliminated this constant, so I left this as a constant.  We hope
6330         // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6331         if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6332         {
6333             NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6334         }
6335
6336 #ifdef DEBUG
6337         if (verbose)
6338         {
6339             if (rpPasses > 0)
6340             {
6341                 if (rpLostEnreg)
6342                     printf("\n; Another pass due to rpLostEnreg");
6343                 if (rpAddedVarIntf)
6344                     printf("\n; Another pass due to rpAddedVarIntf");
6345                 if ((rpPasses == 1) && rpPredictAssignAgain)
6346                     printf("\n; Another pass due to rpPredictAssignAgain");
6347             }
6348             printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6349         }
6350 #endif
6351
6352         /*  Zero the variable/register interference graph */
6353         for (unsigned i = 0; i < REG_COUNT; i++)
6354         {
6355             VarSetOps::ClearD(this, raLclRegIntf[i]);
6356         }
6357
6358         // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6359         // it must not be in a register trashed by the callee
6360         if (info.compLvFrameListRoot != BAD_VAR_NUM)
6361         {
6362             assert(!opts.ShouldUsePInvokeHelpers());
6363             noway_assert(info.compLvFrameListRoot < lvaCount);
6364
6365             LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6366
6367             if (pinvokeVarDsc->lvTracked)
6368             {
6369                 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6370                                                       DEBUGARG("compLvFrameListRoot"));
6371
6372                 // We would prefer to have this be enregister in the PINVOKE_TCB register
6373                 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6374             }
6375
6376             // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6377             // worst case).  Make sure that the return value compiler temp that we create for the single
6378             // return block knows about this interference.
6379             if (genReturnLocal != BAD_VAR_NUM)
6380             {
6381                 noway_assert(genReturnBB);
6382                 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6383                 if (localTmp->lvTracked)
6384                 {
6385                     rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6386                                     VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6387                 }
6388             }
6389         }
6390
6391 #ifdef _TARGET_ARM_
6392         if (compFloatingPointUsed)
6393         {
6394             bool hasMustInitFloat = false;
6395
6396             // if we have any must-init floating point LclVars then we will add register interferences
6397             // for the arguments with RBM_SCRATCH
6398             // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6399             // we won't home the arguments into REG_SCRATCH
6400
6401             unsigned   lclNum;
6402             LclVarDsc* varDsc;
6403
6404             for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6405             {
6406                 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6407                 {
6408                     hasMustInitFloat = true;
6409                     break;
6410                 }
6411             }
6412
6413             if (hasMustInitFloat)
6414             {
6415                 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6416                 {
6417                     // If is an incoming argument, that is tracked and not floating-point
6418                     if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6419                     {
6420                         rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6421                                                          DEBUGARG("arg home with must-init fp"));
6422                     }
6423                 }
6424             }
6425         }
6426 #endif
6427
6428         stmtNum        = 0;
6429         rpAddedVarIntf = false;
6430         rpLostEnreg    = false;
6431
6432         /* Walk the basic blocks and predict reg use for each tree */
6433
6434         for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6435         {
6436             GenTreePtr stmt;
6437             compCurBB       = block;
6438             compCurLifeTree = NULL;
6439             VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6440
6441             compCurBB = block;
6442
6443             for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6444             {
6445                 noway_assert(stmt->gtOper == GT_STMT);
6446
6447                 rpPredictSpillCnt = 0;
6448                 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6449                 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6450
6451                 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6452                 stmtNum++;
6453 #ifdef DEBUG
6454                 if (verbose && 1)
6455                 {
6456                     printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6457                     gtDispTree(tree);
6458                     printf("\n");
6459                 }
6460 #endif
6461                 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6462
6463                 noway_assert(rpAsgVarNum == -1);
6464
6465                 if (rpPredictSpillCnt > tmpIntSpillMax)
6466                     tmpIntSpillMax = rpPredictSpillCnt;
6467             }
6468         }
6469         rpPasses++;
6470
6471         /* Decide whether we need to set mustPredict */
6472         mustPredict = false;
6473
6474 #ifdef _TARGET_ARM_
6475         // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6476         // reserve r10, and if it was used, throw out the last prediction and repredict.
6477         if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6478         {
6479             codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6480             allAcceptableRegs &= ~RBM_OPT_RSVD;
6481             if ((regUsed & RBM_OPT_RSVD) != 0)
6482             {
6483                 mustPredict              = true;
6484                 rpBestRecordedPrediction = nullptr;
6485             }
6486         }
6487 #endif
6488
6489         if (rpAddedVarIntf)
6490         {
6491             mustPredict = true;
6492 #ifdef DEBUG
6493             if (verbose)
6494                 raDumpVarIntf();
6495 #endif
6496         }
6497
6498         if (rpPasses == 1)
6499         {
6500             if ((opts.compFlags & CLFLG_REGVAR) == 0)
6501                 goto ALL_DONE;
6502
6503             if (rpPredictAssignAgain)
6504                 mustPredict = true;
6505 #ifdef DEBUG
6506             if (fJitNoRegLoc)
6507                 goto ALL_DONE;
6508 #endif
6509         }
6510
6511         /* Calculate the new value to use for regAvail */
6512
6513         regAvail = allAcceptableRegs;
6514
6515         /* If a frame pointer is required then we remove EBP */
6516         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6517             regAvail &= ~RBM_FPBASE;
6518
6519 #if ETW_EBP_FRAMED
6520         // We never have EBP available when ETW_EBP_FRAME is defined
6521         regAvail &= ~RBM_FPBASE;
6522 #endif
6523
6524         // If we have done n-passes then we must continue to pessimize the
6525         // interference graph by or-ing the interferences from the previous pass
6526
6527         if (rpPasses > rpPassesPessimize)
6528         {
6529             for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6530                 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6531
6532             /* If we reverse an EBP enregistration then keep it that way */
6533             if (rpReverseEBPenreg)
6534                 regAvail &= ~RBM_FPBASE;
6535         }
6536
6537 #ifdef DEBUG
6538         if (verbose)
6539             raDumpRegIntf();
6540 #endif
6541
6542         /*  Save the old variable/register interference graph */
6543         for (unsigned i = 0; i < REG_COUNT; i++)
6544         {
6545             VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6546         }
6547         oldStkPredict = rpStkPredict;
6548     } // end of while (true)
6549
6550 ALL_DONE:;
6551
6552     // If we recorded a better feasible allocation than we ended up with, go back to using it.
6553     rpUseRecordedPredictionIfBetter();
6554
6555 #if DOUBLE_ALIGN
6556     codeGen->setDoubleAlign(false);
6557 #endif
6558
6559     switch (rpFrameType)
6560     {
6561         default:
6562             noway_assert(!"rpFrameType not set correctly!");
6563             break;
6564         case FT_ESP_FRAME:
6565             noway_assert(!codeGen->isFramePointerRequired());
6566             noway_assert(!codeGen->isFrameRequired());
6567             codeGen->setFramePointerUsed(false);
6568             break;
6569         case FT_EBP_FRAME:
6570             noway_assert((regUsed & RBM_FPBASE) == 0);
6571             codeGen->setFramePointerUsed(true);
6572             break;
6573 #if DOUBLE_ALIGN
6574         case FT_DOUBLE_ALIGN_FRAME:
6575             noway_assert((regUsed & RBM_FPBASE) == 0);
6576             noway_assert(!codeGen->isFramePointerRequired());
6577             codeGen->setFramePointerUsed(false);
6578             codeGen->setDoubleAlign(true);
6579             break;
6580 #endif
6581     }
6582
6583     /* Record the set of registers that we need */
6584     codeGen->regSet.rsClearRegsModified();
6585     if (regUsed != RBM_NONE)
6586     {
6587         codeGen->regSet.rsSetRegsModified(regUsed);
6588     }
6589
6590     /* We need genFullPtrRegMap if :
6591      * The method is fully interruptible, or
6592      * We are generating an EBP-less frame (for stack-pointer deltas)
6593      */
6594
6595     genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6596
6597     raMarkStkVars();
6598 #ifdef DEBUG
6599     if (verbose)
6600     {
6601         printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6602         printf("  rpStkPredict was %u\n", rpStkPredict);
6603     }
6604 #endif
6605     rpRegAllocDone = true;
6606 }
6607
6608 #endif // LEGACY_BACKEND
6609
6610 /*****************************************************************************
6611  *
6612  *  Mark all variables as to whether they live on the stack frame
6613  *  (part or whole), and if so what the base is (FP or SP).
6614  */
6615
6616 void Compiler::raMarkStkVars()
6617 {
6618     unsigned   lclNum;
6619     LclVarDsc* varDsc;
6620
6621     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6622     {
6623         // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6624         CLANG_FORMAT_COMMENT_ANCHOR;
6625
6626 #ifdef LEGACY_BACKEND
6627         varDsc->lvOnFrame = false;
6628 #endif // LEGACY_BACKEND
6629
6630         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6631         {
6632             noway_assert(!varDsc->lvRegister);
6633             goto ON_STK;
6634         }
6635
6636         /* Fully enregistered variables don't need any frame space */
6637
6638         if (varDsc->lvRegister)
6639         {
6640             if (!isRegPairType(varDsc->TypeGet()))
6641             {
6642                 goto NOT_STK;
6643             }
6644
6645             /* For "large" variables make sure both halves are enregistered */
6646
6647             if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6648             {
6649                 goto NOT_STK;
6650             }
6651         }
6652         /* Unused variables typically don't get any frame space */
6653         else if (varDsc->lvRefCnt == 0)
6654         {
6655             bool needSlot = false;
6656
6657             bool stkFixedArgInVarArgs =
6658                 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6659
6660             // If its address has been exposed, ignore lvRefCnt. However, exclude
6661             // fixed arguments in varargs method as lvOnFrame shouldn't be set
6662             // for them as we don't want to explicitly report them to GC.
6663
6664             if (!stkFixedArgInVarArgs)
6665             {
6666                 needSlot |= varDsc->lvAddrExposed;
6667             }
6668
6669 #if FEATURE_FIXED_OUT_ARGS
6670
6671             /* Is this the dummy variable representing GT_LCLBLK ? */
6672             needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6673
6674 #endif // FEATURE_FIXED_OUT_ARGS
6675
6676 #ifdef DEBUG
6677             /* For debugging, note that we have to reserve space even for
6678                unused variables if they are ever in scope. However, this is not
6679                an issue as fgExtendDbgLifetimes() adds an initialization and
6680                variables in scope will not have a zero ref-cnt.
6681              */
6682             if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6683             {
6684                 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6685                 {
6686                     noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6687                 }
6688             }
6689 #endif
6690             /*
6691               For Debug Code, we have to reserve space even if the variable is never
6692               in scope. We will also need to initialize it if it is a GC var.
6693               So we set lvMustInit and artifically bump up the ref-cnt.
6694              */
6695
6696             if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6697             {
6698                 needSlot |= true;
6699
6700                 if (lvaTypeIsGC(lclNum))
6701                 {
6702                     varDsc->lvRefCnt = 1;
6703                 }
6704
6705                 if (!varDsc->lvIsParam)
6706                 {
6707                     varDsc->lvMustInit = true;
6708                 }
6709             }
6710
6711 #ifndef LEGACY_BACKEND
6712             varDsc->lvOnFrame = needSlot;
6713 #endif // !LEGACY_BACKEND
6714             if (!needSlot)
6715             {
6716                 /* Clear the lvMustInit flag in case it is set */
6717                 varDsc->lvMustInit = false;
6718
6719                 goto NOT_STK;
6720             }
6721         }
6722
6723 #ifndef LEGACY_BACKEND
6724         if (!varDsc->lvOnFrame)
6725         {
6726             goto NOT_STK;
6727         }
6728 #endif // !LEGACY_BACKEND
6729
6730     ON_STK:
6731         /* The variable (or part of it) lives on the stack frame */
6732
6733         noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6734 #if FEATURE_FIXED_OUT_ARGS
6735         noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6736 #else  // FEATURE_FIXED_OUT_ARGS
6737         noway_assert(lvaLclSize(lclNum) != 0);
6738 #endif // FEATURE_FIXED_OUT_ARGS
6739
6740         varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6741                                   // stack frame
6742
6743     NOT_STK:;
6744         varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6745
6746 #if DOUBLE_ALIGN
6747
6748         if (codeGen->doDoubleAlign())
6749         {
6750             noway_assert(codeGen->isFramePointerUsed() == false);
6751
6752             /* All arguments are off of EBP with double-aligned frames */
6753
6754             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6755             {
6756                 varDsc->lvFramePointerBased = true;
6757             }
6758         }
6759
6760 #endif
6761
6762         /* Some basic checks */
6763
6764         // It must be in a register, on frame, or have zero references.
6765
6766         noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6767
6768 #ifndef LEGACY_BACKEND
6769         // We can't have both lvRegister and lvOnFrame for RyuJIT
6770         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6771 #else  // LEGACY_BACKEND
6772
6773         /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6774         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6775                      (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6776 #endif // LEGACY_BACKEND
6777
6778 #ifdef DEBUG
6779
6780         // For varargs functions, there should be no direct references to
6781         // parameter variables except for 'this' (because these were morphed
6782         // in the importer) and the 'arglist' parameter (which is not a GC
6783         // pointer). and the return buffer argument (if we are returning a
6784         // struct).
6785         // This is important because we don't want to try to report them
6786         // to the GC, as the frame offsets in these local varables would
6787         // not be correct.
6788
6789         if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6790         {
6791             if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6792             {
6793                 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6794             }
6795         }
6796 #endif
6797     }
6798 }
6799
6800 #ifdef LEGACY_BACKEND
6801 void Compiler::rpRecordPrediction()
6802 {
6803     if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6804     {
6805         if (rpBestRecordedPrediction == NULL)
6806         {
6807             rpBestRecordedPrediction =
6808                 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6809         }
6810         for (unsigned k = 0; k < lvaCount; k++)
6811         {
6812             rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6813             rpBestRecordedPrediction[k].m_regNum         = (regNumberSmall)lvaTable[k].GetRegNum();
6814             rpBestRecordedPrediction[k].m_otherReg       = (regNumberSmall)lvaTable[k].GetOtherReg();
6815         }
6816         rpBestRecordedStkPredict = rpStkPredict;
6817         JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6818     }
6819 }
6820
6821 void Compiler::rpUseRecordedPredictionIfBetter()
6822 {
6823     JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6824             rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6825     if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6826     {
6827         JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6828                 rpBestRecordedStkPredict);
6829
6830         for (unsigned k = 0; k < lvaCount; k++)
6831         {
6832             lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6833             lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6834             lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6835         }
6836     }
6837 }
6838 #endif // LEGACY_BACKEND