Fix reading Time zone rules using Julian days (#17672)
[platform/upstream/coreclr.git] / src / jit / regalloc.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           RegAlloc                                        XX
9 XX                                                                           XX
10 XX  Does the register allocation and puts the remaining lclVars on the stack XX
11 XX                                                                           XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 */
15
16 #include "jitpch.h"
17 #ifdef _MSC_VER
18 #pragma hdrstop
19 #endif
20 #include "regalloc.h"
21
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
24 {
25     DWORD val = JitConfig.JitRegisterFP();
26
27     return (enumConfigRegisterFP)(val & 0x3);
28 }
29 #endif // FEATURE_FP_REGALLOC
30
31 regMaskTP Compiler::raConfigRestrictMaskFP()
32 {
33     regMaskTP result = RBM_NONE;
34
35 #if FEATURE_FP_REGALLOC
36     switch (raConfigRegisterFP())
37     {
38         case CONFIG_REGISTER_FP_NONE:
39             result = RBM_NONE;
40             break;
41         case CONFIG_REGISTER_FP_CALLEE_TRASH:
42             result = RBM_FLT_CALLEE_TRASH;
43             break;
44         case CONFIG_REGISTER_FP_CALLEE_SAVED:
45             result = RBM_FLT_CALLEE_SAVED;
46             break;
47         case CONFIG_REGISTER_FP_FULL:
48             result = RBM_ALLFLOAT;
49             break;
50     }
51 #endif
52
53     return result;
54 }
55
56 #if DOUBLE_ALIGN
57 DWORD Compiler::getCanDoubleAlign()
58 {
59 #ifdef DEBUG
60     if (compStressCompile(STRESS_DBL_ALN, 20))
61         return MUST_DOUBLE_ALIGN;
62
63     return JitConfig.JitDoubleAlign();
64 #else
65     return DEFAULT_DOUBLE_ALIGN;
66 #endif
67 }
68
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
71 //
72 // Arguments:
73 //    refCntStk       - sum of     ref counts for all stack based variables
74 //    refCntEBP       - sum of     ref counts for EBP enregistered variables
75 //    refCntWtdEBP    - sum of wtd ref counts for EBP enregistered variables
76 //    refCntStkParam  - sum of     ref counts for all stack based parameters
77 //    refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 //                      with double fields).
79 //
80 // Return Value:
81 //    Returns true if this method estimates that a double-aligned frame would be beneficial
82 //
83 // Notes:
84 //    The impact of a double-aligned frame is computed as follows:
85 //    - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 //    - We pay a byte of code for each non-parameter stack reference.
87 //    - We save the misalignment penalty and possible cache-line crossing penalty.
88 //      This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 //    - We pay 7 extra bytes for:
90 //        MOV EBP,ESP,
91 //        LEA ESP,[EBP-offset]
92 //        AND ESP,-8 to double align ESP
93 //    - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
94 //
95 //    If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 //    Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 //    ref count for double-aligned values.
98 //
99 bool Compiler::shouldDoubleAlign(
100     unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
101 {
102     bool           doDoubleAlign        = false;
103     const unsigned DBL_ALIGN_SETUP_SIZE = 7;
104
105     unsigned bytesUsed         = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106     unsigned misaligned_weight = 4;
107
108     if (compCodeOpt() == Compiler::SMALL_CODE)
109         misaligned_weight = 0;
110
111     if (compCodeOpt() == Compiler::FAST_CODE)
112         misaligned_weight *= 4;
113
114     JITDUMP("\nDouble alignment:\n");
115     JITDUMP("  Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116     JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117     JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
118
119     if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
120     {
121         JITDUMP("    Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
122     }
123     else if (refCntWtdEBP > refCntWtdStkDbl * 2)
124     {
125         // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126         // not double aligned.
127         // Here are the numbers that make this not double-aligned.
128         //     refCntWtdStkDbl = 0x164
129         //     refCntWtdEBP    = 0x1a4
130         // We think we do need to change the heuristic to be in favor of double-align.
131
132         JITDUMP("    Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
133     }
134     else
135     {
136         // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137         JITDUMP("    Predicting to create a double-aligned frame\n");
138         doDoubleAlign = true;
139     }
140     return doDoubleAlign;
141 }
142 #endif // DOUBLE_ALIGN
143
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
145
146 void Compiler::raInit()
147 {
148 #if FEATURE_STACK_FP_X87
149     /* We have not assigned any FP variables to registers yet */
150
151     VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
152 #endif
153     codeGen->intRegState.rsIsFloat   = false;
154     codeGen->floatRegState.rsIsFloat = true;
155
156     rpReverseEBPenreg = false;
157     rpAsgVarNum       = -1;
158     rpPassesMax       = 6;
159     rpPassesPessimize = rpPassesMax - 3;
160     if (opts.compDbgCode)
161     {
162         rpPassesMax++;
163     }
164     rpStkPredict            = (unsigned)-1;
165     rpFrameType             = FT_NOT_SET;
166     rpLostEnreg             = false;
167     rpMustCreateEBPCalled   = false;
168     rpRegAllocDone          = false;
169     rpMaskPInvokeEpilogIntf = RBM_NONE;
170
171     rpPredictMap[PREDICT_NONE] = RBM_NONE;
172     rpPredictMap[PREDICT_ADDR] = RBM_NONE;
173
174 #if FEATURE_FP_REGALLOC
175     rpPredictMap[PREDICT_REG]         = RBM_ALLINT | RBM_ALLFLOAT;
176     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
177 #else
178     rpPredictMap[PREDICT_REG]         = RBM_ALLINT;
179     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
180 #endif
181
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
184
185 #if defined(_TARGET_ARM_)
186
187     rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188     rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189     rpPredictMap[PREDICT_REG_SP]    = RBM_ILLEGAL;
190
191 #elif defined(_TARGET_AMD64_)
192
193     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
196
197 #elif defined(_TARGET_X86_)
198
199     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
202     rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203     rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
204
205 #endif
206
207     rpBestRecordedPrediction = NULL;
208 }
209
210 /*****************************************************************************
211  *
212  *  The following table(s) determines the order in which registers are considered
213  *  for variables to live in
214  */
215
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
217 {
218 #if FEATURE_FP_REGALLOC
219     if (varTypeIsFloating(regType))
220     {
221         static const regNumber raRegVarOrderFlt[]   = {REG_VAR_ORDER_FLT};
222         const unsigned         raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
223
224         if (wbVarOrderSize != NULL)
225             *wbVarOrderSize = raRegVarOrderFltSize;
226
227         return &raRegVarOrderFlt[0];
228     }
229     else
230 #endif
231     {
232         static const regNumber raRegVarOrder[]   = {REG_VAR_ORDER};
233         const unsigned         raRegVarOrderSize = _countof(raRegVarOrder);
234
235         if (wbVarOrderSize != NULL)
236             *wbVarOrderSize = raRegVarOrderSize;
237
238         return &raRegVarOrder[0];
239     }
240 }
241
242 #ifdef DEBUG
243
244 /*****************************************************************************
245  *
246  *  Dump out the variable interference graph
247  *
248  */
249
250 void Compiler::raDumpVarIntf()
251 {
252     unsigned   lclNum;
253     LclVarDsc* varDsc;
254
255     printf("Var. interference graph for %s\n", info.compFullName);
256
257     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
258     {
259         /* Ignore the variable if it's not tracked */
260
261         if (!varDsc->lvTracked)
262             continue;
263
264         /* Get hold of the index and the interference mask for the variable */
265         unsigned varIndex = varDsc->lvVarIndex;
266
267         printf("  V%02u,T%02u and ", lclNum, varIndex);
268
269         unsigned refIndex;
270
271         for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
272         {
273             if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274                 printf("T%02u ", refIndex);
275             else
276                 printf("    ");
277         }
278
279         printf("\n");
280     }
281
282     printf("\n");
283 }
284
285 /*****************************************************************************
286  *
287  *  Dump out the register interference graph
288  *
289  */
290 void Compiler::raDumpRegIntf()
291 {
292     printf("Reg. interference graph for %s\n", info.compFullName);
293
294     unsigned   lclNum;
295     LclVarDsc* varDsc;
296
297     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
298     {
299         unsigned varNum;
300
301         /* Ignore the variable if it's not tracked */
302
303         if (!varDsc->lvTracked)
304             continue;
305
306         /* Get hold of the index and the interference mask for the variable */
307
308         varNum = varDsc->lvVarIndex;
309
310         printf("  V%02u,T%02u and ", lclNum, varNum);
311
312         if (varDsc->IsFloatRegType())
313         {
314 #if !FEATURE_STACK_FP_X87
315             for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
316             {
317                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318                     printf("%3s ", getRegName(regNum, true));
319                 else
320                     printf("    ");
321             }
322 #endif
323         }
324         else
325         {
326             for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
327             {
328                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329                     printf("%3s ", getRegName(regNum));
330                 else
331                     printf("    ");
332             }
333         }
334
335         printf("\n");
336     }
337
338     printf("\n");
339 }
340 #endif // DEBUG
341
342 /*****************************************************************************
343  *
344  * We'll adjust the ref counts based on interference
345  *
346  */
347
348 void Compiler::raAdjustVarIntf()
349 {
350     // This method was not correct and has been disabled.
351     return;
352 }
353
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
357  */
358
359 inline regMaskTP Compiler::genReturnRegForTree(GenTree* tree)
360 {
361     var_types type = tree->TypeGet();
362
363     if (varTypeIsStruct(type) && IsHfa(tree))
364     {
365         int retSlots = GetHfaCount(tree);
366         return ((1 << retSlots) - 1) << REG_FLOATRET;
367     }
368
369     const static regMaskTP returnMap[TYP_COUNT] = {
370         RBM_ILLEGAL,   // TYP_UNDEF,
371         RBM_NONE,      // TYP_VOID,
372         RBM_INTRET,    // TYP_BOOL,
373         RBM_INTRET,    // TYP_BYTE,
374         RBM_INTRET,    // TYP_UBYTE,
375         RBM_INTRET,    // TYP_SHORT,
376         RBM_INTRET,    // TYP_USHORT,
377         RBM_INTRET,    // TYP_INT,
378         RBM_INTRET,    // TYP_UINT,
379         RBM_LNGRET,    // TYP_LONG,
380         RBM_LNGRET,    // TYP_ULONG,
381         RBM_FLOATRET,  // TYP_FLOAT,
382         RBM_DOUBLERET, // TYP_DOUBLE,
383         RBM_INTRET,    // TYP_REF,
384         RBM_INTRET,    // TYP_BYREF,
385         RBM_ILLEGAL,   // TYP_STRUCT,
386         RBM_ILLEGAL,   // TYP_BLK,
387         RBM_ILLEGAL,   // TYP_LCLBLK,
388         RBM_ILLEGAL,   // TYP_UNKNOWN,
389     };
390
391     assert((unsigned)type < _countof(returnMap));
392     assert(returnMap[TYP_LONG] == RBM_LNGRET);
393     assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
394     assert(returnMap[TYP_REF] == RBM_INTRET);
395     assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
396
397     regMaskTP result = returnMap[type];
398     assert(result != RBM_ILLEGAL);
399     return result;
400 }
401
402 /*****************************************************************************/
403
404 /****************************************************************************/
405
406 #ifdef DEBUG
407
408 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
409 {
410     unsigned   lclNum;
411     LclVarDsc* varDsc;
412
413     for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
414     {
415         if (!varDsc->lvTracked)
416             continue;
417
418         if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
419             continue;
420
421         if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
422             printf("V%02u ", lclNum);
423     }
424 }
425
426 #endif
427
428 /*****************************************************************************/
429 #ifdef DEBUG
430 /*****************************************************************************
431  *
432  *  Debugging helpers - display variables liveness info.
433  */
434
435 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
436 {
437     do
438     {
439         printf("BB%02u: ", beg->bbNum);
440
441         printf(" in  = [ ");
442         dispLifeSet(comp, mask, beg->bbLiveIn);
443         printf("] ,");
444
445         printf(" out = [ ");
446         dispLifeSet(comp, mask, beg->bbLiveOut);
447         printf("]");
448
449         if (beg->bbFlags & BBF_VISITED)
450             printf(" inner=%u", beg->bbFPinVars);
451
452         printf("\n");
453
454         beg = beg->bbNext;
455         if (!beg)
456             return;
457     } while (beg != end);
458 }
459
460 #if FEATURE_STACK_FP_X87
461 void Compiler::raDispFPlifeInfo()
462 {
463     BasicBlock* block;
464
465     for (block = fgFirstBB; block; block = block->bbNext)
466     {
467         GenTree* stmt;
468
469         printf("BB%02u: in  = [ ", block->bbNum);
470         dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
471         printf("]\n\n");
472
473         VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
474         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
475         {
476             GenTree* tree;
477
478             noway_assert(stmt->gtOper == GT_STMT);
479
480             for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
481             {
482                 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
483
484                 dispLifeSet(this, optAllFloatVars, life);
485                 printf("   ");
486                 gtDispTree(tree, 0, NULL, true);
487             }
488
489             printf("\n");
490         }
491
492         printf("BB%02u: out = [ ", block->bbNum);
493         dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
494         printf("]\n\n");
495     }
496 }
497 #endif // FEATURE_STACK_FP_X87
498 /*****************************************************************************/
499 #endif // DEBUG
500 /*****************************************************************************/
501
502 /*****************************************************************************/
503
504 void Compiler::raSetRegVarOrder(
505     var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
506 {
507     unsigned         normalVarOrderSize;
508     const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
509     unsigned         index;
510     unsigned         listIndex = 0;
511     regMaskTP        usedReg   = avoidReg;
512
513     noway_assert(*customVarOrderSize >= normalVarOrderSize);
514
515     if (prefReg)
516     {
517         /* First place the preferred registers at the start of customVarOrder */
518
519         regMaskTP regBit;
520         regNumber regNum;
521
522         for (index = 0; index < normalVarOrderSize; index++)
523         {
524             regNum = normalVarOrder[index];
525             regBit = genRegMask(regNum);
526
527             if (usedReg & regBit)
528                 continue;
529
530             if (prefReg & regBit)
531             {
532                 usedReg |= regBit;
533                 noway_assert(listIndex < normalVarOrderSize);
534                 customVarOrder[listIndex++] = regNum;
535                 prefReg -= regBit;
536                 if (prefReg == 0)
537                     break;
538             }
539         }
540
541 #if CPU_HAS_BYTE_REGS
542         /* Then if byteable registers are preferred place them */
543
544         if (prefReg & RBM_BYTE_REG_FLAG)
545         {
546             for (index = 0; index < normalVarOrderSize; index++)
547             {
548                 regNum = normalVarOrder[index];
549                 regBit = genRegMask(regNum);
550
551                 if (usedReg & regBit)
552                     continue;
553
554                 if (RBM_BYTE_REGS & regBit)
555                 {
556                     usedReg |= regBit;
557                     noway_assert(listIndex < normalVarOrderSize);
558                     customVarOrder[listIndex++] = regNum;
559                 }
560             }
561         }
562
563 #endif // CPU_HAS_BYTE_REGS
564     }
565
566     /* Now place all the non-preferred registers */
567
568     for (index = 0; index < normalVarOrderSize; index++)
569     {
570         regNumber regNum = normalVarOrder[index];
571         regMaskTP regBit = genRegMask(regNum);
572
573         if (usedReg & regBit)
574             continue;
575
576         usedReg |= regBit;
577         noway_assert(listIndex < normalVarOrderSize);
578         customVarOrder[listIndex++] = regNum;
579     }
580
581     if (avoidReg)
582     {
583         /* Now place the "avoid" registers */
584
585         for (index = 0; index < normalVarOrderSize; index++)
586         {
587             regNumber regNum = normalVarOrder[index];
588             regMaskTP regBit = genRegMask(regNum);
589
590             if (avoidReg & regBit)
591             {
592                 noway_assert(listIndex < normalVarOrderSize);
593                 customVarOrder[listIndex++] = regNum;
594                 avoidReg -= regBit;
595                 if (avoidReg == 0)
596                     break;
597             }
598         }
599     }
600
601     *customVarOrderSize = listIndex;
602     noway_assert(listIndex == normalVarOrderSize);
603 }
604
605 /*****************************************************************************
606  *
607  *  Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
608  */
609
610 void Compiler::raSetupArgMasks(RegState* regState)
611 {
612     /* Determine the registers holding incoming register arguments */
613     /*  and setup raAvoidArgRegMask to the set of registers that we  */
614     /*  may want to avoid when enregistering the locals.            */
615
616     regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
617     raAvoidArgRegMask                  = RBM_NONE;
618
619     LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
620
621     for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
622     {
623         noway_assert(argDsc->lvIsParam);
624
625         // Is it a register argument ?
626         if (!argDsc->lvIsRegArg)
627             continue;
628
629         // only process args that apply to the current register file
630         if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
631         {
632             continue;
633         }
634
635         // Is it dead on entry ??
636         // In certain cases such as when compJmpOpUsed is true,
637         // or when we have a generic type context arg that we must report
638         // then the arguments have to be kept alive throughout the prolog.
639         // So we have to consider it as live on entry.
640         //
641         bool keepArgAlive = compJmpOpUsed;
642         if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
643             ((lvaTable + info.compTypeCtxtArg) == argDsc))
644         {
645             keepArgAlive = true;
646         }
647
648         if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
649         {
650             continue;
651         }
652
653         // The code to set the regState for each arg is outlined for shared use
654         // by linear scan
655         regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
656
657         // Do we need to try to avoid this incoming arg registers?
658
659         // If it's not tracked, don't do the stuff below.
660         if (!argDsc->lvTracked)
661             continue;
662
663         // If the incoming arg is used after a call it is live accross
664         //  a call and will have to be allocated to a caller saved
665         //  register anyway (a very common case).
666         //
667         // In this case it is pointless to ask that the higher ref count
668         //  locals to avoid using the incoming arg register
669
670         unsigned argVarIndex = argDsc->lvVarIndex;
671
672         /* Does the incoming register and the arg variable interfere? */
673
674         if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
675         {
676             // No they do not interfere,
677             //  so we add inArgReg to raAvoidArgRegMask
678
679             raAvoidArgRegMask |= genRegMask(inArgReg);
680         }
681 #ifdef _TARGET_ARM_
682         if (argDsc->lvType == TYP_DOUBLE)
683         {
684             // Avoid the double register argument pair for register allocation.
685             if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
686             {
687                 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
688             }
689         }
690 #endif
691     }
692 }
693
694 #endif // LEGACY_BACKEND
695
696 // The code to set the regState for each arg is outlined for shared use
697 // by linear scan. (It is not shared for System V AMD64 platform.)
698 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
699 {
700     regNumber inArgReg  = argDsc->lvArgReg;
701     regMaskTP inArgMask = genRegMask(inArgReg);
702
703     if (regState->rsIsFloat)
704     {
705         noway_assert(inArgMask & RBM_FLTARG_REGS);
706     }
707     else //  regState is for the integer registers
708     {
709         // This might be the fixed return buffer register argument (on ARM64)
710         // We check and allow inArgReg to be theFixedRetBuffReg
711         if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
712         {
713             // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
714             noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
715             // We should have recorded the variable number for the return buffer arg
716             noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
717         }
718         else // we have a regular arg
719         {
720             noway_assert(inArgMask & RBM_ARG_REGS);
721         }
722     }
723
724     regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
725
726 #ifdef _TARGET_ARM_
727     if (argDsc->lvType == TYP_DOUBLE)
728     {
729         if (info.compIsVarArgs || opts.compUseSoftFP)
730         {
731             assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
732             assert(!regState->rsIsFloat);
733         }
734         else
735         {
736             assert(regState->rsIsFloat);
737             assert(emitter::isDoubleReg(inArgReg));
738         }
739         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
740     }
741     else if (argDsc->lvType == TYP_LONG)
742     {
743         assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
744         assert(!regState->rsIsFloat);
745         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
746     }
747 #endif // _TARGET_ARM_
748
749 #if FEATURE_MULTIREG_ARGS
750     if (varTypeIsStruct(argDsc->lvType))
751     {
752         if (argDsc->lvIsHfaRegArg())
753         {
754             assert(regState->rsIsFloat);
755             unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
756             for (unsigned i = 1; i < cSlots; i++)
757             {
758                 assert(inArgReg + i <= LAST_FP_ARGREG);
759                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
760             }
761         }
762         else
763         {
764             unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
765             for (unsigned i = 1; i < cSlots; i++)
766             {
767                 regNumber nextArgReg = (regNumber)(inArgReg + i);
768                 if (nextArgReg > REG_ARG_LAST)
769                 {
770                     break;
771                 }
772                 assert(regState->rsIsFloat == false);
773                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
774             }
775         }
776     }
777 #endif // FEATURE_MULTIREG_ARGS
778
779     return inArgReg;
780 }
781
782 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
783
784 /*****************************************************************************
785  *
786  *  Assign variables to live in registers, etc.
787  */
788
789 void Compiler::raAssignVars()
790 {
791 #ifdef DEBUG
792     if (verbose)
793         printf("*************** In raAssignVars()\n");
794 #endif
795     /* We need to keep track of which registers we ever touch */
796
797     codeGen->regSet.rsClearRegsModified();
798
799 #if FEATURE_STACK_FP_X87
800     // FP register allocation
801     raEnregisterVarsStackFP();
802     raGenerateFPRefCounts();
803 #endif
804
805     /* Predict registers used by code generation */
806     rpPredictRegUse(); // New reg predictor/allocator
807
808     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
809     // so that the gc tracking logic and lvMustInit logic will ignore them.
810
811     unsigned   lclNum;
812     LclVarDsc* varDsc;
813
814     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
815     {
816         if (varDsc->lvType != TYP_STRUCT)
817             continue;
818
819         if (!varDsc->lvPromoted)
820             continue;
821
822         if (varDsc->lvIsParam)
823             continue;
824
825         if (varDsc->lvRefCnt > 0)
826             continue;
827
828 #ifdef DEBUG
829         if (verbose)
830         {
831             printf("Mark unused struct local V%02u\n", lclNum);
832         }
833
834         lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
835
836         if (promotionType == PROMOTION_TYPE_DEPENDENT)
837         {
838             // This should only happen when all its field locals are unused as well.
839
840             for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
841                  varNum++)
842             {
843                 noway_assert(lvaTable[varNum].lvRefCnt == 0);
844                 lvaTable[varNum].lvIsStructField = false;
845             }
846         }
847         else
848         {
849             noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
850         }
851
852         varDsc->lvUnusedStruct = 1;
853 #endif
854
855         // Change such struct locals to ints
856
857         varDsc->lvType = TYP_INT; // Bash to a non-gc type.
858         noway_assert(!varDsc->lvTracked);
859         noway_assert(!varDsc->lvRegister);
860         varDsc->lvOnFrame  = false; // Force it not to be onstack.
861         varDsc->lvMustInit = false; // Force not to init it.
862         varDsc->lvStkOffs  = 0;     // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
863     }
864 }
865
866 /*****************************************************************************/
867 /*****************************************************************************/
868
869 /*****************************************************************************
870  *
871  *   Given a regNumber return the correct predictReg enum value
872  */
873
874 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
875 {
876     return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
877 }
878
879 /*****************************************************************************
880  *
881  *   Given a varIndex return the correct predictReg enum value
882  */
883
884 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
885 {
886     return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
887 }
888
889 /*****************************************************************************
890  *
891  *   Given a rpPredictReg return the correct varNumber value
892  */
893
894 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
895 {
896     return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
897 }
898
899 /*****************************************************************************
900  *
901  *   Given a rpPredictReg return true if it specifies a Txx register
902  */
903
904 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
905 {
906     if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
907         return true;
908     else
909         return false;
910 }
911
912 /*****************************************************************************
913  *
914  *   Given a regmask return the correct predictReg enum value
915  */
916
917 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
918 {
919     rpPredictReg result = PREDICT_NONE;
920     if (regmask != 0) /* Check if regmask has zero bits set */
921     {
922         if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
923         {
924             DWORD reg = 0;
925             assert(FitsIn<DWORD>(regmask));
926             BitScanForward(&reg, (DWORD)regmask);
927             return rpGetPredictForReg((regNumber)reg);
928         }
929
930 #if defined(_TARGET_ARM_)
931         /* It has multiple bits set */
932         else if (regmask == (RBM_R0 | RBM_R1))
933         {
934             result = PREDICT_PAIR_R0R1;
935         }
936         else if (regmask == (RBM_R2 | RBM_R3))
937         {
938             result = PREDICT_PAIR_R2R3;
939         }
940 #elif defined(_TARGET_X86_)
941         /* It has multiple bits set */
942         else if (regmask == (RBM_EAX | RBM_EDX))
943         {
944             result = PREDICT_PAIR_EAXEDX;
945         }
946         else if (regmask == (RBM_ECX | RBM_EBX))
947         {
948             result = PREDICT_PAIR_ECXEBX;
949         }
950 #endif
951         else /* It doesn't match anything */
952         {
953             result = PREDICT_NONE;
954             assert(!"unreachable");
955             NO_WAY("bad regpair");
956         }
957     }
958     return result;
959 }
960
961 /*****************************************************************************
962  *
963  *  Record a variable to register(s) interference
964  */
965
966 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
967
968 {
969     bool addedIntf = false;
970
971     if (regMask != 0)
972     {
973         for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
974         {
975             regMaskTP regBit = genRegMask(regNum);
976
977             if (regMask & regBit)
978             {
979                 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
980                 if (!VarSetOps::IsEmpty(this, newIntf))
981                 {
982 #ifdef DEBUG
983                     if (verbose)
984                     {
985                         VarSetOps::Iter newIntfIter(this, newIntf);
986                         unsigned        varNum = 0;
987                         while (newIntfIter.NextElem(&varNum))
988                         {
989                             unsigned   lclNum = lvaTrackedToVarNum[varNum];
990                             LclVarDsc* varDsc = &lvaTable[varNum];
991 #if FEATURE_FP_REGALLOC
992                             // Only print the useful interferences
993                             // i.e. floating point LclVar interference with floating point registers
994                             //         or integer LclVar interference with general purpose registers
995                             if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
996 #endif
997                             {
998                                 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
999                                        getRegName(regNum), msg);
1000                             }
1001                         }
1002                     }
1003 #endif
1004                     addedIntf = true;
1005                     VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1006                 }
1007
1008                 regMask -= regBit;
1009                 if (regMask == 0)
1010                     break;
1011             }
1012         }
1013     }
1014     return addedIntf;
1015 }
1016
1017 /*****************************************************************************
1018  *
1019  *  Record a new variable to variable(s) interference
1020  */
1021
1022 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1023 {
1024     noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1025     noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1026
1027     VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1028     VarSetOps::AddElemD(this, oneVar, varNum);
1029
1030     bool newIntf = fgMarkIntf(intfVar, oneVar);
1031
1032     if (newIntf)
1033         rpAddedVarIntf = true;
1034
1035 #ifdef DEBUG
1036     if (verbose && newIntf)
1037     {
1038         for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1039         {
1040             if (VarSetOps::IsMember(this, intfVar, oneNum))
1041             {
1042                 unsigned lclNum = lvaTrackedToVarNum[varNum];
1043                 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1044                 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1045                        oneNum, msg);
1046             }
1047         }
1048     }
1049 #endif
1050
1051     return newIntf;
1052 }
1053
1054 /*****************************************************************************
1055  *
1056  *   Determine preferred register mask for a given predictReg value
1057  */
1058
1059 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1060 {
1061     if (rpHasVarIndexForPredict(predictReg))
1062         predictReg = PREDICT_REG;
1063
1064     noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1065     noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1066
1067     regMaskTP regAvailForType = rpPredictMap[predictReg];
1068     if (varTypeIsFloating(type))
1069     {
1070         regAvailForType &= RBM_ALLFLOAT;
1071     }
1072     else
1073     {
1074         regAvailForType &= RBM_ALLINT;
1075     }
1076 #ifdef _TARGET_ARM_
1077     if (type == TYP_DOUBLE)
1078     {
1079         if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1080         {
1081             // Fix 388433 ARM JitStress WP7
1082             if ((regAvailForType & RBM_DBL_REGS) != 0)
1083             {
1084                 regAvailForType |= (regAvailForType << 1);
1085             }
1086             else
1087             {
1088                 regAvailForType = RBM_NONE;
1089             }
1090         }
1091     }
1092 #endif
1093     return regAvailForType;
1094 }
1095
1096 /*****************************************************************************
1097  *
1098  *  Predict register choice for a type.
1099  *
1100  *  Adds the predicted registers to rsModifiedRegsMask.
1101  */
1102 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1103 {
1104     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1105     regNumber regNum;
1106     regMaskTP regBits;
1107
1108     // Add any reserved register to the lockedRegs
1109     lockedRegs |= codeGen->regSet.rsMaskResvd;
1110
1111     /* Clear out the lockedRegs from preferReg */
1112     preferReg &= ~lockedRegs;
1113
1114     if (rpAsgVarNum != -1)
1115     {
1116         noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1117
1118         /* Don't pick the register used by rpAsgVarNum either */
1119         LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1120         noway_assert(tgtVar->lvRegNum != REG_STK);
1121
1122         preferReg &= ~genRegMask(tgtVar->lvRegNum);
1123     }
1124
1125     switch (type)
1126     {
1127         case TYP_BOOL:
1128         case TYP_BYTE:
1129         case TYP_UBYTE:
1130         case TYP_SHORT:
1131         case TYP_USHORT:
1132         case TYP_INT:
1133         case TYP_UINT:
1134         case TYP_REF:
1135         case TYP_BYREF:
1136 #ifdef _TARGET_AMD64_
1137         case TYP_LONG:
1138 #endif // _TARGET_AMD64_
1139
1140             // expand preferReg to all non-locked registers if no bits set
1141             preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1142
1143             if (preferReg == 0) // no bits set?
1144             {
1145                 // Add one predefined spill choice register if no bits set.
1146                 // (The jit will introduce one spill temp)
1147                 preferReg |= RBM_SPILL_CHOICE;
1148                 rpPredictSpillCnt++;
1149
1150 #ifdef DEBUG
1151                 if (verbose)
1152                     printf("Predict one spill temp\n");
1153 #endif
1154             }
1155
1156             if (preferReg != 0)
1157             {
1158                 /* Iterate the registers in the order specified by rpRegTmpOrder */
1159
1160                 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1161                 {
1162                     regNum  = rpRegTmpOrder[index];
1163                     regBits = genRegMask(regNum);
1164
1165                     if ((preferReg & regBits) == regBits)
1166                     {
1167                         goto RET;
1168                     }
1169                 }
1170             }
1171             /* Otherwise we have allocated all registers, so do nothing */
1172             break;
1173
1174 #ifndef _TARGET_AMD64_
1175         case TYP_LONG:
1176
1177             if ((preferReg == 0) ||                   // no bits set?
1178                 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1179             {
1180                 // expand preferReg to all non-locked registers
1181                 preferReg = RBM_ALLINT & ~lockedRegs;
1182             }
1183
1184             if (preferReg == 0) // no bits set?
1185             {
1186                 // Add EAX:EDX to the registers
1187                 // (The jit will introduce two spill temps)
1188                 preferReg = RBM_PAIR_TMP;
1189                 rpPredictSpillCnt += 2;
1190 #ifdef DEBUG
1191                 if (verbose)
1192                     printf("Predict two spill temps\n");
1193 #endif
1194             }
1195             else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1196             {
1197                 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1198                 {
1199                     // Add EAX to the registers
1200                     // (The jit will introduce one spill temp)
1201                     preferReg |= RBM_PAIR_TMP_LO;
1202                 }
1203                 else
1204                 {
1205                     // Add EDX to the registers
1206                     // (The jit will introduce one spill temp)
1207                     preferReg |= RBM_PAIR_TMP_HI;
1208                 }
1209                 rpPredictSpillCnt++;
1210 #ifdef DEBUG
1211                 if (verbose)
1212                     printf("Predict one spill temp\n");
1213 #endif
1214             }
1215
1216             regPairNo regPair;
1217             regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1218             if (regPair != REG_PAIR_NONE)
1219             {
1220                 regBits = genRegPairMask(regPair);
1221                 goto RET;
1222             }
1223
1224             /* Otherwise we have allocated all registers, so do nothing */
1225             break;
1226 #endif // _TARGET_AMD64_
1227
1228 #ifdef _TARGET_ARM_
1229         case TYP_STRUCT:
1230 #endif
1231
1232         case TYP_FLOAT:
1233         case TYP_DOUBLE:
1234
1235 #if FEATURE_FP_REGALLOC
1236             regMaskTP restrictMask;
1237             restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1238             assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1239
1240             // expand preferReg to all available non-locked registers if no bits set
1241             preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1242             regMaskTP preferDouble;
1243             preferDouble = preferReg & (preferReg >> 1);
1244
1245             if ((preferReg == 0) // no bits set?
1246 #ifdef _TARGET_ARM_
1247                 || ((type == TYP_DOUBLE) &&
1248                     ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1249 #endif
1250                 )
1251             {
1252                 // Add one predefined spill choice register if no bits set.
1253                 // (The jit will introduce one spill temp)
1254                 preferReg |= RBM_SPILL_CHOICE_FLT;
1255                 rpPredictSpillCnt++;
1256
1257 #ifdef DEBUG
1258                 if (verbose)
1259                     printf("Predict one spill temp (float)\n");
1260 #endif
1261             }
1262
1263             assert(preferReg != 0);
1264
1265             /* Iterate the registers in the order specified by raRegFltTmpOrder */
1266
1267             for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1268             {
1269                 regNum  = raRegFltTmpOrder[index];
1270                 regBits = genRegMask(regNum);
1271
1272                 if (varTypeIsFloating(type))
1273                 {
1274 #ifdef _TARGET_ARM_
1275                     if (type == TYP_DOUBLE)
1276                     {
1277                         if ((regBits & RBM_DBL_REGS) == 0)
1278                         {
1279                             continue; // We must restrict the set to the double registers
1280                         }
1281                         else
1282                         {
1283                             // TYP_DOUBLE use two consecutive registers
1284                             regBits |= genRegMask(REG_NEXT(regNum));
1285                         }
1286                     }
1287 #endif
1288                     // See if COMPlus_JitRegisterFP is restricting this FP register
1289                     //
1290                     if ((restrictMask & regBits) != regBits)
1291                         continue;
1292                 }
1293
1294                 if ((preferReg & regBits) == regBits)
1295                 {
1296                     goto RET;
1297                 }
1298             }
1299             /* Otherwise we have allocated all registers, so do nothing */
1300             break;
1301
1302 #else // !FEATURE_FP_REGALLOC
1303
1304             return RBM_NONE;
1305
1306 #endif
1307
1308         default:
1309             noway_assert(!"unexpected type in reg use prediction");
1310     }
1311
1312     /* Abnormal return */
1313     noway_assert(!"Ran out of registers in rpPredictRegPick");
1314     return RBM_NONE;
1315
1316 RET:
1317     /*
1318      *  If during the first prediction we need to allocate
1319      *  one of the registers that we used for coloring locals
1320      *  then flag this by setting rpPredictAssignAgain.
1321      *  We will have to go back and repredict the registers
1322      */
1323     if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1324         rpPredictAssignAgain = true;
1325
1326     // Add a register interference to each of the last use variables
1327     if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1328     {
1329         VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1330         VarSetOps::Assign(this, lastUse, rpLastUseVars);
1331         VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1332         VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1333         // While we still have any lastUse or inPlaceUse bits
1334         VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1335
1336         VARSET_TP       varAsSet(VarSetOps::MakeEmpty(this));
1337         VarSetOps::Iter iter(this, useUnion);
1338         unsigned        varNum = 0;
1339         while (iter.NextElem(&varNum))
1340         {
1341             // We'll need this for one of the calls...
1342             VarSetOps::ClearD(this, varAsSet);
1343             VarSetOps::AddElemD(this, varAsSet, varNum);
1344
1345             // If this varBit and lastUse?
1346             if (VarSetOps::IsMember(this, lastUse, varNum))
1347             {
1348                 // Record a register to variable interference
1349                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1350             }
1351
1352             // If this varBit and inPlaceUse?
1353             if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1354             {
1355                 // Record a register to variable interference
1356                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1357             }
1358         }
1359     }
1360     codeGen->regSet.rsSetRegsModified(regBits);
1361
1362     return regBits;
1363 }
1364
1365 /*****************************************************************************
1366  *
1367  *  Predict integer register use for generating an address mode for a tree,
1368  *  by setting tree->gtUsedRegs to all registers used by this tree and its
1369  *  children.
1370  *    tree       - is the child of a GT_IND node
1371  *    type       - the type of the GT_IND node (floating point/integer)
1372  *    lockedRegs - are the registers which are currently held by
1373  *                 a previously evaluated node.
1374  *    rsvdRegs   - registers which should not be allocated because they will
1375  *                 be needed to evaluate a node in the future
1376  *               - Also if rsvdRegs has the RBM_LASTUSE bit set then
1377  *                 the rpLastUseVars set should be saved and restored
1378  *                 so that we don't add any new variables to rpLastUseVars
1379  *    lenCSE     - is non-NULL only when we have a lenCSE expression
1380  *
1381  *  Return the scratch registers to be held by this tree. (one or two registers
1382  *  to form an address expression)
1383  */
1384
1385 regMaskTP Compiler::rpPredictAddressMode(
1386     GenTree* tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTree* lenCSE)
1387 {
1388     GenTree*   op1;
1389     GenTree*   op2;
1390     GenTree*   opTemp;
1391     genTreeOps oper = tree->OperGet();
1392     regMaskTP  op1Mask;
1393     regMaskTP  op2Mask;
1394     regMaskTP  regMask;
1395     ssize_t    sh;
1396     ssize_t    cns = 0;
1397     bool       rev;
1398     bool       hasTwoAddConst     = false;
1399     bool       restoreLastUseVars = false;
1400     VARSET_TP  oldLastUseVars(VarSetOps::MakeEmpty(this));
1401
1402     /* do we need to save and restore the rpLastUseVars set ? */
1403     if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1404     {
1405         restoreLastUseVars = true;
1406         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1407     }
1408     rsvdRegs &= ~RBM_LASTUSE;
1409
1410     /* if not an add, then just force it to a register */
1411
1412     if (oper != GT_ADD)
1413     {
1414         if (oper == GT_ARR_ELEM)
1415         {
1416             regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1417             goto DONE;
1418         }
1419         else
1420         {
1421             goto NO_ADDR_EXPR;
1422         }
1423     }
1424
1425     op1 = tree->gtOp.gtOp1;
1426     op2 = tree->gtOp.gtOp2;
1427     rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1428
1429     /* look for (x + y) + icon address mode */
1430
1431     if (op2->OperGet() == GT_CNS_INT)
1432     {
1433         cns = op2->gtIntCon.gtIconVal;
1434
1435         /* if not an add, then just force op1 into a register */
1436         if (op1->OperGet() != GT_ADD)
1437             goto ONE_ADDR_EXPR;
1438
1439         hasTwoAddConst = true;
1440
1441         /* Record the 'rev' flag, reverse evaluation order */
1442         rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1443
1444         op2 = op1->gtOp.gtOp2;
1445         op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1446     }
1447
1448     /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1449
1450     sh = 0;
1451     if (op2->OperGet() == GT_LSH)
1452     {
1453         if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1454         {
1455             sh     = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1456             opTemp = op2->gtOp.gtOp1;
1457         }
1458         else
1459         {
1460             opTemp = NULL;
1461         }
1462     }
1463     else
1464     {
1465         opTemp = op2;
1466     }
1467
1468     if (opTemp != NULL)
1469     {
1470         if (opTemp->OperGet() == GT_NOP)
1471         {
1472             opTemp = opTemp->gtOp.gtOp1;
1473         }
1474
1475         // Is this a const operand?
1476         if (opTemp->OperGet() == GT_CNS_INT)
1477         {
1478             // Compute the new cns value that Codegen will end up using
1479             cns += (opTemp->gtIntCon.gtIconVal << sh);
1480
1481             goto ONE_ADDR_EXPR;
1482         }
1483     }
1484
1485     /* Check for LSH in op1 slot */
1486
1487     if (op1->OperGet() != GT_LSH)
1488         goto TWO_ADDR_EXPR;
1489
1490     opTemp = op1->gtOp.gtOp2;
1491
1492     if (opTemp->OperGet() != GT_CNS_INT)
1493         goto TWO_ADDR_EXPR;
1494
1495     sh = opTemp->gtIntCon.gtIconVal;
1496
1497     /* Check for LSH of 0, special case */
1498     if (sh == 0)
1499         goto TWO_ADDR_EXPR;
1500
1501 #if defined(_TARGET_XARCH_)
1502
1503     /* Check for LSH of 1 2 or 3 */
1504     if (sh > 3)
1505         goto TWO_ADDR_EXPR;
1506
1507 #elif defined(_TARGET_ARM_)
1508
1509     /* Check for LSH of 1 to 30 */
1510     if (sh > 30)
1511         goto TWO_ADDR_EXPR;
1512
1513 #else
1514
1515     goto TWO_ADDR_EXPR;
1516
1517 #endif
1518
1519     /* Matched a leftShift by 'sh' subtree, move op1 down */
1520     op1 = op1->gtOp.gtOp1;
1521
1522 TWO_ADDR_EXPR:
1523
1524     /* Now we have to evaluate op1 and op2 into registers */
1525
1526     /* Evaluate op1 and op2 in the correct order */
1527     if (rev)
1528     {
1529         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1530         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1531     }
1532     else
1533     {
1534         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1535         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1536     }
1537
1538     /*  If op1 and op2 must be spilled and reloaded then
1539      *  op1 and op2 might be reloaded into the same register
1540      *  This can only happen when all the registers are lockedRegs
1541      */
1542     if ((op1Mask == op2Mask) && (op1Mask != 0))
1543     {
1544         /* We'll need to grab a different register for op2 */
1545         op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1546     }
1547
1548 #ifdef _TARGET_ARM_
1549     // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1550     //      [op2 + op1<<sh + cns]
1551     // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1552     //
1553     if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1554     {
1555         op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1556     }
1557
1558     //
1559     // On the ARM we will need at least one scratch register for trees that have this form:
1560     //     [op1 + op2 + cns] or  [op1 + op2<<sh + cns]
1561     // or for a float/double or long when we have both op1 and op2
1562     // or when we have an 'cns' that is too large for the ld/st instruction
1563     //
1564     if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1565     {
1566         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1567     }
1568
1569     //
1570     // If we create a CSE that immediately dies then we may need to add an additional register interference
1571     // so we don't color the CSE into R3
1572     //
1573     if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1574     {
1575         opTemp = op2->gtOp.gtOp2;
1576         if (opTemp->OperGet() == GT_LCL_VAR)
1577         {
1578             unsigned   varNum = opTemp->gtLclVar.gtLclNum;
1579             LclVarDsc* varDsc = &lvaTable[varNum];
1580
1581             if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1582             {
1583                 rpRecordRegIntf(RBM_TMP_0,
1584                                 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1585             }
1586         }
1587     }
1588 #endif
1589
1590     regMask          = (op1Mask | op2Mask);
1591     tree->gtUsedRegs = (regMaskSmall)regMask;
1592     goto DONE;
1593
1594 ONE_ADDR_EXPR:
1595
1596     /* now we have to evaluate op1 into a register */
1597
1598     op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1599     op2Mask = RBM_NONE;
1600
1601 #ifdef _TARGET_ARM_
1602     //
1603     // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1604     // instruction
1605     //
1606     if (!codeGen->validDispForLdSt(cns, type))
1607     {
1608         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1609     }
1610 #endif
1611
1612     regMask          = (op1Mask | op2Mask);
1613     tree->gtUsedRegs = (regMaskSmall)regMask;
1614     goto DONE;
1615
1616 NO_ADDR_EXPR:
1617
1618 #if !CPU_LOAD_STORE_ARCH
1619     if (oper == GT_CNS_INT)
1620     {
1621         /* Indirect of a constant does not require a register */
1622         regMask = RBM_NONE;
1623     }
1624     else
1625 #endif
1626     {
1627         /* now we have to evaluate tree into a register */
1628         regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1629     }
1630
1631 DONE:
1632     regMaskTP regUse = tree->gtUsedRegs;
1633
1634     if (!VarSetOps::IsEmpty(this, compCurLife))
1635     {
1636         // Add interference between the current set of life variables and
1637         //  the set of temporary registers need to evaluate the sub tree
1638         if (regUse)
1639         {
1640             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1641         }
1642     }
1643
1644     /* Do we need to resore the oldLastUseVars value */
1645     if (restoreLastUseVars)
1646     {
1647         /*
1648          *  If we used a GT_ASG targeted register then we need to add
1649          *  a variable interference between any new last use variables
1650          *  and the GT_ASG targeted register
1651          */
1652         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1653         {
1654             rpRecordVarIntf(rpAsgVarNum,
1655                             VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1656         }
1657         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1658     }
1659
1660     return regMask;
1661 }
1662
1663 /*****************************************************************************
1664  *
1665  *
1666  */
1667
1668 void Compiler::rpPredictRefAssign(unsigned lclNum)
1669 {
1670     LclVarDsc* varDsc = lvaTable + lclNum;
1671
1672     varDsc->lvRefAssign = 1;
1673
1674 #if NOGC_WRITE_BARRIERS
1675 #ifdef DEBUG
1676     if (verbose)
1677     {
1678         if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1679             printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1680                    varDsc->lvVarIndex);
1681     }
1682 #endif
1683
1684     /* Make sure that write barrier pointer variables never land in EDX */
1685     VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1686 #endif // NOGC_WRITE_BARRIERS
1687 }
1688
1689 /*****************************************************************************
1690  *
1691  * Predict the internal temp physical register usage for a block assignment tree,
1692  * by setting tree->gtUsedRegs.
1693  * Records the internal temp physical register usage for this tree.
1694  * Returns a mask of interfering registers for this tree.
1695  *
1696  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1697  * to the set of scratch registers needed when evaluating the tree.
1698  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1699  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1700  * predict additional internal temp physical registers to spill into.
1701  *
1702  *    tree       - is the child of a GT_IND node
1703  *    predictReg - what type of register does the tree need
1704  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1705  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1706  *    rsvdRegs   - registers which should not be allocated because they will
1707  *                 be needed to evaluate a node in the future
1708  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1709  *                 the rpLastUseVars set should be saved and restored
1710  *                 so that we don't add any new variables to rpLastUseVars.
1711  */
1712 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTree*     tree,
1713                                           rpPredictReg predictReg,
1714                                           regMaskTP    lockedRegs,
1715                                           regMaskTP    rsvdRegs)
1716 {
1717     regMaskTP regMask         = RBM_NONE;
1718     regMaskTP interferingRegs = RBM_NONE;
1719
1720     bool        hasGCpointer  = false;
1721     bool        dstIsOnStack  = false;
1722     bool        useMemHelper  = false;
1723     bool        useBarriers   = false;
1724     GenTreeBlk* dst           = tree->gtGetOp1()->AsBlk();
1725     GenTree*    dstAddr       = dst->Addr();
1726     GenTree*    srcAddrOrFill = tree->gtGetOp2IfPresent();
1727
1728     size_t blkSize = dst->gtBlkSize;
1729
1730     hasGCpointer = (dst->HasGCPtr());
1731
1732     bool isCopyBlk = tree->OperIsCopyBlkOp();
1733     bool isCopyObj = isCopyBlk && hasGCpointer;
1734     bool isInitBlk = tree->OperIsInitBlkOp();
1735
1736     if (isCopyBlk)
1737     {
1738         assert(srcAddrOrFill->OperIsIndir());
1739         srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1740     }
1741     else
1742     {
1743         // For initBlk, we don't need to worry about the GC pointers.
1744         hasGCpointer = false;
1745     }
1746
1747     if (blkSize != 0)
1748     {
1749         if (isCopyObj)
1750         {
1751             dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1752         }
1753
1754         if (isInitBlk)
1755         {
1756             if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1757             {
1758                 useMemHelper = true;
1759             }
1760         }
1761     }
1762     else
1763     {
1764         useMemHelper = true;
1765     }
1766
1767     if (hasGCpointer && !dstIsOnStack)
1768     {
1769         useBarriers = true;
1770     }
1771
1772 #ifdef _TARGET_ARM_
1773     //
1774     // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1775     //
1776     if (!useMemHelper && !useBarriers)
1777     {
1778         bool     useLoop        = false;
1779         unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1780
1781         // A mask to use to force the predictor to choose low registers (to reduce code size)
1782         regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1783
1784         // Allow the src and dst to be used in place, unless we use a loop, in which
1785         // case we will need scratch registers as we will be writing to them.
1786         rpPredictReg srcAndDstPredict = PREDICT_REG;
1787
1788         // Will we be using a loop to implement this INITBLK/COPYBLK?
1789         if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1790         {
1791             useLoop          = true;
1792             avoidReg         = RBM_NONE;
1793             srcAndDstPredict = PREDICT_SCRATCH_REG;
1794         }
1795
1796         if (tree->gtFlags & GTF_REVERSE_OPS)
1797         {
1798             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1799                                            dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1800             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1801         }
1802         else
1803         {
1804             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1805                                            srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1806             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1807         }
1808
1809         // We need at least one scratch register for a copyBlk
1810         if (isCopyBlk)
1811         {
1812             // Pick a low register to reduce the code size
1813             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1814         }
1815
1816         if (useLoop)
1817         {
1818             if (isCopyBlk)
1819             {
1820                 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1821                 // Pick another low register to reduce the code size
1822                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1823             }
1824
1825             // We need a loop index register
1826             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1827         }
1828
1829         tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1830
1831         return interferingRegs;
1832     }
1833 #endif
1834     // What order should the Dest, Val/Src, and Size be calculated
1835     GenTree*  opsPtr[3];
1836     regMaskTP regsPtr[3];
1837
1838 #if defined(_TARGET_XARCH_)
1839     fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1840
1841     // We're going to use these, might as well make them available now
1842
1843     codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1844     if (isCopyBlk)
1845         codeGen->regSet.rsSetRegsModified(RBM_ESI);
1846
1847 #elif defined(_TARGET_ARM_)
1848
1849     if (useMemHelper)
1850     {
1851         // For all other cases that involve non-constants, we just call memcpy/memset
1852         // JIT helpers
1853         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1854         interferingRegs |= RBM_CALLEE_TRASH;
1855 #ifdef DEBUG
1856         if (verbose)
1857             printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1858 #endif
1859     }
1860     else // useBarriers
1861     {
1862         assert(useBarriers);
1863         assert(isCopyBlk);
1864
1865         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1866
1867         // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1868         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1869 #ifdef DEBUG
1870         if (verbose)
1871             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1872 #endif
1873     }
1874 #else // !_TARGET_X86_ && !_TARGET_ARM_
1875 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1876 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1877     regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1878     regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1879                                    opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1880     regMask |= regsPtr[0];
1881     opsPtr[0]->gtUsedRegs |= regsPtr[0];
1882     rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1883
1884     regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1885                                    opsPtr2RsvdRegs | RBM_LASTUSE);
1886     regMask |= regsPtr[1];
1887     opsPtr[1]->gtUsedRegs |= regsPtr[1];
1888     rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1889
1890     regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1891     if (opsPtr[2] == nullptr)
1892     {
1893         // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1894         // Note that it is quite possible that no register is required, but this preserves
1895         // former behavior.
1896         regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1897         rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1898     }
1899     else
1900     {
1901         regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1902         opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1903     }
1904     regMask |= opsPtr2UsedRegs;
1905
1906     tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1907     return interferingRegs;
1908 }
1909
1910 /*****************************************************************************
1911  *
1912  * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1913  * Returns a regMask with the internal temp physical register usage for this tree.
1914  *
1915  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1916  * to the set of scratch registers needed when evaluating the tree.
1917  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1918  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1919  * predict additional internal temp physical registers to spill into.
1920  *
1921  *    tree       - is the child of a GT_IND node
1922  *    predictReg - what type of register does the tree need
1923  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1924  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1925  *    rsvdRegs   - registers which should not be allocated because they will
1926  *                 be needed to evaluate a node in the future
1927  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1928  *                 the rpLastUseVars set should be saved and restored
1929  *                 so that we don't add any new variables to rpLastUseVars.
1930  */
1931
1932 #pragma warning(disable : 4701)
1933
1934 #ifdef _PREFAST_
1935 #pragma warning(push)
1936 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1937 #endif
1938 regMaskTP Compiler::rpPredictTreeRegUse(GenTree*     tree,
1939                                         rpPredictReg predictReg,
1940                                         regMaskTP    lockedRegs,
1941                                         regMaskTP    rsvdRegs)
1942 {
1943     regMaskTP    regMask = DUMMY_INIT(RBM_ILLEGAL);
1944     regMaskTP    op2Mask;
1945     regMaskTP    tmpMask;
1946     rpPredictReg op1PredictReg;
1947     rpPredictReg op2PredictReg;
1948     LclVarDsc*   varDsc = NULL;
1949     VARSET_TP    oldLastUseVars(VarSetOps::UninitVal());
1950
1951     VARSET_TP varBits(VarSetOps::UninitVal());
1952     VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1953
1954     bool      restoreLastUseVars = false;
1955     regMaskTP interferingRegs    = RBM_NONE;
1956
1957 #ifdef DEBUG
1958     // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1959     noway_assert(tree);
1960     noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1961     noway_assert(RBM_ILLEGAL);
1962     noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1963     /* impossible values, to make sure that we set them */
1964     tree->gtUsedRegs = RBM_ILLEGAL;
1965 #endif
1966
1967     /* Figure out what kind of a node we have */
1968
1969     genTreeOps oper = tree->OperGet();
1970     var_types  type = tree->TypeGet();
1971     unsigned   kind = tree->OperKind();
1972
1973     // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1974     genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1975     if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1976         predictReg = PREDICT_NONE;
1977     else if (rpHasVarIndexForPredict(predictReg))
1978     {
1979         // The only place where predictReg is set to a var is in the PURE
1980         // assignment case where varIndex is the var being assigned to.
1981         // We need to check whether the variable is used between here and
1982         // its redefinition.
1983         unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1984         unsigned lclNum   = lvaTrackedToVarNum[varIndex];
1985         bool     found    = false;
1986         for (GenTree* nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1987         {
1988             if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1989             {
1990                 // Is this the pure assignment?
1991                 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1992                 {
1993                     predictReg = PREDICT_SCRATCH_REG;
1994                 }
1995                 found = true;
1996                 break;
1997             }
1998         }
1999         assert(found);
2000     }
2001
2002     if (rsvdRegs & RBM_LASTUSE)
2003     {
2004         restoreLastUseVars = true;
2005         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2006         rsvdRegs &= ~RBM_LASTUSE;
2007     }
2008
2009     /* Is this a constant or leaf node? */
2010
2011     if (kind & (GTK_CONST | GTK_LEAF))
2012     {
2013         bool      lastUse   = false;
2014         regMaskTP enregMask = RBM_NONE;
2015
2016         switch (oper)
2017         {
2018 #ifdef _TARGET_ARM_
2019             case GT_CNS_DBL:
2020                 // Codegen for floating point constants on the ARM is currently
2021                 // movw/movt    rT1, <lo32 bits>
2022                 // movw/movt    rT2, <hi32 bits>
2023                 //  vmov.i2d    dT0, rT1,rT2
2024                 //
2025                 // For TYP_FLOAT one integer register is required
2026                 //
2027                 // These integer register(s) immediately die
2028                 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2029                 if (type == TYP_DOUBLE)
2030                 {
2031                     // For TYP_DOUBLE a second integer register is required
2032                     //
2033                     tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2034                 }
2035
2036                 // We also need a floating point register that we keep
2037                 //
2038                 if (predictReg == PREDICT_NONE)
2039                     predictReg = PREDICT_SCRATCH_REG;
2040
2041                 regMask          = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2042                 tree->gtUsedRegs = regMask | tmpMask;
2043                 goto RETURN_CHECK;
2044 #endif
2045
2046             case GT_CNS_INT:
2047             case GT_CNS_LNG:
2048
2049                 if (rpHasVarIndexForPredict(predictReg))
2050                 {
2051                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2052                     rpAsgVarNum       = tgtIndex;
2053
2054                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2055                     predictReg = PREDICT_NONE;
2056
2057                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2058                     tgtVar->lvDependReg = true;
2059
2060                     if (type == TYP_LONG)
2061                     {
2062                         assert(oper == GT_CNS_LNG);
2063
2064                         if (tgtVar->lvOtherReg == REG_STK)
2065                         {
2066                             // Well we do need one register for a partially enregistered
2067                             type       = TYP_INT;
2068                             predictReg = PREDICT_SCRATCH_REG;
2069                         }
2070                     }
2071                 }
2072                 else
2073                 {
2074 #if !CPU_LOAD_STORE_ARCH
2075                     /* If the constant is a handle then it will need to have a relocation
2076                        applied to it.  It will need to be loaded into a register.
2077                        But never throw away an existing hint.
2078                        */
2079                     if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2080 #endif
2081                     {
2082                         if (predictReg == PREDICT_NONE)
2083                             predictReg = PREDICT_SCRATCH_REG;
2084                     }
2085                 }
2086                 break;
2087
2088             case GT_NO_OP:
2089                 break;
2090
2091             case GT_CLS_VAR:
2092                 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2093                     (genTypeSize(type) < sizeof(int)))
2094                 {
2095                     predictReg = PREDICT_SCRATCH_REG;
2096                 }
2097 #ifdef _TARGET_ARM_
2098                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2099                 //
2100                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2101                 {
2102                     // These integer register(s) immediately die
2103                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2104                     // Two integer registers are required for a TYP_DOUBLE
2105                     if (type == TYP_DOUBLE)
2106                         tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2107                 }
2108                 // We need a temp register in some cases of loads/stores to a class var
2109                 if (predictReg == PREDICT_NONE)
2110                 {
2111                     predictReg = PREDICT_SCRATCH_REG;
2112                 }
2113 #endif
2114                 if (rpHasVarIndexForPredict(predictReg))
2115                 {
2116                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2117                     rpAsgVarNum       = tgtIndex;
2118
2119                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2120                     predictReg = PREDICT_NONE;
2121
2122                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2123                     tgtVar->lvDependReg = true;
2124
2125                     if (type == TYP_LONG)
2126                     {
2127                         if (tgtVar->lvOtherReg == REG_STK)
2128                         {
2129                             // Well we do need one register for a partially enregistered
2130                             type       = TYP_INT;
2131                             predictReg = PREDICT_SCRATCH_REG;
2132                         }
2133                     }
2134                 }
2135                 break;
2136
2137             case GT_LCL_FLD:
2138 #ifdef _TARGET_ARM_
2139                 // Check for a misalignment on a Floating Point field
2140                 //
2141                 if (varTypeIsFloating(type))
2142                 {
2143                     if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2144                     {
2145                         // These integer register(s) immediately die
2146                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2147                         // Two integer registers are required for a TYP_DOUBLE
2148                         if (type == TYP_DOUBLE)
2149                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2150                     }
2151                 }
2152 #endif
2153                 __fallthrough;
2154
2155             case GT_LCL_VAR:
2156             case GT_REG_VAR:
2157
2158                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2159
2160                 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2161                 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2162                 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2163
2164 #if FEATURE_STACK_FP_X87
2165                 // If it's a floating point var, there's nothing to do
2166                 if (varTypeIsFloating(type))
2167                 {
2168                     tree->gtUsedRegs = RBM_NONE;
2169                     regMask          = RBM_NONE;
2170                     goto RETURN_CHECK;
2171                 }
2172 #endif
2173
2174                 // If the variable is already a register variable, no need to go further.
2175                 if (oper == GT_REG_VAR)
2176                     break;
2177
2178                 /* Apply the type of predictReg to the LCL_VAR */
2179
2180                 if (predictReg == PREDICT_REG)
2181                 {
2182                 PREDICT_REG_COMMON:
2183                     if (varDsc->lvRegNum == REG_STK)
2184                         break;
2185
2186                     goto GRAB_COUNT;
2187                 }
2188                 else if (predictReg == PREDICT_SCRATCH_REG)
2189                 {
2190                     noway_assert(predictReg == PREDICT_SCRATCH_REG);
2191
2192                     /* Is this the last use of a local var?   */
2193                     if (lastUse)
2194                     {
2195                         if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2196                             goto PREDICT_REG_COMMON;
2197                     }
2198                 }
2199                 else if (rpHasVarIndexForPredict(predictReg))
2200                 {
2201                     /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2202                     {
2203                         unsigned   tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2204                         LclVarDsc* tgtVar    = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2205                         VarSetOps::MakeSingleton(this, tgtIndex1);
2206
2207                         noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2208                         noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2209 #ifndef _TARGET_AMD64_
2210                         // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2211                         // so this assert is meaningless
2212                         noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2213 #endif // !_TARGET_AMD64_
2214
2215                         if (varDsc->lvTracked)
2216                         {
2217                             unsigned srcIndex;
2218                             srcIndex = varDsc->lvVarIndex;
2219
2220                             // If this register has it's last use here then we will prefer
2221                             // to color to the same register as tgtVar.
2222                             if (lastUse)
2223                             {
2224                                 /*
2225                                  *  Add an entry in the lvaVarPref graph to indicate
2226                                  *  that it would be worthwhile to color these two variables
2227                                  *  into the same physical register.
2228                                  *  This will help us avoid having an extra copy instruction
2229                                  */
2230                                 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2231                                 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2232                             }
2233
2234                             // Add a variable interference from srcIndex to each of the last use variables
2235                             if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2236                             {
2237                                 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2238                             }
2239                         }
2240                         rpAsgVarNum = tgtIndex1;
2241
2242                         /* We will rely on the target enregistered variable from the GT_ASG */
2243                         varDsc = tgtVar;
2244                     }
2245                 GRAB_COUNT:
2246                     unsigned grabCount;
2247                     grabCount = 0;
2248
2249                     if (genIsValidFloatReg(varDsc->lvRegNum))
2250                     {
2251                         enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2252                     }
2253                     else
2254                     {
2255                         enregMask = genRegMask(varDsc->lvRegNum);
2256                     }
2257
2258 #ifdef _TARGET_ARM_
2259                     if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2260                     {
2261                         // We need to compute the intermediate value using a TYP_DOUBLE
2262                         // but we storing the result in a TYP_SINGLE enregistered variable
2263                         //
2264                         grabCount++;
2265                     }
2266                     else
2267 #endif
2268                     {
2269                         /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2270                         if (enregMask & (rsvdRegs | lockedRegs))
2271                         {
2272                             grabCount++;
2273                         }
2274 #ifndef _TARGET_64BIT_
2275                         if (type == TYP_LONG)
2276                         {
2277                             if (varDsc->lvOtherReg != REG_STK)
2278                             {
2279                                 tmpMask = genRegMask(varDsc->lvOtherReg);
2280                                 enregMask |= tmpMask;
2281
2282                                 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2283                                 if (tmpMask & (rsvdRegs | lockedRegs))
2284                                     grabCount++;
2285                             }
2286                             else // lvOtherReg == REG_STK
2287                             {
2288                                 grabCount++;
2289                             }
2290                         }
2291 #endif // _TARGET_64BIT_
2292                     }
2293
2294                     varDsc->lvDependReg = true;
2295
2296                     if (grabCount == 0)
2297                     {
2298                         /* Does not need a register */
2299                         predictReg = PREDICT_NONE;
2300                         // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2301                         VarSetOps::UnionD(this, rpUseInPlace, varBits);
2302                     }
2303                     else // (grabCount > 0)
2304                     {
2305 #ifndef _TARGET_64BIT_
2306                         /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2307                         if ((type == TYP_LONG) && (grabCount == 1))
2308                         {
2309                             /* We will need to pick one register */
2310                             type = TYP_INT;
2311                             // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2312                             VarSetOps::UnionD(this, rpUseInPlace, varBits);
2313                         }
2314                         noway_assert((type == TYP_DOUBLE) ||
2315                                      (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2316 #else  // !_TARGET_64BIT_
2317                         noway_assert(grabCount == 1);
2318 #endif // !_TARGET_64BIT_
2319                     }
2320                 }
2321                 else if (type == TYP_STRUCT)
2322                 {
2323 #ifdef _TARGET_ARM_
2324                     // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2325                     //        predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2326                     //        As a workaround we just bash it to PREDICT_NONE here
2327                     //
2328                     if (predictReg != PREDICT_NONE)
2329                         predictReg = PREDICT_NONE;
2330 #endif
2331                     // Currently predictReg is saying that we will not need any scratch registers
2332                     noway_assert(predictReg == PREDICT_NONE);
2333
2334                     /* We may need to sign or zero extend a small type when pushing a struct */
2335                     if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2336                     {
2337                         for (unsigned varNum = varDsc->lvFieldLclStart;
2338                              varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2339                         {
2340                             LclVarDsc* fldVar = lvaTable + varNum;
2341
2342                             if (fldVar->lvStackAligned())
2343                             {
2344                                 // When we are stack aligned Codegen will just use
2345                                 // a push instruction and thus doesn't need any register
2346                                 // since we can push both a register or a stack frame location
2347                                 continue;
2348                             }
2349
2350                             if (varTypeIsByte(fldVar->TypeGet()))
2351                             {
2352                                 // We will need to reserve one byteable register,
2353                                 //
2354                                 type       = TYP_BYTE;
2355                                 predictReg = PREDICT_SCRATCH_REG;
2356 #if CPU_HAS_BYTE_REGS
2357                                 // It is best to enregister this fldVar in a byteable register
2358                                 //
2359                                 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2360 #endif
2361                             }
2362                             else if (varTypeIsShort(fldVar->TypeGet()))
2363                             {
2364                                 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2365                                 // If fldVar is not enregistered then we will need a scratch register
2366                                 //
2367                                 if (!isEnregistered)
2368                                 {
2369                                     // We will need either an int register or a byte register
2370                                     // If we are not requesting a byte register we will request an int register
2371                                     //
2372                                     if (type != TYP_BYTE)
2373                                         type   = TYP_INT;
2374                                     predictReg = PREDICT_SCRATCH_REG;
2375                                 }
2376                             }
2377                         }
2378                     }
2379                 }
2380                 else
2381                 {
2382                     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2383                     if (preferReg != 0)
2384                     {
2385                         if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2386                         {
2387                             varDsc->addPrefReg(preferReg, this);
2388                         }
2389                     }
2390                 }
2391                 break; /* end of case GT_LCL_VAR */
2392
2393             case GT_JMP:
2394                 tree->gtUsedRegs = RBM_NONE;
2395                 regMask          = RBM_NONE;
2396
2397 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2398                 // Mark the registers required to emit a tailcall profiler callback
2399                 if (compIsProfilerHookNeeded())
2400                 {
2401                     tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2402                 }
2403 #endif
2404                 goto RETURN_CHECK;
2405
2406             default:
2407                 break;
2408         } /* end of switch (oper) */
2409
2410         /* If we don't need to evaluate to register, regmask is the empty set */
2411         /* Otherwise we grab a temp for the local variable                    */
2412
2413         if (predictReg == PREDICT_NONE)
2414             regMask = RBM_NONE;
2415         else
2416         {
2417             regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2418
2419             if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2420             {
2421                 /* We need to sign or zero extend a small type when pushing a struct */
2422                 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2423
2424                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2425                 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2426
2427                 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2428                      varNum++)
2429                 {
2430                     LclVarDsc* fldVar = lvaTable + varNum;
2431                     if (fldVar->lvTracked)
2432                     {
2433                         VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2434                         rpRecordRegIntf(regMask, fldBit DEBUGARG(
2435                                                      "need scratch register when pushing a small field of a struct"));
2436                     }
2437                 }
2438             }
2439         }
2440
2441         /* Update the set of lastUse variables that we encountered so far */
2442         if (lastUse)
2443         {
2444             VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2445             VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2446
2447             /*
2448              *  Add interference from any previously locked temps into this last use variable.
2449              */
2450             if (lockedRegs)
2451             {
2452                 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2453             }
2454             /*
2455              *  Add interference from any reserved temps into this last use variable.
2456              */
2457             if (rsvdRegs)
2458             {
2459                 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2460             }
2461             /*
2462              *  For partially enregistered longs add an interference with the
2463              *  register return by rpPredictRegPick
2464              */
2465             if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2466             {
2467                 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2468             }
2469         }
2470
2471         tree->gtUsedRegs = (regMaskSmall)regMask;
2472         goto RETURN_CHECK;
2473     }
2474
2475     /* Is it a 'simple' unary/binary operator? */
2476
2477     if (kind & GTK_SMPOP)
2478     {
2479         GenTree* op1 = tree->gtOp.gtOp1;
2480         GenTree* op2 = tree->gtGetOp2IfPresent();
2481
2482         GenTree*  opsPtr[3];
2483         regMaskTP regsPtr[3];
2484
2485         VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2486
2487         switch (oper)
2488         {
2489             case GT_ASG:
2490
2491                 /* Is the value being assigned into a LCL_VAR? */
2492                 if (op1->gtOper == GT_LCL_VAR)
2493                 {
2494                     varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2495
2496                     /* Are we assigning a LCL_VAR the result of a call? */
2497                     if (op2->gtOper == GT_CALL)
2498                     {
2499                         /* Set a preferred register for the LCL_VAR */
2500                         if (isRegPairType(varDsc->TypeGet()))
2501                             varDsc->addPrefReg(RBM_LNGRET, this);
2502                         else if (!varTypeIsFloating(varDsc->TypeGet()))
2503                             varDsc->addPrefReg(RBM_INTRET, this);
2504 #ifdef _TARGET_AMD64_
2505                         else
2506                             varDsc->addPrefReg(RBM_FLOATRET, this);
2507 #endif
2508                         /*
2509                          *  When assigning the result of a call we don't
2510                          *  bother trying to target the right side of the
2511                          *  assignment, since we have a fixed calling convention.
2512                          */
2513                     }
2514                     else if (varDsc->lvTracked)
2515                     {
2516                         // We interfere with uses in place
2517                         if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2518                         {
2519                             rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2520                         }
2521
2522                         // Did we predict that this local will be fully enregistered?
2523                         // and the assignment type is the same as the expression type?
2524                         // and it is dead on the right side of the assignment?
2525                         // and we current have no other rpAsgVarNum active?
2526                         //
2527                         if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2528                             (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2529                         {
2530                             //
2531                             //  Yes, we should try to target the right side (op2) of this
2532                             //  assignment into the (enregistered) tracked variable.
2533                             //
2534
2535                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2536                             op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2537
2538                             // Remember that this is a new use in place
2539
2540                             // We've added "new UseInPlace"; remove from the global set.
2541                             VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2542
2543                             //  Note that later when we walk down to the leaf node for op2
2544                             //  if we decide to actually use the register for the 'varDsc'
2545                             //  to enregister the operand, the we will set rpAsgVarNum to
2546                             //  varDsc->lvVarIndex, by extracting this value using
2547                             //  rpGetVarIndexForPredict()
2548                             //
2549                             //  Also we reset rpAsgVarNum back to -1 after we have finished
2550                             //  predicting the current GT_ASG node
2551                             //
2552                             goto ASG_COMMON;
2553                         }
2554                     }
2555                 }
2556                 else if (tree->OperIsBlkOp())
2557                 {
2558                     interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2559                     regMask = 0;
2560                     goto RETURN_CHECK;
2561                 }
2562                 __fallthrough;
2563
2564             case GT_CHS:
2565
2566             case GT_ASG_OR:
2567             case GT_ASG_XOR:
2568             case GT_ASG_AND:
2569             case GT_ASG_SUB:
2570             case GT_ASG_ADD:
2571             case GT_ASG_MUL:
2572             case GT_ASG_DIV:
2573             case GT_ASG_UDIV:
2574
2575                 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2576                 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2577                 {
2578                     /* Is the value being assigned into an enregistered LCL_VAR? */
2579                     /* For debug code we only allow a simple op2 to be assigned */
2580                     if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2581                     {
2582                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2583                         /* Did we predict that this local will be enregistered? */
2584                         if (varDsc->lvRegNum != REG_STK)
2585                         {
2586                             /* Yes, we can use "reg <op>= addr" */
2587
2588                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2589                             op2PredictReg = PREDICT_NONE;
2590
2591                             goto ASG_COMMON;
2592                         }
2593                     }
2594                 }
2595
2596 #if CPU_LOAD_STORE_ARCH
2597                 if (oper != GT_ASG)
2598                 {
2599                     op1PredictReg = PREDICT_REG;
2600                     op2PredictReg = PREDICT_REG;
2601                 }
2602                 else
2603 #endif
2604                 {
2605                     /*
2606                      *  Otherwise, initialize the normal forcing of operands:
2607                      *   "addr <op>= reg"
2608                      */
2609                     op1PredictReg = PREDICT_ADDR;
2610                     op2PredictReg = PREDICT_REG;
2611                 }
2612
2613             ASG_COMMON:
2614
2615 #if !CPU_LOAD_STORE_ARCH
2616                 if (op2PredictReg != PREDICT_NONE)
2617                 {
2618                     /* Is the value being assigned a simple one? */
2619                     if (rpCanAsgOperWithoutReg(op2, false))
2620                         op2PredictReg = PREDICT_NONE;
2621                 }
2622 #endif
2623
2624                 bool simpleAssignment;
2625                 simpleAssignment = false;
2626
2627                 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2628                 {
2629                     // Add a variable interference from the assign target
2630                     // to each of the last use variables
2631                     if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2632                     {
2633                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2634
2635                         if (varDsc->lvTracked)
2636                         {
2637                             unsigned varIndex = varDsc->lvVarIndex;
2638
2639                             rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2640                         }
2641                     }
2642
2643                     /*  Record whether this tree is a simple assignment to a local */
2644
2645                     simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2646                 }
2647
2648                 bool requireByteReg;
2649                 requireByteReg = false;
2650
2651 #if CPU_HAS_BYTE_REGS
2652                 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2653
2654                 if (varTypeIsByte(type) &&
2655                     ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2656
2657                 {
2658                     // Byte-assignments typically need a byte register
2659                     requireByteReg = true;
2660
2661                     if (op1->gtOper == GT_LCL_VAR)
2662                     {
2663                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2664
2665                         // Did we predict that this local will be enregistered?
2666                         if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2667                         {
2668                             // We don't require a byte register when op1 is an enregistered local */
2669                             requireByteReg = false;
2670                         }
2671
2672                         // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2673                         if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2674                         {
2675                             // We should try to put op1 in an byte register
2676                             varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2677                         }
2678                     }
2679                 }
2680 #endif
2681
2682                 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2683
2684                 bool isWriteBarrierAsgNode;
2685                 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2686 #ifdef DEBUG
2687                 GCInfo::WriteBarrierForm wbf;
2688                 if (isWriteBarrierAsgNode)
2689                     wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2690                 else
2691                     wbf = GCInfo::WBF_NoBarrier;
2692 #endif // DEBUG
2693
2694                 regMaskTP wbaLockedRegs;
2695                 wbaLockedRegs = lockedRegs;
2696                 if (isWriteBarrierAsgNode)
2697                 {
2698 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2699 #ifdef DEBUG
2700                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2701                     {
2702 #endif // DEBUG
2703                         wbaLockedRegs |= RBM_WRITE_BARRIER;
2704                         op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2705                         assert(REG_WRITE_BARRIER == REG_EDX);
2706                         op1PredictReg = PREDICT_REG_EDX;
2707 #ifdef DEBUG
2708                     }
2709                     else
2710 #endif // DEBUG
2711 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2712
2713 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2714                     {
2715 #ifdef _TARGET_X86_
2716                         op1PredictReg = PREDICT_REG_ECX;
2717                         op2PredictReg = PREDICT_REG_EDX;
2718 #elif defined(_TARGET_ARM_)
2719                         op1PredictReg = PREDICT_REG_R0;
2720                         op2PredictReg = PREDICT_REG_R1;
2721
2722                         // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2723                         if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2724                         {
2725                             op1 = op1->gtOp.gtOp1;
2726                         }
2727 #else // !_TARGET_X86_ && !_TARGET_ARM_
2728 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2729 #endif
2730                     }
2731 #endif
2732                 }
2733
2734                 /*  Are we supposed to evaluate RHS first? */
2735
2736                 if (tree->gtFlags & GTF_REVERSE_OPS)
2737                 {
2738                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2739
2740 #if CPU_HAS_BYTE_REGS
2741                     // Should we insure that op2 gets evaluated into a byte register?
2742                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2743                     {
2744                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2745                         // and we can't select one that is already reserved (i.e. lockedRegs)
2746                         //
2747                         op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2748                         op2->gtUsedRegs |= op2Mask;
2749
2750                         // No longer a simple assignment because we're using extra registers and might
2751                         // have interference between op1 and op2.  See DevDiv #136681
2752                         simpleAssignment = false;
2753                     }
2754 #endif
2755                     /*
2756                      *  For a simple assignment we don't want the op2Mask to be
2757                      *  marked as interferring with the LCL_VAR, since it is likely
2758                      *  that we will want to enregister the LCL_VAR in exactly
2759                      *  the register that is used to compute op2
2760                      */
2761                     tmpMask = lockedRegs;
2762
2763                     if (!simpleAssignment)
2764                         tmpMask |= op2Mask;
2765
2766                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2767
2768                     // Did we relax the register prediction for op1 and op2 above ?
2769                     // - because we are depending upon op1 being enregistered
2770                     //
2771                     if ((op1PredictReg == PREDICT_NONE) &&
2772                         ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2773                     {
2774                         /* We must be assigning into an enregistered LCL_VAR */
2775                         noway_assert(op1->gtOper == GT_LCL_VAR);
2776                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2777                         noway_assert(varDsc->lvRegNum != REG_STK);
2778
2779                         /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2780                         varDsc->lvDependReg = true;
2781                     }
2782                 }
2783                 else
2784                 {
2785                     // For the case of simpleAssignments op2 should always be evaluated first
2786                     noway_assert(!simpleAssignment);
2787
2788                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2789                     if (isWriteBarrierAsgNode)
2790                     {
2791                         wbaLockedRegs |= op1->gtUsedRegs;
2792                     }
2793                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2794
2795 #if CPU_HAS_BYTE_REGS
2796                     // Should we insure that op2 gets evaluated into a byte register?
2797                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2798                     {
2799                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2800                         // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2801                         //
2802                         op2Mask |=
2803                             rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2804                         op2->gtUsedRegs |= op2Mask;
2805                     }
2806 #endif
2807                 }
2808
2809                 if (rpHasVarIndexForPredict(op2PredictReg))
2810                 {
2811                     rpAsgVarNum = -1;
2812                 }
2813
2814                 if (isWriteBarrierAsgNode)
2815                 {
2816 #if NOGC_WRITE_BARRIERS
2817 #ifdef DEBUG
2818                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2819                     {
2820 #endif // DEBUG
2821
2822                         /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2823                            passed to the write-barrier call in REG_WRITE_BARRIER */
2824
2825                         regMask = op2Mask;
2826
2827                         if (op1->gtOper == GT_IND)
2828                         {
2829                             GenTree* rv1;
2830                             GenTree* rv2;
2831                             unsigned mul, cns;
2832                             bool     rev;
2833
2834                             /* Special handling of indirect assigns for write barrier */
2835
2836                             bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2837                                                                   &mul, &cns);
2838
2839                             /* Check address mode for enregisterable locals */
2840
2841                             if (yes)
2842                             {
2843                                 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2844                                 {
2845                                     rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2846                                 }
2847                                 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2848                                 {
2849                                     rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2850                                 }
2851                             }
2852                         }
2853
2854                         if (op2->gtOper == GT_LCL_VAR)
2855                         {
2856                             rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2857                         }
2858
2859                         // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2860                         if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2861                         {
2862                             rpRecordRegIntf(RBM_WRITE_BARRIER,
2863                                             rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2864                         }
2865                         tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2866 #ifdef DEBUG
2867                     }
2868                     else
2869 #endif // DEBUG
2870 #endif // NOGC_WRITE_BARRIERS
2871
2872 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2873                     {
2874 #ifdef _TARGET_ARM_
2875 #ifdef DEBUG
2876                         if (verbose)
2877                             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2878 #endif
2879                         //
2880                         // For the ARM target we have an optimized JIT Helper
2881                         // that only trashes a subset of the callee saved registers
2882                         //
2883
2884                         // NOTE: Adding it to the gtUsedRegs will cause the interference to
2885                         // be added appropriately
2886
2887                         // the RBM_CALLEE_TRASH_NOGC set is killed.  We will record this in interferingRegs
2888                         // instead of gtUsedRegs, because the latter will be modified later, but we need
2889                         // to remember to add the interference.
2890
2891                         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2892
2893                         op1->gtUsedRegs |= RBM_R0;
2894                         op2->gtUsedRegs |= RBM_R1;
2895 #else // _TARGET_ARM_
2896
2897 #ifdef DEBUG
2898                         if (verbose)
2899                             printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2900 #endif
2901                         // We have to call a normal JIT helper to perform the Write Barrier Assignment
2902                         // It will trash the callee saved registers
2903
2904                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2905 #endif // _TARGET_ARM_
2906                     }
2907 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2908                 }
2909
2910                 if (simpleAssignment)
2911                 {
2912                     /*
2913                      *  Consider a simple assignment to a local:
2914                      *
2915                      *   lcl = expr;
2916                      *
2917                      *  Since the "=" node is visited after the variable
2918                      *  is marked live (assuming it's live after the
2919                      *  assignment), we don't want to use the register
2920                      *  use mask of the "=" node but rather that of the
2921                      *  variable itself.
2922                      */
2923                     tree->gtUsedRegs = op1->gtUsedRegs;
2924                 }
2925                 else
2926                 {
2927                     tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2928                 }
2929                 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2930                 goto RETURN_CHECK;
2931
2932             case GT_ASG_LSH:
2933             case GT_ASG_RSH:
2934             case GT_ASG_RSZ:
2935                 /* assigning shift operators */
2936
2937                 noway_assert(type != TYP_LONG);
2938
2939 #if CPU_LOAD_STORE_ARCH
2940                 predictReg = PREDICT_ADDR;
2941 #else
2942                 predictReg = PREDICT_NONE;
2943 #endif
2944
2945                 /* shift count is handled same as ordinary shift */
2946                 goto HANDLE_SHIFT_COUNT;
2947
2948             case GT_ADDR:
2949                 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2950
2951                 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2952                 {
2953                     // We need a scratch register for the LEA instruction
2954                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2955                 }
2956
2957                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2958                 goto RETURN_CHECK;
2959
2960             case GT_CAST:
2961
2962                 /* Cannot cast to VOID */
2963                 noway_assert(type != TYP_VOID);
2964
2965                 /* cast to long is special */
2966                 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2967                 {
2968                     noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2969 #if CPU_LONG_USES_REGPAIR
2970                     rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2971
2972                     if (rpHasVarIndexForPredict(predictReg))
2973                     {
2974                         unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2975                         rpAsgVarNum       = tgtIndex;
2976
2977                         // We don't need any register as we plan on writing to the rpAsgVarNum register
2978                         predictReg = PREDICT_NONE;
2979
2980                         LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2981                         tgtVar->lvDependReg = true;
2982
2983                         if (tgtVar->lvOtherReg != REG_STK)
2984                         {
2985                             predictRegHi = PREDICT_NONE;
2986                         }
2987                     }
2988                     else
2989 #endif
2990                         if (predictReg == PREDICT_NONE)
2991                     {
2992                         predictReg = PREDICT_SCRATCH_REG;
2993                     }
2994 #ifdef _TARGET_ARM_
2995                     // If we are widening an int into a long using a targeted register pair we
2996                     // should retarget so that the low part get loaded into the appropriate register
2997                     else if (predictReg == PREDICT_PAIR_R0R1)
2998                     {
2999                         predictReg   = PREDICT_REG_R0;
3000                         predictRegHi = PREDICT_REG_R1;
3001                     }
3002                     else if (predictReg == PREDICT_PAIR_R2R3)
3003                     {
3004                         predictReg   = PREDICT_REG_R2;
3005                         predictRegHi = PREDICT_REG_R3;
3006                     }
3007 #endif
3008 #ifdef _TARGET_X86_
3009                     // If we are widening an int into a long using a targeted register pair we
3010                     // should retarget so that the low part get loaded into the appropriate register
3011                     else if (predictReg == PREDICT_PAIR_EAXEDX)
3012                     {
3013                         predictReg   = PREDICT_REG_EAX;
3014                         predictRegHi = PREDICT_REG_EDX;
3015                     }
3016                     else if (predictReg == PREDICT_PAIR_ECXEBX)
3017                     {
3018                         predictReg   = PREDICT_REG_ECX;
3019                         predictRegHi = PREDICT_REG_EBX;
3020                     }
3021 #endif
3022
3023                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3024
3025 #if CPU_LONG_USES_REGPAIR
3026                     if (predictRegHi != PREDICT_NONE)
3027                     {
3028                         // Now get one more reg for the upper part
3029                         regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3030                     }
3031 #endif
3032                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3033                     goto RETURN_CHECK;
3034                 }
3035
3036                 /* cast from long is special - it frees a register */
3037                 if (type <= TYP_INT // nice.  this presumably is intended to mean "signed int and shorter types"
3038                     && op1->gtType == TYP_LONG)
3039                 {
3040                     if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3041                         predictReg = PREDICT_REG;
3042
3043                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3044
3045                     // If we have 2 or more regs, free one of them
3046                     if (!genMaxOneBit(regMask))
3047                     {
3048                         /* Clear the 2nd lowest bit in regMask */
3049                         /* First set tmpMask to the lowest bit in regMask */
3050                         tmpMask = genFindLowestBit(regMask);
3051                         /* Next find the second lowest bit in regMask */
3052                         tmpMask = genFindLowestBit(regMask & ~tmpMask);
3053                         /* Clear this bit from regmask */
3054                         regMask &= ~tmpMask;
3055                     }
3056                     tree->gtUsedRegs = op1->gtUsedRegs;
3057                     goto RETURN_CHECK;
3058                 }
3059
3060 #if CPU_HAS_BYTE_REGS
3061                 /* cast from signed-byte is special - it uses byteable registers */
3062                 if (type == TYP_INT)
3063                 {
3064                     var_types smallType;
3065
3066                     if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3067                         smallType = tree->gtCast.CastOp()->TypeGet();
3068                     else
3069                         smallType = tree->gtCast.gtCastType;
3070
3071                     if (smallType == TYP_BYTE)
3072                     {
3073                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3074
3075                         if ((regMask & RBM_BYTE_REGS) == 0)
3076                             regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3077
3078                         tree->gtUsedRegs = (regMaskSmall)regMask;
3079                         goto RETURN_CHECK;
3080                     }
3081                 }
3082 #endif
3083
3084 #if FEATURE_STACK_FP_X87
3085                 /* cast to float/double is special */
3086                 if (varTypeIsFloating(type))
3087                 {
3088                     switch (op1->TypeGet())
3089                     {
3090                         /* uses fild, so don't need to be loaded to reg */
3091                         case TYP_INT:
3092                         case TYP_LONG:
3093                             rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3094                             tree->gtUsedRegs = op1->gtUsedRegs;
3095                             regMask          = 0;
3096                             goto RETURN_CHECK;
3097                         default:
3098                             break;
3099                     }
3100                 }
3101
3102                 /* Casting from integral type to floating type is special */
3103                 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3104                 {
3105                     if (opts.compCanUseSSE2)
3106                     {
3107                         // predict for SSE2 based casting
3108                         if (predictReg <= PREDICT_REG)
3109                             predictReg = PREDICT_SCRATCH_REG;
3110                         regMask        = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3111
3112                         // Get one more int reg to hold cast result
3113                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3114                         tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3115                         goto RETURN_CHECK;
3116                     }
3117                 }
3118 #endif
3119
3120 #if FEATURE_FP_REGALLOC
3121                 // Are we casting between int to float or float to int
3122                 // Fix 388428 ARM JitStress WP7
3123                 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3124                 {
3125                     // op1 needs to go into a register
3126                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3127
3128 #ifdef _TARGET_ARM_
3129                     if (varTypeIsFloating(op1->TypeGet()))
3130                     {
3131                         // We also need a fp scratch register for the convert operation
3132                         regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3133                                                     PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3134                     }
3135 #endif
3136                     // We also need a register to hold the result
3137                     regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3138                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3139                     goto RETURN_CHECK;
3140                 }
3141 #endif
3142
3143                 /* otherwise must load op1 into a register */
3144                 goto GENERIC_UNARY;
3145
3146             case GT_INTRINSIC:
3147
3148 #ifdef _TARGET_XARCH_
3149                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3150                 {
3151                     // This is a special case to handle the following
3152                     // optimization: conv.i4(round.d(d)) -> round.i(d)
3153                     // if flowgraph 3186
3154
3155                     if (predictReg <= PREDICT_REG)
3156                         predictReg = PREDICT_SCRATCH_REG;
3157
3158                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3159
3160                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3161
3162                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3163                     goto RETURN_CHECK;
3164                 }
3165 #endif
3166                 __fallthrough;
3167
3168             case GT_NEG:
3169 #ifdef _TARGET_ARM_
3170                 if (tree->TypeGet() == TYP_LONG)
3171                 {
3172                     // On ARM this consumes an extra register for the '0' value
3173                     if (predictReg <= PREDICT_REG)
3174                         predictReg = PREDICT_SCRATCH_REG;
3175
3176                     regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3177
3178                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3179
3180                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3181                     goto RETURN_CHECK;
3182                 }
3183 #endif // _TARGET_ARM_
3184
3185                 __fallthrough;
3186
3187             case GT_NOT:
3188             // these unary operators will write new values
3189             // and thus will need a scratch register
3190             GENERIC_UNARY:
3191                 /* generic unary operators */
3192
3193                 if (predictReg <= PREDICT_REG)
3194                     predictReg = PREDICT_SCRATCH_REG;
3195
3196                 __fallthrough;
3197
3198             case GT_NOP:
3199                 // these unary operators do not write new values
3200                 // and thus won't need a scratch register
3201                 CLANG_FORMAT_COMMENT_ANCHOR;
3202
3203 #if OPT_BOOL_OPS
3204                 if (!op1)
3205                 {
3206                     tree->gtUsedRegs = 0;
3207                     regMask          = 0;
3208                     goto RETURN_CHECK;
3209                 }
3210 #endif
3211                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3212                 tree->gtUsedRegs = op1->gtUsedRegs;
3213                 goto RETURN_CHECK;
3214
3215             case GT_IND:
3216             case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3217             {
3218                 bool      intoReg = true;
3219                 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3220
3221                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3222                 {
3223                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3224                 }
3225
3226                 if (predictReg == PREDICT_ADDR)
3227                 {
3228                     intoReg = false;
3229                 }
3230                 else if (predictReg == PREDICT_NONE)
3231                 {
3232                     if (type != TYP_LONG)
3233                     {
3234                         intoReg = false;
3235                     }
3236                     else
3237                     {
3238                         predictReg = PREDICT_REG;
3239                     }
3240                 }
3241
3242                 /* forcing to register? */
3243                 if (intoReg && (type != TYP_LONG))
3244                 {
3245                     rsvdRegs |= RBM_LASTUSE;
3246                 }
3247
3248                 GenTree* lenCSE;
3249                 lenCSE = NULL;
3250
3251                 /* check for address mode */
3252                 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3253                 tmpMask = RBM_NONE;
3254
3255 #if CPU_LOAD_STORE_ARCH
3256                 // We may need a scratch register for loading a long
3257                 if (type == TYP_LONG)
3258                 {
3259                     /* This scratch register immediately dies */
3260                     tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3261                 }
3262 #endif // CPU_LOAD_STORE_ARCH
3263
3264 #ifdef _TARGET_ARM_
3265                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3266                 //
3267                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3268                 {
3269                     /* These integer register(s) immediately die */
3270                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3271                     // Two integer registers are required for a TYP_DOUBLE
3272                     if (type == TYP_DOUBLE)
3273                         tmpMask |=
3274                             rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3275                 }
3276 #endif
3277
3278                 /* forcing to register? */
3279                 if (intoReg)
3280                 {
3281                     regMaskTP lockedMask = lockedRegs | rsvdRegs;
3282                     tmpMask |= regMask;
3283
3284                     // We will compute a new regMask that holds the register(s)
3285                     // that we will load the indirection into.
3286                     //
3287                     CLANG_FORMAT_COMMENT_ANCHOR;
3288
3289 #ifndef _TARGET_64BIT_
3290                     if (type == TYP_LONG)
3291                     {
3292                         // We need to use multiple load instructions here:
3293                         // For the first register we can not choose
3294                         // any registers that are being used in place or
3295                         // any register in the current regMask
3296                         //
3297                         regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3298
3299                         // For the second register we can choose a register that was
3300                         // used in place or any register in the old now overwritten regMask
3301                         // but not the same register that we picked above in 'regMask'
3302                         //
3303                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3304                         regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3305                     }
3306                     else
3307 #endif
3308                     {
3309                         // We will use one load instruction here:
3310                         // The load target register can be a register that was used in place
3311                         // or one of the register from the orginal regMask.
3312                         //
3313                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3314                         regMask = rpPredictRegPick(type, predictReg, lockedMask);
3315                     }
3316                 }
3317                 else if (predictReg != PREDICT_ADDR)
3318                 {
3319                     /* Unless the caller specified PREDICT_ADDR   */
3320                     /* we don't return the temp registers used    */
3321                     /* to form the address                        */
3322                     regMask = RBM_NONE;
3323                 }
3324             }
3325
3326                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3327
3328                 goto RETURN_CHECK;
3329
3330             case GT_EQ:
3331             case GT_NE:
3332             case GT_LT:
3333             case GT_LE:
3334             case GT_GE:
3335             case GT_GT:
3336
3337 #ifdef _TARGET_X86_
3338                 /* Floating point comparison uses EAX for flags */
3339                 if (varTypeIsFloating(op1->TypeGet()))
3340                 {
3341                     regMask = RBM_EAX;
3342                 }
3343                 else
3344 #endif
3345                     if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3346                 {
3347                     // Some comparisons are converted to ?:
3348                     noway_assert(!fgMorphRelopToQmark(op1));
3349
3350                     if (predictReg <= PREDICT_REG)
3351                         predictReg = PREDICT_SCRATCH_REG;
3352
3353                     // The set instructions need a byte register
3354                     regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3355                 }
3356                 else
3357                 {
3358                     regMask = RBM_NONE;
3359 #ifdef _TARGET_XARCH_
3360                     tmpMask = RBM_NONE;
3361                     // Optimize the compare with a constant cases for xarch
3362                     if (op1->gtOper == GT_CNS_INT)
3363                     {
3364                         if (op2->gtOper == GT_CNS_INT)
3365                             tmpMask =
3366                                 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3367                         rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3368                         tree->gtUsedRegs = op2->gtUsedRegs;
3369                         goto RETURN_CHECK;
3370                     }
3371                     else if (op2->gtOper == GT_CNS_INT)
3372                     {
3373                         rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3374                         tree->gtUsedRegs = op1->gtUsedRegs;
3375                         goto RETURN_CHECK;
3376                     }
3377                     else if (op2->gtOper == GT_CNS_LNG)
3378                     {
3379                         regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3380 #ifdef _TARGET_X86_
3381                         // We also need one extra register to read values from
3382                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3383 #endif // _TARGET_X86_
3384                         tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3385                         goto RETURN_CHECK;
3386                     }
3387 #endif // _TARGET_XARCH_
3388                 }
3389
3390                 unsigned op1TypeSize;
3391                 unsigned op2TypeSize;
3392
3393                 op1TypeSize = genTypeSize(op1->TypeGet());
3394                 op2TypeSize = genTypeSize(op2->TypeGet());
3395
3396                 op1PredictReg = PREDICT_REG;
3397                 op2PredictReg = PREDICT_REG;
3398
3399                 if (tree->gtFlags & GTF_REVERSE_OPS)
3400                 {
3401 #ifdef _TARGET_XARCH_
3402                     if (op1TypeSize == sizeof(int))
3403                         op1PredictReg = PREDICT_NONE;
3404 #endif
3405
3406                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3407                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3408                 }
3409                 else
3410                 {
3411 #ifdef _TARGET_XARCH_
3412                     // For full DWORD compares we can have
3413                     //
3414                     //      op1 is an address mode and op2 is a register
3415                     // or
3416                     //      op1 is a register and op2 is an address mode
3417                     //
3418                     if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3419                     {
3420                         if (op2->gtOper == GT_LCL_VAR)
3421                         {
3422                             unsigned lclNum = op2->gtLclVar.gtLclNum;
3423                             varDsc          = lvaTable + lclNum;
3424                             /* Did we predict that this local will be enregistered? */
3425                             if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3426                             {
3427                                 op1PredictReg = PREDICT_ADDR;
3428                             }
3429                         }
3430                     }
3431                     // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3432                     if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3433                         op2PredictReg = PREDICT_ADDR;
3434 #endif // _TARGET_XARCH_
3435
3436                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3437 #ifdef _TARGET_ARM_
3438                     if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3439 #endif
3440                     {
3441                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3442                     }
3443                 }
3444
3445 #ifdef _TARGET_XARCH_
3446                 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3447                 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3448                 // if one of the registers is small and the types aren't equal.
3449
3450                 if (regMask == RBM_NONE)
3451                 {
3452                     rpPredictReg op1xPredictReg, op2xPredictReg;
3453                     GenTree*     op1x;
3454                     GenTree*     op2x;
3455                     if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3456                     {
3457                         op1xPredictReg = op2PredictReg;
3458                         op2xPredictReg = op1PredictReg;
3459                         op1x           = op2;
3460                         op2x           = op1;
3461                     }
3462                     else
3463                     {
3464                         op1xPredictReg = op1PredictReg;
3465                         op2xPredictReg = op2PredictReg;
3466                         op1x           = op1;
3467                         op2x           = op2;
3468                     }
3469                     if ((op1xPredictReg < PREDICT_REG) &&  // op1 doesn't get a register (probably an indir)
3470                         (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3471                         varTypeIsSmall(op1x->TypeGet()))   // op1 is smaller than an int
3472                     {
3473                         bool needTmp = false;
3474
3475                         // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3476                         // We could predict a byteable register for op2x, but what if we don't get it?
3477                         // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3478                         // result.
3479                         if (varTypeIsByte(op1x->TypeGet()))
3480                         {
3481                             needTmp = true;
3482                         }
3483                         else
3484                         {
3485                             if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3486                             {
3487                                 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3488                                     needTmp = true;
3489                             }
3490                             else
3491                             {
3492                                 if (op1x->TypeGet() != op2x->TypeGet())
3493                                     needTmp = true;
3494                             }
3495                         }
3496                         if (needTmp)
3497                         {
3498                             regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3499                         }
3500                     }
3501                 }
3502 #endif // _TARGET_XARCH_
3503
3504                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3505                 goto RETURN_CHECK;
3506
3507             case GT_MUL:
3508
3509 #ifndef _TARGET_AMD64_
3510                 if (type == TYP_LONG)
3511                 {
3512                     assert(tree->gtIsValid64RsltMul());
3513
3514                     /* Strip out the cast nodes */
3515
3516                     noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3517                     op1 = op1->gtCast.CastOp();
3518                     op2 = op2->gtCast.CastOp();
3519 #else
3520                 if (false)
3521                 {
3522 #endif // !_TARGET_AMD64_
3523                 USE_MULT_EAX:
3524
3525 #if defined(_TARGET_X86_)
3526                     // This will done by a 64-bit imul "imul eax, reg"
3527                     //   (i.e. EDX:EAX = EAX * reg)
3528
3529                     /* Are we supposed to evaluate op2 first? */
3530                     if (tree->gtFlags & GTF_REVERSE_OPS)
3531                     {
3532                         rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3533                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3534                     }
3535                     else
3536                     {
3537                         rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3538                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3539                     }
3540
3541                     /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3542
3543                     tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3544
3545                     /* set regMask to the set of held registers */
3546
3547                     regMask = RBM_PAIR_TMP_LO;
3548
3549                     if (type == TYP_LONG)
3550                         regMask |= RBM_PAIR_TMP_HI;
3551
3552 #elif defined(_TARGET_ARM_)
3553                     // This will done by a 4 operand multiply
3554
3555                     // Are we supposed to evaluate op2 first?
3556                     if (tree->gtFlags & GTF_REVERSE_OPS)
3557                     {
3558                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3559                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3560                     }
3561                     else
3562                     {
3563                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3564                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3565                     }
3566
3567                     // set regMask to the set of held registers,
3568                     //  the two scratch register we need to compute the mul result
3569
3570                     regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3571
3572                     // set gtUsedRegs toregMask and the registers needed by op1 and op2
3573
3574                     tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3575
3576 #else // !_TARGET_X86_ && !_TARGET_ARM_
3577 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3578 #endif
3579
3580                     goto RETURN_CHECK;
3581                 }
3582                 else
3583                 {
3584                     /* We use imulEAX for most unsigned multiply operations */
3585                     if (tree->gtOverflow())
3586                     {
3587                         if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3588                         {
3589                             goto USE_MULT_EAX;
3590                         }
3591                     }
3592                 }
3593
3594                 __fallthrough;
3595
3596             case GT_OR:
3597             case GT_XOR:
3598             case GT_AND:
3599
3600             case GT_SUB:
3601             case GT_ADD:
3602                 tree->gtUsedRegs = 0;
3603
3604                 if (predictReg <= PREDICT_REG)
3605                     predictReg = PREDICT_SCRATCH_REG;
3606
3607             GENERIC_BINARY:
3608
3609                 noway_assert(op2);
3610                 if (tree->gtFlags & GTF_REVERSE_OPS)
3611                 {
3612                     op1PredictReg = PREDICT_REG;
3613 #if !CPU_LOAD_STORE_ARCH
3614                     if (genTypeSize(op1->gtType) >= sizeof(int))
3615                         op1PredictReg = PREDICT_NONE;
3616 #endif
3617                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3618                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3619                 }
3620                 else
3621                 {
3622                     op2PredictReg = PREDICT_REG;
3623 #if !CPU_LOAD_STORE_ARCH
3624                     if (genTypeSize(op2->gtType) >= sizeof(int))
3625                         op2PredictReg = PREDICT_NONE;
3626 #endif
3627                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3628 #ifdef _TARGET_ARM_
3629                     // For most ALU operations we can generate a single instruction that encodes
3630                     // a small immediate integer constant value.  (except for multiply)
3631                     //
3632                     if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3633                     {
3634                         ssize_t ival = op2->gtIntCon.gtIconVal;
3635                         if (codeGen->validImmForAlu(ival))
3636                         {
3637                             op2PredictReg = PREDICT_NONE;
3638                         }
3639                         else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3640                                  ((oper == GT_ADD) || (oper == GT_SUB)))
3641                         {
3642                             op2PredictReg = PREDICT_NONE;
3643                         }
3644                     }
3645                     if (op2PredictReg == PREDICT_NONE)
3646                     {
3647                         op2->gtUsedRegs = RBM_NONE;
3648                     }
3649                     else
3650 #endif
3651                     {
3652                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3653                     }
3654                 }
3655                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3656
3657 #if CPU_HAS_BYTE_REGS
3658                 /* We have special register requirements for byte operations */
3659
3660                 if (varTypeIsByte(tree->TypeGet()))
3661                 {
3662                     /* For 8 bit arithmetic, one operands has to be in a
3663                        byte-addressable register, and the other has to be
3664                        in a byte-addrble reg or in memory. Assume its in a reg */
3665
3666                     regMaskTP regByteMask = 0;
3667                     regMaskTP op1ByteMask = op1->gtUsedRegs;
3668
3669                     if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3670                     {
3671                         // Pick a Byte register to use for op1
3672                         regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3673                         op1ByteMask = regByteMask;
3674                     }
3675
3676                     if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3677                     {
3678                         // Pick a Byte register to use for op2, avoiding the one used by op1
3679                         regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3680                     }
3681
3682                     if (regByteMask)
3683                     {
3684                         tree->gtUsedRegs |= regByteMask;
3685                         regMask = regByteMask;
3686                     }
3687                 }
3688 #endif
3689                 goto RETURN_CHECK;
3690
3691             case GT_DIV:
3692             case GT_MOD:
3693
3694             case GT_UDIV:
3695             case GT_UMOD:
3696
3697                 /* non-integer division handled in generic way */
3698                 if (!varTypeIsIntegral(type))
3699                 {
3700                     tree->gtUsedRegs = 0;
3701                     if (predictReg <= PREDICT_REG)
3702                         predictReg = PREDICT_SCRATCH_REG;
3703                     goto GENERIC_BINARY;
3704                 }
3705
3706 #ifndef _TARGET_64BIT_
3707
3708                 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3709                 {
3710                     /* Special case:  a mod with an int op2 is done inline using idiv or div
3711                        to avoid a costly call to the helper */
3712
3713                     noway_assert((op2->gtOper == GT_CNS_LNG) &&
3714                                  (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3715
3716 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3717                     if (tree->gtFlags & GTF_REVERSE_OPS)
3718                     {
3719                         tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3720                                                       rsvdRegs | op1->gtRsvdRegs);
3721                         tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3722                     }
3723                     else
3724                     {
3725                         tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3726                         tmpMask |=
3727                             rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3728                     }
3729                     regMask = RBM_PAIR_TMP;
3730 #else // !_TARGET_X86_ && !_TARGET_ARM_
3731 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3732 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3733
3734                     tree->gtUsedRegs =
3735                         (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3736                                        rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3737
3738                     goto RETURN_CHECK;
3739                 }
3740 #endif // _TARGET_64BIT_
3741
3742                 /* no divide immediate, so force integer constant which is not
3743                  * a power of two to register
3744                  */
3745
3746                 if (op2->OperKind() & GTK_CONST)
3747                 {
3748                     ssize_t ival = op2->gtIntConCommon.IconValue();
3749
3750                     /* Is the divisor a power of 2 ? */
3751
3752                     if (ival > 0 && genMaxOneBit(size_t(ival)))
3753                     {
3754                         goto GENERIC_UNARY;
3755                     }
3756                     else
3757                         op2PredictReg = PREDICT_SCRATCH_REG;
3758                 }
3759                 else
3760                 {
3761                     /* Non integer constant also must be enregistered */
3762                     op2PredictReg = PREDICT_REG;
3763                 }
3764
3765                 regMaskTP trashedMask;
3766                 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3767                 regMaskTP op1ExcludeMask;
3768                 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3769                 regMaskTP op2ExcludeMask;
3770                 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3771
3772 #ifdef _TARGET_XARCH_
3773                 /*  Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3774                  *  we can safely allow the "b" value to die. Unfortunately, if we simply
3775                  *  mark the node "b" as using EDX, this will not work if "b" is a register
3776                  *  variable that dies with this particular reference. Thus, if we want to
3777                  *  avoid this situation (where we would have to spill the variable from
3778                  *  EDX to someplace else), we need to explicitly mark the interference
3779                  *  of the variable at this point.
3780                  */
3781
3782                 if (op2->gtOper == GT_LCL_VAR)
3783                 {
3784                     unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3785                     varDsc          = lvaTable + lclNum;
3786                     if (varDsc->lvTracked)
3787                     {
3788 #ifdef DEBUG
3789                         if (verbose)
3790                         {
3791                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3792                                 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3793                                        varDsc->lvVarIndex);
3794                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3795                                 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3796                                        varDsc->lvVarIndex);
3797                         }
3798 #endif
3799                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3800                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3801                     }
3802                 }
3803
3804                 /* set the held register based on opcode */
3805                 if (oper == GT_DIV || oper == GT_UDIV)
3806                     regMask = RBM_EAX;
3807                 else
3808                     regMask    = RBM_EDX;
3809                 trashedMask    = (RBM_EAX | RBM_EDX);
3810                 op1ExcludeMask = 0;
3811                 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3812
3813 #endif // _TARGET_XARCH_
3814
3815 #ifdef _TARGET_ARM_
3816                 trashedMask    = RBM_NONE;
3817                 op1ExcludeMask = RBM_NONE;
3818                 op2ExcludeMask = RBM_NONE;
3819 #endif
3820
3821                 /* set the lvPref reg if possible */
3822                 GenTree* dest;
3823                 /*
3824                  *  Walking the gtNext link twice from here should get us back
3825                  *  to our parent node, if this is an simple assignment tree.
3826                  */
3827                 dest = tree->gtNext;
3828                 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3829                     dest->gtNext->gtOp.gtOp2 == tree)
3830                 {
3831                     varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3832                     varDsc->addPrefReg(regMask, this);
3833                 }
3834 #ifdef _TARGET_XARCH_
3835                 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3836 #else
3837                 op1PredictReg        = PREDICT_SCRATCH_REG;
3838 #endif
3839
3840                 /* are we supposed to evaluate op2 first? */
3841                 if (tree->gtFlags & GTF_REVERSE_OPS)
3842                 {
3843                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3844                                                   rsvdRegs | op1->gtRsvdRegs);
3845                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3846                 }
3847                 else
3848                 {
3849                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3850                                                   rsvdRegs | op2->gtRsvdRegs);
3851                     rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3852                 }
3853 #ifdef _TARGET_ARM_
3854                 regMask = tmpMask;
3855 #endif
3856                 /* grab EAX, EDX for this tree node */
3857                 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3858
3859                 goto RETURN_CHECK;
3860
3861             case GT_LSH:
3862             case GT_RSH:
3863             case GT_RSZ:
3864
3865                 if (predictReg <= PREDICT_REG)
3866                     predictReg = PREDICT_SCRATCH_REG;
3867
3868 #ifndef _TARGET_64BIT_
3869                 if (type == TYP_LONG)
3870                 {
3871                     if (op2->IsCnsIntOrI())
3872                     {
3873                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3874                         // no register used by op2
3875                         op2->gtUsedRegs  = 0;
3876                         tree->gtUsedRegs = op1->gtUsedRegs;
3877                     }
3878                     else
3879                     {
3880                         // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3881                         tmpMask = lockedRegs;
3882                         tmpMask &= ~RBM_LNGARG_0;
3883                         tmpMask &= ~RBM_SHIFT_LNG;
3884
3885                         // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3886                         if (tree->gtFlags & GTF_REVERSE_OPS)
3887                         {
3888                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3889                             tmpMask |= RBM_SHIFT_LNG;
3890                             // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3891                             // Fix 383843 X86/ARM ILGEN
3892                             rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3893                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3894                         }
3895                         else
3896                         {
3897                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3898                             tmpMask |= RBM_LNGARG_0;
3899                             // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3900                             // Fix 383839 ARM ILGEN
3901                             rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3902                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3903                         }
3904                         regMask = RBM_LNGRET; // function return registers
3905                         op1->gtUsedRegs |= RBM_LNGARG_0;
3906                         op2->gtUsedRegs |= RBM_SHIFT_LNG;
3907
3908                         tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3909
3910                         // We are using a helper function to do shift:
3911                         //
3912                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3913                     }
3914                 }
3915                 else
3916 #endif // _TARGET_64BIT_
3917                 {
3918 #ifdef _TARGET_XARCH_
3919                     if (!op2->IsCnsIntOrI())
3920                         predictReg = PREDICT_NOT_REG_ECX;
3921 #endif
3922
3923                 HANDLE_SHIFT_COUNT:
3924                     // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3925
3926                     regMaskTP tmpRsvdRegs;
3927
3928                     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3929                     {
3930                         regMask     = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3931                         rsvdRegs    = RBM_LASTUSE;
3932                         tmpRsvdRegs = RBM_NONE;
3933                     }
3934                     else
3935                     {
3936                         regMask = RBM_NONE;
3937                         // Special case op1 of a constant
3938                         if (op1->IsCnsIntOrI())
3939                             tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3940                                                        // System.Xml.Schema.BitSet:Get(int):bool
3941                         else
3942                             tmpRsvdRegs = op1->gtRsvdRegs;
3943                     }
3944
3945                     op2Mask = RBM_NONE;
3946                     if (!op2->IsCnsIntOrI())
3947                     {
3948                         if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3949                         {
3950                             op2PredictReg = PREDICT_REG_SHIFT;
3951                         }
3952                         else
3953                         {
3954                             op2PredictReg = PREDICT_REG;
3955                         }
3956
3957                         /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3958                         op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3959
3960                         // If our target arch has a REG_SHIFT register then
3961                         //     we set the PrefReg when we have a LclVar for op2
3962                         //     we add an interference with REG_SHIFT for any other LclVars alive at op2
3963                         if (REG_SHIFT != REG_NA)
3964                         {
3965                             VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3966
3967                             while (op2->gtOper == GT_COMMA)
3968                             {
3969                                 op2 = op2->gtOp.gtOp2;
3970                             }
3971
3972                             if (op2->gtOper == GT_LCL_VAR)
3973                             {
3974                                 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3975                                 varDsc->setPrefReg(REG_SHIFT, this);
3976                                 if (varDsc->lvTracked)
3977                                 {
3978                                     VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3979                                 }
3980                             }
3981
3982                             // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3983                             // excluding the LclVar that was used for the shift amount as it is read-only
3984                             // and can be kept alive through the shift operation
3985                             //
3986                             rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3987                             // In case op2Mask doesn't contain the required shift register,
3988                             // we will or it in now.
3989                             op2Mask |= RBM_SHIFT;
3990                         }
3991                     }
3992
3993                     if (tree->gtFlags & GTF_REVERSE_OPS)
3994                     {
3995                         assert(regMask == RBM_NONE);
3996                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3997                     }
3998
3999 #if CPU_HAS_BYTE_REGS
4000                     if (varTypeIsByte(type))
4001                     {
4002                         // Fix 383789 X86 ILGEN
4003                         // Fix 383813 X86 ILGEN
4004                         // Fix 383828 X86 ILGEN
4005                         if (op1->gtOper == GT_LCL_VAR)
4006                         {
4007                             varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4008                             if (varDsc->lvTracked)
4009                             {
4010                                 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4011
4012                                 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4013                                 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4014                             }
4015                         }
4016                         if ((regMask & RBM_BYTE_REGS) == 0)
4017                         {
4018                             // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4019                             // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4020                             //
4021                             regMask |=
4022                                 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4023                         }
4024                     }
4025 #endif
4026                     tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4027                 }
4028
4029                 goto RETURN_CHECK;
4030
4031             case GT_COMMA:
4032                 if (tree->gtFlags & GTF_REVERSE_OPS)
4033                 {
4034                     if (predictReg == PREDICT_NONE)
4035                     {
4036                         predictReg = PREDICT_REG;
4037                     }
4038                     else if (rpHasVarIndexForPredict(predictReg))
4039                     {
4040                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4041                         predictReg = PREDICT_SCRATCH_REG;
4042                     }
4043
4044                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4045                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4046                 }
4047                 else
4048                 {
4049                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4050
4051                     /* CodeGen will enregister the op2 side of a GT_COMMA */
4052                     if (predictReg == PREDICT_NONE)
4053                     {
4054                         predictReg = PREDICT_REG;
4055                     }
4056                     else if (rpHasVarIndexForPredict(predictReg))
4057                     {
4058                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4059                         predictReg = PREDICT_SCRATCH_REG;
4060                     }
4061
4062                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4063                 }
4064                 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4065                 //
4066                 tree->gtUsedRegs = op2->gtUsedRegs;
4067                 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4068                 {
4069                     LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4070
4071                     if (op2VarDsc->lvTracked)
4072                     {
4073                         VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4074                         rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4075                     }
4076                 }
4077                 goto RETURN_CHECK;
4078
4079             case GT_QMARK:
4080             {
4081                 noway_assert(op1 != NULL && op2 != NULL);
4082
4083                 /*
4084                  *  If the gtUsedRegs conflicts with lockedRegs
4085                  *  then we going to have to spill some registers
4086                  *  into the non-trashed register set to keep it alive
4087                  */
4088                 unsigned spillCnt;
4089                 spillCnt = 0;
4090                 regMaskTP spillRegs;
4091                 spillRegs = lockedRegs & tree->gtUsedRegs;
4092
4093                 while (spillRegs)
4094                 {
4095                     /* Find the next register that needs to be spilled */
4096                     tmpMask = genFindLowestBit(spillRegs);
4097
4098 #ifdef DEBUG
4099                     if (verbose)
4100                     {
4101                         printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4102                         gtDispTree(tree, 0, NULL, true);
4103                     }
4104 #endif
4105                     /* In Codegen it will typically introduce a spill temp here */
4106                     /* rather than relocating the register to a non trashed reg */
4107                     rpPredictSpillCnt++;
4108                     spillCnt++;
4109
4110                     /* Remove it from the spillRegs and lockedRegs*/
4111                     spillRegs &= ~tmpMask;
4112                     lockedRegs &= ~tmpMask;
4113                 }
4114                 {
4115                     VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4116
4117                     /* Evaluate the <cond> subtree */
4118                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4119                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4120                     tree->gtUsedRegs = op1->gtUsedRegs;
4121
4122                     noway_assert(op2->gtOper == GT_COLON);
4123                     if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4124                     {
4125                         // Don't try to target the register specified in predictReg when we have complex subtrees
4126                         //
4127                         predictReg = PREDICT_SCRATCH_REG;
4128                     }
4129                     GenTree* elseTree = op2->AsColon()->ElseNode();
4130                     GenTree* thenTree = op2->AsColon()->ThenNode();
4131
4132                     noway_assert(thenTree != NULL && elseTree != NULL);
4133
4134                     // Update compCurLife to only those vars live on the <then> subtree
4135
4136                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4137
4138                     if (type == TYP_VOID)
4139                     {
4140                         /* Evaluate the <then> subtree */
4141                         rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4142                         regMask    = RBM_NONE;
4143                         predictReg = PREDICT_NONE;
4144                     }
4145                     else
4146                     {
4147                         // A mask to use to force the predictor to choose low registers (to reduce code size)
4148                         regMaskTP avoidRegs = RBM_NONE;
4149 #ifdef _TARGET_ARM_
4150                         avoidRegs = (RBM_R12 | RBM_LR);
4151 #endif
4152                         if (predictReg <= PREDICT_REG)
4153                             predictReg = PREDICT_SCRATCH_REG;
4154
4155                         /* Evaluate the <then> subtree */
4156                         regMask =
4157                             rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4158
4159                         if (regMask)
4160                         {
4161                             rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4162                             if (op1PredictReg != PREDICT_NONE)
4163                                 predictReg = op1PredictReg;
4164                         }
4165                     }
4166
4167                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4168
4169                     /* Evaluate the <else> subtree */
4170                     // First record the post-then liveness, and reset the current liveness to the else
4171                     // branch liveness.
4172                     CLANG_FORMAT_COMMENT_ANCHOR;
4173
4174 #ifdef DEBUG
4175                     VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4176 #endif
4177
4178                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4179
4180                     rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4181                     tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4182
4183                     // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4184                     // They each have only one successor, which they share.  Their live-out sets must equal the
4185                     // live-in set of this virtual successor block, and thus must be the same.  We can assert
4186                     // that equality here.
4187                     assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4188
4189                     if (spillCnt > 0)
4190                     {
4191                         regMaskTP reloadMask = RBM_NONE;
4192
4193                         while (spillCnt)
4194                         {
4195                             regMaskTP reloadReg;
4196
4197                             /* Get an extra register to hold it */
4198                             reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4199 #ifdef DEBUG
4200                             if (verbose)
4201                             {
4202                                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4203                                 gtDispTree(tree, 0, NULL, true);
4204                             }
4205 #endif
4206                             reloadMask |= reloadReg;
4207
4208                             spillCnt--;
4209                         }
4210
4211                         /* update the gtUsedRegs mask */
4212                         tree->gtUsedRegs |= reloadMask;
4213                     }
4214                 }
4215
4216                 goto RETURN_CHECK;
4217             }
4218             case GT_RETURN:
4219                 tree->gtUsedRegs = RBM_NONE;
4220                 regMask          = RBM_NONE;
4221
4222                 /* Is there a return value? */
4223                 if (op1 != NULL)
4224                 {
4225 #if FEATURE_FP_REGALLOC
4226                     if (varTypeIsFloating(type))
4227                     {
4228                         predictReg = PREDICT_FLTRET;
4229                         if (type == TYP_FLOAT)
4230                             regMask = RBM_FLOATRET;
4231                         else
4232                             regMask = RBM_DOUBLERET;
4233                     }
4234                     else
4235 #endif
4236                         if (isRegPairType(type))
4237                     {
4238                         predictReg = PREDICT_LNGRET;
4239                         regMask    = RBM_LNGRET;
4240                     }
4241                     else
4242                     {
4243                         predictReg = PREDICT_INTRET;
4244                         regMask    = RBM_INTRET;
4245                     }
4246                     if (info.compCallUnmanaged)
4247                     {
4248                         lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4249                     }
4250                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4251                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4252                 }
4253
4254 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4255                 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4256                 // We could optimize on registers based on int/long or no return value.  But to
4257                 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4258                 if (compIsProfilerHookNeeded())
4259                 {
4260                     tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4261                 }
4262
4263 #endif
4264                 goto RETURN_CHECK;
4265
4266             case GT_RETFILT:
4267                 if (op1 != NULL)
4268                 {
4269                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4270                     regMask          = genReturnRegForTree(tree);
4271                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4272                     goto RETURN_CHECK;
4273                 }
4274                 tree->gtUsedRegs = 0;
4275                 regMask          = 0;
4276
4277                 goto RETURN_CHECK;
4278
4279             case GT_JTRUE:
4280                 /* This must be a test of a relational operator */
4281
4282                 noway_assert(op1->OperIsCompare());
4283
4284                 /* Only condition code set by this operation */
4285
4286                 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4287
4288                 tree->gtUsedRegs = op1->gtUsedRegs;
4289                 regMask          = 0;
4290
4291                 goto RETURN_CHECK;
4292
4293             case GT_SWITCH:
4294                 noway_assert(type <= TYP_INT);
4295                 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4296 #ifdef _TARGET_ARM_
4297                 {
4298                     regMask          = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4299                     unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4300                     if (jumpCnt > 2)
4301                     {
4302                         // Table based switch requires an extra register for the table base
4303                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4304                     }
4305                     tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4306                 }
4307 #else  // !_TARGET_ARM_
4308                 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4309                 tree->gtUsedRegs = op1->gtUsedRegs;
4310 #endif // _TARGET_ARM_
4311                 regMask = 0;
4312                 goto RETURN_CHECK;
4313
4314             case GT_CKFINITE:
4315                 if (predictReg <= PREDICT_REG)
4316                     predictReg = PREDICT_SCRATCH_REG;
4317
4318                 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4319                 // Need a reg to load exponent into
4320                 regMask          = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4321                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4322                 goto RETURN_CHECK;
4323
4324             case GT_LCLHEAP:
4325                 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4326                 op2Mask = 0;
4327
4328 #ifdef _TARGET_ARM_
4329                 if (info.compInitMem)
4330                 {
4331                     // We zero out two registers in the ARM codegen path
4332                     op2Mask |=
4333                         rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4334                 }
4335 #endif
4336
4337                 op1->gtUsedRegs |= (regMaskSmall)regMask;
4338                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4339
4340                 // The result will be put in the reg we picked for the size
4341                 // regMask = <already set as we want it to be>
4342
4343                 goto RETURN_CHECK;
4344
4345             case GT_OBJ:
4346             {
4347 #ifdef _TARGET_ARM_
4348                 if (predictReg <= PREDICT_REG)
4349                     predictReg = PREDICT_SCRATCH_REG;
4350
4351                 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4352                                                           // registers (to reduce code size)
4353                 regMask = RBM_NONE;
4354                 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4355 #endif
4356
4357                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4358                 {
4359                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4360                 }
4361
4362 #ifdef _TARGET_ARM_
4363                 unsigned  objSize   = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4364                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4365                 // If it has one bit set, and that's an arg reg...
4366                 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4367                 {
4368                     // We are passing the 'obj' in the argument registers
4369                     //
4370                     regNumber rn = genRegNumFromMask(preferReg);
4371
4372                     //  Add the registers used to pass the 'obj' to regMask.
4373                     for (unsigned i = 0; i < objSize / 4; i++)
4374                     {
4375                         if (rn == MAX_REG_ARG)
4376                             break;
4377                         // Otherwise...
4378                         regMask |= genRegMask(rn);
4379                         rn = genRegArgNext(rn);
4380                     }
4381                 }
4382                 else
4383                 {
4384                     // We are passing the 'obj' in the outgoing arg space
4385                     // We will need one register to load into unless the 'obj' size is 4 or less.
4386                     //
4387                     if (objSize > 4)
4388                     {
4389                         regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4390                     }
4391                 }
4392                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4393                 goto RETURN_CHECK;
4394 #else  // !_TARGET_ARM_
4395                 goto GENERIC_UNARY;
4396 #endif // _TARGET_ARM_
4397             }
4398
4399             case GT_MKREFANY:
4400             {
4401 #ifdef _TARGET_ARM_
4402                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4403                 regMask             = RBM_NONE;
4404                 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4405                 {
4406                     // A MKREFANY takes up two registers.
4407                     regNumber rn = genRegNumFromMask(preferReg);
4408                     regMask      = RBM_NONE;
4409                     if (rn < MAX_REG_ARG)
4410                     {
4411                         regMask |= genRegMask(rn);
4412                         rn = genRegArgNext(rn);
4413                         if (rn < MAX_REG_ARG)
4414                             regMask |= genRegMask(rn);
4415                     }
4416                 }
4417                 if (regMask != RBM_NONE)
4418                 {
4419                     // Condensation of GENERIC_BINARY path.
4420                     assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4421                     op2PredictReg        = PREDICT_REG;
4422                     regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4423                     rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4424                     regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4425                     tree->gtUsedRegs = (regMaskSmall)regMask;
4426                     goto RETURN_CHECK;
4427                 }
4428                 tree->gtUsedRegs = op1->gtUsedRegs;
4429 #endif // _TARGET_ARM_
4430                 goto GENERIC_BINARY;
4431             }
4432
4433             case GT_BOX:
4434                 goto GENERIC_UNARY;
4435
4436             case GT_LOCKADD:
4437                 goto GENERIC_BINARY;
4438
4439             case GT_XADD:
4440             case GT_XCHG:
4441                 // Ensure we can write to op2.  op2 will hold the output.
4442                 if (predictReg < PREDICT_SCRATCH_REG)
4443                     predictReg = PREDICT_SCRATCH_REG;
4444
4445                 if (tree->gtFlags & GTF_REVERSE_OPS)
4446                 {
4447                     op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4448                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4449                 }
4450                 else
4451                 {
4452                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4453                     op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4454                 }
4455                 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4456                 goto RETURN_CHECK;
4457
4458             case GT_ARR_LENGTH:
4459                 goto GENERIC_UNARY;
4460
4461             case GT_INIT_VAL:
4462                 // This unary operator simply passes through the value from its child (much like GT_NOP)
4463                 // and thus won't need a scratch register.
4464                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4465                 tree->gtUsedRegs = op1->gtUsedRegs;
4466                 goto RETURN_CHECK;
4467
4468             default:
4469 #ifdef DEBUG
4470                 gtDispTree(tree);
4471 #endif
4472                 noway_assert(!"unexpected simple operator in reg use prediction");
4473                 break;
4474         }
4475     }
4476
4477     /* See what kind of a special operator we have here */
4478
4479     switch (oper)
4480     {
4481         GenTree*        args;
4482         GenTreeArgList* list;
4483         regMaskTP       keepMask;
4484         unsigned        regArgsNum;
4485         int             regIndex;
4486         regMaskTP       regArgMask;
4487         regMaskTP       curArgMask;
4488
4489         case GT_CALL:
4490
4491         {
4492
4493             /* initialize so we can just or in various bits */
4494             tree->gtUsedRegs = RBM_NONE;
4495
4496 #if GTF_CALL_REG_SAVE
4497             /*
4498              *  Unless the GTF_CALL_REG_SAVE flag is set,
4499              *  we can't preserve the RBM_CALLEE_TRASH registers.
4500              *  (likewise we can't preserve the return registers)
4501              *  So we remove them from the lockedRegs set and
4502              *  record any of them in the keepMask
4503              */
4504
4505             if (tree->gtFlags & GTF_CALL_REG_SAVE)
4506             {
4507                 regMaskTP trashMask = genReturnRegForTree(tree);
4508
4509                 keepMask = lockedRegs & trashMask;
4510                 lockedRegs &= ~trashMask;
4511             }
4512             else
4513 #endif
4514             {
4515                 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4516                 lockedRegs &= ~RBM_CALLEE_TRASH;
4517             }
4518
4519             regArgsNum = 0;
4520             regIndex   = 0;
4521
4522             /* Is there an object pointer? */
4523             if (tree->gtCall.gtCallObjp)
4524             {
4525                 /* Evaluate the instance pointer first */
4526
4527                 args = tree->gtCall.gtCallObjp;
4528
4529                 /* the objPtr always goes to an integer register (through temp or directly) */
4530                 noway_assert(regArgsNum == 0);
4531                 regArgsNum++;
4532
4533                 /* Must be passed in a register */
4534
4535                 noway_assert(args->gtFlags & GTF_LATE_ARG);
4536
4537                 /* Must be either a deferred reg arg node or a GT_ASG node */
4538
4539                 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4540                              args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4541
4542                 if (!args->IsArgPlaceHolderNode())
4543                 {
4544                     rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4545                 }
4546             }
4547             VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4548             VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4549
4550             /* process argument list */
4551             for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4552             {
4553                 args = list->Current();
4554
4555                 if (args->gtFlags & GTF_LATE_ARG)
4556                 {
4557                     /* Must be either a Placeholder/NOP node or a GT_ASG node */
4558
4559                     noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4560                                  args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4561
4562                     if (!args->IsArgPlaceHolderNode())
4563                     {
4564                         rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4565                     }
4566
4567                     regArgsNum++;
4568                 }
4569                 else
4570                 {
4571 #ifdef FEATURE_FIXED_OUT_ARGS
4572                     // We'll store this argument into the outgoing argument area
4573                     // It needs to be in a register to be stored.
4574                     //
4575                     predictReg = PREDICT_REG;
4576
4577 #else // !FEATURE_FIXED_OUT_ARGS
4578                     // We'll generate a push for this argument
4579                     //
4580                     predictReg = PREDICT_NONE;
4581                     if (varTypeIsSmall(args->TypeGet()))
4582                     {
4583                         /* We may need to sign or zero extend a small type using a register */
4584                         predictReg = PREDICT_SCRATCH_REG;
4585                     }
4586 #endif
4587
4588                     rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4589                 }
4590                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4591                 tree->gtUsedRegs |= args->gtUsedRegs;
4592             }
4593
4594             /* Is there a late argument list */
4595
4596             regIndex   = 0;
4597             regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4598             args       = NULL;
4599
4600             /* process the late argument list */
4601             for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4602             {
4603                 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4604                 LclVarDsc* promotedStructLocal = NULL;
4605
4606                 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4607                 tmpMask    = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4608
4609                 assert(list->OperIsList());
4610
4611                 args = list->Current();
4612                 list = list->Rest();
4613
4614                 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4615
4616                 fgArgTabEntry* curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4617                 assert(curArgTabEntry);
4618
4619                 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4620                 unsigned  numSlots =
4621                     curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4622
4623                 rpPredictReg argPredictReg;
4624                 regMaskTP    avoidReg = RBM_NONE;
4625
4626                 if (regNum != REG_STK)
4627                 {
4628                     argPredictReg = rpGetPredictForReg(regNum);
4629                     curArgMask |= genRegMask(regNum);
4630                 }
4631                 else
4632                 {
4633                     assert(numSlots > 0);
4634                     argPredictReg = PREDICT_NONE;
4635 #ifdef _TARGET_ARM_
4636                     // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4637                     avoidReg = (RBM_R12 | RBM_LR);
4638 #endif
4639                 }
4640
4641 #ifdef _TARGET_ARM_
4642                 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4643                 //
4644                 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4645                 {
4646                     // 64-bit longs and doubles require 2 consecutive argument registers
4647                     curArgMask |= genRegMask(REG_NEXT(regNum));
4648                 }
4649                 else if (args->TypeGet() == TYP_STRUCT)
4650                 {
4651                     GenTree* argx       = args;
4652                     GenTree* lclVarTree = NULL;
4653
4654                     /* The GT_OBJ may be be a child of a GT_COMMA */
4655                     while (argx->gtOper == GT_COMMA)
4656                     {
4657                         argx = argx->gtOp.gtOp2;
4658                     }
4659                     unsigned originalSize = 0;
4660
4661                     if (argx->gtOper == GT_OBJ)
4662                     {
4663                         originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4664
4665                         // Is it the address of a promoted struct local?
4666                         if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4667                         {
4668                             lclVarTree        = argx->gtObj.gtOp1->gtOp.gtOp1;
4669                             LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4670                             if (varDsc->lvPromoted)
4671                                 promotedStructLocal = varDsc;
4672                         }
4673                     }
4674                     else if (argx->gtOper == GT_LCL_VAR)
4675                     {
4676                         varDsc       = lvaTable + argx->gtLclVarCommon.gtLclNum;
4677                         originalSize = varDsc->lvSize();
4678
4679                         // Is it a promoted struct local?
4680                         if (varDsc->lvPromoted)
4681                             promotedStructLocal = varDsc;
4682                     }
4683                     else if (argx->gtOper == GT_MKREFANY)
4684                     {
4685                         originalSize = 2 * TARGET_POINTER_SIZE;
4686                     }
4687                     else
4688                     {
4689                         noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4690                     }
4691
4692                     // We only pass arguments differently if it a struct local "independently" promoted, which
4693                     // allows the field locals can be independently enregistered.
4694                     if (promotedStructLocal != NULL)
4695                     {
4696                         if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4697                             promotedStructLocal = NULL;
4698                     }
4699
4700                     unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4701
4702                     // Are we passing a TYP_STRUCT in multiple integer registers?
4703                     // if so set up curArgMask to reflect this
4704                     // Also slots is updated to reflect the number of outgoing arg slots that we will write
4705                     if (regNum != REG_STK)
4706                     {
4707                         regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4708                         assert(genIsValidReg(regNum));
4709                         regNumber nextReg = REG_NEXT(regNum);
4710                         slots--;
4711                         while (slots > 0 && nextReg <= regLast)
4712                         {
4713                             curArgMask |= genRegMask(nextReg);
4714                             nextReg = REG_NEXT(nextReg);
4715                             slots--;
4716                         }
4717                     }
4718
4719                     if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4720                     {
4721                         // All or a portion of this struct will be placed in the argument registers indicated by
4722                         // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4723                         // that the second arg to be evaluated interferes with the reg for the first, the third with
4724                         // the regs for the first and second, etc. But since we always place the stack slots before
4725                         // placing the register slots we do not add inteferences for any part of the struct that gets
4726                         // passed on the stack.
4727
4728                         argPredictReg =
4729                             PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4730                         regMaskTP prevArgMask = RBM_NONE;
4731                         for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4732                         {
4733                             LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4734                             if (fieldVarDsc->lvTracked)
4735                             {
4736                                 assert(lclVarTree != NULL);
4737                                 if (prevArgMask != RBM_NONE)
4738                                 {
4739                                     rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4740                                                                      DEBUGARG("fieldVar/argReg"));
4741                                 }
4742                             }
4743                             // Now see many registers this uses up.
4744                             unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4745                             unsigned nextAfterLastRegOffset =
4746                                 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4747                                 TARGET_POINTER_SIZE;
4748                             unsigned nextAfterLastArgRegOffset =
4749                                 min(nextAfterLastRegOffset,
4750                                     genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4751
4752                             for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4753                                  regOffset++)
4754                             {
4755                                 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4756                             }
4757
4758                             if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4759                             {
4760                                 break;
4761                             }
4762
4763                             if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4764                             {
4765                                 // Add the argument register used here as a preferred register for this fieldVarDsc
4766                                 //
4767                                 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4768                                 fieldVarDsc->setPrefReg(firstRegUsed, this);
4769                             }
4770                         }
4771                         compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4772                     }
4773
4774                     // If slots is greater than zero then part or all of this TYP_STRUCT
4775                     // argument is passed in the outgoing argument area. (except HFA arg)
4776                     //
4777                     if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4778                     {
4779                         // We will need a register to address the TYP_STRUCT
4780                         // Note that we can use an argument register in curArgMask as in
4781                         // codegen we pass the stack portion of the argument before we
4782                         // setup the register part.
4783                         //
4784
4785                         // Force the predictor to choose a LOW_REG here to reduce code bloat
4786                         avoidReg = (RBM_R12 | RBM_LR);
4787
4788                         assert(tmpMask == RBM_NONE);
4789                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4790
4791                         // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4792                         // arg area
4793                         if (slots > 1)
4794                         {
4795                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4796                                                         lockedRegs | regArgMask | tmpMask | avoidReg);
4797                         }
4798                     }
4799                 } // (args->TypeGet() == TYP_STRUCT)
4800 #endif            // _TARGET_ARM_
4801
4802                 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4803                 // as we have already calculated the correct tmpMask and curArgMask values and
4804                 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4805                 //
4806                 if (promotedStructLocal == NULL)
4807                 {
4808                     /* Target the appropriate argument register */
4809                     tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4810                 }
4811
4812                 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4813                 // for the duration of the OBJ.
4814                 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4815                 {
4816                     GenTree* lclVarTree = fgIsIndirOfAddrOfLocal(args);
4817                     assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4818                     compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4819                 }
4820
4821                 regArgMask |= curArgMask;
4822                 args->gtUsedRegs |= (tmpMask | regArgMask);
4823                 tree->gtUsedRegs |= args->gtUsedRegs;
4824                 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4825
4826                 if (args->gtUsedRegs != RBM_NONE)
4827                 {
4828                     // Add register interference with the set of registers used or in use when we evaluated
4829                     // the current arg, with whatever is alive after the current arg
4830                     //
4831                     rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4832                 }
4833                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4834             }
4835             assert(list == NULL);
4836
4837 #ifdef LEGACY_BACKEND
4838 #if CPU_LOAD_STORE_ARCH
4839 #ifdef FEATURE_READYTORUN_COMPILER
4840             if (tree->gtCall.IsR2RRelativeIndir())
4841             {
4842                 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4843             }
4844 #endif // FEATURE_READYTORUN_COMPILER
4845 #endif // CPU_LOAD_STORE_ARCH
4846 #endif // LEGACY_BACKEND
4847
4848             regMaskTP callAddrMask;
4849             callAddrMask = RBM_NONE;
4850 #if CPU_LOAD_STORE_ARCH
4851             predictReg = PREDICT_SCRATCH_REG;
4852 #else
4853             predictReg       = PREDICT_NONE;
4854 #endif
4855
4856             switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4857             {
4858                 case GTF_CALL_VIRT_STUB:
4859
4860                     // We only want to record an interference between the virtual stub
4861                     // param reg and anything that's live AFTER the call, but we've not
4862                     // yet processed the indirect target.  So add virtualStubParamInfo.regMask
4863                     // to interferingRegs.
4864                     interferingRegs |= virtualStubParamInfo->GetRegMask();
4865 #ifdef DEBUG
4866                     if (verbose)
4867                         printf("Adding interference with Virtual Stub Param\n");
4868 #endif
4869                     codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4870
4871                     if (tree->gtCall.gtCallType == CT_INDIRECT)
4872                     {
4873                         predictReg = virtualStubParamInfo->GetPredict();
4874                     }
4875                     break;
4876
4877                 case GTF_CALL_VIRT_VTABLE:
4878                     predictReg = PREDICT_SCRATCH_REG;
4879                     break;
4880
4881                 case GTF_CALL_NONVIRT:
4882                     predictReg = PREDICT_SCRATCH_REG;
4883                     break;
4884             }
4885
4886             if (tree->gtCall.gtCallType == CT_INDIRECT)
4887             {
4888 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4889                 if (tree->gtCall.gtCallCookie)
4890                 {
4891                     codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4892
4893                     callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4894                                                         lockedRegs | regArgMask, RBM_LASTUSE);
4895
4896                     // Just in case we predict some other registers, force interference with our two special
4897                     // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4898                     callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4899
4900                     predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4901                 }
4902 #endif
4903                 callAddrMask |=
4904                     rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4905             }
4906             else if (predictReg != PREDICT_NONE)
4907             {
4908                 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4909             }
4910
4911             if (tree->gtFlags & GTF_CALL_UNMANAGED)
4912             {
4913                 // Need a register for tcbReg
4914                 callAddrMask |=
4915                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4916 #if CPU_LOAD_STORE_ARCH
4917                 // Need an extra register for tmpReg
4918                 callAddrMask |=
4919                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4920 #endif
4921             }
4922
4923             tree->gtUsedRegs |= callAddrMask;
4924
4925             /* After the call restore the orginal value of lockedRegs */
4926             lockedRegs |= keepMask;
4927
4928             /* set the return register */
4929             regMask = genReturnRegForTree(tree);
4930
4931             if (regMask & rsvdRegs)
4932             {
4933                 // We will need to relocate the return register value
4934                 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4935 #if FEATURE_FP_REGALLOC
4936                 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4937 #endif
4938                 regMask = RBM_NONE;
4939
4940                 if (intRegMask)
4941                 {
4942                     if (intRegMask == RBM_INTRET)
4943                     {
4944                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4945                     }
4946                     else if (intRegMask == RBM_LNGRET)
4947                     {
4948                         regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4949                     }
4950                     else
4951                     {
4952                         noway_assert(!"unexpected return regMask");
4953                     }
4954                 }
4955
4956 #if FEATURE_FP_REGALLOC
4957                 if (floatRegMask)
4958                 {
4959                     if (floatRegMask == RBM_FLOATRET)
4960                     {
4961                         regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4962                     }
4963                     else if (floatRegMask == RBM_DOUBLERET)
4964                     {
4965                         regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4966                     }
4967                     else // HFA return case
4968                     {
4969                         for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4970                         {
4971                             regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4972                         }
4973                     }
4974                 }
4975 #endif
4976             }
4977
4978             /* the return registers (if any) are killed */
4979             tree->gtUsedRegs |= regMask;
4980
4981 #if GTF_CALL_REG_SAVE
4982             if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4983 #endif
4984             {
4985                 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4986                 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4987             }
4988         }
4989
4990 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4991             // Mark required registers for emitting tailcall profiler callback as used
4992             if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4993             {
4994                 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
4995             }
4996 #endif
4997             break;
4998
4999         case GT_ARR_ELEM:
5000
5001             // Figure out which registers can't be touched
5002             unsigned dim;
5003             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5004                 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5005
5006             regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5007
5008             regMaskTP dimsMask;
5009             dimsMask = 0;
5010
5011 #if CPU_LOAD_STORE_ARCH
5012             // We need a register to load the bounds of the MD array
5013             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5014 #endif
5015
5016             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5017             {
5018                 /* We need scratch registers to compute index-lower_bound.
5019                    Also, gtArrInds[0]'s register will be used as the second
5020                    addressability register (besides gtArrObj's) */
5021
5022                 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5023                                                         lockedRegs | regMask | dimsMask, rsvdRegs);
5024                 if (dim == 0)
5025                     regMask |= dimMask;
5026
5027                 dimsMask |= dimMask;
5028             }
5029 #ifdef _TARGET_XARCH_
5030             // INS_imul doesnt have an immediate constant.
5031             if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5032                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5033 #endif
5034             tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5035             break;
5036
5037         case GT_CMPXCHG:
5038         {
5039 #ifdef _TARGET_XARCH_
5040             rsvdRegs |= RBM_EAX;
5041 #endif
5042             if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5043             {
5044                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5045             }
5046             else
5047             {
5048                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5049             }
5050             op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5051
5052 #ifdef _TARGET_XARCH_
5053             rsvdRegs &= ~RBM_EAX;
5054             tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5055                                           rsvdRegs | regMask | op2Mask);
5056             tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5057             predictReg       = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5058 #else
5059             tmpMask          = 0;
5060             tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5061 #endif
5062         }
5063         break;
5064
5065         case GT_ARR_BOUNDS_CHECK:
5066         {
5067             regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5068             regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5069             rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5070
5071             tree->gtUsedRegs =
5072                 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5073         }
5074         break;
5075
5076         default:
5077             NO_WAY("unexpected special operator in reg use prediction");
5078             break;
5079     }
5080
5081 RETURN_CHECK:
5082
5083 #ifdef DEBUG
5084     /* make sure we set them to something reasonable */
5085     if (tree->gtUsedRegs & RBM_ILLEGAL)
5086         noway_assert(!"used regs not set properly in reg use prediction");
5087
5088     if (regMask & RBM_ILLEGAL)
5089         noway_assert(!"return value not set propery in reg use prediction");
5090
5091 #endif
5092
5093     /*
5094      *  If the gtUsedRegs conflicts with lockedRegs
5095      *  then we going to have to spill some registers
5096      *  into the non-trashed register set to keep it alive
5097      */
5098     regMaskTP spillMask;
5099     spillMask = tree->gtUsedRegs & lockedRegs;
5100
5101     if (spillMask)
5102     {
5103         while (spillMask)
5104         {
5105             /* Find the next register that needs to be spilled */
5106             tmpMask = genFindLowestBit(spillMask);
5107
5108 #ifdef DEBUG
5109             if (verbose)
5110             {
5111                 printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5112                 gtDispTree(tree, 0, NULL, true);
5113                 if ((tmpMask & regMask) == 0)
5114                 {
5115                     printf("Predict reload of   %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5116                     gtDispTree(tree, 0, NULL, true);
5117                 }
5118             }
5119 #endif
5120             /* In Codegen it will typically introduce a spill temp here */
5121             /* rather than relocating the register to a non trashed reg */
5122             rpPredictSpillCnt++;
5123
5124             /* Remove it from the spillMask */
5125             spillMask &= ~tmpMask;
5126         }
5127     }
5128
5129     /*
5130      *  If the return registers in regMask conflicts with the lockedRegs
5131      *  then we allocate extra registers for the reload of the conflicting
5132      *  registers.
5133      *
5134      *  Set spillMask to the set of locked registers that have to be reloaded here.
5135      *  reloadMask is set to the extra registers that are used to reload
5136      *  the spilled lockedRegs.
5137      */
5138
5139     noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5140     spillMask = lockedRegs & regMask;
5141
5142     if (spillMask)
5143     {
5144         /* Remove the spillMask from regMask */
5145         regMask &= ~spillMask;
5146
5147         regMaskTP reloadMask = RBM_NONE;
5148         while (spillMask)
5149         {
5150             /* Get an extra register to hold it */
5151             regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5152 #ifdef DEBUG
5153             if (verbose)
5154             {
5155                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5156                 gtDispTree(tree, 0, NULL, true);
5157             }
5158 #endif
5159             reloadMask |= reloadReg;
5160
5161             /* Remove it from the spillMask */
5162             spillMask &= ~genFindLowestBit(spillMask);
5163         }
5164
5165         /* Update regMask to use the reloadMask */
5166         regMask |= reloadMask;
5167
5168         /* update the gtUsedRegs mask */
5169         tree->gtUsedRegs |= (regMaskSmall)regMask;
5170     }
5171
5172     regMaskTP regUse = tree->gtUsedRegs;
5173     regUse |= interferingRegs;
5174
5175     if (!VarSetOps::IsEmpty(this, compCurLife))
5176     {
5177         // Add interference between the current set of live variables and
5178         //  the set of temporary registers need to evaluate the sub tree
5179         if (regUse)
5180         {
5181             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5182         }
5183     }
5184
5185     if (rpAsgVarNum != -1)
5186     {
5187         // Add interference between the registers used (if any)
5188         // and the assignment target variable
5189         if (regUse)
5190         {
5191             rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5192         }
5193
5194         // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5195         // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5196         // to the set of currently live variables. This new interference will prevent us
5197         // from using the register value used here for enregistering different live variable
5198         //
5199         if (!VarSetOps::IsEmpty(this, compCurLife))
5200         {
5201             rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5202         }
5203     }
5204
5205     /* Do we need to resore the oldLastUseVars value */
5206     if (restoreLastUseVars)
5207     {
5208         /*  If we used a GT_ASG targeted register then we need to add
5209          *  a variable interference between any new last use variables
5210          *  and the GT_ASG targeted register
5211          */
5212         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5213         {
5214             rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5215                                              DEBUGARG("asgn tgt last use conflict"));
5216         }
5217         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5218     }
5219
5220     return regMask;
5221 }
5222 #ifdef _PREFAST_
5223 #pragma warning(pop)
5224 #endif
5225
5226 #endif // LEGACY_BACKEND
5227
5228 /****************************************************************************/
5229 /* Returns true when we must create an EBP frame
5230    This is used to force most managed methods to have EBP based frames
5231    which allows the ETW kernel stackwalker to walk the stacks of managed code
5232    this allows the kernel to perform light weight profiling
5233  */
5234 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5235 {
5236     bool result = false;
5237 #ifdef DEBUG
5238     const char* reason = nullptr;
5239 #endif
5240
5241 #if ETW_EBP_FRAMED
5242     if (!result && (opts.MinOpts() || opts.compDbgCode))
5243     {
5244         INDEBUG(reason = "Debug Code");
5245         result = true;
5246     }
5247     if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5248     {
5249         INDEBUG(reason = "IL Code Size");
5250         result = true;
5251     }
5252     if (!result && (fgBBcount > 3))
5253     {
5254         INDEBUG(reason = "BasicBlock Count");
5255         result = true;
5256     }
5257     if (!result && fgHasLoops)
5258     {
5259         INDEBUG(reason = "Method has Loops");
5260         result = true;
5261     }
5262     if (!result && (optCallCount >= 2))
5263     {
5264         INDEBUG(reason = "Call Count");
5265         result = true;
5266     }
5267     if (!result && (optIndirectCallCount >= 1))
5268     {
5269         INDEBUG(reason = "Indirect Call");
5270         result = true;
5271     }
5272 #endif // ETW_EBP_FRAMED
5273
5274     // VM wants to identify the containing frame of an InlinedCallFrame always
5275     // via the frame register never the stack register so we need a frame.
5276     if (!result && (optNativeCallCount != 0))
5277     {
5278         INDEBUG(reason = "Uses PInvoke");
5279         result = true;
5280     }
5281
5282 #ifdef _TARGET_ARM64_
5283     // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5284     // pointer frames.
5285     if (!result)
5286     {
5287         INDEBUG(reason = "Temporary ARM64 force frame pointer");
5288         result = true;
5289     }
5290 #endif // _TARGET_ARM64_
5291
5292 #ifdef DEBUG
5293     if ((result == true) && (wbReason != nullptr))
5294     {
5295         *wbReason = reason;
5296     }
5297 #endif
5298
5299     return result;
5300 }
5301
5302 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5303
5304 /*****************************************************************************
5305  *
5306  *  Predict which variables will be assigned to registers
5307  *  This is x86 specific and only predicts the integer registers and
5308  *  must be conservative, any register that is predicted to be enregister
5309  *  must end up being enregistered.
5310  *
5311  *  The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5312  *  predicted to be enregistered to minimize calls to rpPredictRegPick.
5313  *
5314  */
5315
5316 #ifdef _PREFAST_
5317 #pragma warning(push)
5318 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5319 #endif
5320 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5321 {
5322     unsigned regInx;
5323
5324     if (rpPasses <= rpPassesPessimize)
5325     {
5326         // Assume that we won't have to reverse EBP enregistration
5327         rpReverseEBPenreg = false;
5328
5329         // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5330         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5331             rpFrameType = FT_EBP_FRAME;
5332         else
5333             rpFrameType = FT_ESP_FRAME;
5334     }
5335
5336 #if !ETW_EBP_FRAMED
5337     // If we are using FPBASE as the frame register, we cannot also use it for
5338     // a local var
5339     if (rpFrameType == FT_EBP_FRAME)
5340     {
5341         regAvail &= ~RBM_FPBASE;
5342     }
5343 #endif // !ETW_EBP_FRAMED
5344
5345     rpStkPredict        = 0;
5346     rpPredictAssignMask = regAvail;
5347
5348     raSetupArgMasks(&codeGen->intRegState);
5349 #if !FEATURE_STACK_FP_X87
5350     raSetupArgMasks(&codeGen->floatRegState);
5351 #endif
5352
5353     // If there is a secret stub param, it is also live in
5354     if (info.compPublishStubParam)
5355     {
5356         codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5357     }
5358
5359     if (regAvail == RBM_NONE)
5360     {
5361         unsigned   lclNum;
5362         LclVarDsc* varDsc;
5363
5364         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5365         {
5366 #if FEATURE_STACK_FP_X87
5367             if (!varDsc->IsFloatRegType())
5368 #endif
5369             {
5370                 varDsc->lvRegNum = REG_STK;
5371                 if (isRegPairType(varDsc->lvType))
5372                     varDsc->lvOtherReg = REG_STK;
5373             }
5374         }
5375     }
5376
5377 #ifdef DEBUG
5378     if (verbose)
5379     {
5380         printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5381         printf("\n        Available registers = ");
5382         dspRegMask(regAvail);
5383         printf("\n");
5384     }
5385 #endif
5386
5387     if (regAvail == RBM_NONE)
5388     {
5389         return RBM_NONE;
5390     }
5391
5392     /* We cannot change the lvVarIndexes at this point, so we  */
5393     /* can only re-order the existing set of tracked variables */
5394     /* Which will change the order in which we select the      */
5395     /* locals for enregistering.                               */
5396
5397     assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5398
5399     // Should not be set unless optimizing
5400     noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5401
5402     if (lvaSortAgain)
5403         lvaSortOnly();
5404
5405 #ifdef DEBUG
5406     fgDebugCheckBBlist();
5407 #endif
5408
5409     /* Initialize the weighted count of variables that could have */
5410     /* been enregistered but weren't */
5411     unsigned refCntStk    = 0; // sum of     ref counts for all stack based variables
5412     unsigned refCntEBP    = 0; // sum of     ref counts for EBP enregistered variables
5413     unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5414 #if DOUBLE_ALIGN
5415     unsigned refCntStkParam;  // sum of     ref counts for all stack based parameters
5416     unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5417
5418 #if FEATURE_STACK_FP_X87
5419     refCntStkParam  = raCntStkParamDblStackFP;
5420     refCntWtdStkDbl = raCntWtdStkDblStackFP;
5421     refCntStk       = raCntStkStackFP;
5422 #else
5423     refCntStkParam  = 0;
5424     refCntWtdStkDbl = 0;
5425     refCntStk       = 0;
5426 #endif // FEATURE_STACK_FP_X87
5427
5428 #endif // DOUBLE_ALIGN
5429
5430     /* Set of registers used to enregister variables in the predition */
5431     regMaskTP regUsed = RBM_NONE;
5432
5433     /*-------------------------------------------------------------------------
5434      *
5435      *  Predict/Assign the enregistered locals in ref-count order
5436      *
5437      */
5438
5439     VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5440
5441     unsigned FPRegVarLiveInCnt;
5442     FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5443
5444     LclVarDsc* varDsc;
5445     for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5446     {
5447         bool notWorthy = false;
5448
5449         unsigned  varIndex;
5450         bool      isDouble;
5451         regMaskTP regAvailForType;
5452         var_types regType;
5453         regMaskTP avoidReg;
5454         unsigned  customVarOrderSize;
5455         regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5456         bool      firstHalf;
5457         regNumber saveOtherReg;
5458
5459         varDsc = lvaRefSorted[sortNum];
5460
5461 #if FEATURE_STACK_FP_X87
5462         if (varTypeIsFloating(varDsc->TypeGet()))
5463         {
5464 #ifdef DEBUG
5465             if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5466             {
5467                 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5468                 // be en-registered.
5469                 noway_assert(!varDsc->lvRegister);
5470             }
5471 #endif
5472             continue;
5473         }
5474 #endif
5475
5476         /* Check the set of invariant things that would prevent enregistration */
5477
5478         /* Ignore the variable if it's not tracked */
5479         if (!varDsc->lvTracked)
5480             goto CANT_REG;
5481
5482         /* Get hold of the index and the interference mask for the variable */
5483         varIndex = varDsc->lvVarIndex;
5484
5485         // Remove 'varIndex' from unprocessedVars
5486         VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5487
5488         // Skip the variable if it's marked as DoNotEnregister.
5489
5490         if (varDsc->lvDoNotEnregister)
5491             goto CANT_REG;
5492
5493         /* TODO: For now if we have JMP all register args go to stack
5494          * TODO: Later consider extending the life of the argument or make a copy of it */
5495
5496         if (compJmpOpUsed && varDsc->lvIsRegArg)
5497             goto CANT_REG;
5498
5499         /* Skip the variable if the ref count is zero */
5500
5501         if (varDsc->lvRefCnt == 0)
5502             goto CANT_REG;
5503
5504         /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5505
5506         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5507         {
5508             goto CANT_REG;
5509         }
5510
5511         /* Is the unweighted ref count too low to be interesting? */
5512
5513         if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5514             (varDsc->lvRefCnt <= 1))
5515         {
5516             /* Sometimes it's useful to enregister a variable with only one use */
5517             /*   arguments referenced in loops are one example */
5518
5519             if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5520                 goto OK_TO_ENREGISTER;
5521
5522             /* If the variable has a preferred register set it may be useful to put it there */
5523             if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5524                 goto OK_TO_ENREGISTER;
5525
5526             /* Keep going; the table is sorted by "weighted" ref count */
5527             goto CANT_REG;
5528         }
5529
5530     OK_TO_ENREGISTER:
5531
5532         if (varTypeIsFloating(varDsc->TypeGet()))
5533         {
5534             regType         = varDsc->TypeGet();
5535             regAvailForType = regAvail & RBM_ALLFLOAT;
5536         }
5537         else
5538         {
5539             regType         = TYP_INT;
5540             regAvailForType = regAvail & RBM_ALLINT;
5541         }
5542
5543 #ifdef _TARGET_ARM_
5544         isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5545
5546         if (isDouble)
5547         {
5548             regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5549         }
5550 #endif
5551
5552         /* If we don't have any registers available then skip the enregistration attempt */
5553         if (regAvailForType == RBM_NONE)
5554             goto NO_REG;
5555
5556         // On the pessimize passes don't even try to enregister LONGS
5557         if (isRegPairType(varDsc->lvType))
5558         {
5559             if (rpPasses > rpPassesPessimize)
5560                 goto NO_REG;
5561             else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5562                 goto NO_REG;
5563         }
5564
5565         // Set of registers to avoid when performing register allocation
5566         avoidReg = RBM_NONE;
5567
5568         if (!varDsc->lvIsRegArg)
5569         {
5570             /* For local variables,
5571              *  avoid the incoming arguments,
5572              *  but only if you conflict with them */
5573
5574             if (raAvoidArgRegMask != 0)
5575             {
5576                 LclVarDsc* argDsc;
5577                 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5578
5579                 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5580                 {
5581                     if (!argDsc->lvIsRegArg)
5582                         continue;
5583
5584                     bool      isFloat  = argDsc->IsFloatRegType();
5585                     regNumber inArgReg = argDsc->lvArgReg;
5586                     regMaskTP inArgBit = genRegMask(inArgReg);
5587
5588                     // Is this inArgReg in the raAvoidArgRegMask set?
5589
5590                     if (!(raAvoidArgRegMask & inArgBit))
5591                         continue;
5592
5593                     noway_assert(argDsc->lvIsParam);
5594                     noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5595
5596                     unsigned locVarIndex = varDsc->lvVarIndex;
5597                     unsigned argVarIndex = argDsc->lvVarIndex;
5598
5599                     /* Does this variable interfere with the arg variable ? */
5600                     if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5601                     {
5602                         noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5603                         /* Yes, so try to avoid the incoming arg reg */
5604                         avoidReg |= inArgBit;
5605                     }
5606                     else
5607                     {
5608                         noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5609                     }
5610                 }
5611             }
5612         }
5613
5614         // Now we will try to predict which register the variable
5615         // could  be enregistered in
5616
5617         customVarOrderSize = MAX_VAR_ORDER_SIZE;
5618
5619         raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5620
5621         firstHalf    = false;
5622         saveOtherReg = DUMMY_INIT(REG_NA);
5623
5624         for (regInx = 0; regInx < customVarOrderSize; regInx++)
5625         {
5626             regNumber regNum  = customVarOrder[regInx];
5627             regMaskTP regBits = genRegMask(regNum);
5628
5629             /* Skip this register if it isn't available */
5630             if ((regAvailForType & regBits) == 0)
5631                 continue;
5632
5633             /* Skip this register if it interferes with the variable */
5634
5635             if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5636                 continue;
5637
5638             if (varTypeIsFloating(regType))
5639             {
5640 #ifdef _TARGET_ARM_
5641                 if (isDouble)
5642                 {
5643                     regNumber regNext = REG_NEXT(regNum);
5644                     regBits |= genRegMask(regNext);
5645
5646                     /* Skip if regNext interferes with the variable */
5647                     if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5648                         continue;
5649                 }
5650 #endif
5651             }
5652
5653             bool firstUseOfReg     = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5654             bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5655             bool calleeSavedReg    = ((regBits & RBM_CALLEE_SAVED) != 0);
5656
5657             /* Skip this register if the weighted ref count is less than two
5658                and we are considering a unused callee saved register */
5659
5660             if (lessThanTwoRefWtd && // less than two references (weighted)
5661                 firstUseOfReg &&     // first use of this register
5662                 calleeSavedReg)      // callee saved register
5663             {
5664                 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5665
5666                 // psc is abbeviation for possibleSameColor
5667                 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5668
5669                 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5670                 unsigned        pscIndex = 0;
5671                 while (pscIndexIter.NextElem(&pscIndex))
5672                 {
5673                     LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5674                     totalRefCntWtd += pscVar->lvRefCntWtd;
5675                     if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5676                         break;
5677                 }
5678
5679                 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5680                 {
5681                     notWorthy = true;
5682                     continue; // not worth spilling a callee saved register
5683                 }
5684                 // otherwise we will spill this callee saved registers,
5685                 // because its uses when combined with the uses of
5686                 // other yet to be processed candidates exceed our threshold.
5687                 // totalRefCntWtd = totalRefCntWtd;
5688             }
5689
5690             /* Looks good - mark the variable as living in the register */
5691
5692             if (isRegPairType(varDsc->lvType))
5693             {
5694                 if (firstHalf == false)
5695                 {
5696                     /* Enregister the first half of the long */
5697                     varDsc->lvRegNum   = regNum;
5698                     saveOtherReg       = varDsc->lvOtherReg;
5699                     varDsc->lvOtherReg = REG_STK;
5700                     firstHalf          = true;
5701                 }
5702                 else
5703                 {
5704                     /* Ensure 'well-formed' register pairs */
5705                     /* (those returned by gen[Pick|Grab]RegPair) */
5706
5707                     if (regNum < varDsc->lvRegNum)
5708                     {
5709                         varDsc->lvOtherReg = varDsc->lvRegNum;
5710                         varDsc->lvRegNum   = regNum;
5711                     }
5712                     else
5713                     {
5714                         varDsc->lvOtherReg = regNum;
5715                     }
5716                     firstHalf = false;
5717                 }
5718             }
5719             else
5720             {
5721                 varDsc->lvRegNum = regNum;
5722 #ifdef _TARGET_ARM_
5723                 if (isDouble)
5724                 {
5725                     varDsc->lvOtherReg = REG_NEXT(regNum);
5726                 }
5727 #endif
5728             }
5729
5730             if (regNum == REG_FPBASE)
5731             {
5732                 refCntEBP += varDsc->lvRefCnt;
5733                 refCntWtdEBP += varDsc->lvRefCntWtd;
5734 #if DOUBLE_ALIGN
5735                 if (varDsc->lvIsParam)
5736                 {
5737                     refCntStkParam += varDsc->lvRefCnt;
5738                 }
5739 #endif
5740             }
5741
5742             /* Record this register in the regUsed set */
5743             regUsed |= regBits;
5744
5745             /* The register is now ineligible for all interfering variables */
5746
5747             VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5748
5749 #ifdef _TARGET_ARM_
5750             if (isDouble)
5751             {
5752                 regNumber       secondHalf = REG_NEXT(regNum);
5753                 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5754                 unsigned        intfIndex = 0;
5755                 while (iter.NextElem(&intfIndex))
5756                 {
5757                     VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5758                 }
5759             }
5760 #endif
5761
5762             /* If a register argument, remove its incoming register
5763              * from the "avoid" list */
5764
5765             if (varDsc->lvIsRegArg)
5766             {
5767                 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5768 #ifdef _TARGET_ARM_
5769                 if (isDouble)
5770                 {
5771                     raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5772                 }
5773 #endif
5774             }
5775
5776             /* A variable of TYP_LONG can take two registers */
5777             if (firstHalf)
5778                 continue;
5779
5780             // Since we have successfully enregistered this variable it is
5781             // now time to move on and consider the next variable
5782             goto ENREG_VAR;
5783         }
5784
5785         if (firstHalf)
5786         {
5787             noway_assert(isRegPairType(varDsc->lvType));
5788
5789             /* This TYP_LONG is partially enregistered */
5790
5791             noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5792
5793             if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5794             {
5795                 rpLostEnreg = true;
5796             }
5797
5798             raAddToStkPredict(varDsc->lvRefCntWtd);
5799             goto ENREG_VAR;
5800         }
5801
5802     NO_REG:;
5803         if (varDsc->lvDependReg)
5804         {
5805             rpLostEnreg = true;
5806         }
5807
5808         if (!notWorthy)
5809         {
5810             /* Weighted count of variables that could have been enregistered but weren't */
5811             raAddToStkPredict(varDsc->lvRefCntWtd);
5812
5813             if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5814                 raAddToStkPredict(varDsc->lvRefCntWtd);
5815         }
5816
5817     CANT_REG:;
5818         varDsc->lvRegister = false;
5819
5820         varDsc->lvRegNum = REG_STK;
5821         if (isRegPairType(varDsc->lvType))
5822             varDsc->lvOtherReg = REG_STK;
5823
5824         /* unweighted count of variables that were not enregistered */
5825
5826         refCntStk += varDsc->lvRefCnt;
5827
5828 #if DOUBLE_ALIGN
5829         if (varDsc->lvIsParam)
5830         {
5831             refCntStkParam += varDsc->lvRefCnt;
5832         }
5833         else
5834         {
5835             /* Is it a stack based double? */
5836             /* Note that double params are excluded since they can not be double aligned */
5837             if (varDsc->lvType == TYP_DOUBLE)
5838             {
5839                 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5840             }
5841         }
5842 #endif
5843 #ifdef DEBUG
5844         if (verbose)
5845         {
5846             printf("; ");
5847             gtDispLclVar((unsigned)(varDsc - lvaTable));
5848             if (varDsc->lvTracked)
5849                 printf("T%02u", varDsc->lvVarIndex);
5850             else
5851                 printf("   ");
5852             printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5853             if (varDsc->lvDoNotEnregister)
5854                 printf(", do-not-enregister");
5855             printf("\n");
5856         }
5857 #endif
5858         continue;
5859
5860     ENREG_VAR:;
5861
5862         varDsc->lvRegister = true;
5863
5864         // Record the fact that we enregistered a stack arg when tail call is used.
5865         if (compJmpOpUsed && !varDsc->lvIsRegArg)
5866         {
5867             rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5868             if (isRegPairType(varDsc->lvType))
5869             {
5870                 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5871             }
5872         }
5873
5874 #ifdef DEBUG
5875         if (verbose)
5876         {
5877             printf("; ");
5878             gtDispLclVar((unsigned)(varDsc - lvaTable));
5879             printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5880                    refCntWtd2str(varDsc->lvRefCntWtd));
5881             varDsc->PrintVarReg();
5882 #ifdef _TARGET_ARM_
5883             if (isDouble)
5884             {
5885                 printf(":%s", getRegName(varDsc->lvOtherReg));
5886             }
5887 #endif
5888             printf("\n");
5889         }
5890 #endif
5891     }
5892
5893 #if ETW_EBP_FRAMED
5894     noway_assert(refCntEBP == 0);
5895 #endif
5896
5897 #ifdef DEBUG
5898     if (verbose)
5899     {
5900         if (refCntStk > 0)
5901             printf("; refCntStk       = %u\n", refCntStk);
5902         if (refCntEBP > 0)
5903             printf("; refCntEBP       = %u\n", refCntEBP);
5904         if (refCntWtdEBP > 0)
5905             printf("; refCntWtdEBP    = %u\n", refCntWtdEBP);
5906 #if DOUBLE_ALIGN
5907         if (refCntStkParam > 0)
5908             printf("; refCntStkParam  = %u\n", refCntStkParam);
5909         if (refCntWtdStkDbl > 0)
5910             printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5911 #endif
5912     }
5913 #endif
5914
5915     /* Determine how the EBP register should be used */
5916     CLANG_FORMAT_COMMENT_ANCHOR;
5917
5918 #if DOUBLE_ALIGN
5919
5920     if (!codeGen->isFramePointerRequired())
5921     {
5922         noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5923
5924         /*
5925             First let us decide if we should use EBP to create a
5926             double-aligned frame, instead of enregistering variables
5927         */
5928
5929         if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5930         {
5931             rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5932             goto REVERSE_EBP_ENREG;
5933         }
5934
5935         if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5936         {
5937             if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5938             {
5939                 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5940                 goto REVERSE_EBP_ENREG;
5941             }
5942         }
5943     }
5944
5945 #endif // DOUBLE_ALIGN
5946
5947     if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5948     {
5949 #ifdef _TARGET_XARCH_
5950 // clang-format off
5951         /*  If we are using EBP to enregister variables then
5952             will we actually save bytes by setting up an EBP frame?
5953
5954             Each stack reference is an extra byte of code if we use
5955             an ESP frame.
5956
5957             Here we measure the savings that we get by using EBP to
5958             enregister variables vs. the cost in code size that we
5959             pay when using an ESP based frame.
5960
5961             We pay one byte of code for each refCntStk
5962             but we save one byte (or more) for each refCntEBP.
5963
5964             Our savings are the elimination of a stack memory read/write.
5965             We use the loop weighted value of
5966                refCntWtdEBP * mem_access_weight (0, 3, 6)
5967             to represent this savings.
5968          */
5969
5970         // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5971         // to set up an EBP frame in the prolog and epilog
5972         #define EBP_FRAME_SETUP_SIZE  5
5973         // clang-format on
5974
5975         if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5976         {
5977             unsigned bytesSaved        = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5978             unsigned mem_access_weight = 3;
5979
5980             if (compCodeOpt() == SMALL_CODE)
5981                 mem_access_weight = 0;
5982             else if (compCodeOpt() == FAST_CODE)
5983                 mem_access_weight *= 2;
5984
5985             if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5986             {
5987                 /* It's not be a good idea to use EBP in our predictions */
5988                 CLANG_FORMAT_COMMENT_ANCHOR;
5989 #ifdef DEBUG
5990                 if (verbose && (refCntEBP > 0))
5991                     printf("; Predicting that it's not worth using EBP to enregister variables\n");
5992 #endif
5993                 rpFrameType = FT_EBP_FRAME;
5994                 goto REVERSE_EBP_ENREG;
5995             }
5996         }
5997 #endif // _TARGET_XARCH_
5998
5999         if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
6000         {
6001 #ifdef DEBUG
6002             const char* reason;
6003 #endif
6004             if (rpMustCreateEBPCalled == false)
6005             {
6006                 rpMustCreateEBPCalled = true;
6007                 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6008                 {
6009 #ifdef DEBUG
6010                     if (verbose)
6011                         printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6012 #endif
6013                     codeGen->setFrameRequired(true);
6014
6015                     rpFrameType = FT_EBP_FRAME;
6016                     goto REVERSE_EBP_ENREG;
6017                 }
6018             }
6019         }
6020     }
6021
6022     goto EXIT;
6023
6024 REVERSE_EBP_ENREG:
6025
6026     noway_assert(rpFrameType != FT_ESP_FRAME);
6027
6028     rpReverseEBPenreg = true;
6029
6030 #if !ETW_EBP_FRAMED
6031     if (refCntEBP > 0)
6032     {
6033         noway_assert(regUsed & RBM_FPBASE);
6034
6035         regUsed &= ~RBM_FPBASE;
6036
6037         /* variables that were enregistered in EBP become stack based variables */
6038         raAddToStkPredict(refCntWtdEBP);
6039
6040         unsigned lclNum;
6041
6042         /* We're going to have to undo some predicted enregistered variables */
6043         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6044         {
6045             /* Is this a register variable? */
6046             if (varDsc->lvRegNum != REG_STK)
6047             {
6048                 if (isRegPairType(varDsc->lvType))
6049                 {
6050                     /* Only one can be EBP */
6051                     if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6052                     {
6053                         if (varDsc->lvRegNum == REG_FPBASE)
6054                             varDsc->lvRegNum = varDsc->lvOtherReg;
6055
6056                         varDsc->lvOtherReg = REG_STK;
6057
6058                         if (varDsc->lvRegNum == REG_STK)
6059                             varDsc->lvRegister = false;
6060
6061                         if (varDsc->lvDependReg)
6062                             rpLostEnreg = true;
6063 #ifdef DEBUG
6064                         if (verbose)
6065                             goto DUMP_MSG;
6066 #endif
6067                     }
6068                 }
6069                 else
6070                 {
6071                     if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6072                     {
6073                         varDsc->lvRegNum = REG_STK;
6074
6075                         varDsc->lvRegister = false;
6076
6077                         if (varDsc->lvDependReg)
6078                             rpLostEnreg = true;
6079 #ifdef DEBUG
6080                         if (verbose)
6081                         {
6082                         DUMP_MSG:
6083                             printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6084                                    varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6085                                    (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6086                         }
6087 #endif
6088                     }
6089                 }
6090             }
6091         }
6092     }
6093 #endif // ETW_EBP_FRAMED
6094
6095 EXIT:;
6096
6097     unsigned lclNum;
6098     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6099     {
6100         /* Clear the lvDependReg flag for next iteration of the predictor */
6101         varDsc->lvDependReg = false;
6102
6103         // If we set rpLostEnreg and this is the first pessimize pass
6104         // then reverse the enreg of all TYP_LONG
6105         if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6106         {
6107             varDsc->lvRegNum   = REG_STK;
6108             varDsc->lvOtherReg = REG_STK;
6109         }
6110     }
6111
6112 #ifdef DEBUG
6113     if (verbose && raNewBlocks)
6114     {
6115         printf("\nAdded FP register killing blocks:\n");
6116         fgDispBasicBlocks();
6117         printf("\n");
6118     }
6119 #endif
6120     noway_assert(rpFrameType != FT_NOT_SET);
6121
6122     /* return the set of registers used to enregister variables */
6123     return regUsed;
6124 }
6125 #ifdef _PREFAST_
6126 #pragma warning(pop)
6127 #endif
6128
6129 /*****************************************************************************
6130  *
6131  *  Predict register use for every tree in the function. Note that we do this
6132  *  at different times (not to mention in a totally different way) for x86 vs
6133  *  RISC targets.
6134  */
6135 void Compiler::rpPredictRegUse()
6136 {
6137 #ifdef DEBUG
6138     if (verbose)
6139         raDumpVarIntf();
6140 #endif
6141
6142     // We might want to adjust the ref counts based on interference
6143     raAdjustVarIntf();
6144
6145     regMaskTP allAcceptableRegs = RBM_ALLINT;
6146
6147 #if FEATURE_FP_REGALLOC
6148     allAcceptableRegs |= raConfigRestrictMaskFP();
6149 #endif
6150
6151     allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6152
6153     /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6154        to acdHelper(). This is done implicitly, without creating a GT_CALL
6155        node. Hence, this interference is be handled implicitly by
6156        restricting the registers used for enregistering variables */
6157
6158     if (opts.compDbgCode)
6159     {
6160         allAcceptableRegs &= RBM_CALLEE_SAVED;
6161     }
6162
6163     /* Compute the initial regmask to use for the first pass */
6164     regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6165     regMaskTP regUsed;
6166
6167 #if CPU_USES_BLOCK_MOVE
6168     /* If we might need to generate a rep mov instruction */
6169     /* remove ESI and EDI */
6170     if (compBlkOpUsed)
6171         regAvail &= ~(RBM_ESI | RBM_EDI);
6172 #endif
6173
6174 #ifdef _TARGET_X86_
6175     /* If we using longs then we remove ESI to allow */
6176     /* ESI:EBX to be saved accross a call */
6177     if (compLongUsed)
6178         regAvail &= ~(RBM_ESI);
6179 #endif
6180
6181 #ifdef _TARGET_ARM_
6182     // For the first register allocation pass we don't want to color using r4
6183     // as we want to allow it to be used to color the internal temps instead
6184     // when r0,r1,r2,r3 are all in use.
6185     //
6186     regAvail &= ~(RBM_R4);
6187 #endif
6188
6189 #if ETW_EBP_FRAMED
6190     // We never have EBP available when ETW_EBP_FRAME is defined
6191     regAvail &= ~RBM_FPBASE;
6192 #else
6193     /* If a frame pointer is required then we remove EBP */
6194     if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6195         regAvail &= ~RBM_FPBASE;
6196 #endif
6197
6198 #ifdef DEBUG
6199     BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6200     if (fJitNoRegLoc)
6201         regAvail = RBM_NONE;
6202 #endif
6203
6204     if ((opts.compFlags & CLFLG_REGVAR) == 0)
6205         regAvail = RBM_NONE;
6206
6207 #if FEATURE_STACK_FP_X87
6208     VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6209     VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6210
6211     // Calculate the set of all tracked FP/non-FP variables
6212     //  into optAllFloatVars and optAllNonFPvars
6213
6214     unsigned   lclNum;
6215     LclVarDsc* varDsc;
6216
6217     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6218     {
6219         /* Ignore the variable if it's not tracked */
6220
6221         if (!varDsc->lvTracked)
6222             continue;
6223
6224         /* Get hold of the index and the interference mask for the variable */
6225
6226         unsigned varNum = varDsc->lvVarIndex;
6227
6228         /* add to the set of all tracked FP/non-FP variables */
6229
6230         if (varDsc->IsFloatRegType())
6231             VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6232         else
6233             VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6234     }
6235 #endif
6236
6237     for (unsigned i = 0; i < REG_COUNT; i++)
6238     {
6239         VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6240     }
6241     for (unsigned i = 0; i < lvaTrackedCount; i++)
6242     {
6243         VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6244     }
6245
6246     raNewBlocks          = false;
6247     rpPredictAssignAgain = false;
6248     rpPasses             = 0;
6249
6250     bool      mustPredict   = true;
6251     unsigned  stmtNum       = 0;
6252     unsigned  oldStkPredict = DUMMY_INIT(~0);
6253     VARSET_TP oldLclRegIntf[REG_COUNT];
6254
6255     for (unsigned i = 0; i < REG_COUNT; i++)
6256     {
6257         VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6258     }
6259
6260     while (true)
6261     {
6262         /* Assign registers to variables using the variable/register interference
6263            graph (raLclRegIntf[]) calculated in the previous pass */
6264         regUsed = rpPredictAssignRegVars(regAvail);
6265
6266         mustPredict |= rpLostEnreg;
6267
6268 #ifdef _TARGET_ARM_
6269         // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6270         if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6271             !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6272         {
6273             // We can release our reservation on R10 and use it to color registers
6274             codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6275             allAcceptableRegs |= RBM_OPT_RSVD;
6276         }
6277 #endif
6278
6279         /* Is our new prediction good enough?? */
6280         if (!mustPredict)
6281         {
6282             /* For small methods (less than 12 stmts), we add a    */
6283             /*   extra pass if we are predicting the use of some   */
6284             /*   of the caller saved registers.                    */
6285             /* This fixes RAID perf bug 43440 VB Ackerman function */
6286
6287             if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6288             {
6289                 goto EXTRA_PASS;
6290             }
6291
6292             /* If every variable was fully enregistered then we're done */
6293             if (rpStkPredict == 0)
6294                 goto ALL_DONE;
6295
6296             // This was a successful prediction.  Record it, in case it turns out to be the best one.
6297             rpRecordPrediction();
6298
6299             if (rpPasses > 1)
6300             {
6301                 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6302
6303                 // Be careful about overflow
6304                 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6305                 if (oldStkPredict < highStkPredict)
6306                     goto ALL_DONE;
6307
6308                 if (rpStkPredict < rpPasses * 8)
6309                     goto ALL_DONE;
6310
6311                 if (rpPasses >= (rpPassesMax - 1))
6312                     goto ALL_DONE;
6313             }
6314
6315         EXTRA_PASS:
6316             /* We will do another pass */;
6317         }
6318
6319 #ifdef DEBUG
6320         if (JitConfig.JitAssertOnMaxRAPasses())
6321         {
6322             noway_assert(rpPasses < rpPassesMax &&
6323                          "This may not a bug, but dev team should look and see what is happening");
6324         }
6325 #endif
6326
6327         // The "64" here had been "VARSET_SZ".  It is unclear why this number is connected with
6328         // the (max) size of a VARSET.  We've eliminated this constant, so I left this as a constant.  We hope
6329         // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6330         if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6331         {
6332             NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6333         }
6334
6335 #ifdef DEBUG
6336         if (verbose)
6337         {
6338             if (rpPasses > 0)
6339             {
6340                 if (rpLostEnreg)
6341                     printf("\n; Another pass due to rpLostEnreg");
6342                 if (rpAddedVarIntf)
6343                     printf("\n; Another pass due to rpAddedVarIntf");
6344                 if ((rpPasses == 1) && rpPredictAssignAgain)
6345                     printf("\n; Another pass due to rpPredictAssignAgain");
6346             }
6347             printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6348         }
6349 #endif
6350
6351         /*  Zero the variable/register interference graph */
6352         for (unsigned i = 0; i < REG_COUNT; i++)
6353         {
6354             VarSetOps::ClearD(this, raLclRegIntf[i]);
6355         }
6356
6357         // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6358         // it must not be in a register trashed by the callee
6359         if (info.compLvFrameListRoot != BAD_VAR_NUM)
6360         {
6361             assert(!opts.ShouldUsePInvokeHelpers());
6362             noway_assert(info.compLvFrameListRoot < lvaCount);
6363
6364             LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6365
6366             if (pinvokeVarDsc->lvTracked)
6367             {
6368                 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6369                                                       DEBUGARG("compLvFrameListRoot"));
6370
6371                 // We would prefer to have this be enregister in the PINVOKE_TCB register
6372                 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6373             }
6374
6375             // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6376             // worst case).  Make sure that the return value compiler temp that we create for the single
6377             // return block knows about this interference.
6378             if (genReturnLocal != BAD_VAR_NUM)
6379             {
6380                 noway_assert(genReturnBB);
6381                 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6382                 if (localTmp->lvTracked)
6383                 {
6384                     rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6385                                     VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6386                 }
6387             }
6388         }
6389
6390 #ifdef _TARGET_ARM_
6391         if (compFloatingPointUsed)
6392         {
6393             bool hasMustInitFloat = false;
6394
6395             // if we have any must-init floating point LclVars then we will add register interferences
6396             // for the arguments with RBM_SCRATCH
6397             // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6398             // we won't home the arguments into REG_SCRATCH
6399
6400             unsigned   lclNum;
6401             LclVarDsc* varDsc;
6402
6403             for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6404             {
6405                 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6406                 {
6407                     hasMustInitFloat = true;
6408                     break;
6409                 }
6410             }
6411
6412             if (hasMustInitFloat)
6413             {
6414                 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6415                 {
6416                     // If is an incoming argument, that is tracked and not floating-point
6417                     if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6418                     {
6419                         rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6420                                                          DEBUGARG("arg home with must-init fp"));
6421                     }
6422                 }
6423             }
6424         }
6425 #endif
6426
6427         stmtNum        = 0;
6428         rpAddedVarIntf = false;
6429         rpLostEnreg    = false;
6430
6431         /* Walk the basic blocks and predict reg use for each tree */
6432
6433         for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6434         {
6435             GenTree* stmt;
6436             compCurBB       = block;
6437             compCurLifeTree = NULL;
6438             VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6439
6440             compCurBB = block;
6441
6442             for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6443             {
6444                 noway_assert(stmt->gtOper == GT_STMT);
6445
6446                 rpPredictSpillCnt = 0;
6447                 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6448                 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6449
6450                 GenTree* tree = stmt->gtStmt.gtStmtExpr;
6451                 stmtNum++;
6452 #ifdef DEBUG
6453                 if (verbose && 1)
6454                 {
6455                     printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6456                     gtDispTree(tree);
6457                     printf("\n");
6458                 }
6459 #endif
6460                 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6461
6462                 noway_assert(rpAsgVarNum == -1);
6463
6464                 if (rpPredictSpillCnt > tmpIntSpillMax)
6465                     tmpIntSpillMax = rpPredictSpillCnt;
6466             }
6467         }
6468         rpPasses++;
6469
6470         /* Decide whether we need to set mustPredict */
6471         mustPredict = false;
6472
6473 #ifdef _TARGET_ARM_
6474         // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6475         // reserve r10, and if it was used, throw out the last prediction and repredict.
6476         if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6477         {
6478             codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6479             allAcceptableRegs &= ~RBM_OPT_RSVD;
6480             if ((regUsed & RBM_OPT_RSVD) != 0)
6481             {
6482                 mustPredict              = true;
6483                 rpBestRecordedPrediction = nullptr;
6484             }
6485         }
6486 #endif
6487
6488         if (rpAddedVarIntf)
6489         {
6490             mustPredict = true;
6491 #ifdef DEBUG
6492             if (verbose)
6493                 raDumpVarIntf();
6494 #endif
6495         }
6496
6497         if (rpPasses == 1)
6498         {
6499             if ((opts.compFlags & CLFLG_REGVAR) == 0)
6500                 goto ALL_DONE;
6501
6502             if (rpPredictAssignAgain)
6503                 mustPredict = true;
6504 #ifdef DEBUG
6505             if (fJitNoRegLoc)
6506                 goto ALL_DONE;
6507 #endif
6508         }
6509
6510         /* Calculate the new value to use for regAvail */
6511
6512         regAvail = allAcceptableRegs;
6513
6514         /* If a frame pointer is required then we remove EBP */
6515         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6516             regAvail &= ~RBM_FPBASE;
6517
6518 #if ETW_EBP_FRAMED
6519         // We never have EBP available when ETW_EBP_FRAME is defined
6520         regAvail &= ~RBM_FPBASE;
6521 #endif
6522
6523         // If we have done n-passes then we must continue to pessimize the
6524         // interference graph by or-ing the interferences from the previous pass
6525
6526         if (rpPasses > rpPassesPessimize)
6527         {
6528             for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6529                 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6530
6531             /* If we reverse an EBP enregistration then keep it that way */
6532             if (rpReverseEBPenreg)
6533                 regAvail &= ~RBM_FPBASE;
6534         }
6535
6536 #ifdef DEBUG
6537         if (verbose)
6538             raDumpRegIntf();
6539 #endif
6540
6541         /*  Save the old variable/register interference graph */
6542         for (unsigned i = 0; i < REG_COUNT; i++)
6543         {
6544             VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6545         }
6546         oldStkPredict = rpStkPredict;
6547     } // end of while (true)
6548
6549 ALL_DONE:;
6550
6551     // If we recorded a better feasible allocation than we ended up with, go back to using it.
6552     rpUseRecordedPredictionIfBetter();
6553
6554 #if DOUBLE_ALIGN
6555     codeGen->setDoubleAlign(false);
6556 #endif
6557
6558     switch (rpFrameType)
6559     {
6560         default:
6561             noway_assert(!"rpFrameType not set correctly!");
6562             break;
6563         case FT_ESP_FRAME:
6564             noway_assert(!codeGen->isFramePointerRequired());
6565             noway_assert(!codeGen->isFrameRequired());
6566             codeGen->setFramePointerUsed(false);
6567             break;
6568         case FT_EBP_FRAME:
6569             noway_assert((regUsed & RBM_FPBASE) == 0);
6570             codeGen->setFramePointerUsed(true);
6571             break;
6572 #if DOUBLE_ALIGN
6573         case FT_DOUBLE_ALIGN_FRAME:
6574             noway_assert((regUsed & RBM_FPBASE) == 0);
6575             noway_assert(!codeGen->isFramePointerRequired());
6576             codeGen->setFramePointerUsed(false);
6577             codeGen->setDoubleAlign(true);
6578             break;
6579 #endif
6580     }
6581
6582     /* Record the set of registers that we need */
6583     codeGen->regSet.rsClearRegsModified();
6584     if (regUsed != RBM_NONE)
6585     {
6586         codeGen->regSet.rsSetRegsModified(regUsed);
6587     }
6588
6589     /* We need genFullPtrRegMap if :
6590      * The method is fully interruptible, or
6591      * We are generating an EBP-less frame (for stack-pointer deltas)
6592      */
6593
6594     genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6595
6596     raMarkStkVars();
6597 #ifdef DEBUG
6598     if (verbose)
6599     {
6600         printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6601         printf("  rpStkPredict was %u\n", rpStkPredict);
6602     }
6603 #endif
6604     rpRegAllocDone = true;
6605 }
6606
6607 #endif // LEGACY_BACKEND
6608
6609 /*****************************************************************************
6610  *
6611  *  Mark all variables as to whether they live on the stack frame
6612  *  (part or whole), and if so what the base is (FP or SP).
6613  */
6614
6615 void Compiler::raMarkStkVars()
6616 {
6617     unsigned   lclNum;
6618     LclVarDsc* varDsc;
6619
6620     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6621     {
6622         // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6623         CLANG_FORMAT_COMMENT_ANCHOR;
6624
6625 #ifdef LEGACY_BACKEND
6626         varDsc->lvOnFrame = false;
6627 #endif // LEGACY_BACKEND
6628
6629         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6630         {
6631             noway_assert(!varDsc->lvRegister);
6632             goto ON_STK;
6633         }
6634
6635         /* Fully enregistered variables don't need any frame space */
6636
6637         if (varDsc->lvRegister)
6638         {
6639             if (!isRegPairType(varDsc->TypeGet()))
6640             {
6641                 goto NOT_STK;
6642             }
6643
6644             /* For "large" variables make sure both halves are enregistered */
6645
6646             if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6647             {
6648                 goto NOT_STK;
6649             }
6650         }
6651         /* Unused variables typically don't get any frame space */
6652         else if (varDsc->lvRefCnt == 0)
6653         {
6654             bool needSlot = false;
6655
6656             bool stkFixedArgInVarArgs =
6657                 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6658
6659             // If its address has been exposed, ignore lvRefCnt. However, exclude
6660             // fixed arguments in varargs method as lvOnFrame shouldn't be set
6661             // for them as we don't want to explicitly report them to GC.
6662
6663             if (!stkFixedArgInVarArgs)
6664             {
6665                 needSlot |= varDsc->lvAddrExposed;
6666             }
6667
6668 #if FEATURE_FIXED_OUT_ARGS
6669
6670             /* Is this the dummy variable representing GT_LCLBLK ? */
6671             needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6672
6673 #endif // FEATURE_FIXED_OUT_ARGS
6674
6675 #ifdef DEBUG
6676             /* For debugging, note that we have to reserve space even for
6677                unused variables if they are ever in scope. However, this is not
6678                an issue as fgExtendDbgLifetimes() adds an initialization and
6679                variables in scope will not have a zero ref-cnt.
6680              */
6681             if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6682             {
6683                 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6684                 {
6685                     noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6686                 }
6687             }
6688 #endif
6689             /*
6690               For Debug Code, we have to reserve space even if the variable is never
6691               in scope. We will also need to initialize it if it is a GC var.
6692               So we set lvMustInit and artifically bump up the ref-cnt.
6693              */
6694
6695             if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6696             {
6697                 needSlot |= true;
6698
6699                 if (lvaTypeIsGC(lclNum))
6700                 {
6701                     varDsc->lvRefCnt = 1;
6702                 }
6703
6704                 if (!varDsc->lvIsParam)
6705                 {
6706                     varDsc->lvMustInit = true;
6707                 }
6708             }
6709
6710 #ifndef LEGACY_BACKEND
6711             varDsc->lvOnFrame = needSlot;
6712 #endif // !LEGACY_BACKEND
6713             if (!needSlot)
6714             {
6715                 /* Clear the lvMustInit flag in case it is set */
6716                 varDsc->lvMustInit = false;
6717
6718                 goto NOT_STK;
6719             }
6720         }
6721
6722 #ifndef LEGACY_BACKEND
6723         if (!varDsc->lvOnFrame)
6724         {
6725             goto NOT_STK;
6726         }
6727 #endif // !LEGACY_BACKEND
6728
6729     ON_STK:
6730         /* The variable (or part of it) lives on the stack frame */
6731
6732         noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6733 #if FEATURE_FIXED_OUT_ARGS
6734         noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6735 #else  // FEATURE_FIXED_OUT_ARGS
6736         noway_assert(lvaLclSize(lclNum) != 0);
6737 #endif // FEATURE_FIXED_OUT_ARGS
6738
6739         varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6740                                   // stack frame
6741
6742     NOT_STK:;
6743         varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6744
6745 #if DOUBLE_ALIGN
6746
6747         if (codeGen->doDoubleAlign())
6748         {
6749             noway_assert(codeGen->isFramePointerUsed() == false);
6750
6751             /* All arguments are off of EBP with double-aligned frames */
6752
6753             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6754             {
6755                 varDsc->lvFramePointerBased = true;
6756             }
6757         }
6758
6759 #endif
6760
6761         /* Some basic checks */
6762
6763         // It must be in a register, on frame, or have zero references.
6764
6765         noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6766
6767 #ifndef LEGACY_BACKEND
6768         // We can't have both lvRegister and lvOnFrame for RyuJIT
6769         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6770 #else  // LEGACY_BACKEND
6771
6772         /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6773         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6774                      (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6775 #endif // LEGACY_BACKEND
6776
6777 #ifdef DEBUG
6778
6779         // For varargs functions, there should be no direct references to
6780         // parameter variables except for 'this' (because these were morphed
6781         // in the importer) and the 'arglist' parameter (which is not a GC
6782         // pointer). and the return buffer argument (if we are returning a
6783         // struct).
6784         // This is important because we don't want to try to report them
6785         // to the GC, as the frame offsets in these local varables would
6786         // not be correct.
6787
6788         if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6789         {
6790             if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6791             {
6792                 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6793             }
6794         }
6795 #endif
6796     }
6797 }
6798
6799 #ifdef LEGACY_BACKEND
6800 void Compiler::rpRecordPrediction()
6801 {
6802     if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6803     {
6804         if (rpBestRecordedPrediction == NULL)
6805         {
6806             rpBestRecordedPrediction =
6807                 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6808         }
6809         for (unsigned k = 0; k < lvaCount; k++)
6810         {
6811             rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6812             rpBestRecordedPrediction[k].m_regNum         = (regNumberSmall)lvaTable[k].GetRegNum();
6813             rpBestRecordedPrediction[k].m_otherReg       = (regNumberSmall)lvaTable[k].GetOtherReg();
6814         }
6815         rpBestRecordedStkPredict = rpStkPredict;
6816         JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6817     }
6818 }
6819
6820 void Compiler::rpUseRecordedPredictionIfBetter()
6821 {
6822     JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6823             rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6824     if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6825     {
6826         JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6827                 rpBestRecordedStkPredict);
6828
6829         for (unsigned k = 0; k < lvaCount; k++)
6830         {
6831             lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6832             lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6833             lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6834         }
6835     }
6836 }
6837 #endif // LEGACY_BACKEND