Merge pull request #15404 from CarolEidt/ElimLsraInfo
[platform/upstream/coreclr.git] / src / jit / regalloc.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           RegAlloc                                        XX
9 XX                                                                           XX
10 XX  Does the register allocation and puts the remaining lclVars on the stack XX
11 XX                                                                           XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 */
15
16 #include "jitpch.h"
17 #ifdef _MSC_VER
18 #pragma hdrstop
19 #endif
20 #include "regalloc.h"
21
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
24 {
25     DWORD val = JitConfig.JitRegisterFP();
26
27     return (enumConfigRegisterFP)(val & 0x3);
28 }
29 #endif // FEATURE_FP_REGALLOC
30
31 regMaskTP Compiler::raConfigRestrictMaskFP()
32 {
33     regMaskTP result = RBM_NONE;
34
35 #if FEATURE_FP_REGALLOC
36     switch (raConfigRegisterFP())
37     {
38         case CONFIG_REGISTER_FP_NONE:
39             result = RBM_NONE;
40             break;
41         case CONFIG_REGISTER_FP_CALLEE_TRASH:
42             result = RBM_FLT_CALLEE_TRASH;
43             break;
44         case CONFIG_REGISTER_FP_CALLEE_SAVED:
45             result = RBM_FLT_CALLEE_SAVED;
46             break;
47         case CONFIG_REGISTER_FP_FULL:
48             result = RBM_ALLFLOAT;
49             break;
50     }
51 #endif
52
53     return result;
54 }
55
56 #if DOUBLE_ALIGN
57 DWORD Compiler::getCanDoubleAlign()
58 {
59 #ifdef DEBUG
60     if (compStressCompile(STRESS_DBL_ALN, 20))
61         return MUST_DOUBLE_ALIGN;
62
63     return JitConfig.JitDoubleAlign();
64 #else
65     return DEFAULT_DOUBLE_ALIGN;
66 #endif
67 }
68
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
71 //
72 // Arguments:
73 //    refCntStk       - sum of     ref counts for all stack based variables
74 //    refCntEBP       - sum of     ref counts for EBP enregistered variables
75 //    refCntWtdEBP    - sum of wtd ref counts for EBP enregistered variables
76 //    refCntStkParam  - sum of     ref counts for all stack based parameters
77 //    refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 //                      with double fields).
79 //
80 // Return Value:
81 //    Returns true if this method estimates that a double-aligned frame would be beneficial
82 //
83 // Notes:
84 //    The impact of a double-aligned frame is computed as follows:
85 //    - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 //    - We pay a byte of code for each non-parameter stack reference.
87 //    - We save the misalignment penalty and possible cache-line crossing penalty.
88 //      This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 //    - We pay 7 extra bytes for:
90 //        MOV EBP,ESP,
91 //        LEA ESP,[EBP-offset]
92 //        AND ESP,-8 to double align ESP
93 //    - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
94 //
95 //    If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 //    Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 //    ref count for double-aligned values.
98 //
99 bool Compiler::shouldDoubleAlign(
100     unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
101 {
102     bool           doDoubleAlign        = false;
103     const unsigned DBL_ALIGN_SETUP_SIZE = 7;
104
105     unsigned bytesUsed         = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106     unsigned misaligned_weight = 4;
107
108     if (compCodeOpt() == Compiler::SMALL_CODE)
109         misaligned_weight = 0;
110
111     if (compCodeOpt() == Compiler::FAST_CODE)
112         misaligned_weight *= 4;
113
114     JITDUMP("\nDouble alignment:\n");
115     JITDUMP("  Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116     JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117     JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
118
119     if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
120     {
121         JITDUMP("    Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
122     }
123     else if (refCntWtdEBP > refCntWtdStkDbl * 2)
124     {
125         // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126         // not double aligned.
127         // Here are the numbers that make this not double-aligned.
128         //     refCntWtdStkDbl = 0x164
129         //     refCntWtdEBP    = 0x1a4
130         // We think we do need to change the heuristic to be in favor of double-align.
131
132         JITDUMP("    Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
133     }
134     else
135     {
136         // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137         JITDUMP("    Predicting to create a double-aligned frame\n");
138         doDoubleAlign = true;
139     }
140     return doDoubleAlign;
141 }
142 #endif // DOUBLE_ALIGN
143
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
145
146 void Compiler::raInit()
147 {
148 #if FEATURE_STACK_FP_X87
149     /* We have not assigned any FP variables to registers yet */
150
151     VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
152 #endif
153     codeGen->intRegState.rsIsFloat   = false;
154     codeGen->floatRegState.rsIsFloat = true;
155
156     rpReverseEBPenreg = false;
157     rpAsgVarNum       = -1;
158     rpPassesMax       = 6;
159     rpPassesPessimize = rpPassesMax - 3;
160     if (opts.compDbgCode)
161     {
162         rpPassesMax++;
163     }
164     rpStkPredict            = (unsigned)-1;
165     rpFrameType             = FT_NOT_SET;
166     rpLostEnreg             = false;
167     rpMustCreateEBPCalled   = false;
168     rpRegAllocDone          = false;
169     rpMaskPInvokeEpilogIntf = RBM_NONE;
170
171     rpPredictMap[PREDICT_NONE] = RBM_NONE;
172     rpPredictMap[PREDICT_ADDR] = RBM_NONE;
173
174 #if FEATURE_FP_REGALLOC
175     rpPredictMap[PREDICT_REG]         = RBM_ALLINT | RBM_ALLFLOAT;
176     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
177 #else
178     rpPredictMap[PREDICT_REG]         = RBM_ALLINT;
179     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
180 #endif
181
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
184
185 #if defined(_TARGET_ARM_)
186
187     rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188     rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189     rpPredictMap[PREDICT_REG_SP]    = RBM_ILLEGAL;
190
191 #elif defined(_TARGET_AMD64_)
192
193     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
196
197 #elif defined(_TARGET_X86_)
198
199     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
202     rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203     rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
204
205 #endif
206
207     rpBestRecordedPrediction = NULL;
208 }
209
210 /*****************************************************************************
211  *
212  *  The following table(s) determines the order in which registers are considered
213  *  for variables to live in
214  */
215
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
217 {
218 #if FEATURE_FP_REGALLOC
219     if (varTypeIsFloating(regType))
220     {
221         static const regNumber raRegVarOrderFlt[]   = {REG_VAR_ORDER_FLT};
222         const unsigned         raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
223
224         if (wbVarOrderSize != NULL)
225             *wbVarOrderSize = raRegVarOrderFltSize;
226
227         return &raRegVarOrderFlt[0];
228     }
229     else
230 #endif
231     {
232         static const regNumber raRegVarOrder[]   = {REG_VAR_ORDER};
233         const unsigned         raRegVarOrderSize = _countof(raRegVarOrder);
234
235         if (wbVarOrderSize != NULL)
236             *wbVarOrderSize = raRegVarOrderSize;
237
238         return &raRegVarOrder[0];
239     }
240 }
241
242 #ifdef DEBUG
243
244 /*****************************************************************************
245  *
246  *  Dump out the variable interference graph
247  *
248  */
249
250 void Compiler::raDumpVarIntf()
251 {
252     unsigned   lclNum;
253     LclVarDsc* varDsc;
254
255     printf("Var. interference graph for %s\n", info.compFullName);
256
257     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
258     {
259         /* Ignore the variable if it's not tracked */
260
261         if (!varDsc->lvTracked)
262             continue;
263
264         /* Get hold of the index and the interference mask for the variable */
265         unsigned varIndex = varDsc->lvVarIndex;
266
267         printf("  V%02u,T%02u and ", lclNum, varIndex);
268
269         unsigned refIndex;
270
271         for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
272         {
273             if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274                 printf("T%02u ", refIndex);
275             else
276                 printf("    ");
277         }
278
279         printf("\n");
280     }
281
282     printf("\n");
283 }
284
285 /*****************************************************************************
286  *
287  *  Dump out the register interference graph
288  *
289  */
290 void Compiler::raDumpRegIntf()
291 {
292     printf("Reg. interference graph for %s\n", info.compFullName);
293
294     unsigned   lclNum;
295     LclVarDsc* varDsc;
296
297     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
298     {
299         unsigned varNum;
300
301         /* Ignore the variable if it's not tracked */
302
303         if (!varDsc->lvTracked)
304             continue;
305
306         /* Get hold of the index and the interference mask for the variable */
307
308         varNum = varDsc->lvVarIndex;
309
310         printf("  V%02u,T%02u and ", lclNum, varNum);
311
312         if (varDsc->IsFloatRegType())
313         {
314 #if !FEATURE_STACK_FP_X87
315             for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
316             {
317                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318                     printf("%3s ", getRegName(regNum, true));
319                 else
320                     printf("    ");
321             }
322 #endif
323         }
324         else
325         {
326             for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
327             {
328                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329                     printf("%3s ", getRegName(regNum));
330                 else
331                     printf("    ");
332             }
333         }
334
335         printf("\n");
336     }
337
338     printf("\n");
339 }
340 #endif // DEBUG
341
342 /*****************************************************************************
343  *
344  * We'll adjust the ref counts based on interference
345  *
346  */
347
348 void Compiler::raAdjustVarIntf()
349 {
350     // This method was not correct and has been disabled.
351     return;
352 }
353
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
357  */
358
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
360 {
361     var_types type = tree->TypeGet();
362
363     if (varTypeIsStruct(type) && IsHfa(tree))
364     {
365         int retSlots = GetHfaCount(tree);
366         return ((1 << retSlots) - 1) << REG_FLOATRET;
367     }
368
369     const static regMaskTP returnMap[TYP_COUNT] = {
370         RBM_ILLEGAL,   // TYP_UNDEF,
371         RBM_NONE,      // TYP_VOID,
372         RBM_INTRET,    // TYP_BOOL,
373         RBM_INTRET,    // TYP_BYTE,
374         RBM_INTRET,    // TYP_UBYTE,
375         RBM_INTRET,    // TYP_SHORT,
376         RBM_INTRET,    // TYP_USHORT,
377         RBM_INTRET,    // TYP_INT,
378         RBM_INTRET,    // TYP_UINT,
379         RBM_LNGRET,    // TYP_LONG,
380         RBM_LNGRET,    // TYP_ULONG,
381         RBM_FLOATRET,  // TYP_FLOAT,
382         RBM_DOUBLERET, // TYP_DOUBLE,
383         RBM_INTRET,    // TYP_REF,
384         RBM_INTRET,    // TYP_BYREF,
385         RBM_ILLEGAL,   // TYP_STRUCT,
386         RBM_ILLEGAL,   // TYP_BLK,
387         RBM_ILLEGAL,   // TYP_LCLBLK,
388         RBM_ILLEGAL,   // TYP_UNKNOWN,
389     };
390
391     assert((unsigned)type < _countof(returnMap));
392     assert(returnMap[TYP_LONG] == RBM_LNGRET);
393     assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
394     assert(returnMap[TYP_REF] == RBM_INTRET);
395     assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
396
397     regMaskTP result = returnMap[type];
398     assert(result != RBM_ILLEGAL);
399     return result;
400 }
401
402 /*****************************************************************************/
403
404 /****************************************************************************/
405
406 #ifdef DEBUG
407
408 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
409 {
410     unsigned   lclNum;
411     LclVarDsc* varDsc;
412
413     for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
414     {
415         if (!varDsc->lvTracked)
416             continue;
417
418         if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
419             continue;
420
421         if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
422             printf("V%02u ", lclNum);
423     }
424 }
425
426 #endif
427
428 /*****************************************************************************/
429 #ifdef DEBUG
430 /*****************************************************************************
431  *
432  *  Debugging helpers - display variables liveness info.
433  */
434
435 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
436 {
437     do
438     {
439         printf("BB%02u: ", beg->bbNum);
440
441         printf(" in  = [ ");
442         dispLifeSet(comp, mask, beg->bbLiveIn);
443         printf("] ,");
444
445         printf(" out = [ ");
446         dispLifeSet(comp, mask, beg->bbLiveOut);
447         printf("]");
448
449         if (beg->bbFlags & BBF_VISITED)
450             printf(" inner=%u", beg->bbFPinVars);
451
452         printf("\n");
453
454         beg = beg->bbNext;
455         if (!beg)
456             return;
457     } while (beg != end);
458 }
459
460 #if FEATURE_STACK_FP_X87
461 void Compiler::raDispFPlifeInfo()
462 {
463     BasicBlock* block;
464
465     for (block = fgFirstBB; block; block = block->bbNext)
466     {
467         GenTreePtr stmt;
468
469         printf("BB%02u: in  = [ ", block->bbNum);
470         dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
471         printf("]\n\n");
472
473         VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
474         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
475         {
476             GenTreePtr tree;
477
478             noway_assert(stmt->gtOper == GT_STMT);
479
480             for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
481             {
482                 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
483
484                 dispLifeSet(this, optAllFloatVars, life);
485                 printf("   ");
486                 gtDispTree(tree, 0, NULL, true);
487             }
488
489             printf("\n");
490         }
491
492         printf("BB%02u: out = [ ", block->bbNum);
493         dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
494         printf("]\n\n");
495     }
496 }
497 #endif // FEATURE_STACK_FP_X87
498 /*****************************************************************************/
499 #endif // DEBUG
500 /*****************************************************************************/
501
502 /*****************************************************************************/
503
504 void Compiler::raSetRegVarOrder(
505     var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
506 {
507     unsigned         normalVarOrderSize;
508     const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
509     unsigned         index;
510     unsigned         listIndex = 0;
511     regMaskTP        usedReg   = avoidReg;
512
513     noway_assert(*customVarOrderSize >= normalVarOrderSize);
514
515     if (prefReg)
516     {
517         /* First place the preferred registers at the start of customVarOrder */
518
519         regMaskTP regBit;
520         regNumber regNum;
521
522         for (index = 0; index < normalVarOrderSize; index++)
523         {
524             regNum = normalVarOrder[index];
525             regBit = genRegMask(regNum);
526
527             if (usedReg & regBit)
528                 continue;
529
530             if (prefReg & regBit)
531             {
532                 usedReg |= regBit;
533                 noway_assert(listIndex < normalVarOrderSize);
534                 customVarOrder[listIndex++] = regNum;
535                 prefReg -= regBit;
536                 if (prefReg == 0)
537                     break;
538             }
539         }
540
541 #if CPU_HAS_BYTE_REGS
542         /* Then if byteable registers are preferred place them */
543
544         if (prefReg & RBM_BYTE_REG_FLAG)
545         {
546             for (index = 0; index < normalVarOrderSize; index++)
547             {
548                 regNum = normalVarOrder[index];
549                 regBit = genRegMask(regNum);
550
551                 if (usedReg & regBit)
552                     continue;
553
554                 if (RBM_BYTE_REGS & regBit)
555                 {
556                     usedReg |= regBit;
557                     noway_assert(listIndex < normalVarOrderSize);
558                     customVarOrder[listIndex++] = regNum;
559                 }
560             }
561         }
562
563 #endif // CPU_HAS_BYTE_REGS
564     }
565
566     /* Now place all the non-preferred registers */
567
568     for (index = 0; index < normalVarOrderSize; index++)
569     {
570         regNumber regNum = normalVarOrder[index];
571         regMaskTP regBit = genRegMask(regNum);
572
573         if (usedReg & regBit)
574             continue;
575
576         usedReg |= regBit;
577         noway_assert(listIndex < normalVarOrderSize);
578         customVarOrder[listIndex++] = regNum;
579     }
580
581     if (avoidReg)
582     {
583         /* Now place the "avoid" registers */
584
585         for (index = 0; index < normalVarOrderSize; index++)
586         {
587             regNumber regNum = normalVarOrder[index];
588             regMaskTP regBit = genRegMask(regNum);
589
590             if (avoidReg & regBit)
591             {
592                 noway_assert(listIndex < normalVarOrderSize);
593                 customVarOrder[listIndex++] = regNum;
594                 avoidReg -= regBit;
595                 if (avoidReg == 0)
596                     break;
597             }
598         }
599     }
600
601     *customVarOrderSize = listIndex;
602     noway_assert(listIndex == normalVarOrderSize);
603 }
604
605 /*****************************************************************************
606  *
607  *  Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
608  */
609
610 void Compiler::raSetupArgMasks(RegState* regState)
611 {
612     /* Determine the registers holding incoming register arguments */
613     /*  and setup raAvoidArgRegMask to the set of registers that we  */
614     /*  may want to avoid when enregistering the locals.            */
615
616     regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
617     raAvoidArgRegMask                  = RBM_NONE;
618
619     LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
620
621     for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
622     {
623         noway_assert(argDsc->lvIsParam);
624
625         // Is it a register argument ?
626         if (!argDsc->lvIsRegArg)
627             continue;
628
629         // only process args that apply to the current register file
630         if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
631         {
632             continue;
633         }
634
635         // Is it dead on entry ??
636         // In certain cases such as when compJmpOpUsed is true,
637         // or when we have a generic type context arg that we must report
638         // then the arguments have to be kept alive throughout the prolog.
639         // So we have to consider it as live on entry.
640         //
641         bool keepArgAlive = compJmpOpUsed;
642         if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
643             ((lvaTable + info.compTypeCtxtArg) == argDsc))
644         {
645             keepArgAlive = true;
646         }
647
648         if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
649         {
650             continue;
651         }
652
653         // The code to set the regState for each arg is outlined for shared use
654         // by linear scan
655         regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
656
657         // Do we need to try to avoid this incoming arg registers?
658
659         // If it's not tracked, don't do the stuff below.
660         if (!argDsc->lvTracked)
661             continue;
662
663         // If the incoming arg is used after a call it is live accross
664         //  a call and will have to be allocated to a caller saved
665         //  register anyway (a very common case).
666         //
667         // In this case it is pointless to ask that the higher ref count
668         //  locals to avoid using the incoming arg register
669
670         unsigned argVarIndex = argDsc->lvVarIndex;
671
672         /* Does the incoming register and the arg variable interfere? */
673
674         if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
675         {
676             // No they do not interfere,
677             //  so we add inArgReg to raAvoidArgRegMask
678
679             raAvoidArgRegMask |= genRegMask(inArgReg);
680         }
681 #ifdef _TARGET_ARM_
682         if (argDsc->lvType == TYP_DOUBLE)
683         {
684             // Avoid the double register argument pair for register allocation.
685             if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
686             {
687                 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
688             }
689         }
690 #endif
691     }
692 }
693
694 #endif // LEGACY_BACKEND
695
696 // The code to set the regState for each arg is outlined for shared use
697 // by linear scan. (It is not shared for System V AMD64 platform.)
698 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
699 {
700     regNumber inArgReg  = argDsc->lvArgReg;
701     regMaskTP inArgMask = genRegMask(inArgReg);
702
703     if (regState->rsIsFloat)
704     {
705         noway_assert(inArgMask & RBM_FLTARG_REGS);
706     }
707     else //  regState is for the integer registers
708     {
709         // This might be the fixed return buffer register argument (on ARM64)
710         // We check and allow inArgReg to be theFixedRetBuffReg
711         if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
712         {
713             // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
714             noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
715             // We should have recorded the variable number for the return buffer arg
716             noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
717         }
718         else // we have a regular arg
719         {
720             noway_assert(inArgMask & RBM_ARG_REGS);
721         }
722     }
723
724     regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
725
726 #ifdef _TARGET_ARM_
727     if (argDsc->lvType == TYP_DOUBLE)
728     {
729         if (info.compIsVarArgs || opts.compUseSoftFP)
730         {
731             assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
732             assert(!regState->rsIsFloat);
733         }
734         else
735         {
736             assert(regState->rsIsFloat);
737             assert(emitter::isDoubleReg(inArgReg));
738         }
739         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
740     }
741     else if (argDsc->lvType == TYP_LONG)
742     {
743         assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
744         assert(!regState->rsIsFloat);
745         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
746     }
747 #endif // _TARGET_ARM_
748
749 #if FEATURE_MULTIREG_ARGS
750     if (varTypeIsStruct(argDsc->lvType))
751     {
752         if (argDsc->lvIsHfaRegArg())
753         {
754             assert(regState->rsIsFloat);
755             unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
756             for (unsigned i = 1; i < cSlots; i++)
757             {
758                 assert(inArgReg + i <= LAST_FP_ARGREG);
759                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
760             }
761         }
762         else
763         {
764             unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
765             for (unsigned i = 1; i < cSlots; i++)
766             {
767                 regNumber nextArgReg = (regNumber)(inArgReg + i);
768                 if (nextArgReg > REG_ARG_LAST)
769                 {
770                     break;
771                 }
772                 assert(regState->rsIsFloat == false);
773                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
774             }
775         }
776     }
777 #endif // FEATURE_MULTIREG_ARGS
778
779     return inArgReg;
780 }
781
782 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
783
784 /*****************************************************************************
785  *
786  *  Assign variables to live in registers, etc.
787  */
788
789 void Compiler::raAssignVars()
790 {
791 #ifdef DEBUG
792     if (verbose)
793         printf("*************** In raAssignVars()\n");
794 #endif
795     /* We need to keep track of which registers we ever touch */
796
797     codeGen->regSet.rsClearRegsModified();
798
799 #if FEATURE_STACK_FP_X87
800     // FP register allocation
801     raEnregisterVarsStackFP();
802     raGenerateFPRefCounts();
803 #endif
804
805     /* Predict registers used by code generation */
806     rpPredictRegUse(); // New reg predictor/allocator
807
808     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
809     // so that the gc tracking logic and lvMustInit logic will ignore them.
810
811     unsigned   lclNum;
812     LclVarDsc* varDsc;
813
814     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
815     {
816         if (varDsc->lvType != TYP_STRUCT)
817             continue;
818
819         if (!varDsc->lvPromoted)
820             continue;
821
822         if (varDsc->lvIsParam)
823             continue;
824
825         if (varDsc->lvRefCnt > 0)
826             continue;
827
828 #ifdef DEBUG
829         if (verbose)
830         {
831             printf("Mark unused struct local V%02u\n", lclNum);
832         }
833
834         lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
835
836         if (promotionType == PROMOTION_TYPE_DEPENDENT)
837         {
838             // This should only happen when all its field locals are unused as well.
839
840             for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
841                  varNum++)
842             {
843                 noway_assert(lvaTable[varNum].lvRefCnt == 0);
844             }
845         }
846         else
847         {
848             noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
849         }
850
851         varDsc->lvUnusedStruct = 1;
852 #endif
853
854         // Change such struct locals to ints
855
856         varDsc->lvType = TYP_INT; // Bash to a non-gc type.
857         noway_assert(!varDsc->lvTracked);
858         noway_assert(!varDsc->lvRegister);
859         varDsc->lvOnFrame  = false; // Force it not to be onstack.
860         varDsc->lvMustInit = false; // Force not to init it.
861         varDsc->lvStkOffs  = 0;     // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
862     }
863 }
864
865 /*****************************************************************************/
866 /*****************************************************************************/
867
868 /*****************************************************************************
869  *
870  *   Given a regNumber return the correct predictReg enum value
871  */
872
873 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
874 {
875     return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
876 }
877
878 /*****************************************************************************
879  *
880  *   Given a varIndex return the correct predictReg enum value
881  */
882
883 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
884 {
885     return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
886 }
887
888 /*****************************************************************************
889  *
890  *   Given a rpPredictReg return the correct varNumber value
891  */
892
893 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
894 {
895     return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
896 }
897
898 /*****************************************************************************
899  *
900  *   Given a rpPredictReg return true if it specifies a Txx register
901  */
902
903 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
904 {
905     if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
906         return true;
907     else
908         return false;
909 }
910
911 /*****************************************************************************
912  *
913  *   Given a regmask return the correct predictReg enum value
914  */
915
916 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
917 {
918     rpPredictReg result = PREDICT_NONE;
919     if (regmask != 0) /* Check if regmask has zero bits set */
920     {
921         if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
922         {
923             DWORD reg = 0;
924             assert(FitsIn<DWORD>(regmask));
925             BitScanForward(&reg, (DWORD)regmask);
926             return rpGetPredictForReg((regNumber)reg);
927         }
928
929 #if defined(_TARGET_ARM_)
930         /* It has multiple bits set */
931         else if (regmask == (RBM_R0 | RBM_R1))
932         {
933             result = PREDICT_PAIR_R0R1;
934         }
935         else if (regmask == (RBM_R2 | RBM_R3))
936         {
937             result = PREDICT_PAIR_R2R3;
938         }
939 #elif defined(_TARGET_X86_)
940         /* It has multiple bits set */
941         else if (regmask == (RBM_EAX | RBM_EDX))
942         {
943             result = PREDICT_PAIR_EAXEDX;
944         }
945         else if (regmask == (RBM_ECX | RBM_EBX))
946         {
947             result = PREDICT_PAIR_ECXEBX;
948         }
949 #endif
950         else /* It doesn't match anything */
951         {
952             result = PREDICT_NONE;
953             assert(!"unreachable");
954             NO_WAY("bad regpair");
955         }
956     }
957     return result;
958 }
959
960 /*****************************************************************************
961  *
962  *  Record a variable to register(s) interference
963  */
964
965 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
966
967 {
968     bool addedIntf = false;
969
970     if (regMask != 0)
971     {
972         for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
973         {
974             regMaskTP regBit = genRegMask(regNum);
975
976             if (regMask & regBit)
977             {
978                 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
979                 if (!VarSetOps::IsEmpty(this, newIntf))
980                 {
981 #ifdef DEBUG
982                     if (verbose)
983                     {
984                         VarSetOps::Iter newIntfIter(this, newIntf);
985                         unsigned        varNum = 0;
986                         while (newIntfIter.NextElem(&varNum))
987                         {
988                             unsigned   lclNum = lvaTrackedToVarNum[varNum];
989                             LclVarDsc* varDsc = &lvaTable[varNum];
990 #if FEATURE_FP_REGALLOC
991                             // Only print the useful interferences
992                             // i.e. floating point LclVar interference with floating point registers
993                             //         or integer LclVar interference with general purpose registers
994                             if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
995 #endif
996                             {
997                                 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
998                                        getRegName(regNum), msg);
999                             }
1000                         }
1001                     }
1002 #endif
1003                     addedIntf = true;
1004                     VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1005                 }
1006
1007                 regMask -= regBit;
1008                 if (regMask == 0)
1009                     break;
1010             }
1011         }
1012     }
1013     return addedIntf;
1014 }
1015
1016 /*****************************************************************************
1017  *
1018  *  Record a new variable to variable(s) interference
1019  */
1020
1021 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1022 {
1023     noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1024     noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1025
1026     VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1027     VarSetOps::AddElemD(this, oneVar, varNum);
1028
1029     bool newIntf = fgMarkIntf(intfVar, oneVar);
1030
1031     if (newIntf)
1032         rpAddedVarIntf = true;
1033
1034 #ifdef DEBUG
1035     if (verbose && newIntf)
1036     {
1037         for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1038         {
1039             if (VarSetOps::IsMember(this, intfVar, oneNum))
1040             {
1041                 unsigned lclNum = lvaTrackedToVarNum[varNum];
1042                 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1043                 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1044                        oneNum, msg);
1045             }
1046         }
1047     }
1048 #endif
1049
1050     return newIntf;
1051 }
1052
1053 /*****************************************************************************
1054  *
1055  *   Determine preferred register mask for a given predictReg value
1056  */
1057
1058 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1059 {
1060     if (rpHasVarIndexForPredict(predictReg))
1061         predictReg = PREDICT_REG;
1062
1063     noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1064     noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1065
1066     regMaskTP regAvailForType = rpPredictMap[predictReg];
1067     if (varTypeIsFloating(type))
1068     {
1069         regAvailForType &= RBM_ALLFLOAT;
1070     }
1071     else
1072     {
1073         regAvailForType &= RBM_ALLINT;
1074     }
1075 #ifdef _TARGET_ARM_
1076     if (type == TYP_DOUBLE)
1077     {
1078         if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1079         {
1080             // Fix 388433 ARM JitStress WP7
1081             if ((regAvailForType & RBM_DBL_REGS) != 0)
1082             {
1083                 regAvailForType |= (regAvailForType << 1);
1084             }
1085             else
1086             {
1087                 regAvailForType = RBM_NONE;
1088             }
1089         }
1090     }
1091 #endif
1092     return regAvailForType;
1093 }
1094
1095 /*****************************************************************************
1096  *
1097  *  Predict register choice for a type.
1098  *
1099  *  Adds the predicted registers to rsModifiedRegsMask.
1100  */
1101 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1102 {
1103     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1104     regNumber regNum;
1105     regMaskTP regBits;
1106
1107     // Add any reserved register to the lockedRegs
1108     lockedRegs |= codeGen->regSet.rsMaskResvd;
1109
1110     /* Clear out the lockedRegs from preferReg */
1111     preferReg &= ~lockedRegs;
1112
1113     if (rpAsgVarNum != -1)
1114     {
1115         noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1116
1117         /* Don't pick the register used by rpAsgVarNum either */
1118         LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1119         noway_assert(tgtVar->lvRegNum != REG_STK);
1120
1121         preferReg &= ~genRegMask(tgtVar->lvRegNum);
1122     }
1123
1124     switch (type)
1125     {
1126         case TYP_BOOL:
1127         case TYP_BYTE:
1128         case TYP_UBYTE:
1129         case TYP_SHORT:
1130         case TYP_USHORT:
1131         case TYP_INT:
1132         case TYP_UINT:
1133         case TYP_REF:
1134         case TYP_BYREF:
1135 #ifdef _TARGET_AMD64_
1136         case TYP_LONG:
1137 #endif // _TARGET_AMD64_
1138
1139             // expand preferReg to all non-locked registers if no bits set
1140             preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1141
1142             if (preferReg == 0) // no bits set?
1143             {
1144                 // Add one predefined spill choice register if no bits set.
1145                 // (The jit will introduce one spill temp)
1146                 preferReg |= RBM_SPILL_CHOICE;
1147                 rpPredictSpillCnt++;
1148
1149 #ifdef DEBUG
1150                 if (verbose)
1151                     printf("Predict one spill temp\n");
1152 #endif
1153             }
1154
1155             if (preferReg != 0)
1156             {
1157                 /* Iterate the registers in the order specified by rpRegTmpOrder */
1158
1159                 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1160                 {
1161                     regNum  = rpRegTmpOrder[index];
1162                     regBits = genRegMask(regNum);
1163
1164                     if ((preferReg & regBits) == regBits)
1165                     {
1166                         goto RET;
1167                     }
1168                 }
1169             }
1170             /* Otherwise we have allocated all registers, so do nothing */
1171             break;
1172
1173 #ifndef _TARGET_AMD64_
1174         case TYP_LONG:
1175
1176             if ((preferReg == 0) ||                   // no bits set?
1177                 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1178             {
1179                 // expand preferReg to all non-locked registers
1180                 preferReg = RBM_ALLINT & ~lockedRegs;
1181             }
1182
1183             if (preferReg == 0) // no bits set?
1184             {
1185                 // Add EAX:EDX to the registers
1186                 // (The jit will introduce two spill temps)
1187                 preferReg = RBM_PAIR_TMP;
1188                 rpPredictSpillCnt += 2;
1189 #ifdef DEBUG
1190                 if (verbose)
1191                     printf("Predict two spill temps\n");
1192 #endif
1193             }
1194             else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1195             {
1196                 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1197                 {
1198                     // Add EAX to the registers
1199                     // (The jit will introduce one spill temp)
1200                     preferReg |= RBM_PAIR_TMP_LO;
1201                 }
1202                 else
1203                 {
1204                     // Add EDX to the registers
1205                     // (The jit will introduce one spill temp)
1206                     preferReg |= RBM_PAIR_TMP_HI;
1207                 }
1208                 rpPredictSpillCnt++;
1209 #ifdef DEBUG
1210                 if (verbose)
1211                     printf("Predict one spill temp\n");
1212 #endif
1213             }
1214
1215             regPairNo regPair;
1216             regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1217             if (regPair != REG_PAIR_NONE)
1218             {
1219                 regBits = genRegPairMask(regPair);
1220                 goto RET;
1221             }
1222
1223             /* Otherwise we have allocated all registers, so do nothing */
1224             break;
1225 #endif // _TARGET_AMD64_
1226
1227 #ifdef _TARGET_ARM_
1228         case TYP_STRUCT:
1229 #endif
1230
1231         case TYP_FLOAT:
1232         case TYP_DOUBLE:
1233
1234 #if FEATURE_FP_REGALLOC
1235             regMaskTP restrictMask;
1236             restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1237             assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1238
1239             // expand preferReg to all available non-locked registers if no bits set
1240             preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1241             regMaskTP preferDouble;
1242             preferDouble = preferReg & (preferReg >> 1);
1243
1244             if ((preferReg == 0) // no bits set?
1245 #ifdef _TARGET_ARM_
1246                 || ((type == TYP_DOUBLE) &&
1247                     ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1248 #endif
1249                 )
1250             {
1251                 // Add one predefined spill choice register if no bits set.
1252                 // (The jit will introduce one spill temp)
1253                 preferReg |= RBM_SPILL_CHOICE_FLT;
1254                 rpPredictSpillCnt++;
1255
1256 #ifdef DEBUG
1257                 if (verbose)
1258                     printf("Predict one spill temp (float)\n");
1259 #endif
1260             }
1261
1262             assert(preferReg != 0);
1263
1264             /* Iterate the registers in the order specified by raRegFltTmpOrder */
1265
1266             for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1267             {
1268                 regNum  = raRegFltTmpOrder[index];
1269                 regBits = genRegMask(regNum);
1270
1271                 if (varTypeIsFloating(type))
1272                 {
1273 #ifdef _TARGET_ARM_
1274                     if (type == TYP_DOUBLE)
1275                     {
1276                         if ((regBits & RBM_DBL_REGS) == 0)
1277                         {
1278                             continue; // We must restrict the set to the double registers
1279                         }
1280                         else
1281                         {
1282                             // TYP_DOUBLE use two consecutive registers
1283                             regBits |= genRegMask(REG_NEXT(regNum));
1284                         }
1285                     }
1286 #endif
1287                     // See if COMPlus_JitRegisterFP is restricting this FP register
1288                     //
1289                     if ((restrictMask & regBits) != regBits)
1290                         continue;
1291                 }
1292
1293                 if ((preferReg & regBits) == regBits)
1294                 {
1295                     goto RET;
1296                 }
1297             }
1298             /* Otherwise we have allocated all registers, so do nothing */
1299             break;
1300
1301 #else // !FEATURE_FP_REGALLOC
1302
1303             return RBM_NONE;
1304
1305 #endif
1306
1307         default:
1308             noway_assert(!"unexpected type in reg use prediction");
1309     }
1310
1311     /* Abnormal return */
1312     noway_assert(!"Ran out of registers in rpPredictRegPick");
1313     return RBM_NONE;
1314
1315 RET:
1316     /*
1317      *  If during the first prediction we need to allocate
1318      *  one of the registers that we used for coloring locals
1319      *  then flag this by setting rpPredictAssignAgain.
1320      *  We will have to go back and repredict the registers
1321      */
1322     if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1323         rpPredictAssignAgain = true;
1324
1325     // Add a register interference to each of the last use variables
1326     if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1327     {
1328         VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1329         VarSetOps::Assign(this, lastUse, rpLastUseVars);
1330         VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1331         VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1332         // While we still have any lastUse or inPlaceUse bits
1333         VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1334
1335         VARSET_TP       varAsSet(VarSetOps::MakeEmpty(this));
1336         VarSetOps::Iter iter(this, useUnion);
1337         unsigned        varNum = 0;
1338         while (iter.NextElem(&varNum))
1339         {
1340             // We'll need this for one of the calls...
1341             VarSetOps::ClearD(this, varAsSet);
1342             VarSetOps::AddElemD(this, varAsSet, varNum);
1343
1344             // If this varBit and lastUse?
1345             if (VarSetOps::IsMember(this, lastUse, varNum))
1346             {
1347                 // Record a register to variable interference
1348                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1349             }
1350
1351             // If this varBit and inPlaceUse?
1352             if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1353             {
1354                 // Record a register to variable interference
1355                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1356             }
1357         }
1358     }
1359     codeGen->regSet.rsSetRegsModified(regBits);
1360
1361     return regBits;
1362 }
1363
1364 /*****************************************************************************
1365  *
1366  *  Predict integer register use for generating an address mode for a tree,
1367  *  by setting tree->gtUsedRegs to all registers used by this tree and its
1368  *  children.
1369  *    tree       - is the child of a GT_IND node
1370  *    type       - the type of the GT_IND node (floating point/integer)
1371  *    lockedRegs - are the registers which are currently held by
1372  *                 a previously evaluated node.
1373  *    rsvdRegs   - registers which should not be allocated because they will
1374  *                 be needed to evaluate a node in the future
1375  *               - Also if rsvdRegs has the RBM_LASTUSE bit set then
1376  *                 the rpLastUseVars set should be saved and restored
1377  *                 so that we don't add any new variables to rpLastUseVars
1378  *    lenCSE     - is non-NULL only when we have a lenCSE expression
1379  *
1380  *  Return the scratch registers to be held by this tree. (one or two registers
1381  *  to form an address expression)
1382  */
1383
1384 regMaskTP Compiler::rpPredictAddressMode(
1385     GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1386 {
1387     GenTreePtr op1;
1388     GenTreePtr op2;
1389     GenTreePtr opTemp;
1390     genTreeOps oper = tree->OperGet();
1391     regMaskTP  op1Mask;
1392     regMaskTP  op2Mask;
1393     regMaskTP  regMask;
1394     ssize_t    sh;
1395     ssize_t    cns = 0;
1396     bool       rev;
1397     bool       hasTwoAddConst     = false;
1398     bool       restoreLastUseVars = false;
1399     VARSET_TP  oldLastUseVars(VarSetOps::MakeEmpty(this));
1400
1401     /* do we need to save and restore the rpLastUseVars set ? */
1402     if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1403     {
1404         restoreLastUseVars = true;
1405         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1406     }
1407     rsvdRegs &= ~RBM_LASTUSE;
1408
1409     /* if not an add, then just force it to a register */
1410
1411     if (oper != GT_ADD)
1412     {
1413         if (oper == GT_ARR_ELEM)
1414         {
1415             regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1416             goto DONE;
1417         }
1418         else
1419         {
1420             goto NO_ADDR_EXPR;
1421         }
1422     }
1423
1424     op1 = tree->gtOp.gtOp1;
1425     op2 = tree->gtOp.gtOp2;
1426     rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1427
1428     /* look for (x + y) + icon address mode */
1429
1430     if (op2->OperGet() == GT_CNS_INT)
1431     {
1432         cns = op2->gtIntCon.gtIconVal;
1433
1434         /* if not an add, then just force op1 into a register */
1435         if (op1->OperGet() != GT_ADD)
1436             goto ONE_ADDR_EXPR;
1437
1438         hasTwoAddConst = true;
1439
1440         /* Record the 'rev' flag, reverse evaluation order */
1441         rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1442
1443         op2 = op1->gtOp.gtOp2;
1444         op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1445     }
1446
1447     /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1448
1449     sh = 0;
1450     if (op2->OperGet() == GT_LSH)
1451     {
1452         if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1453         {
1454             sh     = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1455             opTemp = op2->gtOp.gtOp1;
1456         }
1457         else
1458         {
1459             opTemp = NULL;
1460         }
1461     }
1462     else
1463     {
1464         opTemp = op2;
1465     }
1466
1467     if (opTemp != NULL)
1468     {
1469         if (opTemp->OperGet() == GT_NOP)
1470         {
1471             opTemp = opTemp->gtOp.gtOp1;
1472         }
1473
1474         // Is this a const operand?
1475         if (opTemp->OperGet() == GT_CNS_INT)
1476         {
1477             // Compute the new cns value that Codegen will end up using
1478             cns += (opTemp->gtIntCon.gtIconVal << sh);
1479
1480             goto ONE_ADDR_EXPR;
1481         }
1482     }
1483
1484     /* Check for LSH in op1 slot */
1485
1486     if (op1->OperGet() != GT_LSH)
1487         goto TWO_ADDR_EXPR;
1488
1489     opTemp = op1->gtOp.gtOp2;
1490
1491     if (opTemp->OperGet() != GT_CNS_INT)
1492         goto TWO_ADDR_EXPR;
1493
1494     sh = opTemp->gtIntCon.gtIconVal;
1495
1496     /* Check for LSH of 0, special case */
1497     if (sh == 0)
1498         goto TWO_ADDR_EXPR;
1499
1500 #if defined(_TARGET_XARCH_)
1501
1502     /* Check for LSH of 1 2 or 3 */
1503     if (sh > 3)
1504         goto TWO_ADDR_EXPR;
1505
1506 #elif defined(_TARGET_ARM_)
1507
1508     /* Check for LSH of 1 to 30 */
1509     if (sh > 30)
1510         goto TWO_ADDR_EXPR;
1511
1512 #else
1513
1514     goto TWO_ADDR_EXPR;
1515
1516 #endif
1517
1518     /* Matched a leftShift by 'sh' subtree, move op1 down */
1519     op1 = op1->gtOp.gtOp1;
1520
1521 TWO_ADDR_EXPR:
1522
1523     /* Now we have to evaluate op1 and op2 into registers */
1524
1525     /* Evaluate op1 and op2 in the correct order */
1526     if (rev)
1527     {
1528         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1529         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1530     }
1531     else
1532     {
1533         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1534         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1535     }
1536
1537     /*  If op1 and op2 must be spilled and reloaded then
1538      *  op1 and op2 might be reloaded into the same register
1539      *  This can only happen when all the registers are lockedRegs
1540      */
1541     if ((op1Mask == op2Mask) && (op1Mask != 0))
1542     {
1543         /* We'll need to grab a different register for op2 */
1544         op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1545     }
1546
1547 #ifdef _TARGET_ARM_
1548     // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1549     //      [op2 + op1<<sh + cns]
1550     // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1551     //
1552     if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1553     {
1554         op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1555     }
1556
1557     //
1558     // On the ARM we will need at least one scratch register for trees that have this form:
1559     //     [op1 + op2 + cns] or  [op1 + op2<<sh + cns]
1560     // or for a float/double or long when we have both op1 and op2
1561     // or when we have an 'cns' that is too large for the ld/st instruction
1562     //
1563     if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1564     {
1565         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1566     }
1567
1568     //
1569     // If we create a CSE that immediately dies then we may need to add an additional register interference
1570     // so we don't color the CSE into R3
1571     //
1572     if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1573     {
1574         opTemp = op2->gtOp.gtOp2;
1575         if (opTemp->OperGet() == GT_LCL_VAR)
1576         {
1577             unsigned   varNum = opTemp->gtLclVar.gtLclNum;
1578             LclVarDsc* varDsc = &lvaTable[varNum];
1579
1580             if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1581             {
1582                 rpRecordRegIntf(RBM_TMP_0,
1583                                 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1584             }
1585         }
1586     }
1587 #endif
1588
1589     regMask          = (op1Mask | op2Mask);
1590     tree->gtUsedRegs = (regMaskSmall)regMask;
1591     goto DONE;
1592
1593 ONE_ADDR_EXPR:
1594
1595     /* now we have to evaluate op1 into a register */
1596
1597     op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1598     op2Mask = RBM_NONE;
1599
1600 #ifdef _TARGET_ARM_
1601     //
1602     // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1603     // instruction
1604     //
1605     if (!codeGen->validDispForLdSt(cns, type))
1606     {
1607         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1608     }
1609 #endif
1610
1611     regMask          = (op1Mask | op2Mask);
1612     tree->gtUsedRegs = (regMaskSmall)regMask;
1613     goto DONE;
1614
1615 NO_ADDR_EXPR:
1616
1617 #if !CPU_LOAD_STORE_ARCH
1618     if (oper == GT_CNS_INT)
1619     {
1620         /* Indirect of a constant does not require a register */
1621         regMask = RBM_NONE;
1622     }
1623     else
1624 #endif
1625     {
1626         /* now we have to evaluate tree into a register */
1627         regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1628     }
1629
1630 DONE:
1631     regMaskTP regUse = tree->gtUsedRegs;
1632
1633     if (!VarSetOps::IsEmpty(this, compCurLife))
1634     {
1635         // Add interference between the current set of life variables and
1636         //  the set of temporary registers need to evaluate the sub tree
1637         if (regUse)
1638         {
1639             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1640         }
1641     }
1642
1643     /* Do we need to resore the oldLastUseVars value */
1644     if (restoreLastUseVars)
1645     {
1646         /*
1647          *  If we used a GT_ASG targeted register then we need to add
1648          *  a variable interference between any new last use variables
1649          *  and the GT_ASG targeted register
1650          */
1651         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1652         {
1653             rpRecordVarIntf(rpAsgVarNum,
1654                             VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1655         }
1656         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1657     }
1658
1659     return regMask;
1660 }
1661
1662 /*****************************************************************************
1663  *
1664  *
1665  */
1666
1667 void Compiler::rpPredictRefAssign(unsigned lclNum)
1668 {
1669     LclVarDsc* varDsc = lvaTable + lclNum;
1670
1671     varDsc->lvRefAssign = 1;
1672
1673 #if NOGC_WRITE_BARRIERS
1674 #ifdef DEBUG
1675     if (verbose)
1676     {
1677         if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1678             printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1679                    varDsc->lvVarIndex);
1680     }
1681 #endif
1682
1683     /* Make sure that write barrier pointer variables never land in EDX */
1684     VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1685 #endif // NOGC_WRITE_BARRIERS
1686 }
1687
1688 /*****************************************************************************
1689  *
1690  * Predict the internal temp physical register usage for a block assignment tree,
1691  * by setting tree->gtUsedRegs.
1692  * Records the internal temp physical register usage for this tree.
1693  * Returns a mask of interfering registers for this tree.
1694  *
1695  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1696  * to the set of scratch registers needed when evaluating the tree.
1697  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1698  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1699  * predict additional internal temp physical registers to spill into.
1700  *
1701  *    tree       - is the child of a GT_IND node
1702  *    predictReg - what type of register does the tree need
1703  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1704  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1705  *    rsvdRegs   - registers which should not be allocated because they will
1706  *                 be needed to evaluate a node in the future
1707  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1708  *                 the rpLastUseVars set should be saved and restored
1709  *                 so that we don't add any new variables to rpLastUseVars.
1710  */
1711 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr   tree,
1712                                           rpPredictReg predictReg,
1713                                           regMaskTP    lockedRegs,
1714                                           regMaskTP    rsvdRegs)
1715 {
1716     regMaskTP regMask         = RBM_NONE;
1717     regMaskTP interferingRegs = RBM_NONE;
1718
1719     bool        hasGCpointer  = false;
1720     bool        dstIsOnStack  = false;
1721     bool        useMemHelper  = false;
1722     bool        useBarriers   = false;
1723     GenTreeBlk* dst           = tree->gtGetOp1()->AsBlk();
1724     GenTreePtr  dstAddr       = dst->Addr();
1725     GenTreePtr  srcAddrOrFill = tree->gtGetOp2IfPresent();
1726
1727     size_t blkSize = dst->gtBlkSize;
1728
1729     hasGCpointer = (dst->HasGCPtr());
1730
1731     bool isCopyBlk = tree->OperIsCopyBlkOp();
1732     bool isCopyObj = isCopyBlk && hasGCpointer;
1733     bool isInitBlk = tree->OperIsInitBlkOp();
1734
1735     if (isCopyBlk)
1736     {
1737         assert(srcAddrOrFill->OperIsIndir());
1738         srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1739     }
1740     else
1741     {
1742         // For initBlk, we don't need to worry about the GC pointers.
1743         hasGCpointer = false;
1744     }
1745
1746     if (blkSize != 0)
1747     {
1748         if (isCopyObj)
1749         {
1750             dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1751         }
1752
1753         if (isInitBlk)
1754         {
1755             if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1756             {
1757                 useMemHelper = true;
1758             }
1759         }
1760     }
1761     else
1762     {
1763         useMemHelper = true;
1764     }
1765
1766     if (hasGCpointer && !dstIsOnStack)
1767     {
1768         useBarriers = true;
1769     }
1770
1771 #ifdef _TARGET_ARM_
1772     //
1773     // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1774     //
1775     if (!useMemHelper && !useBarriers)
1776     {
1777         bool     useLoop        = false;
1778         unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1779
1780         // A mask to use to force the predictor to choose low registers (to reduce code size)
1781         regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1782
1783         // Allow the src and dst to be used in place, unless we use a loop, in which
1784         // case we will need scratch registers as we will be writing to them.
1785         rpPredictReg srcAndDstPredict = PREDICT_REG;
1786
1787         // Will we be using a loop to implement this INITBLK/COPYBLK?
1788         if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1789         {
1790             useLoop          = true;
1791             avoidReg         = RBM_NONE;
1792             srcAndDstPredict = PREDICT_SCRATCH_REG;
1793         }
1794
1795         if (tree->gtFlags & GTF_REVERSE_OPS)
1796         {
1797             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1798                                            dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1799             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1800         }
1801         else
1802         {
1803             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1804                                            srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1805             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1806         }
1807
1808         // We need at least one scratch register for a copyBlk
1809         if (isCopyBlk)
1810         {
1811             // Pick a low register to reduce the code size
1812             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1813         }
1814
1815         if (useLoop)
1816         {
1817             if (isCopyBlk)
1818             {
1819                 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1820                 // Pick another low register to reduce the code size
1821                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1822             }
1823
1824             // We need a loop index register
1825             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1826         }
1827
1828         tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1829
1830         return interferingRegs;
1831     }
1832 #endif
1833     // What order should the Dest, Val/Src, and Size be calculated
1834     GenTreePtr opsPtr[3];
1835     regMaskTP  regsPtr[3];
1836
1837 #if defined(_TARGET_XARCH_)
1838     fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1839
1840     // We're going to use these, might as well make them available now
1841
1842     codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1843     if (isCopyBlk)
1844         codeGen->regSet.rsSetRegsModified(RBM_ESI);
1845
1846 #elif defined(_TARGET_ARM_)
1847
1848     if (useMemHelper)
1849     {
1850         // For all other cases that involve non-constants, we just call memcpy/memset
1851         // JIT helpers
1852         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1853         interferingRegs |= RBM_CALLEE_TRASH;
1854 #ifdef DEBUG
1855         if (verbose)
1856             printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1857 #endif
1858     }
1859     else // useBarriers
1860     {
1861         assert(useBarriers);
1862         assert(isCopyBlk);
1863
1864         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1865
1866         // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1867         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1868 #ifdef DEBUG
1869         if (verbose)
1870             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1871 #endif
1872     }
1873 #else // !_TARGET_X86_ && !_TARGET_ARM_
1874 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1875 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1876     regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1877     regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1878                                    opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1879     regMask |= regsPtr[0];
1880     opsPtr[0]->gtUsedRegs |= regsPtr[0];
1881     rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1882
1883     regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1884                                    opsPtr2RsvdRegs | RBM_LASTUSE);
1885     regMask |= regsPtr[1];
1886     opsPtr[1]->gtUsedRegs |= regsPtr[1];
1887     rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1888
1889     regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1890     if (opsPtr[2] == nullptr)
1891     {
1892         // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1893         // Note that it is quite possible that no register is required, but this preserves
1894         // former behavior.
1895         regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1896         rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1897     }
1898     else
1899     {
1900         regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1901         opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1902     }
1903     regMask |= opsPtr2UsedRegs;
1904
1905     tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1906     return interferingRegs;
1907 }
1908
1909 /*****************************************************************************
1910  *
1911  * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1912  * Returns a regMask with the internal temp physical register usage for this tree.
1913  *
1914  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1915  * to the set of scratch registers needed when evaluating the tree.
1916  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1917  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1918  * predict additional internal temp physical registers to spill into.
1919  *
1920  *    tree       - is the child of a GT_IND node
1921  *    predictReg - what type of register does the tree need
1922  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1923  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1924  *    rsvdRegs   - registers which should not be allocated because they will
1925  *                 be needed to evaluate a node in the future
1926  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1927  *                 the rpLastUseVars set should be saved and restored
1928  *                 so that we don't add any new variables to rpLastUseVars.
1929  */
1930
1931 #pragma warning(disable : 4701)
1932
1933 #ifdef _PREFAST_
1934 #pragma warning(push)
1935 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1936 #endif
1937 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr   tree,
1938                                         rpPredictReg predictReg,
1939                                         regMaskTP    lockedRegs,
1940                                         regMaskTP    rsvdRegs)
1941 {
1942     regMaskTP    regMask = DUMMY_INIT(RBM_ILLEGAL);
1943     regMaskTP    op2Mask;
1944     regMaskTP    tmpMask;
1945     rpPredictReg op1PredictReg;
1946     rpPredictReg op2PredictReg;
1947     LclVarDsc*   varDsc = NULL;
1948     VARSET_TP    oldLastUseVars(VarSetOps::UninitVal());
1949
1950     VARSET_TP varBits(VarSetOps::UninitVal());
1951     VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1952
1953     bool      restoreLastUseVars = false;
1954     regMaskTP interferingRegs    = RBM_NONE;
1955
1956 #ifdef DEBUG
1957     // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1958     noway_assert(tree);
1959     noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1960     noway_assert(RBM_ILLEGAL);
1961     noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1962     /* impossible values, to make sure that we set them */
1963     tree->gtUsedRegs = RBM_ILLEGAL;
1964 #endif
1965
1966     /* Figure out what kind of a node we have */
1967
1968     genTreeOps oper = tree->OperGet();
1969     var_types  type = tree->TypeGet();
1970     unsigned   kind = tree->OperKind();
1971
1972     // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1973     genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1974     if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1975         predictReg = PREDICT_NONE;
1976     else if (rpHasVarIndexForPredict(predictReg))
1977     {
1978         // The only place where predictReg is set to a var is in the PURE
1979         // assignment case where varIndex is the var being assigned to.
1980         // We need to check whether the variable is used between here and
1981         // its redefinition.
1982         unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1983         unsigned lclNum   = lvaTrackedToVarNum[varIndex];
1984         bool     found    = false;
1985         for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1986         {
1987             if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1988             {
1989                 // Is this the pure assignment?
1990                 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1991                 {
1992                     predictReg = PREDICT_SCRATCH_REG;
1993                 }
1994                 found = true;
1995                 break;
1996             }
1997         }
1998         assert(found);
1999     }
2000
2001     if (rsvdRegs & RBM_LASTUSE)
2002     {
2003         restoreLastUseVars = true;
2004         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2005         rsvdRegs &= ~RBM_LASTUSE;
2006     }
2007
2008     /* Is this a constant or leaf node? */
2009
2010     if (kind & (GTK_CONST | GTK_LEAF))
2011     {
2012         bool      lastUse   = false;
2013         regMaskTP enregMask = RBM_NONE;
2014
2015         switch (oper)
2016         {
2017 #ifdef _TARGET_ARM_
2018             case GT_CNS_DBL:
2019                 // Codegen for floating point constants on the ARM is currently
2020                 // movw/movt    rT1, <lo32 bits>
2021                 // movw/movt    rT2, <hi32 bits>
2022                 //  vmov.i2d    dT0, rT1,rT2
2023                 //
2024                 // For TYP_FLOAT one integer register is required
2025                 //
2026                 // These integer register(s) immediately die
2027                 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2028                 if (type == TYP_DOUBLE)
2029                 {
2030                     // For TYP_DOUBLE a second integer register is required
2031                     //
2032                     tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2033                 }
2034
2035                 // We also need a floating point register that we keep
2036                 //
2037                 if (predictReg == PREDICT_NONE)
2038                     predictReg = PREDICT_SCRATCH_REG;
2039
2040                 regMask          = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2041                 tree->gtUsedRegs = regMask | tmpMask;
2042                 goto RETURN_CHECK;
2043 #endif
2044
2045             case GT_CNS_INT:
2046             case GT_CNS_LNG:
2047
2048                 if (rpHasVarIndexForPredict(predictReg))
2049                 {
2050                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2051                     rpAsgVarNum       = tgtIndex;
2052
2053                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2054                     predictReg = PREDICT_NONE;
2055
2056                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2057                     tgtVar->lvDependReg = true;
2058
2059                     if (type == TYP_LONG)
2060                     {
2061                         assert(oper == GT_CNS_LNG);
2062
2063                         if (tgtVar->lvOtherReg == REG_STK)
2064                         {
2065                             // Well we do need one register for a partially enregistered
2066                             type       = TYP_INT;
2067                             predictReg = PREDICT_SCRATCH_REG;
2068                         }
2069                     }
2070                 }
2071                 else
2072                 {
2073 #if !CPU_LOAD_STORE_ARCH
2074                     /* If the constant is a handle then it will need to have a relocation
2075                        applied to it.  It will need to be loaded into a register.
2076                        But never throw away an existing hint.
2077                        */
2078                     if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2079 #endif
2080                     {
2081                         if (predictReg == PREDICT_NONE)
2082                             predictReg = PREDICT_SCRATCH_REG;
2083                     }
2084                 }
2085                 break;
2086
2087             case GT_NO_OP:
2088                 break;
2089
2090             case GT_CLS_VAR:
2091                 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2092                     (genTypeSize(type) < sizeof(int)))
2093                 {
2094                     predictReg = PREDICT_SCRATCH_REG;
2095                 }
2096 #ifdef _TARGET_ARM_
2097                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2098                 //
2099                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2100                 {
2101                     // These integer register(s) immediately die
2102                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2103                     // Two integer registers are required for a TYP_DOUBLE
2104                     if (type == TYP_DOUBLE)
2105                         tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2106                 }
2107                 // We need a temp register in some cases of loads/stores to a class var
2108                 if (predictReg == PREDICT_NONE)
2109                 {
2110                     predictReg = PREDICT_SCRATCH_REG;
2111                 }
2112 #endif
2113                 if (rpHasVarIndexForPredict(predictReg))
2114                 {
2115                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2116                     rpAsgVarNum       = tgtIndex;
2117
2118                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2119                     predictReg = PREDICT_NONE;
2120
2121                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2122                     tgtVar->lvDependReg = true;
2123
2124                     if (type == TYP_LONG)
2125                     {
2126                         if (tgtVar->lvOtherReg == REG_STK)
2127                         {
2128                             // Well we do need one register for a partially enregistered
2129                             type       = TYP_INT;
2130                             predictReg = PREDICT_SCRATCH_REG;
2131                         }
2132                     }
2133                 }
2134                 break;
2135
2136             case GT_LCL_FLD:
2137 #ifdef _TARGET_ARM_
2138                 // Check for a misalignment on a Floating Point field
2139                 //
2140                 if (varTypeIsFloating(type))
2141                 {
2142                     if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2143                     {
2144                         // These integer register(s) immediately die
2145                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2146                         // Two integer registers are required for a TYP_DOUBLE
2147                         if (type == TYP_DOUBLE)
2148                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2149                     }
2150                 }
2151 #endif
2152                 __fallthrough;
2153
2154             case GT_LCL_VAR:
2155             case GT_REG_VAR:
2156
2157                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2158
2159                 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2160                 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2161                 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2162
2163 #if FEATURE_STACK_FP_X87
2164                 // If it's a floating point var, there's nothing to do
2165                 if (varTypeIsFloating(type))
2166                 {
2167                     tree->gtUsedRegs = RBM_NONE;
2168                     regMask          = RBM_NONE;
2169                     goto RETURN_CHECK;
2170                 }
2171 #endif
2172
2173                 // If the variable is already a register variable, no need to go further.
2174                 if (oper == GT_REG_VAR)
2175                     break;
2176
2177                 /* Apply the type of predictReg to the LCL_VAR */
2178
2179                 if (predictReg == PREDICT_REG)
2180                 {
2181                 PREDICT_REG_COMMON:
2182                     if (varDsc->lvRegNum == REG_STK)
2183                         break;
2184
2185                     goto GRAB_COUNT;
2186                 }
2187                 else if (predictReg == PREDICT_SCRATCH_REG)
2188                 {
2189                     noway_assert(predictReg == PREDICT_SCRATCH_REG);
2190
2191                     /* Is this the last use of a local var?   */
2192                     if (lastUse)
2193                     {
2194                         if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2195                             goto PREDICT_REG_COMMON;
2196                     }
2197                 }
2198                 else if (rpHasVarIndexForPredict(predictReg))
2199                 {
2200                     /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2201                     {
2202                         unsigned   tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2203                         LclVarDsc* tgtVar    = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2204                         VarSetOps::MakeSingleton(this, tgtIndex1);
2205
2206                         noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2207                         noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2208 #ifndef _TARGET_AMD64_
2209                         // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2210                         // so this assert is meaningless
2211                         noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2212 #endif // !_TARGET_AMD64_
2213
2214                         if (varDsc->lvTracked)
2215                         {
2216                             unsigned srcIndex;
2217                             srcIndex = varDsc->lvVarIndex;
2218
2219                             // If this register has it's last use here then we will prefer
2220                             // to color to the same register as tgtVar.
2221                             if (lastUse)
2222                             {
2223                                 /*
2224                                  *  Add an entry in the lvaVarPref graph to indicate
2225                                  *  that it would be worthwhile to color these two variables
2226                                  *  into the same physical register.
2227                                  *  This will help us avoid having an extra copy instruction
2228                                  */
2229                                 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2230                                 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2231                             }
2232
2233                             // Add a variable interference from srcIndex to each of the last use variables
2234                             if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2235                             {
2236                                 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2237                             }
2238                         }
2239                         rpAsgVarNum = tgtIndex1;
2240
2241                         /* We will rely on the target enregistered variable from the GT_ASG */
2242                         varDsc = tgtVar;
2243                     }
2244                 GRAB_COUNT:
2245                     unsigned grabCount;
2246                     grabCount = 0;
2247
2248                     if (genIsValidFloatReg(varDsc->lvRegNum))
2249                     {
2250                         enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2251                     }
2252                     else
2253                     {
2254                         enregMask = genRegMask(varDsc->lvRegNum);
2255                     }
2256
2257 #ifdef _TARGET_ARM_
2258                     if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2259                     {
2260                         // We need to compute the intermediate value using a TYP_DOUBLE
2261                         // but we storing the result in a TYP_SINGLE enregistered variable
2262                         //
2263                         grabCount++;
2264                     }
2265                     else
2266 #endif
2267                     {
2268                         /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2269                         if (enregMask & (rsvdRegs | lockedRegs))
2270                         {
2271                             grabCount++;
2272                         }
2273 #ifndef _TARGET_64BIT_
2274                         if (type == TYP_LONG)
2275                         {
2276                             if (varDsc->lvOtherReg != REG_STK)
2277                             {
2278                                 tmpMask = genRegMask(varDsc->lvOtherReg);
2279                                 enregMask |= tmpMask;
2280
2281                                 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2282                                 if (tmpMask & (rsvdRegs | lockedRegs))
2283                                     grabCount++;
2284                             }
2285                             else // lvOtherReg == REG_STK
2286                             {
2287                                 grabCount++;
2288                             }
2289                         }
2290 #endif // _TARGET_64BIT_
2291                     }
2292
2293                     varDsc->lvDependReg = true;
2294
2295                     if (grabCount == 0)
2296                     {
2297                         /* Does not need a register */
2298                         predictReg = PREDICT_NONE;
2299                         // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2300                         VarSetOps::UnionD(this, rpUseInPlace, varBits);
2301                     }
2302                     else // (grabCount > 0)
2303                     {
2304 #ifndef _TARGET_64BIT_
2305                         /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2306                         if ((type == TYP_LONG) && (grabCount == 1))
2307                         {
2308                             /* We will need to pick one register */
2309                             type = TYP_INT;
2310                             // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2311                             VarSetOps::UnionD(this, rpUseInPlace, varBits);
2312                         }
2313                         noway_assert((type == TYP_DOUBLE) ||
2314                                      (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2315 #else  // !_TARGET_64BIT_
2316                         noway_assert(grabCount == 1);
2317 #endif // !_TARGET_64BIT_
2318                     }
2319                 }
2320                 else if (type == TYP_STRUCT)
2321                 {
2322 #ifdef _TARGET_ARM_
2323                     // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2324                     //        predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2325                     //        As a workaround we just bash it to PREDICT_NONE here
2326                     //
2327                     if (predictReg != PREDICT_NONE)
2328                         predictReg = PREDICT_NONE;
2329 #endif
2330                     // Currently predictReg is saying that we will not need any scratch registers
2331                     noway_assert(predictReg == PREDICT_NONE);
2332
2333                     /* We may need to sign or zero extend a small type when pushing a struct */
2334                     if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2335                     {
2336                         for (unsigned varNum = varDsc->lvFieldLclStart;
2337                              varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2338                         {
2339                             LclVarDsc* fldVar = lvaTable + varNum;
2340
2341                             if (fldVar->lvStackAligned())
2342                             {
2343                                 // When we are stack aligned Codegen will just use
2344                                 // a push instruction and thus doesn't need any register
2345                                 // since we can push both a register or a stack frame location
2346                                 continue;
2347                             }
2348
2349                             if (varTypeIsByte(fldVar->TypeGet()))
2350                             {
2351                                 // We will need to reserve one byteable register,
2352                                 //
2353                                 type       = TYP_BYTE;
2354                                 predictReg = PREDICT_SCRATCH_REG;
2355 #if CPU_HAS_BYTE_REGS
2356                                 // It is best to enregister this fldVar in a byteable register
2357                                 //
2358                                 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2359 #endif
2360                             }
2361                             else if (varTypeIsShort(fldVar->TypeGet()))
2362                             {
2363                                 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2364                                 // If fldVar is not enregistered then we will need a scratch register
2365                                 //
2366                                 if (!isEnregistered)
2367                                 {
2368                                     // We will need either an int register or a byte register
2369                                     // If we are not requesting a byte register we will request an int register
2370                                     //
2371                                     if (type != TYP_BYTE)
2372                                         type   = TYP_INT;
2373                                     predictReg = PREDICT_SCRATCH_REG;
2374                                 }
2375                             }
2376                         }
2377                     }
2378                 }
2379                 else
2380                 {
2381                     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2382                     if (preferReg != 0)
2383                     {
2384                         if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2385                         {
2386                             varDsc->addPrefReg(preferReg, this);
2387                         }
2388                     }
2389                 }
2390                 break; /* end of case GT_LCL_VAR */
2391
2392             case GT_JMP:
2393                 tree->gtUsedRegs = RBM_NONE;
2394                 regMask          = RBM_NONE;
2395
2396 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2397                 // Mark the registers required to emit a tailcall profiler callback
2398                 if (compIsProfilerHookNeeded())
2399                 {
2400                     tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2401                 }
2402 #endif
2403                 goto RETURN_CHECK;
2404
2405             default:
2406                 break;
2407         } /* end of switch (oper) */
2408
2409         /* If we don't need to evaluate to register, regmask is the empty set */
2410         /* Otherwise we grab a temp for the local variable                    */
2411
2412         if (predictReg == PREDICT_NONE)
2413             regMask = RBM_NONE;
2414         else
2415         {
2416             regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2417
2418             if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2419             {
2420                 /* We need to sign or zero extend a small type when pushing a struct */
2421                 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2422
2423                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2424                 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2425
2426                 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2427                      varNum++)
2428                 {
2429                     LclVarDsc* fldVar = lvaTable + varNum;
2430                     if (fldVar->lvTracked)
2431                     {
2432                         VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2433                         rpRecordRegIntf(regMask, fldBit DEBUGARG(
2434                                                      "need scratch register when pushing a small field of a struct"));
2435                     }
2436                 }
2437             }
2438         }
2439
2440         /* Update the set of lastUse variables that we encountered so far */
2441         if (lastUse)
2442         {
2443             VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2444             VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2445
2446             /*
2447              *  Add interference from any previously locked temps into this last use variable.
2448              */
2449             if (lockedRegs)
2450             {
2451                 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2452             }
2453             /*
2454              *  Add interference from any reserved temps into this last use variable.
2455              */
2456             if (rsvdRegs)
2457             {
2458                 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2459             }
2460             /*
2461              *  For partially enregistered longs add an interference with the
2462              *  register return by rpPredictRegPick
2463              */
2464             if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2465             {
2466                 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2467             }
2468         }
2469
2470         tree->gtUsedRegs = (regMaskSmall)regMask;
2471         goto RETURN_CHECK;
2472     }
2473
2474     /* Is it a 'simple' unary/binary operator? */
2475
2476     if (kind & GTK_SMPOP)
2477     {
2478         GenTreePtr op1 = tree->gtOp.gtOp1;
2479         GenTreePtr op2 = tree->gtGetOp2IfPresent();
2480
2481         GenTreePtr opsPtr[3];
2482         regMaskTP  regsPtr[3];
2483
2484         VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2485
2486         switch (oper)
2487         {
2488             case GT_ASG:
2489
2490                 /* Is the value being assigned into a LCL_VAR? */
2491                 if (op1->gtOper == GT_LCL_VAR)
2492                 {
2493                     varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2494
2495                     /* Are we assigning a LCL_VAR the result of a call? */
2496                     if (op2->gtOper == GT_CALL)
2497                     {
2498                         /* Set a preferred register for the LCL_VAR */
2499                         if (isRegPairType(varDsc->TypeGet()))
2500                             varDsc->addPrefReg(RBM_LNGRET, this);
2501                         else if (!varTypeIsFloating(varDsc->TypeGet()))
2502                             varDsc->addPrefReg(RBM_INTRET, this);
2503 #ifdef _TARGET_AMD64_
2504                         else
2505                             varDsc->addPrefReg(RBM_FLOATRET, this);
2506 #endif
2507                         /*
2508                          *  When assigning the result of a call we don't
2509                          *  bother trying to target the right side of the
2510                          *  assignment, since we have a fixed calling convention.
2511                          */
2512                     }
2513                     else if (varDsc->lvTracked)
2514                     {
2515                         // We interfere with uses in place
2516                         if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2517                         {
2518                             rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2519                         }
2520
2521                         // Did we predict that this local will be fully enregistered?
2522                         // and the assignment type is the same as the expression type?
2523                         // and it is dead on the right side of the assignment?
2524                         // and we current have no other rpAsgVarNum active?
2525                         //
2526                         if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2527                             (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2528                         {
2529                             //
2530                             //  Yes, we should try to target the right side (op2) of this
2531                             //  assignment into the (enregistered) tracked variable.
2532                             //
2533
2534                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2535                             op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2536
2537                             // Remember that this is a new use in place
2538
2539                             // We've added "new UseInPlace"; remove from the global set.
2540                             VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2541
2542                             //  Note that later when we walk down to the leaf node for op2
2543                             //  if we decide to actually use the register for the 'varDsc'
2544                             //  to enregister the operand, the we will set rpAsgVarNum to
2545                             //  varDsc->lvVarIndex, by extracting this value using
2546                             //  rpGetVarIndexForPredict()
2547                             //
2548                             //  Also we reset rpAsgVarNum back to -1 after we have finished
2549                             //  predicting the current GT_ASG node
2550                             //
2551                             goto ASG_COMMON;
2552                         }
2553                     }
2554                 }
2555                 else if (tree->OperIsBlkOp())
2556                 {
2557                     interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2558                     regMask = 0;
2559                     goto RETURN_CHECK;
2560                 }
2561                 __fallthrough;
2562
2563             case GT_CHS:
2564
2565             case GT_ASG_OR:
2566             case GT_ASG_XOR:
2567             case GT_ASG_AND:
2568             case GT_ASG_SUB:
2569             case GT_ASG_ADD:
2570             case GT_ASG_MUL:
2571             case GT_ASG_DIV:
2572             case GT_ASG_UDIV:
2573
2574                 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2575                 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2576                 {
2577                     /* Is the value being assigned into an enregistered LCL_VAR? */
2578                     /* For debug code we only allow a simple op2 to be assigned */
2579                     if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2580                     {
2581                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2582                         /* Did we predict that this local will be enregistered? */
2583                         if (varDsc->lvRegNum != REG_STK)
2584                         {
2585                             /* Yes, we can use "reg <op>= addr" */
2586
2587                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2588                             op2PredictReg = PREDICT_NONE;
2589
2590                             goto ASG_COMMON;
2591                         }
2592                     }
2593                 }
2594
2595 #if CPU_LOAD_STORE_ARCH
2596                 if (oper != GT_ASG)
2597                 {
2598                     op1PredictReg = PREDICT_REG;
2599                     op2PredictReg = PREDICT_REG;
2600                 }
2601                 else
2602 #endif
2603                 {
2604                     /*
2605                      *  Otherwise, initialize the normal forcing of operands:
2606                      *   "addr <op>= reg"
2607                      */
2608                     op1PredictReg = PREDICT_ADDR;
2609                     op2PredictReg = PREDICT_REG;
2610                 }
2611
2612             ASG_COMMON:
2613
2614 #if !CPU_LOAD_STORE_ARCH
2615                 if (op2PredictReg != PREDICT_NONE)
2616                 {
2617                     /* Is the value being assigned a simple one? */
2618                     if (rpCanAsgOperWithoutReg(op2, false))
2619                         op2PredictReg = PREDICT_NONE;
2620                 }
2621 #endif
2622
2623                 bool simpleAssignment;
2624                 simpleAssignment = false;
2625
2626                 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2627                 {
2628                     // Add a variable interference from the assign target
2629                     // to each of the last use variables
2630                     if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2631                     {
2632                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2633
2634                         if (varDsc->lvTracked)
2635                         {
2636                             unsigned varIndex = varDsc->lvVarIndex;
2637
2638                             rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2639                         }
2640                     }
2641
2642                     /*  Record whether this tree is a simple assignment to a local */
2643
2644                     simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2645                 }
2646
2647                 bool requireByteReg;
2648                 requireByteReg = false;
2649
2650 #if CPU_HAS_BYTE_REGS
2651                 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2652
2653                 if (varTypeIsByte(type) &&
2654                     ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2655
2656                 {
2657                     // Byte-assignments typically need a byte register
2658                     requireByteReg = true;
2659
2660                     if (op1->gtOper == GT_LCL_VAR)
2661                     {
2662                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2663
2664                         // Did we predict that this local will be enregistered?
2665                         if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2666                         {
2667                             // We don't require a byte register when op1 is an enregistered local */
2668                             requireByteReg = false;
2669                         }
2670
2671                         // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2672                         if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2673                         {
2674                             // We should try to put op1 in an byte register
2675                             varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2676                         }
2677                     }
2678                 }
2679 #endif
2680
2681                 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2682
2683                 bool isWriteBarrierAsgNode;
2684                 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2685 #ifdef DEBUG
2686                 GCInfo::WriteBarrierForm wbf;
2687                 if (isWriteBarrierAsgNode)
2688                     wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2689                 else
2690                     wbf = GCInfo::WBF_NoBarrier;
2691 #endif // DEBUG
2692
2693                 regMaskTP wbaLockedRegs;
2694                 wbaLockedRegs = lockedRegs;
2695                 if (isWriteBarrierAsgNode)
2696                 {
2697 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2698 #ifdef DEBUG
2699                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2700                     {
2701 #endif // DEBUG
2702                         wbaLockedRegs |= RBM_WRITE_BARRIER;
2703                         op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2704                         assert(REG_WRITE_BARRIER == REG_EDX);
2705                         op1PredictReg = PREDICT_REG_EDX;
2706 #ifdef DEBUG
2707                     }
2708                     else
2709 #endif // DEBUG
2710 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2711
2712 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2713                     {
2714 #ifdef _TARGET_X86_
2715                         op1PredictReg = PREDICT_REG_ECX;
2716                         op2PredictReg = PREDICT_REG_EDX;
2717 #elif defined(_TARGET_ARM_)
2718                         op1PredictReg = PREDICT_REG_R0;
2719                         op2PredictReg = PREDICT_REG_R1;
2720
2721                         // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2722                         if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2723                         {
2724                             op1 = op1->gtOp.gtOp1;
2725                         }
2726 #else // !_TARGET_X86_ && !_TARGET_ARM_
2727 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2728 #endif
2729                     }
2730 #endif
2731                 }
2732
2733                 /*  Are we supposed to evaluate RHS first? */
2734
2735                 if (tree->gtFlags & GTF_REVERSE_OPS)
2736                 {
2737                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2738
2739 #if CPU_HAS_BYTE_REGS
2740                     // Should we insure that op2 gets evaluated into a byte register?
2741                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2742                     {
2743                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2744                         // and we can't select one that is already reserved (i.e. lockedRegs)
2745                         //
2746                         op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2747                         op2->gtUsedRegs |= op2Mask;
2748
2749                         // No longer a simple assignment because we're using extra registers and might
2750                         // have interference between op1 and op2.  See DevDiv #136681
2751                         simpleAssignment = false;
2752                     }
2753 #endif
2754                     /*
2755                      *  For a simple assignment we don't want the op2Mask to be
2756                      *  marked as interferring with the LCL_VAR, since it is likely
2757                      *  that we will want to enregister the LCL_VAR in exactly
2758                      *  the register that is used to compute op2
2759                      */
2760                     tmpMask = lockedRegs;
2761
2762                     if (!simpleAssignment)
2763                         tmpMask |= op2Mask;
2764
2765                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2766
2767                     // Did we relax the register prediction for op1 and op2 above ?
2768                     // - because we are depending upon op1 being enregistered
2769                     //
2770                     if ((op1PredictReg == PREDICT_NONE) &&
2771                         ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2772                     {
2773                         /* We must be assigning into an enregistered LCL_VAR */
2774                         noway_assert(op1->gtOper == GT_LCL_VAR);
2775                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2776                         noway_assert(varDsc->lvRegNum != REG_STK);
2777
2778                         /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2779                         varDsc->lvDependReg = true;
2780                     }
2781                 }
2782                 else
2783                 {
2784                     // For the case of simpleAssignments op2 should always be evaluated first
2785                     noway_assert(!simpleAssignment);
2786
2787                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2788                     if (isWriteBarrierAsgNode)
2789                     {
2790                         wbaLockedRegs |= op1->gtUsedRegs;
2791                     }
2792                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2793
2794 #if CPU_HAS_BYTE_REGS
2795                     // Should we insure that op2 gets evaluated into a byte register?
2796                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2797                     {
2798                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2799                         // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2800                         //
2801                         op2Mask |=
2802                             rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2803                         op2->gtUsedRegs |= op2Mask;
2804                     }
2805 #endif
2806                 }
2807
2808                 if (rpHasVarIndexForPredict(op2PredictReg))
2809                 {
2810                     rpAsgVarNum = -1;
2811                 }
2812
2813                 if (isWriteBarrierAsgNode)
2814                 {
2815 #if NOGC_WRITE_BARRIERS
2816 #ifdef DEBUG
2817                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2818                     {
2819 #endif // DEBUG
2820
2821                         /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2822                            passed to the write-barrier call in REG_WRITE_BARRIER */
2823
2824                         regMask = op2Mask;
2825
2826                         if (op1->gtOper == GT_IND)
2827                         {
2828                             GenTreePtr rv1, rv2;
2829                             unsigned   mul, cns;
2830                             bool       rev;
2831
2832                             /* Special handling of indirect assigns for write barrier */
2833
2834                             bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2835                                                                   &mul, &cns);
2836
2837                             /* Check address mode for enregisterable locals */
2838
2839                             if (yes)
2840                             {
2841                                 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2842                                 {
2843                                     rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2844                                 }
2845                                 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2846                                 {
2847                                     rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2848                                 }
2849                             }
2850                         }
2851
2852                         if (op2->gtOper == GT_LCL_VAR)
2853                         {
2854                             rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2855                         }
2856
2857                         // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2858                         if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2859                         {
2860                             rpRecordRegIntf(RBM_WRITE_BARRIER,
2861                                             rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2862                         }
2863                         tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2864 #ifdef DEBUG
2865                     }
2866                     else
2867 #endif // DEBUG
2868 #endif // NOGC_WRITE_BARRIERS
2869
2870 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2871                     {
2872 #ifdef _TARGET_ARM_
2873 #ifdef DEBUG
2874                         if (verbose)
2875                             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2876 #endif
2877                         //
2878                         // For the ARM target we have an optimized JIT Helper
2879                         // that only trashes a subset of the callee saved registers
2880                         //
2881
2882                         // NOTE: Adding it to the gtUsedRegs will cause the interference to
2883                         // be added appropriately
2884
2885                         // the RBM_CALLEE_TRASH_NOGC set is killed.  We will record this in interferingRegs
2886                         // instead of gtUsedRegs, because the latter will be modified later, but we need
2887                         // to remember to add the interference.
2888
2889                         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2890
2891                         op1->gtUsedRegs |= RBM_R0;
2892                         op2->gtUsedRegs |= RBM_R1;
2893 #else // _TARGET_ARM_
2894
2895 #ifdef DEBUG
2896                         if (verbose)
2897                             printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2898 #endif
2899                         // We have to call a normal JIT helper to perform the Write Barrier Assignment
2900                         // It will trash the callee saved registers
2901
2902                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2903 #endif // _TARGET_ARM_
2904                     }
2905 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2906                 }
2907
2908                 if (simpleAssignment)
2909                 {
2910                     /*
2911                      *  Consider a simple assignment to a local:
2912                      *
2913                      *   lcl = expr;
2914                      *
2915                      *  Since the "=" node is visited after the variable
2916                      *  is marked live (assuming it's live after the
2917                      *  assignment), we don't want to use the register
2918                      *  use mask of the "=" node but rather that of the
2919                      *  variable itself.
2920                      */
2921                     tree->gtUsedRegs = op1->gtUsedRegs;
2922                 }
2923                 else
2924                 {
2925                     tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2926                 }
2927                 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2928                 goto RETURN_CHECK;
2929
2930             case GT_ASG_LSH:
2931             case GT_ASG_RSH:
2932             case GT_ASG_RSZ:
2933                 /* assigning shift operators */
2934
2935                 noway_assert(type != TYP_LONG);
2936
2937 #if CPU_LOAD_STORE_ARCH
2938                 predictReg = PREDICT_ADDR;
2939 #else
2940                 predictReg = PREDICT_NONE;
2941 #endif
2942
2943                 /* shift count is handled same as ordinary shift */
2944                 goto HANDLE_SHIFT_COUNT;
2945
2946             case GT_ADDR:
2947                 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2948
2949                 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2950                 {
2951                     // We need a scratch register for the LEA instruction
2952                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2953                 }
2954
2955                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2956                 goto RETURN_CHECK;
2957
2958             case GT_CAST:
2959
2960                 /* Cannot cast to VOID */
2961                 noway_assert(type != TYP_VOID);
2962
2963                 /* cast to long is special */
2964                 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2965                 {
2966                     noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2967 #if CPU_LONG_USES_REGPAIR
2968                     rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2969
2970                     if (rpHasVarIndexForPredict(predictReg))
2971                     {
2972                         unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2973                         rpAsgVarNum       = tgtIndex;
2974
2975                         // We don't need any register as we plan on writing to the rpAsgVarNum register
2976                         predictReg = PREDICT_NONE;
2977
2978                         LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2979                         tgtVar->lvDependReg = true;
2980
2981                         if (tgtVar->lvOtherReg != REG_STK)
2982                         {
2983                             predictRegHi = PREDICT_NONE;
2984                         }
2985                     }
2986                     else
2987 #endif
2988                         if (predictReg == PREDICT_NONE)
2989                     {
2990                         predictReg = PREDICT_SCRATCH_REG;
2991                     }
2992 #ifdef _TARGET_ARM_
2993                     // If we are widening an int into a long using a targeted register pair we
2994                     // should retarget so that the low part get loaded into the appropriate register
2995                     else if (predictReg == PREDICT_PAIR_R0R1)
2996                     {
2997                         predictReg   = PREDICT_REG_R0;
2998                         predictRegHi = PREDICT_REG_R1;
2999                     }
3000                     else if (predictReg == PREDICT_PAIR_R2R3)
3001                     {
3002                         predictReg   = PREDICT_REG_R2;
3003                         predictRegHi = PREDICT_REG_R3;
3004                     }
3005 #endif
3006 #ifdef _TARGET_X86_
3007                     // If we are widening an int into a long using a targeted register pair we
3008                     // should retarget so that the low part get loaded into the appropriate register
3009                     else if (predictReg == PREDICT_PAIR_EAXEDX)
3010                     {
3011                         predictReg   = PREDICT_REG_EAX;
3012                         predictRegHi = PREDICT_REG_EDX;
3013                     }
3014                     else if (predictReg == PREDICT_PAIR_ECXEBX)
3015                     {
3016                         predictReg   = PREDICT_REG_ECX;
3017                         predictRegHi = PREDICT_REG_EBX;
3018                     }
3019 #endif
3020
3021                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3022
3023 #if CPU_LONG_USES_REGPAIR
3024                     if (predictRegHi != PREDICT_NONE)
3025                     {
3026                         // Now get one more reg for the upper part
3027                         regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3028                     }
3029 #endif
3030                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3031                     goto RETURN_CHECK;
3032                 }
3033
3034                 /* cast from long is special - it frees a register */
3035                 if (type <= TYP_INT // nice.  this presumably is intended to mean "signed int and shorter types"
3036                     && op1->gtType == TYP_LONG)
3037                 {
3038                     if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3039                         predictReg = PREDICT_REG;
3040
3041                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3042
3043                     // If we have 2 or more regs, free one of them
3044                     if (!genMaxOneBit(regMask))
3045                     {
3046                         /* Clear the 2nd lowest bit in regMask */
3047                         /* First set tmpMask to the lowest bit in regMask */
3048                         tmpMask = genFindLowestBit(regMask);
3049                         /* Next find the second lowest bit in regMask */
3050                         tmpMask = genFindLowestBit(regMask & ~tmpMask);
3051                         /* Clear this bit from regmask */
3052                         regMask &= ~tmpMask;
3053                     }
3054                     tree->gtUsedRegs = op1->gtUsedRegs;
3055                     goto RETURN_CHECK;
3056                 }
3057
3058 #if CPU_HAS_BYTE_REGS
3059                 /* cast from signed-byte is special - it uses byteable registers */
3060                 if (type == TYP_INT)
3061                 {
3062                     var_types smallType;
3063
3064                     if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3065                         smallType = tree->gtCast.CastOp()->TypeGet();
3066                     else
3067                         smallType = tree->gtCast.gtCastType;
3068
3069                     if (smallType == TYP_BYTE)
3070                     {
3071                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3072
3073                         if ((regMask & RBM_BYTE_REGS) == 0)
3074                             regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3075
3076                         tree->gtUsedRegs = (regMaskSmall)regMask;
3077                         goto RETURN_CHECK;
3078                     }
3079                 }
3080 #endif
3081
3082 #if FEATURE_STACK_FP_X87
3083                 /* cast to float/double is special */
3084                 if (varTypeIsFloating(type))
3085                 {
3086                     switch (op1->TypeGet())
3087                     {
3088                         /* uses fild, so don't need to be loaded to reg */
3089                         case TYP_INT:
3090                         case TYP_LONG:
3091                             rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3092                             tree->gtUsedRegs = op1->gtUsedRegs;
3093                             regMask          = 0;
3094                             goto RETURN_CHECK;
3095                         default:
3096                             break;
3097                     }
3098                 }
3099
3100                 /* Casting from integral type to floating type is special */
3101                 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3102                 {
3103                     if (opts.compCanUseSSE2)
3104                     {
3105                         // predict for SSE2 based casting
3106                         if (predictReg <= PREDICT_REG)
3107                             predictReg = PREDICT_SCRATCH_REG;
3108                         regMask        = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3109
3110                         // Get one more int reg to hold cast result
3111                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3112                         tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3113                         goto RETURN_CHECK;
3114                     }
3115                 }
3116 #endif
3117
3118 #if FEATURE_FP_REGALLOC
3119                 // Are we casting between int to float or float to int
3120                 // Fix 388428 ARM JitStress WP7
3121                 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3122                 {
3123                     // op1 needs to go into a register
3124                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3125
3126 #ifdef _TARGET_ARM_
3127                     if (varTypeIsFloating(op1->TypeGet()))
3128                     {
3129                         // We also need a fp scratch register for the convert operation
3130                         regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3131                                                     PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3132                     }
3133 #endif
3134                     // We also need a register to hold the result
3135                     regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3136                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3137                     goto RETURN_CHECK;
3138                 }
3139 #endif
3140
3141                 /* otherwise must load op1 into a register */
3142                 goto GENERIC_UNARY;
3143
3144             case GT_INTRINSIC:
3145
3146 #ifdef _TARGET_XARCH_
3147                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3148                 {
3149                     // This is a special case to handle the following
3150                     // optimization: conv.i4(round.d(d)) -> round.i(d)
3151                     // if flowgraph 3186
3152
3153                     if (predictReg <= PREDICT_REG)
3154                         predictReg = PREDICT_SCRATCH_REG;
3155
3156                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3157
3158                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3159
3160                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3161                     goto RETURN_CHECK;
3162                 }
3163 #endif
3164                 __fallthrough;
3165
3166             case GT_NEG:
3167 #ifdef _TARGET_ARM_
3168                 if (tree->TypeGet() == TYP_LONG)
3169                 {
3170                     // On ARM this consumes an extra register for the '0' value
3171                     if (predictReg <= PREDICT_REG)
3172                         predictReg = PREDICT_SCRATCH_REG;
3173
3174                     regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3175
3176                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3177
3178                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3179                     goto RETURN_CHECK;
3180                 }
3181 #endif // _TARGET_ARM_
3182
3183                 __fallthrough;
3184
3185             case GT_NOT:
3186             // these unary operators will write new values
3187             // and thus will need a scratch register
3188             GENERIC_UNARY:
3189                 /* generic unary operators */
3190
3191                 if (predictReg <= PREDICT_REG)
3192                     predictReg = PREDICT_SCRATCH_REG;
3193
3194                 __fallthrough;
3195
3196             case GT_NOP:
3197                 // these unary operators do not write new values
3198                 // and thus won't need a scratch register
3199                 CLANG_FORMAT_COMMENT_ANCHOR;
3200
3201 #if OPT_BOOL_OPS
3202                 if (!op1)
3203                 {
3204                     tree->gtUsedRegs = 0;
3205                     regMask          = 0;
3206                     goto RETURN_CHECK;
3207                 }
3208 #endif
3209                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3210                 tree->gtUsedRegs = op1->gtUsedRegs;
3211                 goto RETURN_CHECK;
3212
3213             case GT_IND:
3214             case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3215             {
3216                 bool      intoReg = true;
3217                 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3218
3219                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3220                 {
3221                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3222                 }
3223
3224                 if (predictReg == PREDICT_ADDR)
3225                 {
3226                     intoReg = false;
3227                 }
3228                 else if (predictReg == PREDICT_NONE)
3229                 {
3230                     if (type != TYP_LONG)
3231                     {
3232                         intoReg = false;
3233                     }
3234                     else
3235                     {
3236                         predictReg = PREDICT_REG;
3237                     }
3238                 }
3239
3240                 /* forcing to register? */
3241                 if (intoReg && (type != TYP_LONG))
3242                 {
3243                     rsvdRegs |= RBM_LASTUSE;
3244                 }
3245
3246                 GenTreePtr lenCSE;
3247                 lenCSE = NULL;
3248
3249                 /* check for address mode */
3250                 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3251                 tmpMask = RBM_NONE;
3252
3253 #if CPU_LOAD_STORE_ARCH
3254                 // We may need a scratch register for loading a long
3255                 if (type == TYP_LONG)
3256                 {
3257                     /* This scratch register immediately dies */
3258                     tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3259                 }
3260 #endif // CPU_LOAD_STORE_ARCH
3261
3262 #ifdef _TARGET_ARM_
3263                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3264                 //
3265                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3266                 {
3267                     /* These integer register(s) immediately die */
3268                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3269                     // Two integer registers are required for a TYP_DOUBLE
3270                     if (type == TYP_DOUBLE)
3271                         tmpMask |=
3272                             rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3273                 }
3274 #endif
3275
3276                 /* forcing to register? */
3277                 if (intoReg)
3278                 {
3279                     regMaskTP lockedMask = lockedRegs | rsvdRegs;
3280                     tmpMask |= regMask;
3281
3282                     // We will compute a new regMask that holds the register(s)
3283                     // that we will load the indirection into.
3284                     //
3285                     CLANG_FORMAT_COMMENT_ANCHOR;
3286
3287 #ifndef _TARGET_64BIT_
3288                     if (type == TYP_LONG)
3289                     {
3290                         // We need to use multiple load instructions here:
3291                         // For the first register we can not choose
3292                         // any registers that are being used in place or
3293                         // any register in the current regMask
3294                         //
3295                         regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3296
3297                         // For the second register we can choose a register that was
3298                         // used in place or any register in the old now overwritten regMask
3299                         // but not the same register that we picked above in 'regMask'
3300                         //
3301                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3302                         regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3303                     }
3304                     else
3305 #endif
3306                     {
3307                         // We will use one load instruction here:
3308                         // The load target register can be a register that was used in place
3309                         // or one of the register from the orginal regMask.
3310                         //
3311                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3312                         regMask = rpPredictRegPick(type, predictReg, lockedMask);
3313                     }
3314                 }
3315                 else if (predictReg != PREDICT_ADDR)
3316                 {
3317                     /* Unless the caller specified PREDICT_ADDR   */
3318                     /* we don't return the temp registers used    */
3319                     /* to form the address                        */
3320                     regMask = RBM_NONE;
3321                 }
3322             }
3323
3324                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3325
3326                 goto RETURN_CHECK;
3327
3328             case GT_EQ:
3329             case GT_NE:
3330             case GT_LT:
3331             case GT_LE:
3332             case GT_GE:
3333             case GT_GT:
3334
3335 #ifdef _TARGET_X86_
3336                 /* Floating point comparison uses EAX for flags */
3337                 if (varTypeIsFloating(op1->TypeGet()))
3338                 {
3339                     regMask = RBM_EAX;
3340                 }
3341                 else
3342 #endif
3343                     if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3344                 {
3345                     // Some comparisons are converted to ?:
3346                     noway_assert(!fgMorphRelopToQmark(op1));
3347
3348                     if (predictReg <= PREDICT_REG)
3349                         predictReg = PREDICT_SCRATCH_REG;
3350
3351                     // The set instructions need a byte register
3352                     regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3353                 }
3354                 else
3355                 {
3356                     regMask = RBM_NONE;
3357 #ifdef _TARGET_XARCH_
3358                     tmpMask = RBM_NONE;
3359                     // Optimize the compare with a constant cases for xarch
3360                     if (op1->gtOper == GT_CNS_INT)
3361                     {
3362                         if (op2->gtOper == GT_CNS_INT)
3363                             tmpMask =
3364                                 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3365                         rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3366                         tree->gtUsedRegs = op2->gtUsedRegs;
3367                         goto RETURN_CHECK;
3368                     }
3369                     else if (op2->gtOper == GT_CNS_INT)
3370                     {
3371                         rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3372                         tree->gtUsedRegs = op1->gtUsedRegs;
3373                         goto RETURN_CHECK;
3374                     }
3375                     else if (op2->gtOper == GT_CNS_LNG)
3376                     {
3377                         regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3378 #ifdef _TARGET_X86_
3379                         // We also need one extra register to read values from
3380                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3381 #endif // _TARGET_X86_
3382                         tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3383                         goto RETURN_CHECK;
3384                     }
3385 #endif // _TARGET_XARCH_
3386                 }
3387
3388                 unsigned op1TypeSize;
3389                 unsigned op2TypeSize;
3390
3391                 op1TypeSize = genTypeSize(op1->TypeGet());
3392                 op2TypeSize = genTypeSize(op2->TypeGet());
3393
3394                 op1PredictReg = PREDICT_REG;
3395                 op2PredictReg = PREDICT_REG;
3396
3397                 if (tree->gtFlags & GTF_REVERSE_OPS)
3398                 {
3399 #ifdef _TARGET_XARCH_
3400                     if (op1TypeSize == sizeof(int))
3401                         op1PredictReg = PREDICT_NONE;
3402 #endif
3403
3404                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3405                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3406                 }
3407                 else
3408                 {
3409 #ifdef _TARGET_XARCH_
3410                     // For full DWORD compares we can have
3411                     //
3412                     //      op1 is an address mode and op2 is a register
3413                     // or
3414                     //      op1 is a register and op2 is an address mode
3415                     //
3416                     if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3417                     {
3418                         if (op2->gtOper == GT_LCL_VAR)
3419                         {
3420                             unsigned lclNum = op2->gtLclVar.gtLclNum;
3421                             varDsc          = lvaTable + lclNum;
3422                             /* Did we predict that this local will be enregistered? */
3423                             if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3424                             {
3425                                 op1PredictReg = PREDICT_ADDR;
3426                             }
3427                         }
3428                     }
3429                     // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3430                     if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3431                         op2PredictReg = PREDICT_ADDR;
3432 #endif // _TARGET_XARCH_
3433
3434                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3435 #ifdef _TARGET_ARM_
3436                     if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3437 #endif
3438                     {
3439                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3440                     }
3441                 }
3442
3443 #ifdef _TARGET_XARCH_
3444                 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3445                 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3446                 // if one of the registers is small and the types aren't equal.
3447
3448                 if (regMask == RBM_NONE)
3449                 {
3450                     rpPredictReg op1xPredictReg, op2xPredictReg;
3451                     GenTreePtr   op1x, op2x;
3452                     if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3453                     {
3454                         op1xPredictReg = op2PredictReg;
3455                         op2xPredictReg = op1PredictReg;
3456                         op1x           = op2;
3457                         op2x           = op1;
3458                     }
3459                     else
3460                     {
3461                         op1xPredictReg = op1PredictReg;
3462                         op2xPredictReg = op2PredictReg;
3463                         op1x           = op1;
3464                         op2x           = op2;
3465                     }
3466                     if ((op1xPredictReg < PREDICT_REG) &&  // op1 doesn't get a register (probably an indir)
3467                         (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3468                         varTypeIsSmall(op1x->TypeGet()))   // op1 is smaller than an int
3469                     {
3470                         bool needTmp = false;
3471
3472                         // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3473                         // We could predict a byteable register for op2x, but what if we don't get it?
3474                         // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3475                         // result.
3476                         if (varTypeIsByte(op1x->TypeGet()))
3477                         {
3478                             needTmp = true;
3479                         }
3480                         else
3481                         {
3482                             if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3483                             {
3484                                 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3485                                     needTmp = true;
3486                             }
3487                             else
3488                             {
3489                                 if (op1x->TypeGet() != op2x->TypeGet())
3490                                     needTmp = true;
3491                             }
3492                         }
3493                         if (needTmp)
3494                         {
3495                             regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3496                         }
3497                     }
3498                 }
3499 #endif // _TARGET_XARCH_
3500
3501                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3502                 goto RETURN_CHECK;
3503
3504             case GT_MUL:
3505
3506 #ifndef _TARGET_AMD64_
3507                 if (type == TYP_LONG)
3508                 {
3509                     assert(tree->gtIsValid64RsltMul());
3510
3511                     /* Strip out the cast nodes */
3512
3513                     noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3514                     op1 = op1->gtCast.CastOp();
3515                     op2 = op2->gtCast.CastOp();
3516 #else
3517                 if (false)
3518                 {
3519 #endif // !_TARGET_AMD64_
3520                 USE_MULT_EAX:
3521
3522 #if defined(_TARGET_X86_)
3523                     // This will done by a 64-bit imul "imul eax, reg"
3524                     //   (i.e. EDX:EAX = EAX * reg)
3525
3526                     /* Are we supposed to evaluate op2 first? */
3527                     if (tree->gtFlags & GTF_REVERSE_OPS)
3528                     {
3529                         rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3530                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3531                     }
3532                     else
3533                     {
3534                         rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3535                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3536                     }
3537
3538                     /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3539
3540                     tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3541
3542                     /* set regMask to the set of held registers */
3543
3544                     regMask = RBM_PAIR_TMP_LO;
3545
3546                     if (type == TYP_LONG)
3547                         regMask |= RBM_PAIR_TMP_HI;
3548
3549 #elif defined(_TARGET_ARM_)
3550                     // This will done by a 4 operand multiply
3551
3552                     // Are we supposed to evaluate op2 first?
3553                     if (tree->gtFlags & GTF_REVERSE_OPS)
3554                     {
3555                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3556                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3557                     }
3558                     else
3559                     {
3560                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3561                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3562                     }
3563
3564                     // set regMask to the set of held registers,
3565                     //  the two scratch register we need to compute the mul result
3566
3567                     regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3568
3569                     // set gtUsedRegs toregMask and the registers needed by op1 and op2
3570
3571                     tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3572
3573 #else // !_TARGET_X86_ && !_TARGET_ARM_
3574 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3575 #endif
3576
3577                     goto RETURN_CHECK;
3578                 }
3579                 else
3580                 {
3581                     /* We use imulEAX for most unsigned multiply operations */
3582                     if (tree->gtOverflow())
3583                     {
3584                         if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3585                         {
3586                             goto USE_MULT_EAX;
3587                         }
3588                     }
3589                 }
3590
3591                 __fallthrough;
3592
3593             case GT_OR:
3594             case GT_XOR:
3595             case GT_AND:
3596
3597             case GT_SUB:
3598             case GT_ADD:
3599                 tree->gtUsedRegs = 0;
3600
3601                 if (predictReg <= PREDICT_REG)
3602                     predictReg = PREDICT_SCRATCH_REG;
3603
3604             GENERIC_BINARY:
3605
3606                 noway_assert(op2);
3607                 if (tree->gtFlags & GTF_REVERSE_OPS)
3608                 {
3609                     op1PredictReg = PREDICT_REG;
3610 #if !CPU_LOAD_STORE_ARCH
3611                     if (genTypeSize(op1->gtType) >= sizeof(int))
3612                         op1PredictReg = PREDICT_NONE;
3613 #endif
3614                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3615                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3616                 }
3617                 else
3618                 {
3619                     op2PredictReg = PREDICT_REG;
3620 #if !CPU_LOAD_STORE_ARCH
3621                     if (genTypeSize(op2->gtType) >= sizeof(int))
3622                         op2PredictReg = PREDICT_NONE;
3623 #endif
3624                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3625 #ifdef _TARGET_ARM_
3626                     // For most ALU operations we can generate a single instruction that encodes
3627                     // a small immediate integer constant value.  (except for multiply)
3628                     //
3629                     if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3630                     {
3631                         ssize_t ival = op2->gtIntCon.gtIconVal;
3632                         if (codeGen->validImmForAlu(ival))
3633                         {
3634                             op2PredictReg = PREDICT_NONE;
3635                         }
3636                         else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3637                                  ((oper == GT_ADD) || (oper == GT_SUB)))
3638                         {
3639                             op2PredictReg = PREDICT_NONE;
3640                         }
3641                     }
3642                     if (op2PredictReg == PREDICT_NONE)
3643                     {
3644                         op2->gtUsedRegs = RBM_NONE;
3645                     }
3646                     else
3647 #endif
3648                     {
3649                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3650                     }
3651                 }
3652                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3653
3654 #if CPU_HAS_BYTE_REGS
3655                 /* We have special register requirements for byte operations */
3656
3657                 if (varTypeIsByte(tree->TypeGet()))
3658                 {
3659                     /* For 8 bit arithmetic, one operands has to be in a
3660                        byte-addressable register, and the other has to be
3661                        in a byte-addrble reg or in memory. Assume its in a reg */
3662
3663                     regMaskTP regByteMask = 0;
3664                     regMaskTP op1ByteMask = op1->gtUsedRegs;
3665
3666                     if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3667                     {
3668                         // Pick a Byte register to use for op1
3669                         regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3670                         op1ByteMask = regByteMask;
3671                     }
3672
3673                     if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3674                     {
3675                         // Pick a Byte register to use for op2, avoiding the one used by op1
3676                         regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3677                     }
3678
3679                     if (regByteMask)
3680                     {
3681                         tree->gtUsedRegs |= regByteMask;
3682                         regMask = regByteMask;
3683                     }
3684                 }
3685 #endif
3686                 goto RETURN_CHECK;
3687
3688             case GT_DIV:
3689             case GT_MOD:
3690
3691             case GT_UDIV:
3692             case GT_UMOD:
3693
3694                 /* non-integer division handled in generic way */
3695                 if (!varTypeIsIntegral(type))
3696                 {
3697                     tree->gtUsedRegs = 0;
3698                     if (predictReg <= PREDICT_REG)
3699                         predictReg = PREDICT_SCRATCH_REG;
3700                     goto GENERIC_BINARY;
3701                 }
3702
3703 #ifndef _TARGET_64BIT_
3704
3705                 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3706                 {
3707                     /* Special case:  a mod with an int op2 is done inline using idiv or div
3708                        to avoid a costly call to the helper */
3709
3710                     noway_assert((op2->gtOper == GT_CNS_LNG) &&
3711                                  (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3712
3713 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3714                     if (tree->gtFlags & GTF_REVERSE_OPS)
3715                     {
3716                         tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3717                                                       rsvdRegs | op1->gtRsvdRegs);
3718                         tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3719                     }
3720                     else
3721                     {
3722                         tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3723                         tmpMask |=
3724                             rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3725                     }
3726                     regMask = RBM_PAIR_TMP;
3727 #else // !_TARGET_X86_ && !_TARGET_ARM_
3728 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3729 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3730
3731                     tree->gtUsedRegs =
3732                         (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3733                                        rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3734
3735                     goto RETURN_CHECK;
3736                 }
3737 #endif // _TARGET_64BIT_
3738
3739                 /* no divide immediate, so force integer constant which is not
3740                  * a power of two to register
3741                  */
3742
3743                 if (op2->OperKind() & GTK_CONST)
3744                 {
3745                     ssize_t ival = op2->gtIntConCommon.IconValue();
3746
3747                     /* Is the divisor a power of 2 ? */
3748
3749                     if (ival > 0 && genMaxOneBit(size_t(ival)))
3750                     {
3751                         goto GENERIC_UNARY;
3752                     }
3753                     else
3754                         op2PredictReg = PREDICT_SCRATCH_REG;
3755                 }
3756                 else
3757                 {
3758                     /* Non integer constant also must be enregistered */
3759                     op2PredictReg = PREDICT_REG;
3760                 }
3761
3762                 regMaskTP trashedMask;
3763                 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3764                 regMaskTP op1ExcludeMask;
3765                 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3766                 regMaskTP op2ExcludeMask;
3767                 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3768
3769 #ifdef _TARGET_XARCH_
3770                 /*  Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3771                  *  we can safely allow the "b" value to die. Unfortunately, if we simply
3772                  *  mark the node "b" as using EDX, this will not work if "b" is a register
3773                  *  variable that dies with this particular reference. Thus, if we want to
3774                  *  avoid this situation (where we would have to spill the variable from
3775                  *  EDX to someplace else), we need to explicitly mark the interference
3776                  *  of the variable at this point.
3777                  */
3778
3779                 if (op2->gtOper == GT_LCL_VAR)
3780                 {
3781                     unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3782                     varDsc          = lvaTable + lclNum;
3783                     if (varDsc->lvTracked)
3784                     {
3785 #ifdef DEBUG
3786                         if (verbose)
3787                         {
3788                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3789                                 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3790                                        varDsc->lvVarIndex);
3791                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3792                                 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3793                                        varDsc->lvVarIndex);
3794                         }
3795 #endif
3796                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3797                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3798                     }
3799                 }
3800
3801                 /* set the held register based on opcode */
3802                 if (oper == GT_DIV || oper == GT_UDIV)
3803                     regMask = RBM_EAX;
3804                 else
3805                     regMask    = RBM_EDX;
3806                 trashedMask    = (RBM_EAX | RBM_EDX);
3807                 op1ExcludeMask = 0;
3808                 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3809
3810 #endif // _TARGET_XARCH_
3811
3812 #ifdef _TARGET_ARM_
3813                 trashedMask    = RBM_NONE;
3814                 op1ExcludeMask = RBM_NONE;
3815                 op2ExcludeMask = RBM_NONE;
3816 #endif
3817
3818                 /* set the lvPref reg if possible */
3819                 GenTreePtr dest;
3820                 /*
3821                  *  Walking the gtNext link twice from here should get us back
3822                  *  to our parent node, if this is an simple assignment tree.
3823                  */
3824                 dest = tree->gtNext;
3825                 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3826                     dest->gtNext->gtOp.gtOp2 == tree)
3827                 {
3828                     varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3829                     varDsc->addPrefReg(regMask, this);
3830                 }
3831 #ifdef _TARGET_XARCH_
3832                 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3833 #else
3834                 op1PredictReg        = PREDICT_SCRATCH_REG;
3835 #endif
3836
3837                 /* are we supposed to evaluate op2 first? */
3838                 if (tree->gtFlags & GTF_REVERSE_OPS)
3839                 {
3840                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3841                                                   rsvdRegs | op1->gtRsvdRegs);
3842                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3843                 }
3844                 else
3845                 {
3846                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3847                                                   rsvdRegs | op2->gtRsvdRegs);
3848                     rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3849                 }
3850 #ifdef _TARGET_ARM_
3851                 regMask = tmpMask;
3852 #endif
3853                 /* grab EAX, EDX for this tree node */
3854                 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3855
3856                 goto RETURN_CHECK;
3857
3858             case GT_LSH:
3859             case GT_RSH:
3860             case GT_RSZ:
3861
3862                 if (predictReg <= PREDICT_REG)
3863                     predictReg = PREDICT_SCRATCH_REG;
3864
3865 #ifndef _TARGET_64BIT_
3866                 if (type == TYP_LONG)
3867                 {
3868                     if (op2->IsCnsIntOrI())
3869                     {
3870                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3871                         // no register used by op2
3872                         op2->gtUsedRegs  = 0;
3873                         tree->gtUsedRegs = op1->gtUsedRegs;
3874                     }
3875                     else
3876                     {
3877                         // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3878                         tmpMask = lockedRegs;
3879                         tmpMask &= ~RBM_LNGARG_0;
3880                         tmpMask &= ~RBM_SHIFT_LNG;
3881
3882                         // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3883                         if (tree->gtFlags & GTF_REVERSE_OPS)
3884                         {
3885                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3886                             tmpMask |= RBM_SHIFT_LNG;
3887                             // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3888                             // Fix 383843 X86/ARM ILGEN
3889                             rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3890                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3891                         }
3892                         else
3893                         {
3894                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3895                             tmpMask |= RBM_LNGARG_0;
3896                             // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3897                             // Fix 383839 ARM ILGEN
3898                             rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3899                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3900                         }
3901                         regMask = RBM_LNGRET; // function return registers
3902                         op1->gtUsedRegs |= RBM_LNGARG_0;
3903                         op2->gtUsedRegs |= RBM_SHIFT_LNG;
3904
3905                         tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3906
3907                         // We are using a helper function to do shift:
3908                         //
3909                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3910                     }
3911                 }
3912                 else
3913 #endif // _TARGET_64BIT_
3914                 {
3915 #ifdef _TARGET_XARCH_
3916                     if (!op2->IsCnsIntOrI())
3917                         predictReg = PREDICT_NOT_REG_ECX;
3918 #endif
3919
3920                 HANDLE_SHIFT_COUNT:
3921                     // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3922
3923                     regMaskTP tmpRsvdRegs;
3924
3925                     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3926                     {
3927                         regMask     = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3928                         rsvdRegs    = RBM_LASTUSE;
3929                         tmpRsvdRegs = RBM_NONE;
3930                     }
3931                     else
3932                     {
3933                         regMask = RBM_NONE;
3934                         // Special case op1 of a constant
3935                         if (op1->IsCnsIntOrI())
3936                             tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3937                                                        // System.Xml.Schema.BitSet:Get(int):bool
3938                         else
3939                             tmpRsvdRegs = op1->gtRsvdRegs;
3940                     }
3941
3942                     op2Mask = RBM_NONE;
3943                     if (!op2->IsCnsIntOrI())
3944                     {
3945                         if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3946                         {
3947                             op2PredictReg = PREDICT_REG_SHIFT;
3948                         }
3949                         else
3950                         {
3951                             op2PredictReg = PREDICT_REG;
3952                         }
3953
3954                         /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3955                         op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3956
3957                         // If our target arch has a REG_SHIFT register then
3958                         //     we set the PrefReg when we have a LclVar for op2
3959                         //     we add an interference with REG_SHIFT for any other LclVars alive at op2
3960                         if (REG_SHIFT != REG_NA)
3961                         {
3962                             VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3963
3964                             while (op2->gtOper == GT_COMMA)
3965                             {
3966                                 op2 = op2->gtOp.gtOp2;
3967                             }
3968
3969                             if (op2->gtOper == GT_LCL_VAR)
3970                             {
3971                                 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3972                                 varDsc->setPrefReg(REG_SHIFT, this);
3973                                 if (varDsc->lvTracked)
3974                                 {
3975                                     VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3976                                 }
3977                             }
3978
3979                             // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3980                             // excluding the LclVar that was used for the shift amount as it is read-only
3981                             // and can be kept alive through the shift operation
3982                             //
3983                             rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3984                             // In case op2Mask doesn't contain the required shift register,
3985                             // we will or it in now.
3986                             op2Mask |= RBM_SHIFT;
3987                         }
3988                     }
3989
3990                     if (tree->gtFlags & GTF_REVERSE_OPS)
3991                     {
3992                         assert(regMask == RBM_NONE);
3993                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3994                     }
3995
3996 #if CPU_HAS_BYTE_REGS
3997                     if (varTypeIsByte(type))
3998                     {
3999                         // Fix 383789 X86 ILGEN
4000                         // Fix 383813 X86 ILGEN
4001                         // Fix 383828 X86 ILGEN
4002                         if (op1->gtOper == GT_LCL_VAR)
4003                         {
4004                             varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4005                             if (varDsc->lvTracked)
4006                             {
4007                                 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4008
4009                                 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4010                                 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4011                             }
4012                         }
4013                         if ((regMask & RBM_BYTE_REGS) == 0)
4014                         {
4015                             // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4016                             // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4017                             //
4018                             regMask |=
4019                                 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4020                         }
4021                     }
4022 #endif
4023                     tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4024                 }
4025
4026                 goto RETURN_CHECK;
4027
4028             case GT_COMMA:
4029                 if (tree->gtFlags & GTF_REVERSE_OPS)
4030                 {
4031                     if (predictReg == PREDICT_NONE)
4032                     {
4033                         predictReg = PREDICT_REG;
4034                     }
4035                     else if (rpHasVarIndexForPredict(predictReg))
4036                     {
4037                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4038                         predictReg = PREDICT_SCRATCH_REG;
4039                     }
4040
4041                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4042                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4043                 }
4044                 else
4045                 {
4046                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4047
4048                     /* CodeGen will enregister the op2 side of a GT_COMMA */
4049                     if (predictReg == PREDICT_NONE)
4050                     {
4051                         predictReg = PREDICT_REG;
4052                     }
4053                     else if (rpHasVarIndexForPredict(predictReg))
4054                     {
4055                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4056                         predictReg = PREDICT_SCRATCH_REG;
4057                     }
4058
4059                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4060                 }
4061                 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4062                 //
4063                 tree->gtUsedRegs = op2->gtUsedRegs;
4064                 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4065                 {
4066                     LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4067
4068                     if (op2VarDsc->lvTracked)
4069                     {
4070                         VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4071                         rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4072                     }
4073                 }
4074                 goto RETURN_CHECK;
4075
4076             case GT_QMARK:
4077             {
4078                 noway_assert(op1 != NULL && op2 != NULL);
4079
4080                 /*
4081                  *  If the gtUsedRegs conflicts with lockedRegs
4082                  *  then we going to have to spill some registers
4083                  *  into the non-trashed register set to keep it alive
4084                  */
4085                 unsigned spillCnt;
4086                 spillCnt = 0;
4087                 regMaskTP spillRegs;
4088                 spillRegs = lockedRegs & tree->gtUsedRegs;
4089
4090                 while (spillRegs)
4091                 {
4092                     /* Find the next register that needs to be spilled */
4093                     tmpMask = genFindLowestBit(spillRegs);
4094
4095 #ifdef DEBUG
4096                     if (verbose)
4097                     {
4098                         printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4099                         gtDispTree(tree, 0, NULL, true);
4100                     }
4101 #endif
4102                     /* In Codegen it will typically introduce a spill temp here */
4103                     /* rather than relocating the register to a non trashed reg */
4104                     rpPredictSpillCnt++;
4105                     spillCnt++;
4106
4107                     /* Remove it from the spillRegs and lockedRegs*/
4108                     spillRegs &= ~tmpMask;
4109                     lockedRegs &= ~tmpMask;
4110                 }
4111                 {
4112                     VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4113
4114                     /* Evaluate the <cond> subtree */
4115                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4116                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4117                     tree->gtUsedRegs = op1->gtUsedRegs;
4118
4119                     noway_assert(op2->gtOper == GT_COLON);
4120                     if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4121                     {
4122                         // Don't try to target the register specified in predictReg when we have complex subtrees
4123                         //
4124                         predictReg = PREDICT_SCRATCH_REG;
4125                     }
4126                     GenTreePtr elseTree = op2->AsColon()->ElseNode();
4127                     GenTreePtr thenTree = op2->AsColon()->ThenNode();
4128
4129                     noway_assert(thenTree != NULL && elseTree != NULL);
4130
4131                     // Update compCurLife to only those vars live on the <then> subtree
4132
4133                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4134
4135                     if (type == TYP_VOID)
4136                     {
4137                         /* Evaluate the <then> subtree */
4138                         rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4139                         regMask    = RBM_NONE;
4140                         predictReg = PREDICT_NONE;
4141                     }
4142                     else
4143                     {
4144                         // A mask to use to force the predictor to choose low registers (to reduce code size)
4145                         regMaskTP avoidRegs = RBM_NONE;
4146 #ifdef _TARGET_ARM_
4147                         avoidRegs = (RBM_R12 | RBM_LR);
4148 #endif
4149                         if (predictReg <= PREDICT_REG)
4150                             predictReg = PREDICT_SCRATCH_REG;
4151
4152                         /* Evaluate the <then> subtree */
4153                         regMask =
4154                             rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4155
4156                         if (regMask)
4157                         {
4158                             rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4159                             if (op1PredictReg != PREDICT_NONE)
4160                                 predictReg = op1PredictReg;
4161                         }
4162                     }
4163
4164                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4165
4166                     /* Evaluate the <else> subtree */
4167                     // First record the post-then liveness, and reset the current liveness to the else
4168                     // branch liveness.
4169                     CLANG_FORMAT_COMMENT_ANCHOR;
4170
4171 #ifdef DEBUG
4172                     VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4173 #endif
4174
4175                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4176
4177                     rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4178                     tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4179
4180                     // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4181                     // They each have only one successor, which they share.  Their live-out sets must equal the
4182                     // live-in set of this virtual successor block, and thus must be the same.  We can assert
4183                     // that equality here.
4184                     assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4185
4186                     if (spillCnt > 0)
4187                     {
4188                         regMaskTP reloadMask = RBM_NONE;
4189
4190                         while (spillCnt)
4191                         {
4192                             regMaskTP reloadReg;
4193
4194                             /* Get an extra register to hold it */
4195                             reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4196 #ifdef DEBUG
4197                             if (verbose)
4198                             {
4199                                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4200                                 gtDispTree(tree, 0, NULL, true);
4201                             }
4202 #endif
4203                             reloadMask |= reloadReg;
4204
4205                             spillCnt--;
4206                         }
4207
4208                         /* update the gtUsedRegs mask */
4209                         tree->gtUsedRegs |= reloadMask;
4210                     }
4211                 }
4212
4213                 goto RETURN_CHECK;
4214             }
4215             case GT_RETURN:
4216                 tree->gtUsedRegs = RBM_NONE;
4217                 regMask          = RBM_NONE;
4218
4219                 /* Is there a return value? */
4220                 if (op1 != NULL)
4221                 {
4222 #if FEATURE_FP_REGALLOC
4223                     if (varTypeIsFloating(type))
4224                     {
4225                         predictReg = PREDICT_FLTRET;
4226                         if (type == TYP_FLOAT)
4227                             regMask = RBM_FLOATRET;
4228                         else
4229                             regMask = RBM_DOUBLERET;
4230                     }
4231                     else
4232 #endif
4233                         if (isRegPairType(type))
4234                     {
4235                         predictReg = PREDICT_LNGRET;
4236                         regMask    = RBM_LNGRET;
4237                     }
4238                     else
4239                     {
4240                         predictReg = PREDICT_INTRET;
4241                         regMask    = RBM_INTRET;
4242                     }
4243                     if (info.compCallUnmanaged)
4244                     {
4245                         lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4246                     }
4247                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4248                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4249                 }
4250
4251 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4252                 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4253                 // We could optimize on registers based on int/long or no return value.  But to
4254                 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4255                 if (compIsProfilerHookNeeded())
4256                 {
4257                     tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4258                 }
4259
4260 #endif
4261                 goto RETURN_CHECK;
4262
4263             case GT_RETFILT:
4264                 if (op1 != NULL)
4265                 {
4266                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4267                     regMask          = genReturnRegForTree(tree);
4268                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4269                     goto RETURN_CHECK;
4270                 }
4271                 tree->gtUsedRegs = 0;
4272                 regMask          = 0;
4273
4274                 goto RETURN_CHECK;
4275
4276             case GT_JTRUE:
4277                 /* This must be a test of a relational operator */
4278
4279                 noway_assert(op1->OperIsCompare());
4280
4281                 /* Only condition code set by this operation */
4282
4283                 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4284
4285                 tree->gtUsedRegs = op1->gtUsedRegs;
4286                 regMask          = 0;
4287
4288                 goto RETURN_CHECK;
4289
4290             case GT_SWITCH:
4291                 noway_assert(type <= TYP_INT);
4292                 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4293 #ifdef _TARGET_ARM_
4294                 {
4295                     regMask          = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4296                     unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4297                     if (jumpCnt > 2)
4298                     {
4299                         // Table based switch requires an extra register for the table base
4300                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4301                     }
4302                     tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4303                 }
4304 #else  // !_TARGET_ARM_
4305                 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4306                 tree->gtUsedRegs = op1->gtUsedRegs;
4307 #endif // _TARGET_ARM_
4308                 regMask = 0;
4309                 goto RETURN_CHECK;
4310
4311             case GT_CKFINITE:
4312                 if (predictReg <= PREDICT_REG)
4313                     predictReg = PREDICT_SCRATCH_REG;
4314
4315                 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4316                 // Need a reg to load exponent into
4317                 regMask          = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4318                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4319                 goto RETURN_CHECK;
4320
4321             case GT_LCLHEAP:
4322                 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4323                 op2Mask = 0;
4324
4325 #ifdef _TARGET_ARM_
4326                 if (info.compInitMem)
4327                 {
4328                     // We zero out two registers in the ARM codegen path
4329                     op2Mask |=
4330                         rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4331                 }
4332 #endif
4333
4334                 op1->gtUsedRegs |= (regMaskSmall)regMask;
4335                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4336
4337                 // The result will be put in the reg we picked for the size
4338                 // regMask = <already set as we want it to be>
4339
4340                 goto RETURN_CHECK;
4341
4342             case GT_OBJ:
4343             {
4344 #ifdef _TARGET_ARM_
4345                 if (predictReg <= PREDICT_REG)
4346                     predictReg = PREDICT_SCRATCH_REG;
4347
4348                 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4349                                                           // registers (to reduce code size)
4350                 regMask = RBM_NONE;
4351                 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4352 #endif
4353
4354                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4355                 {
4356                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4357                 }
4358
4359 #ifdef _TARGET_ARM_
4360                 unsigned  objSize   = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4361                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4362                 // If it has one bit set, and that's an arg reg...
4363                 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4364                 {
4365                     // We are passing the 'obj' in the argument registers
4366                     //
4367                     regNumber rn = genRegNumFromMask(preferReg);
4368
4369                     //  Add the registers used to pass the 'obj' to regMask.
4370                     for (unsigned i = 0; i < objSize / 4; i++)
4371                     {
4372                         if (rn == MAX_REG_ARG)
4373                             break;
4374                         // Otherwise...
4375                         regMask |= genRegMask(rn);
4376                         rn = genRegArgNext(rn);
4377                     }
4378                 }
4379                 else
4380                 {
4381                     // We are passing the 'obj' in the outgoing arg space
4382                     // We will need one register to load into unless the 'obj' size is 4 or less.
4383                     //
4384                     if (objSize > 4)
4385                     {
4386                         regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4387                     }
4388                 }
4389                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4390                 goto RETURN_CHECK;
4391 #else  // !_TARGET_ARM_
4392                 goto GENERIC_UNARY;
4393 #endif // _TARGET_ARM_
4394             }
4395
4396             case GT_MKREFANY:
4397             {
4398 #ifdef _TARGET_ARM_
4399                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4400                 regMask             = RBM_NONE;
4401                 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4402                 {
4403                     // A MKREFANY takes up two registers.
4404                     regNumber rn = genRegNumFromMask(preferReg);
4405                     regMask      = RBM_NONE;
4406                     if (rn < MAX_REG_ARG)
4407                     {
4408                         regMask |= genRegMask(rn);
4409                         rn = genRegArgNext(rn);
4410                         if (rn < MAX_REG_ARG)
4411                             regMask |= genRegMask(rn);
4412                     }
4413                 }
4414                 if (regMask != RBM_NONE)
4415                 {
4416                     // Condensation of GENERIC_BINARY path.
4417                     assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4418                     op2PredictReg        = PREDICT_REG;
4419                     regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4420                     rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4421                     regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4422                     tree->gtUsedRegs = (regMaskSmall)regMask;
4423                     goto RETURN_CHECK;
4424                 }
4425                 tree->gtUsedRegs = op1->gtUsedRegs;
4426 #endif // _TARGET_ARM_
4427                 goto GENERIC_BINARY;
4428             }
4429
4430             case GT_BOX:
4431                 goto GENERIC_UNARY;
4432
4433             case GT_LOCKADD:
4434                 goto GENERIC_BINARY;
4435
4436             case GT_XADD:
4437             case GT_XCHG:
4438                 // Ensure we can write to op2.  op2 will hold the output.
4439                 if (predictReg < PREDICT_SCRATCH_REG)
4440                     predictReg = PREDICT_SCRATCH_REG;
4441
4442                 if (tree->gtFlags & GTF_REVERSE_OPS)
4443                 {
4444                     op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4445                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4446                 }
4447                 else
4448                 {
4449                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4450                     op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4451                 }
4452                 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4453                 goto RETURN_CHECK;
4454
4455             case GT_ARR_LENGTH:
4456                 goto GENERIC_UNARY;
4457
4458             case GT_INIT_VAL:
4459                 // This unary operator simply passes through the value from its child (much like GT_NOP)
4460                 // and thus won't need a scratch register.
4461                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4462                 tree->gtUsedRegs = op1->gtUsedRegs;
4463                 goto RETURN_CHECK;
4464
4465             default:
4466 #ifdef DEBUG
4467                 gtDispTree(tree);
4468 #endif
4469                 noway_assert(!"unexpected simple operator in reg use prediction");
4470                 break;
4471         }
4472     }
4473
4474     /* See what kind of a special operator we have here */
4475
4476     switch (oper)
4477     {
4478         GenTreePtr      args;
4479         GenTreeArgList* list;
4480         regMaskTP       keepMask;
4481         unsigned        regArgsNum;
4482         int             regIndex;
4483         regMaskTP       regArgMask;
4484         regMaskTP       curArgMask;
4485
4486         case GT_CALL:
4487
4488         {
4489
4490             /* initialize so we can just or in various bits */
4491             tree->gtUsedRegs = RBM_NONE;
4492
4493 #if GTF_CALL_REG_SAVE
4494             /*
4495              *  Unless the GTF_CALL_REG_SAVE flag is set,
4496              *  we can't preserve the RBM_CALLEE_TRASH registers.
4497              *  (likewise we can't preserve the return registers)
4498              *  So we remove them from the lockedRegs set and
4499              *  record any of them in the keepMask
4500              */
4501
4502             if (tree->gtFlags & GTF_CALL_REG_SAVE)
4503             {
4504                 regMaskTP trashMask = genReturnRegForTree(tree);
4505
4506                 keepMask = lockedRegs & trashMask;
4507                 lockedRegs &= ~trashMask;
4508             }
4509             else
4510 #endif
4511             {
4512                 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4513                 lockedRegs &= ~RBM_CALLEE_TRASH;
4514             }
4515
4516             regArgsNum = 0;
4517             regIndex   = 0;
4518
4519             /* Is there an object pointer? */
4520             if (tree->gtCall.gtCallObjp)
4521             {
4522                 /* Evaluate the instance pointer first */
4523
4524                 args = tree->gtCall.gtCallObjp;
4525
4526                 /* the objPtr always goes to an integer register (through temp or directly) */
4527                 noway_assert(regArgsNum == 0);
4528                 regArgsNum++;
4529
4530                 /* Must be passed in a register */
4531
4532                 noway_assert(args->gtFlags & GTF_LATE_ARG);
4533
4534                 /* Must be either a deferred reg arg node or a GT_ASG node */
4535
4536                 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4537                              args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4538
4539                 if (!args->IsArgPlaceHolderNode())
4540                 {
4541                     rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4542                 }
4543             }
4544             VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4545             VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4546
4547             /* process argument list */
4548             for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4549             {
4550                 args = list->Current();
4551
4552                 if (args->gtFlags & GTF_LATE_ARG)
4553                 {
4554                     /* Must be either a Placeholder/NOP node or a GT_ASG node */
4555
4556                     noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4557                                  args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4558
4559                     if (!args->IsArgPlaceHolderNode())
4560                     {
4561                         rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4562                     }
4563
4564                     regArgsNum++;
4565                 }
4566                 else
4567                 {
4568 #ifdef FEATURE_FIXED_OUT_ARGS
4569                     // We'll store this argument into the outgoing argument area
4570                     // It needs to be in a register to be stored.
4571                     //
4572                     predictReg = PREDICT_REG;
4573
4574 #else // !FEATURE_FIXED_OUT_ARGS
4575                     // We'll generate a push for this argument
4576                     //
4577                     predictReg = PREDICT_NONE;
4578                     if (varTypeIsSmall(args->TypeGet()))
4579                     {
4580                         /* We may need to sign or zero extend a small type using a register */
4581                         predictReg = PREDICT_SCRATCH_REG;
4582                     }
4583 #endif
4584
4585                     rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4586                 }
4587                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4588                 tree->gtUsedRegs |= args->gtUsedRegs;
4589             }
4590
4591             /* Is there a late argument list */
4592
4593             regIndex   = 0;
4594             regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4595             args       = NULL;
4596
4597             /* process the late argument list */
4598             for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4599             {
4600                 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4601                 LclVarDsc* promotedStructLocal = NULL;
4602
4603                 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4604                 tmpMask    = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4605
4606                 assert(list->OperIsList());
4607
4608                 args = list->Current();
4609                 list = list->Rest();
4610
4611                 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4612
4613                 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4614                 assert(curArgTabEntry);
4615
4616                 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4617                 unsigned  numSlots =
4618                     curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4619
4620                 rpPredictReg argPredictReg;
4621                 regMaskTP    avoidReg = RBM_NONE;
4622
4623                 if (regNum != REG_STK)
4624                 {
4625                     argPredictReg = rpGetPredictForReg(regNum);
4626                     curArgMask |= genRegMask(regNum);
4627                 }
4628                 else
4629                 {
4630                     assert(numSlots > 0);
4631                     argPredictReg = PREDICT_NONE;
4632 #ifdef _TARGET_ARM_
4633                     // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4634                     avoidReg = (RBM_R12 | RBM_LR);
4635 #endif
4636                 }
4637
4638 #ifdef _TARGET_ARM_
4639                 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4640                 //
4641                 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4642                 {
4643                     // 64-bit longs and doubles require 2 consecutive argument registers
4644                     curArgMask |= genRegMask(REG_NEXT(regNum));
4645                 }
4646                 else if (args->TypeGet() == TYP_STRUCT)
4647                 {
4648                     GenTreePtr argx       = args;
4649                     GenTreePtr lclVarTree = NULL;
4650
4651                     /* The GT_OBJ may be be a child of a GT_COMMA */
4652                     while (argx->gtOper == GT_COMMA)
4653                     {
4654                         argx = argx->gtOp.gtOp2;
4655                     }
4656                     unsigned originalSize = 0;
4657
4658                     if (argx->gtOper == GT_OBJ)
4659                     {
4660                         originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4661
4662                         // Is it the address of a promoted struct local?
4663                         if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4664                         {
4665                             lclVarTree        = argx->gtObj.gtOp1->gtOp.gtOp1;
4666                             LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4667                             if (varDsc->lvPromoted)
4668                                 promotedStructLocal = varDsc;
4669                         }
4670                     }
4671                     else if (argx->gtOper == GT_LCL_VAR)
4672                     {
4673                         varDsc       = lvaTable + argx->gtLclVarCommon.gtLclNum;
4674                         originalSize = varDsc->lvSize();
4675
4676                         // Is it a promoted struct local?
4677                         if (varDsc->lvPromoted)
4678                             promotedStructLocal = varDsc;
4679                     }
4680                     else if (argx->gtOper == GT_MKREFANY)
4681                     {
4682                         originalSize = 2 * TARGET_POINTER_SIZE;
4683                     }
4684                     else
4685                     {
4686                         noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4687                     }
4688
4689                     // We only pass arguments differently if it a struct local "independently" promoted, which
4690                     // allows the field locals can be independently enregistered.
4691                     if (promotedStructLocal != NULL)
4692                     {
4693                         if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4694                             promotedStructLocal = NULL;
4695                     }
4696
4697                     unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4698
4699                     // Are we passing a TYP_STRUCT in multiple integer registers?
4700                     // if so set up curArgMask to reflect this
4701                     // Also slots is updated to reflect the number of outgoing arg slots that we will write
4702                     if (regNum != REG_STK)
4703                     {
4704                         regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4705                         assert(genIsValidReg(regNum));
4706                         regNumber nextReg = REG_NEXT(regNum);
4707                         slots--;
4708                         while (slots > 0 && nextReg <= regLast)
4709                         {
4710                             curArgMask |= genRegMask(nextReg);
4711                             nextReg = REG_NEXT(nextReg);
4712                             slots--;
4713                         }
4714                     }
4715
4716                     if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4717                     {
4718                         // All or a portion of this struct will be placed in the argument registers indicated by
4719                         // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4720                         // that the second arg to be evaluated interferes with the reg for the first, the third with
4721                         // the regs for the first and second, etc. But since we always place the stack slots before
4722                         // placing the register slots we do not add inteferences for any part of the struct that gets
4723                         // passed on the stack.
4724
4725                         argPredictReg =
4726                             PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4727                         regMaskTP prevArgMask = RBM_NONE;
4728                         for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4729                         {
4730                             LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4731                             if (fieldVarDsc->lvTracked)
4732                             {
4733                                 assert(lclVarTree != NULL);
4734                                 if (prevArgMask != RBM_NONE)
4735                                 {
4736                                     rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4737                                                                      DEBUGARG("fieldVar/argReg"));
4738                                 }
4739                             }
4740                             // Now see many registers this uses up.
4741                             unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4742                             unsigned nextAfterLastRegOffset =
4743                                 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4744                                 TARGET_POINTER_SIZE;
4745                             unsigned nextAfterLastArgRegOffset =
4746                                 min(nextAfterLastRegOffset,
4747                                     genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4748
4749                             for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4750                                  regOffset++)
4751                             {
4752                                 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4753                             }
4754
4755                             if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4756                             {
4757                                 break;
4758                             }
4759
4760                             if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4761                             {
4762                                 // Add the argument register used here as a preferred register for this fieldVarDsc
4763                                 //
4764                                 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4765                                 fieldVarDsc->setPrefReg(firstRegUsed, this);
4766                             }
4767                         }
4768                         compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4769                     }
4770
4771                     // If slots is greater than zero then part or all of this TYP_STRUCT
4772                     // argument is passed in the outgoing argument area. (except HFA arg)
4773                     //
4774                     if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4775                     {
4776                         // We will need a register to address the TYP_STRUCT
4777                         // Note that we can use an argument register in curArgMask as in
4778                         // codegen we pass the stack portion of the argument before we
4779                         // setup the register part.
4780                         //
4781
4782                         // Force the predictor to choose a LOW_REG here to reduce code bloat
4783                         avoidReg = (RBM_R12 | RBM_LR);
4784
4785                         assert(tmpMask == RBM_NONE);
4786                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4787
4788                         // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4789                         // arg area
4790                         if (slots > 1)
4791                         {
4792                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4793                                                         lockedRegs | regArgMask | tmpMask | avoidReg);
4794                         }
4795                     }
4796                 } // (args->TypeGet() == TYP_STRUCT)
4797 #endif            // _TARGET_ARM_
4798
4799                 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4800                 // as we have already calculated the correct tmpMask and curArgMask values and
4801                 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4802                 //
4803                 if (promotedStructLocal == NULL)
4804                 {
4805                     /* Target the appropriate argument register */
4806                     tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4807                 }
4808
4809                 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4810                 // for the duration of the OBJ.
4811                 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4812                 {
4813                     GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4814                     assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4815                     compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4816                 }
4817
4818                 regArgMask |= curArgMask;
4819                 args->gtUsedRegs |= (tmpMask | regArgMask);
4820                 tree->gtUsedRegs |= args->gtUsedRegs;
4821                 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4822
4823                 if (args->gtUsedRegs != RBM_NONE)
4824                 {
4825                     // Add register interference with the set of registers used or in use when we evaluated
4826                     // the current arg, with whatever is alive after the current arg
4827                     //
4828                     rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4829                 }
4830                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4831             }
4832             assert(list == NULL);
4833
4834 #ifdef LEGACY_BACKEND
4835 #if CPU_LOAD_STORE_ARCH
4836 #ifdef FEATURE_READYTORUN_COMPILER
4837             if (tree->gtCall.IsR2RRelativeIndir())
4838             {
4839                 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4840             }
4841 #endif // FEATURE_READYTORUN_COMPILER
4842 #endif // CPU_LOAD_STORE_ARCH
4843 #endif // LEGACY_BACKEND
4844
4845             regMaskTP callAddrMask;
4846             callAddrMask = RBM_NONE;
4847 #if CPU_LOAD_STORE_ARCH
4848             predictReg = PREDICT_SCRATCH_REG;
4849 #else
4850             predictReg       = PREDICT_NONE;
4851 #endif
4852
4853             switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4854             {
4855                 case GTF_CALL_VIRT_STUB:
4856
4857                     // We only want to record an interference between the virtual stub
4858                     // param reg and anything that's live AFTER the call, but we've not
4859                     // yet processed the indirect target.  So add virtualStubParamInfo.regMask
4860                     // to interferingRegs.
4861                     interferingRegs |= virtualStubParamInfo->GetRegMask();
4862 #ifdef DEBUG
4863                     if (verbose)
4864                         printf("Adding interference with Virtual Stub Param\n");
4865 #endif
4866                     codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4867
4868                     if (tree->gtCall.gtCallType == CT_INDIRECT)
4869                     {
4870                         predictReg = virtualStubParamInfo->GetPredict();
4871                     }
4872                     break;
4873
4874                 case GTF_CALL_VIRT_VTABLE:
4875                     predictReg = PREDICT_SCRATCH_REG;
4876                     break;
4877
4878                 case GTF_CALL_NONVIRT:
4879                     predictReg = PREDICT_SCRATCH_REG;
4880                     break;
4881             }
4882
4883             if (tree->gtCall.gtCallType == CT_INDIRECT)
4884             {
4885 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4886                 if (tree->gtCall.gtCallCookie)
4887                 {
4888                     codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4889
4890                     callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4891                                                         lockedRegs | regArgMask, RBM_LASTUSE);
4892
4893                     // Just in case we predict some other registers, force interference with our two special
4894                     // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4895                     callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4896
4897                     predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4898                 }
4899 #endif
4900                 callAddrMask |=
4901                     rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4902             }
4903             else if (predictReg != PREDICT_NONE)
4904             {
4905                 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4906             }
4907
4908             if (tree->gtFlags & GTF_CALL_UNMANAGED)
4909             {
4910                 // Need a register for tcbReg
4911                 callAddrMask |=
4912                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4913 #if CPU_LOAD_STORE_ARCH
4914                 // Need an extra register for tmpReg
4915                 callAddrMask |=
4916                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4917 #endif
4918             }
4919
4920             tree->gtUsedRegs |= callAddrMask;
4921
4922             /* After the call restore the orginal value of lockedRegs */
4923             lockedRegs |= keepMask;
4924
4925             /* set the return register */
4926             regMask = genReturnRegForTree(tree);
4927
4928             if (regMask & rsvdRegs)
4929             {
4930                 // We will need to relocate the return register value
4931                 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4932 #if FEATURE_FP_REGALLOC
4933                 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4934 #endif
4935                 regMask = RBM_NONE;
4936
4937                 if (intRegMask)
4938                 {
4939                     if (intRegMask == RBM_INTRET)
4940                     {
4941                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4942                     }
4943                     else if (intRegMask == RBM_LNGRET)
4944                     {
4945                         regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4946                     }
4947                     else
4948                     {
4949                         noway_assert(!"unexpected return regMask");
4950                     }
4951                 }
4952
4953 #if FEATURE_FP_REGALLOC
4954                 if (floatRegMask)
4955                 {
4956                     if (floatRegMask == RBM_FLOATRET)
4957                     {
4958                         regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4959                     }
4960                     else if (floatRegMask == RBM_DOUBLERET)
4961                     {
4962                         regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4963                     }
4964                     else // HFA return case
4965                     {
4966                         for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4967                         {
4968                             regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4969                         }
4970                     }
4971                 }
4972 #endif
4973             }
4974
4975             /* the return registers (if any) are killed */
4976             tree->gtUsedRegs |= regMask;
4977
4978 #if GTF_CALL_REG_SAVE
4979             if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4980 #endif
4981             {
4982                 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4983                 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4984             }
4985         }
4986
4987 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4988             // Mark required registers for emitting tailcall profiler callback as used
4989             if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4990             {
4991                 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
4992             }
4993 #endif
4994             break;
4995
4996         case GT_ARR_ELEM:
4997
4998             // Figure out which registers can't be touched
4999             unsigned dim;
5000             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5001                 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5002
5003             regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5004
5005             regMaskTP dimsMask;
5006             dimsMask = 0;
5007
5008 #if CPU_LOAD_STORE_ARCH
5009             // We need a register to load the bounds of the MD array
5010             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5011 #endif
5012
5013             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5014             {
5015                 /* We need scratch registers to compute index-lower_bound.
5016                    Also, gtArrInds[0]'s register will be used as the second
5017                    addressability register (besides gtArrObj's) */
5018
5019                 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5020                                                         lockedRegs | regMask | dimsMask, rsvdRegs);
5021                 if (dim == 0)
5022                     regMask |= dimMask;
5023
5024                 dimsMask |= dimMask;
5025             }
5026 #ifdef _TARGET_XARCH_
5027             // INS_imul doesnt have an immediate constant.
5028             if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5029                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5030 #endif
5031             tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5032             break;
5033
5034         case GT_CMPXCHG:
5035         {
5036 #ifdef _TARGET_XARCH_
5037             rsvdRegs |= RBM_EAX;
5038 #endif
5039             if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5040             {
5041                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5042             }
5043             else
5044             {
5045                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5046             }
5047             op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5048
5049 #ifdef _TARGET_XARCH_
5050             rsvdRegs &= ~RBM_EAX;
5051             tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5052                                           rsvdRegs | regMask | op2Mask);
5053             tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5054             predictReg       = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5055 #else
5056             tmpMask          = 0;
5057             tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5058 #endif
5059         }
5060         break;
5061
5062         case GT_ARR_BOUNDS_CHECK:
5063         {
5064             regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5065             regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5066             rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5067
5068             tree->gtUsedRegs =
5069                 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5070         }
5071         break;
5072
5073         default:
5074             NO_WAY("unexpected special operator in reg use prediction");
5075             break;
5076     }
5077
5078 RETURN_CHECK:
5079
5080 #ifdef DEBUG
5081     /* make sure we set them to something reasonable */
5082     if (tree->gtUsedRegs & RBM_ILLEGAL)
5083         noway_assert(!"used regs not set properly in reg use prediction");
5084
5085     if (regMask & RBM_ILLEGAL)
5086         noway_assert(!"return value not set propery in reg use prediction");
5087
5088 #endif
5089
5090     /*
5091      *  If the gtUsedRegs conflicts with lockedRegs
5092      *  then we going to have to spill some registers
5093      *  into the non-trashed register set to keep it alive
5094      */
5095     regMaskTP spillMask;
5096     spillMask = tree->gtUsedRegs & lockedRegs;
5097
5098     if (spillMask)
5099     {
5100         while (spillMask)
5101         {
5102             /* Find the next register that needs to be spilled */
5103             tmpMask = genFindLowestBit(spillMask);
5104
5105 #ifdef DEBUG
5106             if (verbose)
5107             {
5108                 printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5109                 gtDispTree(tree, 0, NULL, true);
5110                 if ((tmpMask & regMask) == 0)
5111                 {
5112                     printf("Predict reload of   %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5113                     gtDispTree(tree, 0, NULL, true);
5114                 }
5115             }
5116 #endif
5117             /* In Codegen it will typically introduce a spill temp here */
5118             /* rather than relocating the register to a non trashed reg */
5119             rpPredictSpillCnt++;
5120
5121             /* Remove it from the spillMask */
5122             spillMask &= ~tmpMask;
5123         }
5124     }
5125
5126     /*
5127      *  If the return registers in regMask conflicts with the lockedRegs
5128      *  then we allocate extra registers for the reload of the conflicting
5129      *  registers.
5130      *
5131      *  Set spillMask to the set of locked registers that have to be reloaded here.
5132      *  reloadMask is set to the extra registers that are used to reload
5133      *  the spilled lockedRegs.
5134      */
5135
5136     noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5137     spillMask = lockedRegs & regMask;
5138
5139     if (spillMask)
5140     {
5141         /* Remove the spillMask from regMask */
5142         regMask &= ~spillMask;
5143
5144         regMaskTP reloadMask = RBM_NONE;
5145         while (spillMask)
5146         {
5147             /* Get an extra register to hold it */
5148             regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5149 #ifdef DEBUG
5150             if (verbose)
5151             {
5152                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5153                 gtDispTree(tree, 0, NULL, true);
5154             }
5155 #endif
5156             reloadMask |= reloadReg;
5157
5158             /* Remove it from the spillMask */
5159             spillMask &= ~genFindLowestBit(spillMask);
5160         }
5161
5162         /* Update regMask to use the reloadMask */
5163         regMask |= reloadMask;
5164
5165         /* update the gtUsedRegs mask */
5166         tree->gtUsedRegs |= (regMaskSmall)regMask;
5167     }
5168
5169     regMaskTP regUse = tree->gtUsedRegs;
5170     regUse |= interferingRegs;
5171
5172     if (!VarSetOps::IsEmpty(this, compCurLife))
5173     {
5174         // Add interference between the current set of live variables and
5175         //  the set of temporary registers need to evaluate the sub tree
5176         if (regUse)
5177         {
5178             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5179         }
5180     }
5181
5182     if (rpAsgVarNum != -1)
5183     {
5184         // Add interference between the registers used (if any)
5185         // and the assignment target variable
5186         if (regUse)
5187         {
5188             rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5189         }
5190
5191         // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5192         // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5193         // to the set of currently live variables. This new interference will prevent us
5194         // from using the register value used here for enregistering different live variable
5195         //
5196         if (!VarSetOps::IsEmpty(this, compCurLife))
5197         {
5198             rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5199         }
5200     }
5201
5202     /* Do we need to resore the oldLastUseVars value */
5203     if (restoreLastUseVars)
5204     {
5205         /*  If we used a GT_ASG targeted register then we need to add
5206          *  a variable interference between any new last use variables
5207          *  and the GT_ASG targeted register
5208          */
5209         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5210         {
5211             rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5212                                              DEBUGARG("asgn tgt last use conflict"));
5213         }
5214         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5215     }
5216
5217     return regMask;
5218 }
5219 #ifdef _PREFAST_
5220 #pragma warning(pop)
5221 #endif
5222
5223 #endif // LEGACY_BACKEND
5224
5225 /****************************************************************************/
5226 /* Returns true when we must create an EBP frame
5227    This is used to force most managed methods to have EBP based frames
5228    which allows the ETW kernel stackwalker to walk the stacks of managed code
5229    this allows the kernel to perform light weight profiling
5230  */
5231 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5232 {
5233     bool result = false;
5234 #ifdef DEBUG
5235     const char* reason = nullptr;
5236 #endif
5237
5238 #if ETW_EBP_FRAMED
5239     if (!result && (opts.MinOpts() || opts.compDbgCode))
5240     {
5241         INDEBUG(reason = "Debug Code");
5242         result = true;
5243     }
5244     if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5245     {
5246         INDEBUG(reason = "IL Code Size");
5247         result = true;
5248     }
5249     if (!result && (fgBBcount > 3))
5250     {
5251         INDEBUG(reason = "BasicBlock Count");
5252         result = true;
5253     }
5254     if (!result && fgHasLoops)
5255     {
5256         INDEBUG(reason = "Method has Loops");
5257         result = true;
5258     }
5259     if (!result && (optCallCount >= 2))
5260     {
5261         INDEBUG(reason = "Call Count");
5262         result = true;
5263     }
5264     if (!result && (optIndirectCallCount >= 1))
5265     {
5266         INDEBUG(reason = "Indirect Call");
5267         result = true;
5268     }
5269 #endif // ETW_EBP_FRAMED
5270
5271     // VM wants to identify the containing frame of an InlinedCallFrame always
5272     // via the frame register never the stack register so we need a frame.
5273     if (!result && (optNativeCallCount != 0))
5274     {
5275         INDEBUG(reason = "Uses PInvoke");
5276         result = true;
5277     }
5278
5279 #ifdef _TARGET_ARM64_
5280     // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5281     // pointer frames.
5282     if (!result)
5283     {
5284         INDEBUG(reason = "Temporary ARM64 force frame pointer");
5285         result = true;
5286     }
5287 #endif // _TARGET_ARM64_
5288
5289 #ifdef DEBUG
5290     if ((result == true) && (wbReason != nullptr))
5291     {
5292         *wbReason = reason;
5293     }
5294 #endif
5295
5296     return result;
5297 }
5298
5299 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5300
5301 /*****************************************************************************
5302  *
5303  *  Predict which variables will be assigned to registers
5304  *  This is x86 specific and only predicts the integer registers and
5305  *  must be conservative, any register that is predicted to be enregister
5306  *  must end up being enregistered.
5307  *
5308  *  The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5309  *  predicted to be enregistered to minimize calls to rpPredictRegPick.
5310  *
5311  */
5312
5313 #ifdef _PREFAST_
5314 #pragma warning(push)
5315 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5316 #endif
5317 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5318 {
5319     unsigned regInx;
5320
5321     if (rpPasses <= rpPassesPessimize)
5322     {
5323         // Assume that we won't have to reverse EBP enregistration
5324         rpReverseEBPenreg = false;
5325
5326         // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5327         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5328             rpFrameType = FT_EBP_FRAME;
5329         else
5330             rpFrameType = FT_ESP_FRAME;
5331     }
5332
5333 #if !ETW_EBP_FRAMED
5334     // If we are using FPBASE as the frame register, we cannot also use it for
5335     // a local var
5336     if (rpFrameType == FT_EBP_FRAME)
5337     {
5338         regAvail &= ~RBM_FPBASE;
5339     }
5340 #endif // !ETW_EBP_FRAMED
5341
5342     rpStkPredict        = 0;
5343     rpPredictAssignMask = regAvail;
5344
5345     raSetupArgMasks(&codeGen->intRegState);
5346 #if !FEATURE_STACK_FP_X87
5347     raSetupArgMasks(&codeGen->floatRegState);
5348 #endif
5349
5350     // If there is a secret stub param, it is also live in
5351     if (info.compPublishStubParam)
5352     {
5353         codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5354     }
5355
5356     if (regAvail == RBM_NONE)
5357     {
5358         unsigned   lclNum;
5359         LclVarDsc* varDsc;
5360
5361         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5362         {
5363 #if FEATURE_STACK_FP_X87
5364             if (!varDsc->IsFloatRegType())
5365 #endif
5366             {
5367                 varDsc->lvRegNum = REG_STK;
5368                 if (isRegPairType(varDsc->lvType))
5369                     varDsc->lvOtherReg = REG_STK;
5370             }
5371         }
5372     }
5373
5374 #ifdef DEBUG
5375     if (verbose)
5376     {
5377         printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5378         printf("\n        Available registers = ");
5379         dspRegMask(regAvail);
5380         printf("\n");
5381     }
5382 #endif
5383
5384     if (regAvail == RBM_NONE)
5385     {
5386         return RBM_NONE;
5387     }
5388
5389     /* We cannot change the lvVarIndexes at this point, so we  */
5390     /* can only re-order the existing set of tracked variables */
5391     /* Which will change the order in which we select the      */
5392     /* locals for enregistering.                               */
5393
5394     assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5395
5396     // Should not be set unless optimizing
5397     noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5398
5399     if (lvaSortAgain)
5400         lvaSortOnly();
5401
5402 #ifdef DEBUG
5403     fgDebugCheckBBlist();
5404 #endif
5405
5406     /* Initialize the weighted count of variables that could have */
5407     /* been enregistered but weren't */
5408     unsigned refCntStk    = 0; // sum of     ref counts for all stack based variables
5409     unsigned refCntEBP    = 0; // sum of     ref counts for EBP enregistered variables
5410     unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5411 #if DOUBLE_ALIGN
5412     unsigned refCntStkParam;  // sum of     ref counts for all stack based parameters
5413     unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5414
5415 #if FEATURE_STACK_FP_X87
5416     refCntStkParam  = raCntStkParamDblStackFP;
5417     refCntWtdStkDbl = raCntWtdStkDblStackFP;
5418     refCntStk       = raCntStkStackFP;
5419 #else
5420     refCntStkParam  = 0;
5421     refCntWtdStkDbl = 0;
5422     refCntStk       = 0;
5423 #endif // FEATURE_STACK_FP_X87
5424
5425 #endif // DOUBLE_ALIGN
5426
5427     /* Set of registers used to enregister variables in the predition */
5428     regMaskTP regUsed = RBM_NONE;
5429
5430     /*-------------------------------------------------------------------------
5431      *
5432      *  Predict/Assign the enregistered locals in ref-count order
5433      *
5434      */
5435
5436     VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5437
5438     unsigned FPRegVarLiveInCnt;
5439     FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5440
5441     LclVarDsc* varDsc;
5442     for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5443     {
5444         bool notWorthy = false;
5445
5446         unsigned  varIndex;
5447         bool      isDouble;
5448         regMaskTP regAvailForType;
5449         var_types regType;
5450         regMaskTP avoidReg;
5451         unsigned  customVarOrderSize;
5452         regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5453         bool      firstHalf;
5454         regNumber saveOtherReg;
5455
5456         varDsc = lvaRefSorted[sortNum];
5457
5458 #if FEATURE_STACK_FP_X87
5459         if (varTypeIsFloating(varDsc->TypeGet()))
5460         {
5461 #ifdef DEBUG
5462             if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5463             {
5464                 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5465                 // be en-registered.
5466                 noway_assert(!varDsc->lvRegister);
5467             }
5468 #endif
5469             continue;
5470         }
5471 #endif
5472
5473         /* Check the set of invariant things that would prevent enregistration */
5474
5475         /* Ignore the variable if it's not tracked */
5476         if (!varDsc->lvTracked)
5477             goto CANT_REG;
5478
5479         /* Get hold of the index and the interference mask for the variable */
5480         varIndex = varDsc->lvVarIndex;
5481
5482         // Remove 'varIndex' from unprocessedVars
5483         VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5484
5485         // Skip the variable if it's marked as DoNotEnregister.
5486
5487         if (varDsc->lvDoNotEnregister)
5488             goto CANT_REG;
5489
5490         /* TODO: For now if we have JMP all register args go to stack
5491          * TODO: Later consider extending the life of the argument or make a copy of it */
5492
5493         if (compJmpOpUsed && varDsc->lvIsRegArg)
5494             goto CANT_REG;
5495
5496         /* Skip the variable if the ref count is zero */
5497
5498         if (varDsc->lvRefCnt == 0)
5499             goto CANT_REG;
5500
5501         /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5502
5503         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5504         {
5505             goto CANT_REG;
5506         }
5507
5508         /* Is the unweighted ref count too low to be interesting? */
5509
5510         if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5511             (varDsc->lvRefCnt <= 1))
5512         {
5513             /* Sometimes it's useful to enregister a variable with only one use */
5514             /*   arguments referenced in loops are one example */
5515
5516             if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5517                 goto OK_TO_ENREGISTER;
5518
5519             /* If the variable has a preferred register set it may be useful to put it there */
5520             if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5521                 goto OK_TO_ENREGISTER;
5522
5523             /* Keep going; the table is sorted by "weighted" ref count */
5524             goto CANT_REG;
5525         }
5526
5527     OK_TO_ENREGISTER:
5528
5529         if (varTypeIsFloating(varDsc->TypeGet()))
5530         {
5531             regType         = varDsc->TypeGet();
5532             regAvailForType = regAvail & RBM_ALLFLOAT;
5533         }
5534         else
5535         {
5536             regType         = TYP_INT;
5537             regAvailForType = regAvail & RBM_ALLINT;
5538         }
5539
5540 #ifdef _TARGET_ARM_
5541         isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5542
5543         if (isDouble)
5544         {
5545             regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5546         }
5547 #endif
5548
5549         /* If we don't have any registers available then skip the enregistration attempt */
5550         if (regAvailForType == RBM_NONE)
5551             goto NO_REG;
5552
5553         // On the pessimize passes don't even try to enregister LONGS
5554         if (isRegPairType(varDsc->lvType))
5555         {
5556             if (rpPasses > rpPassesPessimize)
5557                 goto NO_REG;
5558             else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5559                 goto NO_REG;
5560         }
5561
5562         // Set of registers to avoid when performing register allocation
5563         avoidReg = RBM_NONE;
5564
5565         if (!varDsc->lvIsRegArg)
5566         {
5567             /* For local variables,
5568              *  avoid the incoming arguments,
5569              *  but only if you conflict with them */
5570
5571             if (raAvoidArgRegMask != 0)
5572             {
5573                 LclVarDsc* argDsc;
5574                 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5575
5576                 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5577                 {
5578                     if (!argDsc->lvIsRegArg)
5579                         continue;
5580
5581                     bool      isFloat  = argDsc->IsFloatRegType();
5582                     regNumber inArgReg = argDsc->lvArgReg;
5583                     regMaskTP inArgBit = genRegMask(inArgReg);
5584
5585                     // Is this inArgReg in the raAvoidArgRegMask set?
5586
5587                     if (!(raAvoidArgRegMask & inArgBit))
5588                         continue;
5589
5590                     noway_assert(argDsc->lvIsParam);
5591                     noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5592
5593                     unsigned locVarIndex = varDsc->lvVarIndex;
5594                     unsigned argVarIndex = argDsc->lvVarIndex;
5595
5596                     /* Does this variable interfere with the arg variable ? */
5597                     if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5598                     {
5599                         noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5600                         /* Yes, so try to avoid the incoming arg reg */
5601                         avoidReg |= inArgBit;
5602                     }
5603                     else
5604                     {
5605                         noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5606                     }
5607                 }
5608             }
5609         }
5610
5611         // Now we will try to predict which register the variable
5612         // could  be enregistered in
5613
5614         customVarOrderSize = MAX_VAR_ORDER_SIZE;
5615
5616         raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5617
5618         firstHalf    = false;
5619         saveOtherReg = DUMMY_INIT(REG_NA);
5620
5621         for (regInx = 0; regInx < customVarOrderSize; regInx++)
5622         {
5623             regNumber regNum  = customVarOrder[regInx];
5624             regMaskTP regBits = genRegMask(regNum);
5625
5626             /* Skip this register if it isn't available */
5627             if ((regAvailForType & regBits) == 0)
5628                 continue;
5629
5630             /* Skip this register if it interferes with the variable */
5631
5632             if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5633                 continue;
5634
5635             if (varTypeIsFloating(regType))
5636             {
5637 #ifdef _TARGET_ARM_
5638                 if (isDouble)
5639                 {
5640                     regNumber regNext = REG_NEXT(regNum);
5641                     regBits |= genRegMask(regNext);
5642
5643                     /* Skip if regNext interferes with the variable */
5644                     if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5645                         continue;
5646                 }
5647 #endif
5648             }
5649
5650             bool firstUseOfReg     = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5651             bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5652             bool calleeSavedReg    = ((regBits & RBM_CALLEE_SAVED) != 0);
5653
5654             /* Skip this register if the weighted ref count is less than two
5655                and we are considering a unused callee saved register */
5656
5657             if (lessThanTwoRefWtd && // less than two references (weighted)
5658                 firstUseOfReg &&     // first use of this register
5659                 calleeSavedReg)      // callee saved register
5660             {
5661                 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5662
5663                 // psc is abbeviation for possibleSameColor
5664                 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5665
5666                 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5667                 unsigned        pscIndex = 0;
5668                 while (pscIndexIter.NextElem(&pscIndex))
5669                 {
5670                     LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5671                     totalRefCntWtd += pscVar->lvRefCntWtd;
5672                     if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5673                         break;
5674                 }
5675
5676                 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5677                 {
5678                     notWorthy = true;
5679                     continue; // not worth spilling a callee saved register
5680                 }
5681                 // otherwise we will spill this callee saved registers,
5682                 // because its uses when combined with the uses of
5683                 // other yet to be processed candidates exceed our threshold.
5684                 // totalRefCntWtd = totalRefCntWtd;
5685             }
5686
5687             /* Looks good - mark the variable as living in the register */
5688
5689             if (isRegPairType(varDsc->lvType))
5690             {
5691                 if (firstHalf == false)
5692                 {
5693                     /* Enregister the first half of the long */
5694                     varDsc->lvRegNum   = regNum;
5695                     saveOtherReg       = varDsc->lvOtherReg;
5696                     varDsc->lvOtherReg = REG_STK;
5697                     firstHalf          = true;
5698                 }
5699                 else
5700                 {
5701                     /* Ensure 'well-formed' register pairs */
5702                     /* (those returned by gen[Pick|Grab]RegPair) */
5703
5704                     if (regNum < varDsc->lvRegNum)
5705                     {
5706                         varDsc->lvOtherReg = varDsc->lvRegNum;
5707                         varDsc->lvRegNum   = regNum;
5708                     }
5709                     else
5710                     {
5711                         varDsc->lvOtherReg = regNum;
5712                     }
5713                     firstHalf = false;
5714                 }
5715             }
5716             else
5717             {
5718                 varDsc->lvRegNum = regNum;
5719 #ifdef _TARGET_ARM_
5720                 if (isDouble)
5721                 {
5722                     varDsc->lvOtherReg = REG_NEXT(regNum);
5723                 }
5724 #endif
5725             }
5726
5727             if (regNum == REG_FPBASE)
5728             {
5729                 refCntEBP += varDsc->lvRefCnt;
5730                 refCntWtdEBP += varDsc->lvRefCntWtd;
5731 #if DOUBLE_ALIGN
5732                 if (varDsc->lvIsParam)
5733                 {
5734                     refCntStkParam += varDsc->lvRefCnt;
5735                 }
5736 #endif
5737             }
5738
5739             /* Record this register in the regUsed set */
5740             regUsed |= regBits;
5741
5742             /* The register is now ineligible for all interfering variables */
5743
5744             VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5745
5746 #ifdef _TARGET_ARM_
5747             if (isDouble)
5748             {
5749                 regNumber       secondHalf = REG_NEXT(regNum);
5750                 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5751                 unsigned        intfIndex = 0;
5752                 while (iter.NextElem(&intfIndex))
5753                 {
5754                     VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5755                 }
5756             }
5757 #endif
5758
5759             /* If a register argument, remove its incoming register
5760              * from the "avoid" list */
5761
5762             if (varDsc->lvIsRegArg)
5763             {
5764                 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5765 #ifdef _TARGET_ARM_
5766                 if (isDouble)
5767                 {
5768                     raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5769                 }
5770 #endif
5771             }
5772
5773             /* A variable of TYP_LONG can take two registers */
5774             if (firstHalf)
5775                 continue;
5776
5777             // Since we have successfully enregistered this variable it is
5778             // now time to move on and consider the next variable
5779             goto ENREG_VAR;
5780         }
5781
5782         if (firstHalf)
5783         {
5784             noway_assert(isRegPairType(varDsc->lvType));
5785
5786             /* This TYP_LONG is partially enregistered */
5787
5788             noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5789
5790             if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5791             {
5792                 rpLostEnreg = true;
5793             }
5794
5795             raAddToStkPredict(varDsc->lvRefCntWtd);
5796             goto ENREG_VAR;
5797         }
5798
5799     NO_REG:;
5800         if (varDsc->lvDependReg)
5801         {
5802             rpLostEnreg = true;
5803         }
5804
5805         if (!notWorthy)
5806         {
5807             /* Weighted count of variables that could have been enregistered but weren't */
5808             raAddToStkPredict(varDsc->lvRefCntWtd);
5809
5810             if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5811                 raAddToStkPredict(varDsc->lvRefCntWtd);
5812         }
5813
5814     CANT_REG:;
5815         varDsc->lvRegister = false;
5816
5817         varDsc->lvRegNum = REG_STK;
5818         if (isRegPairType(varDsc->lvType))
5819             varDsc->lvOtherReg = REG_STK;
5820
5821         /* unweighted count of variables that were not enregistered */
5822
5823         refCntStk += varDsc->lvRefCnt;
5824
5825 #if DOUBLE_ALIGN
5826         if (varDsc->lvIsParam)
5827         {
5828             refCntStkParam += varDsc->lvRefCnt;
5829         }
5830         else
5831         {
5832             /* Is it a stack based double? */
5833             /* Note that double params are excluded since they can not be double aligned */
5834             if (varDsc->lvType == TYP_DOUBLE)
5835             {
5836                 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5837             }
5838         }
5839 #endif
5840 #ifdef DEBUG
5841         if (verbose)
5842         {
5843             printf("; ");
5844             gtDispLclVar((unsigned)(varDsc - lvaTable));
5845             if (varDsc->lvTracked)
5846                 printf("T%02u", varDsc->lvVarIndex);
5847             else
5848                 printf("   ");
5849             printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5850             if (varDsc->lvDoNotEnregister)
5851                 printf(", do-not-enregister");
5852             printf("\n");
5853         }
5854 #endif
5855         continue;
5856
5857     ENREG_VAR:;
5858
5859         varDsc->lvRegister = true;
5860
5861         // Record the fact that we enregistered a stack arg when tail call is used.
5862         if (compJmpOpUsed && !varDsc->lvIsRegArg)
5863         {
5864             rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5865             if (isRegPairType(varDsc->lvType))
5866             {
5867                 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5868             }
5869         }
5870
5871 #ifdef DEBUG
5872         if (verbose)
5873         {
5874             printf("; ");
5875             gtDispLclVar((unsigned)(varDsc - lvaTable));
5876             printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5877                    refCntWtd2str(varDsc->lvRefCntWtd));
5878             varDsc->PrintVarReg();
5879 #ifdef _TARGET_ARM_
5880             if (isDouble)
5881             {
5882                 printf(":%s", getRegName(varDsc->lvOtherReg));
5883             }
5884 #endif
5885             printf("\n");
5886         }
5887 #endif
5888     }
5889
5890 #if ETW_EBP_FRAMED
5891     noway_assert(refCntEBP == 0);
5892 #endif
5893
5894 #ifdef DEBUG
5895     if (verbose)
5896     {
5897         if (refCntStk > 0)
5898             printf("; refCntStk       = %u\n", refCntStk);
5899         if (refCntEBP > 0)
5900             printf("; refCntEBP       = %u\n", refCntEBP);
5901         if (refCntWtdEBP > 0)
5902             printf("; refCntWtdEBP    = %u\n", refCntWtdEBP);
5903 #if DOUBLE_ALIGN
5904         if (refCntStkParam > 0)
5905             printf("; refCntStkParam  = %u\n", refCntStkParam);
5906         if (refCntWtdStkDbl > 0)
5907             printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5908 #endif
5909     }
5910 #endif
5911
5912     /* Determine how the EBP register should be used */
5913     CLANG_FORMAT_COMMENT_ANCHOR;
5914
5915 #if DOUBLE_ALIGN
5916
5917     if (!codeGen->isFramePointerRequired())
5918     {
5919         noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5920
5921         /*
5922             First let us decide if we should use EBP to create a
5923             double-aligned frame, instead of enregistering variables
5924         */
5925
5926         if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5927         {
5928             rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5929             goto REVERSE_EBP_ENREG;
5930         }
5931
5932         if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5933         {
5934             if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5935             {
5936                 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5937                 goto REVERSE_EBP_ENREG;
5938             }
5939         }
5940     }
5941
5942 #endif // DOUBLE_ALIGN
5943
5944     if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5945     {
5946 #ifdef _TARGET_XARCH_
5947 // clang-format off
5948         /*  If we are using EBP to enregister variables then
5949             will we actually save bytes by setting up an EBP frame?
5950
5951             Each stack reference is an extra byte of code if we use
5952             an ESP frame.
5953
5954             Here we measure the savings that we get by using EBP to
5955             enregister variables vs. the cost in code size that we
5956             pay when using an ESP based frame.
5957
5958             We pay one byte of code for each refCntStk
5959             but we save one byte (or more) for each refCntEBP.
5960
5961             Our savings are the elimination of a stack memory read/write.
5962             We use the loop weighted value of
5963                refCntWtdEBP * mem_access_weight (0, 3, 6)
5964             to represent this savings.
5965          */
5966
5967         // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5968         // to set up an EBP frame in the prolog and epilog
5969         #define EBP_FRAME_SETUP_SIZE  5
5970         // clang-format on
5971
5972         if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5973         {
5974             unsigned bytesSaved        = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5975             unsigned mem_access_weight = 3;
5976
5977             if (compCodeOpt() == SMALL_CODE)
5978                 mem_access_weight = 0;
5979             else if (compCodeOpt() == FAST_CODE)
5980                 mem_access_weight *= 2;
5981
5982             if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5983             {
5984                 /* It's not be a good idea to use EBP in our predictions */
5985                 CLANG_FORMAT_COMMENT_ANCHOR;
5986 #ifdef DEBUG
5987                 if (verbose && (refCntEBP > 0))
5988                     printf("; Predicting that it's not worth using EBP to enregister variables\n");
5989 #endif
5990                 rpFrameType = FT_EBP_FRAME;
5991                 goto REVERSE_EBP_ENREG;
5992             }
5993         }
5994 #endif // _TARGET_XARCH_
5995
5996         if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
5997         {
5998 #ifdef DEBUG
5999             const char* reason;
6000 #endif
6001             if (rpMustCreateEBPCalled == false)
6002             {
6003                 rpMustCreateEBPCalled = true;
6004                 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6005                 {
6006 #ifdef DEBUG
6007                     if (verbose)
6008                         printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6009 #endif
6010                     codeGen->setFrameRequired(true);
6011
6012                     rpFrameType = FT_EBP_FRAME;
6013                     goto REVERSE_EBP_ENREG;
6014                 }
6015             }
6016         }
6017     }
6018
6019     goto EXIT;
6020
6021 REVERSE_EBP_ENREG:
6022
6023     noway_assert(rpFrameType != FT_ESP_FRAME);
6024
6025     rpReverseEBPenreg = true;
6026
6027 #if !ETW_EBP_FRAMED
6028     if (refCntEBP > 0)
6029     {
6030         noway_assert(regUsed & RBM_FPBASE);
6031
6032         regUsed &= ~RBM_FPBASE;
6033
6034         /* variables that were enregistered in EBP become stack based variables */
6035         raAddToStkPredict(refCntWtdEBP);
6036
6037         unsigned lclNum;
6038
6039         /* We're going to have to undo some predicted enregistered variables */
6040         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6041         {
6042             /* Is this a register variable? */
6043             if (varDsc->lvRegNum != REG_STK)
6044             {
6045                 if (isRegPairType(varDsc->lvType))
6046                 {
6047                     /* Only one can be EBP */
6048                     if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6049                     {
6050                         if (varDsc->lvRegNum == REG_FPBASE)
6051                             varDsc->lvRegNum = varDsc->lvOtherReg;
6052
6053                         varDsc->lvOtherReg = REG_STK;
6054
6055                         if (varDsc->lvRegNum == REG_STK)
6056                             varDsc->lvRegister = false;
6057
6058                         if (varDsc->lvDependReg)
6059                             rpLostEnreg = true;
6060 #ifdef DEBUG
6061                         if (verbose)
6062                             goto DUMP_MSG;
6063 #endif
6064                     }
6065                 }
6066                 else
6067                 {
6068                     if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6069                     {
6070                         varDsc->lvRegNum = REG_STK;
6071
6072                         varDsc->lvRegister = false;
6073
6074                         if (varDsc->lvDependReg)
6075                             rpLostEnreg = true;
6076 #ifdef DEBUG
6077                         if (verbose)
6078                         {
6079                         DUMP_MSG:
6080                             printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6081                                    varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6082                                    (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6083                         }
6084 #endif
6085                     }
6086                 }
6087             }
6088         }
6089     }
6090 #endif // ETW_EBP_FRAMED
6091
6092 EXIT:;
6093
6094     unsigned lclNum;
6095     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6096     {
6097         /* Clear the lvDependReg flag for next iteration of the predictor */
6098         varDsc->lvDependReg = false;
6099
6100         // If we set rpLostEnreg and this is the first pessimize pass
6101         // then reverse the enreg of all TYP_LONG
6102         if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6103         {
6104             varDsc->lvRegNum   = REG_STK;
6105             varDsc->lvOtherReg = REG_STK;
6106         }
6107     }
6108
6109 #ifdef DEBUG
6110     if (verbose && raNewBlocks)
6111     {
6112         printf("\nAdded FP register killing blocks:\n");
6113         fgDispBasicBlocks();
6114         printf("\n");
6115     }
6116 #endif
6117     noway_assert(rpFrameType != FT_NOT_SET);
6118
6119     /* return the set of registers used to enregister variables */
6120     return regUsed;
6121 }
6122 #ifdef _PREFAST_
6123 #pragma warning(pop)
6124 #endif
6125
6126 /*****************************************************************************
6127  *
6128  *  Predict register use for every tree in the function. Note that we do this
6129  *  at different times (not to mention in a totally different way) for x86 vs
6130  *  RISC targets.
6131  */
6132 void Compiler::rpPredictRegUse()
6133 {
6134 #ifdef DEBUG
6135     if (verbose)
6136         raDumpVarIntf();
6137 #endif
6138
6139     // We might want to adjust the ref counts based on interference
6140     raAdjustVarIntf();
6141
6142     regMaskTP allAcceptableRegs = RBM_ALLINT;
6143
6144 #if FEATURE_FP_REGALLOC
6145     allAcceptableRegs |= raConfigRestrictMaskFP();
6146 #endif
6147
6148     allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6149
6150     /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6151        to acdHelper(). This is done implicitly, without creating a GT_CALL
6152        node. Hence, this interference is be handled implicitly by
6153        restricting the registers used for enregistering variables */
6154
6155     if (opts.compDbgCode)
6156     {
6157         allAcceptableRegs &= RBM_CALLEE_SAVED;
6158     }
6159
6160     /* Compute the initial regmask to use for the first pass */
6161     regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6162     regMaskTP regUsed;
6163
6164 #if CPU_USES_BLOCK_MOVE
6165     /* If we might need to generate a rep mov instruction */
6166     /* remove ESI and EDI */
6167     if (compBlkOpUsed)
6168         regAvail &= ~(RBM_ESI | RBM_EDI);
6169 #endif
6170
6171 #ifdef _TARGET_X86_
6172     /* If we using longs then we remove ESI to allow */
6173     /* ESI:EBX to be saved accross a call */
6174     if (compLongUsed)
6175         regAvail &= ~(RBM_ESI);
6176 #endif
6177
6178 #ifdef _TARGET_ARM_
6179     // For the first register allocation pass we don't want to color using r4
6180     // as we want to allow it to be used to color the internal temps instead
6181     // when r0,r1,r2,r3 are all in use.
6182     //
6183     regAvail &= ~(RBM_R4);
6184 #endif
6185
6186 #if ETW_EBP_FRAMED
6187     // We never have EBP available when ETW_EBP_FRAME is defined
6188     regAvail &= ~RBM_FPBASE;
6189 #else
6190     /* If a frame pointer is required then we remove EBP */
6191     if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6192         regAvail &= ~RBM_FPBASE;
6193 #endif
6194
6195 #ifdef DEBUG
6196     BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6197     if (fJitNoRegLoc)
6198         regAvail = RBM_NONE;
6199 #endif
6200
6201     if ((opts.compFlags & CLFLG_REGVAR) == 0)
6202         regAvail = RBM_NONE;
6203
6204 #if FEATURE_STACK_FP_X87
6205     VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6206     VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6207
6208     // Calculate the set of all tracked FP/non-FP variables
6209     //  into optAllFloatVars and optAllNonFPvars
6210
6211     unsigned   lclNum;
6212     LclVarDsc* varDsc;
6213
6214     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6215     {
6216         /* Ignore the variable if it's not tracked */
6217
6218         if (!varDsc->lvTracked)
6219             continue;
6220
6221         /* Get hold of the index and the interference mask for the variable */
6222
6223         unsigned varNum = varDsc->lvVarIndex;
6224
6225         /* add to the set of all tracked FP/non-FP variables */
6226
6227         if (varDsc->IsFloatRegType())
6228             VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6229         else
6230             VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6231     }
6232 #endif
6233
6234     for (unsigned i = 0; i < REG_COUNT; i++)
6235     {
6236         VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6237     }
6238     for (unsigned i = 0; i < lvaTrackedCount; i++)
6239     {
6240         VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6241     }
6242
6243     raNewBlocks          = false;
6244     rpPredictAssignAgain = false;
6245     rpPasses             = 0;
6246
6247     bool      mustPredict   = true;
6248     unsigned  stmtNum       = 0;
6249     unsigned  oldStkPredict = DUMMY_INIT(~0);
6250     VARSET_TP oldLclRegIntf[REG_COUNT];
6251
6252     for (unsigned i = 0; i < REG_COUNT; i++)
6253     {
6254         VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6255     }
6256
6257     while (true)
6258     {
6259         /* Assign registers to variables using the variable/register interference
6260            graph (raLclRegIntf[]) calculated in the previous pass */
6261         regUsed = rpPredictAssignRegVars(regAvail);
6262
6263         mustPredict |= rpLostEnreg;
6264
6265 #ifdef _TARGET_ARM_
6266         // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6267         if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6268             !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6269         {
6270             // We can release our reservation on R10 and use it to color registers
6271             codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6272             allAcceptableRegs |= RBM_OPT_RSVD;
6273         }
6274 #endif
6275
6276         /* Is our new prediction good enough?? */
6277         if (!mustPredict)
6278         {
6279             /* For small methods (less than 12 stmts), we add a    */
6280             /*   extra pass if we are predicting the use of some   */
6281             /*   of the caller saved registers.                    */
6282             /* This fixes RAID perf bug 43440 VB Ackerman function */
6283
6284             if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6285             {
6286                 goto EXTRA_PASS;
6287             }
6288
6289             /* If every variable was fully enregistered then we're done */
6290             if (rpStkPredict == 0)
6291                 goto ALL_DONE;
6292
6293             // This was a successful prediction.  Record it, in case it turns out to be the best one.
6294             rpRecordPrediction();
6295
6296             if (rpPasses > 1)
6297             {
6298                 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6299
6300                 // Be careful about overflow
6301                 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6302                 if (oldStkPredict < highStkPredict)
6303                     goto ALL_DONE;
6304
6305                 if (rpStkPredict < rpPasses * 8)
6306                     goto ALL_DONE;
6307
6308                 if (rpPasses >= (rpPassesMax - 1))
6309                     goto ALL_DONE;
6310             }
6311
6312         EXTRA_PASS:
6313             /* We will do another pass */;
6314         }
6315
6316 #ifdef DEBUG
6317         if (JitConfig.JitAssertOnMaxRAPasses())
6318         {
6319             noway_assert(rpPasses < rpPassesMax &&
6320                          "This may not a bug, but dev team should look and see what is happening");
6321         }
6322 #endif
6323
6324         // The "64" here had been "VARSET_SZ".  It is unclear why this number is connected with
6325         // the (max) size of a VARSET.  We've eliminated this constant, so I left this as a constant.  We hope
6326         // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6327         if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6328         {
6329             NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6330         }
6331
6332 #ifdef DEBUG
6333         if (verbose)
6334         {
6335             if (rpPasses > 0)
6336             {
6337                 if (rpLostEnreg)
6338                     printf("\n; Another pass due to rpLostEnreg");
6339                 if (rpAddedVarIntf)
6340                     printf("\n; Another pass due to rpAddedVarIntf");
6341                 if ((rpPasses == 1) && rpPredictAssignAgain)
6342                     printf("\n; Another pass due to rpPredictAssignAgain");
6343             }
6344             printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6345         }
6346 #endif
6347
6348         /*  Zero the variable/register interference graph */
6349         for (unsigned i = 0; i < REG_COUNT; i++)
6350         {
6351             VarSetOps::ClearD(this, raLclRegIntf[i]);
6352         }
6353
6354         // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6355         // it must not be in a register trashed by the callee
6356         if (info.compLvFrameListRoot != BAD_VAR_NUM)
6357         {
6358             assert(!opts.ShouldUsePInvokeHelpers());
6359             noway_assert(info.compLvFrameListRoot < lvaCount);
6360
6361             LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6362
6363             if (pinvokeVarDsc->lvTracked)
6364             {
6365                 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6366                                                       DEBUGARG("compLvFrameListRoot"));
6367
6368                 // We would prefer to have this be enregister in the PINVOKE_TCB register
6369                 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6370             }
6371
6372             // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6373             // worst case).  Make sure that the return value compiler temp that we create for the single
6374             // return block knows about this interference.
6375             if (genReturnLocal != BAD_VAR_NUM)
6376             {
6377                 noway_assert(genReturnBB);
6378                 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6379                 if (localTmp->lvTracked)
6380                 {
6381                     rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6382                                     VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6383                 }
6384             }
6385         }
6386
6387 #ifdef _TARGET_ARM_
6388         if (compFloatingPointUsed)
6389         {
6390             bool hasMustInitFloat = false;
6391
6392             // if we have any must-init floating point LclVars then we will add register interferences
6393             // for the arguments with RBM_SCRATCH
6394             // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6395             // we won't home the arguments into REG_SCRATCH
6396
6397             unsigned   lclNum;
6398             LclVarDsc* varDsc;
6399
6400             for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6401             {
6402                 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6403                 {
6404                     hasMustInitFloat = true;
6405                     break;
6406                 }
6407             }
6408
6409             if (hasMustInitFloat)
6410             {
6411                 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6412                 {
6413                     // If is an incoming argument, that is tracked and not floating-point
6414                     if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6415                     {
6416                         rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6417                                                          DEBUGARG("arg home with must-init fp"));
6418                     }
6419                 }
6420             }
6421         }
6422 #endif
6423
6424         stmtNum        = 0;
6425         rpAddedVarIntf = false;
6426         rpLostEnreg    = false;
6427
6428         /* Walk the basic blocks and predict reg use for each tree */
6429
6430         for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6431         {
6432             GenTreePtr stmt;
6433             compCurBB       = block;
6434             compCurLifeTree = NULL;
6435             VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6436
6437             compCurBB = block;
6438
6439             for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6440             {
6441                 noway_assert(stmt->gtOper == GT_STMT);
6442
6443                 rpPredictSpillCnt = 0;
6444                 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6445                 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6446
6447                 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6448                 stmtNum++;
6449 #ifdef DEBUG
6450                 if (verbose && 1)
6451                 {
6452                     printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6453                     gtDispTree(tree);
6454                     printf("\n");
6455                 }
6456 #endif
6457                 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6458
6459                 noway_assert(rpAsgVarNum == -1);
6460
6461                 if (rpPredictSpillCnt > tmpIntSpillMax)
6462                     tmpIntSpillMax = rpPredictSpillCnt;
6463             }
6464         }
6465         rpPasses++;
6466
6467         /* Decide whether we need to set mustPredict */
6468         mustPredict = false;
6469
6470 #ifdef _TARGET_ARM_
6471         // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6472         // reserve r10, and if it was used, throw out the last prediction and repredict.
6473         if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6474         {
6475             codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6476             allAcceptableRegs &= ~RBM_OPT_RSVD;
6477             if ((regUsed & RBM_OPT_RSVD) != 0)
6478             {
6479                 mustPredict              = true;
6480                 rpBestRecordedPrediction = nullptr;
6481             }
6482         }
6483 #endif
6484
6485         if (rpAddedVarIntf)
6486         {
6487             mustPredict = true;
6488 #ifdef DEBUG
6489             if (verbose)
6490                 raDumpVarIntf();
6491 #endif
6492         }
6493
6494         if (rpPasses == 1)
6495         {
6496             if ((opts.compFlags & CLFLG_REGVAR) == 0)
6497                 goto ALL_DONE;
6498
6499             if (rpPredictAssignAgain)
6500                 mustPredict = true;
6501 #ifdef DEBUG
6502             if (fJitNoRegLoc)
6503                 goto ALL_DONE;
6504 #endif
6505         }
6506
6507         /* Calculate the new value to use for regAvail */
6508
6509         regAvail = allAcceptableRegs;
6510
6511         /* If a frame pointer is required then we remove EBP */
6512         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6513             regAvail &= ~RBM_FPBASE;
6514
6515 #if ETW_EBP_FRAMED
6516         // We never have EBP available when ETW_EBP_FRAME is defined
6517         regAvail &= ~RBM_FPBASE;
6518 #endif
6519
6520         // If we have done n-passes then we must continue to pessimize the
6521         // interference graph by or-ing the interferences from the previous pass
6522
6523         if (rpPasses > rpPassesPessimize)
6524         {
6525             for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6526                 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6527
6528             /* If we reverse an EBP enregistration then keep it that way */
6529             if (rpReverseEBPenreg)
6530                 regAvail &= ~RBM_FPBASE;
6531         }
6532
6533 #ifdef DEBUG
6534         if (verbose)
6535             raDumpRegIntf();
6536 #endif
6537
6538         /*  Save the old variable/register interference graph */
6539         for (unsigned i = 0; i < REG_COUNT; i++)
6540         {
6541             VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6542         }
6543         oldStkPredict = rpStkPredict;
6544     } // end of while (true)
6545
6546 ALL_DONE:;
6547
6548     // If we recorded a better feasible allocation than we ended up with, go back to using it.
6549     rpUseRecordedPredictionIfBetter();
6550
6551 #if DOUBLE_ALIGN
6552     codeGen->setDoubleAlign(false);
6553 #endif
6554
6555     switch (rpFrameType)
6556     {
6557         default:
6558             noway_assert(!"rpFrameType not set correctly!");
6559             break;
6560         case FT_ESP_FRAME:
6561             noway_assert(!codeGen->isFramePointerRequired());
6562             noway_assert(!codeGen->isFrameRequired());
6563             codeGen->setFramePointerUsed(false);
6564             break;
6565         case FT_EBP_FRAME:
6566             noway_assert((regUsed & RBM_FPBASE) == 0);
6567             codeGen->setFramePointerUsed(true);
6568             break;
6569 #if DOUBLE_ALIGN
6570         case FT_DOUBLE_ALIGN_FRAME:
6571             noway_assert((regUsed & RBM_FPBASE) == 0);
6572             noway_assert(!codeGen->isFramePointerRequired());
6573             codeGen->setFramePointerUsed(false);
6574             codeGen->setDoubleAlign(true);
6575             break;
6576 #endif
6577     }
6578
6579     /* Record the set of registers that we need */
6580     codeGen->regSet.rsClearRegsModified();
6581     if (regUsed != RBM_NONE)
6582     {
6583         codeGen->regSet.rsSetRegsModified(regUsed);
6584     }
6585
6586     /* We need genFullPtrRegMap if :
6587      * The method is fully interruptible, or
6588      * We are generating an EBP-less frame (for stack-pointer deltas)
6589      */
6590
6591     genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6592
6593     raMarkStkVars();
6594 #ifdef DEBUG
6595     if (verbose)
6596     {
6597         printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6598         printf("  rpStkPredict was %u\n", rpStkPredict);
6599     }
6600 #endif
6601     rpRegAllocDone = true;
6602 }
6603
6604 #endif // LEGACY_BACKEND
6605
6606 /*****************************************************************************
6607  *
6608  *  Mark all variables as to whether they live on the stack frame
6609  *  (part or whole), and if so what the base is (FP or SP).
6610  */
6611
6612 void Compiler::raMarkStkVars()
6613 {
6614     unsigned   lclNum;
6615     LclVarDsc* varDsc;
6616
6617     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6618     {
6619         // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6620         CLANG_FORMAT_COMMENT_ANCHOR;
6621
6622 #ifdef LEGACY_BACKEND
6623         varDsc->lvOnFrame = false;
6624 #endif // LEGACY_BACKEND
6625
6626         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6627         {
6628             noway_assert(!varDsc->lvRegister);
6629             goto ON_STK;
6630         }
6631
6632         /* Fully enregistered variables don't need any frame space */
6633
6634         if (varDsc->lvRegister)
6635         {
6636             if (!isRegPairType(varDsc->TypeGet()))
6637             {
6638                 goto NOT_STK;
6639             }
6640
6641             /* For "large" variables make sure both halves are enregistered */
6642
6643             if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6644             {
6645                 goto NOT_STK;
6646             }
6647         }
6648         /* Unused variables typically don't get any frame space */
6649         else if (varDsc->lvRefCnt == 0)
6650         {
6651             bool needSlot = false;
6652
6653             bool stkFixedArgInVarArgs =
6654                 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6655
6656             // If its address has been exposed, ignore lvRefCnt. However, exclude
6657             // fixed arguments in varargs method as lvOnFrame shouldn't be set
6658             // for them as we don't want to explicitly report them to GC.
6659
6660             if (!stkFixedArgInVarArgs)
6661             {
6662                 needSlot |= varDsc->lvAddrExposed;
6663             }
6664
6665 #if FEATURE_FIXED_OUT_ARGS
6666
6667             /* Is this the dummy variable representing GT_LCLBLK ? */
6668             needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6669
6670 #endif // FEATURE_FIXED_OUT_ARGS
6671
6672 #ifdef DEBUG
6673             /* For debugging, note that we have to reserve space even for
6674                unused variables if they are ever in scope. However, this is not
6675                an issue as fgExtendDbgLifetimes() adds an initialization and
6676                variables in scope will not have a zero ref-cnt.
6677              */
6678             if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6679             {
6680                 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6681                 {
6682                     noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6683                 }
6684             }
6685 #endif
6686             /*
6687               For Debug Code, we have to reserve space even if the variable is never
6688               in scope. We will also need to initialize it if it is a GC var.
6689               So we set lvMustInit and artifically bump up the ref-cnt.
6690              */
6691
6692             if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6693             {
6694                 needSlot |= true;
6695
6696                 if (lvaTypeIsGC(lclNum))
6697                 {
6698                     varDsc->lvRefCnt = 1;
6699                 }
6700
6701                 if (!varDsc->lvIsParam)
6702                 {
6703                     varDsc->lvMustInit = true;
6704                 }
6705             }
6706
6707 #ifndef LEGACY_BACKEND
6708             varDsc->lvOnFrame = needSlot;
6709 #endif // !LEGACY_BACKEND
6710             if (!needSlot)
6711             {
6712                 /* Clear the lvMustInit flag in case it is set */
6713                 varDsc->lvMustInit = false;
6714
6715                 goto NOT_STK;
6716             }
6717         }
6718
6719 #ifndef LEGACY_BACKEND
6720         if (!varDsc->lvOnFrame)
6721         {
6722             goto NOT_STK;
6723         }
6724 #endif // !LEGACY_BACKEND
6725
6726     ON_STK:
6727         /* The variable (or part of it) lives on the stack frame */
6728
6729         noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6730 #if FEATURE_FIXED_OUT_ARGS
6731         noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6732 #else  // FEATURE_FIXED_OUT_ARGS
6733         noway_assert(lvaLclSize(lclNum) != 0);
6734 #endif // FEATURE_FIXED_OUT_ARGS
6735
6736         varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6737                                   // stack frame
6738
6739     NOT_STK:;
6740         varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6741
6742 #if DOUBLE_ALIGN
6743
6744         if (codeGen->doDoubleAlign())
6745         {
6746             noway_assert(codeGen->isFramePointerUsed() == false);
6747
6748             /* All arguments are off of EBP with double-aligned frames */
6749
6750             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6751             {
6752                 varDsc->lvFramePointerBased = true;
6753             }
6754         }
6755
6756 #endif
6757
6758         /* Some basic checks */
6759
6760         // It must be in a register, on frame, or have zero references.
6761
6762         noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6763
6764 #ifndef LEGACY_BACKEND
6765         // We can't have both lvRegister and lvOnFrame for RyuJIT
6766         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6767 #else  // LEGACY_BACKEND
6768
6769         /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6770         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6771                      (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6772 #endif // LEGACY_BACKEND
6773
6774 #ifdef DEBUG
6775
6776         // For varargs functions, there should be no direct references to
6777         // parameter variables except for 'this' (because these were morphed
6778         // in the importer) and the 'arglist' parameter (which is not a GC
6779         // pointer). and the return buffer argument (if we are returning a
6780         // struct).
6781         // This is important because we don't want to try to report them
6782         // to the GC, as the frame offsets in these local varables would
6783         // not be correct.
6784
6785         if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6786         {
6787             if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6788             {
6789                 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6790             }
6791         }
6792 #endif
6793     }
6794 }
6795
6796 #ifdef LEGACY_BACKEND
6797 void Compiler::rpRecordPrediction()
6798 {
6799     if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6800     {
6801         if (rpBestRecordedPrediction == NULL)
6802         {
6803             rpBestRecordedPrediction =
6804                 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6805         }
6806         for (unsigned k = 0; k < lvaCount; k++)
6807         {
6808             rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6809             rpBestRecordedPrediction[k].m_regNum         = (regNumberSmall)lvaTable[k].GetRegNum();
6810             rpBestRecordedPrediction[k].m_otherReg       = (regNumberSmall)lvaTable[k].GetOtherReg();
6811         }
6812         rpBestRecordedStkPredict = rpStkPredict;
6813         JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6814     }
6815 }
6816
6817 void Compiler::rpUseRecordedPredictionIfBetter()
6818 {
6819     JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6820             rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6821     if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6822     {
6823         JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6824                 rpBestRecordedStkPredict);
6825
6826         for (unsigned k = 0; k < lvaCount; k++)
6827         {
6828             lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6829             lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6830             lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6831         }
6832     }
6833 }
6834 #endif // LEGACY_BACKEND