Merge pull request #16005 from CarolEidt/Fix15848
[platform/upstream/coreclr.git] / src / jit / regalloc.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           RegAlloc                                        XX
9 XX                                                                           XX
10 XX  Does the register allocation and puts the remaining lclVars on the stack XX
11 XX                                                                           XX
12 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 */
15
16 #include "jitpch.h"
17 #ifdef _MSC_VER
18 #pragma hdrstop
19 #endif
20 #include "regalloc.h"
21
22 #if FEATURE_FP_REGALLOC
23 Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
24 {
25     DWORD val = JitConfig.JitRegisterFP();
26
27     return (enumConfigRegisterFP)(val & 0x3);
28 }
29 #endif // FEATURE_FP_REGALLOC
30
31 regMaskTP Compiler::raConfigRestrictMaskFP()
32 {
33     regMaskTP result = RBM_NONE;
34
35 #if FEATURE_FP_REGALLOC
36     switch (raConfigRegisterFP())
37     {
38         case CONFIG_REGISTER_FP_NONE:
39             result = RBM_NONE;
40             break;
41         case CONFIG_REGISTER_FP_CALLEE_TRASH:
42             result = RBM_FLT_CALLEE_TRASH;
43             break;
44         case CONFIG_REGISTER_FP_CALLEE_SAVED:
45             result = RBM_FLT_CALLEE_SAVED;
46             break;
47         case CONFIG_REGISTER_FP_FULL:
48             result = RBM_ALLFLOAT;
49             break;
50     }
51 #endif
52
53     return result;
54 }
55
56 #if DOUBLE_ALIGN
57 DWORD Compiler::getCanDoubleAlign()
58 {
59 #ifdef DEBUG
60     if (compStressCompile(STRESS_DBL_ALN, 20))
61         return MUST_DOUBLE_ALIGN;
62
63     return JitConfig.JitDoubleAlign();
64 #else
65     return DEFAULT_DOUBLE_ALIGN;
66 #endif
67 }
68
69 //------------------------------------------------------------------------
70 // shouldDoubleAlign: Determine whether to double-align the frame
71 //
72 // Arguments:
73 //    refCntStk       - sum of     ref counts for all stack based variables
74 //    refCntEBP       - sum of     ref counts for EBP enregistered variables
75 //    refCntWtdEBP    - sum of wtd ref counts for EBP enregistered variables
76 //    refCntStkParam  - sum of     ref counts for all stack based parameters
77 //    refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
78 //                      with double fields).
79 //
80 // Return Value:
81 //    Returns true if this method estimates that a double-aligned frame would be beneficial
82 //
83 // Notes:
84 //    The impact of a double-aligned frame is computed as follows:
85 //    - We save a byte of code for each parameter reference (they are frame-pointer relative)
86 //    - We pay a byte of code for each non-parameter stack reference.
87 //    - We save the misalignment penalty and possible cache-line crossing penalty.
88 //      This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
89 //    - We pay 7 extra bytes for:
90 //        MOV EBP,ESP,
91 //        LEA ESP,[EBP-offset]
92 //        AND ESP,-8 to double align ESP
93 //    - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
94 //
95 //    If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
96 //    Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
97 //    ref count for double-aligned values.
98 //
99 bool Compiler::shouldDoubleAlign(
100     unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
101 {
102     bool           doDoubleAlign        = false;
103     const unsigned DBL_ALIGN_SETUP_SIZE = 7;
104
105     unsigned bytesUsed         = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
106     unsigned misaligned_weight = 4;
107
108     if (compCodeOpt() == Compiler::SMALL_CODE)
109         misaligned_weight = 0;
110
111     if (compCodeOpt() == Compiler::FAST_CODE)
112         misaligned_weight *= 4;
113
114     JITDUMP("\nDouble alignment:\n");
115     JITDUMP("  Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
116     JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
117     JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
118
119     if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
120     {
121         JITDUMP("    Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
122     }
123     else if (refCntWtdEBP > refCntWtdStkDbl * 2)
124     {
125         // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
126         // not double aligned.
127         // Here are the numbers that make this not double-aligned.
128         //     refCntWtdStkDbl = 0x164
129         //     refCntWtdEBP    = 0x1a4
130         // We think we do need to change the heuristic to be in favor of double-align.
131
132         JITDUMP("    Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
133     }
134     else
135     {
136         // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
137         JITDUMP("    Predicting to create a double-aligned frame\n");
138         doDoubleAlign = true;
139     }
140     return doDoubleAlign;
141 }
142 #endif // DOUBLE_ALIGN
143
144 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
145
146 void Compiler::raInit()
147 {
148 #if FEATURE_STACK_FP_X87
149     /* We have not assigned any FP variables to registers yet */
150
151     VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
152 #endif
153     codeGen->intRegState.rsIsFloat   = false;
154     codeGen->floatRegState.rsIsFloat = true;
155
156     rpReverseEBPenreg = false;
157     rpAsgVarNum       = -1;
158     rpPassesMax       = 6;
159     rpPassesPessimize = rpPassesMax - 3;
160     if (opts.compDbgCode)
161     {
162         rpPassesMax++;
163     }
164     rpStkPredict            = (unsigned)-1;
165     rpFrameType             = FT_NOT_SET;
166     rpLostEnreg             = false;
167     rpMustCreateEBPCalled   = false;
168     rpRegAllocDone          = false;
169     rpMaskPInvokeEpilogIntf = RBM_NONE;
170
171     rpPredictMap[PREDICT_NONE] = RBM_NONE;
172     rpPredictMap[PREDICT_ADDR] = RBM_NONE;
173
174 #if FEATURE_FP_REGALLOC
175     rpPredictMap[PREDICT_REG]         = RBM_ALLINT | RBM_ALLFLOAT;
176     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
177 #else
178     rpPredictMap[PREDICT_REG]         = RBM_ALLINT;
179     rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
180 #endif
181
182 #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
183 #include "register.h"
184
185 #if defined(_TARGET_ARM_)
186
187     rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
188     rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
189     rpPredictMap[PREDICT_REG_SP]    = RBM_ILLEGAL;
190
191 #elif defined(_TARGET_AMD64_)
192
193     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
194     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
195     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
196
197 #elif defined(_TARGET_X86_)
198
199     rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
200     rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
201     rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
202     rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
203     rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
204
205 #endif
206
207     rpBestRecordedPrediction = NULL;
208 }
209
210 /*****************************************************************************
211  *
212  *  The following table(s) determines the order in which registers are considered
213  *  for variables to live in
214  */
215
216 const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
217 {
218 #if FEATURE_FP_REGALLOC
219     if (varTypeIsFloating(regType))
220     {
221         static const regNumber raRegVarOrderFlt[]   = {REG_VAR_ORDER_FLT};
222         const unsigned         raRegVarOrderFltSize = _countof(raRegVarOrderFlt);
223
224         if (wbVarOrderSize != NULL)
225             *wbVarOrderSize = raRegVarOrderFltSize;
226
227         return &raRegVarOrderFlt[0];
228     }
229     else
230 #endif
231     {
232         static const regNumber raRegVarOrder[]   = {REG_VAR_ORDER};
233         const unsigned         raRegVarOrderSize = _countof(raRegVarOrder);
234
235         if (wbVarOrderSize != NULL)
236             *wbVarOrderSize = raRegVarOrderSize;
237
238         return &raRegVarOrder[0];
239     }
240 }
241
242 #ifdef DEBUG
243
244 /*****************************************************************************
245  *
246  *  Dump out the variable interference graph
247  *
248  */
249
250 void Compiler::raDumpVarIntf()
251 {
252     unsigned   lclNum;
253     LclVarDsc* varDsc;
254
255     printf("Var. interference graph for %s\n", info.compFullName);
256
257     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
258     {
259         /* Ignore the variable if it's not tracked */
260
261         if (!varDsc->lvTracked)
262             continue;
263
264         /* Get hold of the index and the interference mask for the variable */
265         unsigned varIndex = varDsc->lvVarIndex;
266
267         printf("  V%02u,T%02u and ", lclNum, varIndex);
268
269         unsigned refIndex;
270
271         for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
272         {
273             if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
274                 printf("T%02u ", refIndex);
275             else
276                 printf("    ");
277         }
278
279         printf("\n");
280     }
281
282     printf("\n");
283 }
284
285 /*****************************************************************************
286  *
287  *  Dump out the register interference graph
288  *
289  */
290 void Compiler::raDumpRegIntf()
291 {
292     printf("Reg. interference graph for %s\n", info.compFullName);
293
294     unsigned   lclNum;
295     LclVarDsc* varDsc;
296
297     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
298     {
299         unsigned varNum;
300
301         /* Ignore the variable if it's not tracked */
302
303         if (!varDsc->lvTracked)
304             continue;
305
306         /* Get hold of the index and the interference mask for the variable */
307
308         varNum = varDsc->lvVarIndex;
309
310         printf("  V%02u,T%02u and ", lclNum, varNum);
311
312         if (varDsc->IsFloatRegType())
313         {
314 #if !FEATURE_STACK_FP_X87
315             for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
316             {
317                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
318                     printf("%3s ", getRegName(regNum, true));
319                 else
320                     printf("    ");
321             }
322 #endif
323         }
324         else
325         {
326             for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
327             {
328                 if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
329                     printf("%3s ", getRegName(regNum));
330                 else
331                     printf("    ");
332             }
333         }
334
335         printf("\n");
336     }
337
338     printf("\n");
339 }
340 #endif // DEBUG
341
342 /*****************************************************************************
343  *
344  * We'll adjust the ref counts based on interference
345  *
346  */
347
348 void Compiler::raAdjustVarIntf()
349 {
350     // This method was not correct and has been disabled.
351     return;
352 }
353
354 /*****************************************************************************/
355 /*****************************************************************************/
356 /* Determine register mask for a call/return from type.
357  */
358
359 inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
360 {
361     var_types type = tree->TypeGet();
362
363     if (varTypeIsStruct(type) && IsHfa(tree))
364     {
365         int retSlots = GetHfaCount(tree);
366         return ((1 << retSlots) - 1) << REG_FLOATRET;
367     }
368
369     const static regMaskTP returnMap[TYP_COUNT] = {
370         RBM_ILLEGAL,   // TYP_UNDEF,
371         RBM_NONE,      // TYP_VOID,
372         RBM_INTRET,    // TYP_BOOL,
373         RBM_INTRET,    // TYP_BYTE,
374         RBM_INTRET,    // TYP_UBYTE,
375         RBM_INTRET,    // TYP_SHORT,
376         RBM_INTRET,    // TYP_USHORT,
377         RBM_INTRET,    // TYP_INT,
378         RBM_INTRET,    // TYP_UINT,
379         RBM_LNGRET,    // TYP_LONG,
380         RBM_LNGRET,    // TYP_ULONG,
381         RBM_FLOATRET,  // TYP_FLOAT,
382         RBM_DOUBLERET, // TYP_DOUBLE,
383         RBM_INTRET,    // TYP_REF,
384         RBM_INTRET,    // TYP_BYREF,
385         RBM_ILLEGAL,   // TYP_STRUCT,
386         RBM_ILLEGAL,   // TYP_BLK,
387         RBM_ILLEGAL,   // TYP_LCLBLK,
388         RBM_ILLEGAL,   // TYP_UNKNOWN,
389     };
390
391     assert((unsigned)type < _countof(returnMap));
392     assert(returnMap[TYP_LONG] == RBM_LNGRET);
393     assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
394     assert(returnMap[TYP_REF] == RBM_INTRET);
395     assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
396
397     regMaskTP result = returnMap[type];
398     assert(result != RBM_ILLEGAL);
399     return result;
400 }
401
402 /*****************************************************************************/
403
404 /****************************************************************************/
405
406 #ifdef DEBUG
407
408 static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
409 {
410     unsigned   lclNum;
411     LclVarDsc* varDsc;
412
413     for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
414     {
415         if (!varDsc->lvTracked)
416             continue;
417
418         if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
419             continue;
420
421         if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
422             printf("V%02u ", lclNum);
423     }
424 }
425
426 #endif
427
428 /*****************************************************************************/
429 #ifdef DEBUG
430 /*****************************************************************************
431  *
432  *  Debugging helpers - display variables liveness info.
433  */
434
435 void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
436 {
437     do
438     {
439         printf("BB%02u: ", beg->bbNum);
440
441         printf(" in  = [ ");
442         dispLifeSet(comp, mask, beg->bbLiveIn);
443         printf("] ,");
444
445         printf(" out = [ ");
446         dispLifeSet(comp, mask, beg->bbLiveOut);
447         printf("]");
448
449         if (beg->bbFlags & BBF_VISITED)
450             printf(" inner=%u", beg->bbFPinVars);
451
452         printf("\n");
453
454         beg = beg->bbNext;
455         if (!beg)
456             return;
457     } while (beg != end);
458 }
459
460 #if FEATURE_STACK_FP_X87
461 void Compiler::raDispFPlifeInfo()
462 {
463     BasicBlock* block;
464
465     for (block = fgFirstBB; block; block = block->bbNext)
466     {
467         GenTreePtr stmt;
468
469         printf("BB%02u: in  = [ ", block->bbNum);
470         dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
471         printf("]\n\n");
472
473         VARSET_TP life(VarSetOps::MakeCopy(this, block->bbLiveIn));
474         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
475         {
476             GenTreePtr tree;
477
478             noway_assert(stmt->gtOper == GT_STMT);
479
480             for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
481             {
482                 VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
483
484                 dispLifeSet(this, optAllFloatVars, life);
485                 printf("   ");
486                 gtDispTree(tree, 0, NULL, true);
487             }
488
489             printf("\n");
490         }
491
492         printf("BB%02u: out = [ ", block->bbNum);
493         dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
494         printf("]\n\n");
495     }
496 }
497 #endif // FEATURE_STACK_FP_X87
498 /*****************************************************************************/
499 #endif // DEBUG
500 /*****************************************************************************/
501
502 /*****************************************************************************/
503
504 void Compiler::raSetRegVarOrder(
505     var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
506 {
507     unsigned         normalVarOrderSize;
508     const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
509     unsigned         index;
510     unsigned         listIndex = 0;
511     regMaskTP        usedReg   = avoidReg;
512
513     noway_assert(*customVarOrderSize >= normalVarOrderSize);
514
515     if (prefReg)
516     {
517         /* First place the preferred registers at the start of customVarOrder */
518
519         regMaskTP regBit;
520         regNumber regNum;
521
522         for (index = 0; index < normalVarOrderSize; index++)
523         {
524             regNum = normalVarOrder[index];
525             regBit = genRegMask(regNum);
526
527             if (usedReg & regBit)
528                 continue;
529
530             if (prefReg & regBit)
531             {
532                 usedReg |= regBit;
533                 noway_assert(listIndex < normalVarOrderSize);
534                 customVarOrder[listIndex++] = regNum;
535                 prefReg -= regBit;
536                 if (prefReg == 0)
537                     break;
538             }
539         }
540
541 #if CPU_HAS_BYTE_REGS
542         /* Then if byteable registers are preferred place them */
543
544         if (prefReg & RBM_BYTE_REG_FLAG)
545         {
546             for (index = 0; index < normalVarOrderSize; index++)
547             {
548                 regNum = normalVarOrder[index];
549                 regBit = genRegMask(regNum);
550
551                 if (usedReg & regBit)
552                     continue;
553
554                 if (RBM_BYTE_REGS & regBit)
555                 {
556                     usedReg |= regBit;
557                     noway_assert(listIndex < normalVarOrderSize);
558                     customVarOrder[listIndex++] = regNum;
559                 }
560             }
561         }
562
563 #endif // CPU_HAS_BYTE_REGS
564     }
565
566     /* Now place all the non-preferred registers */
567
568     for (index = 0; index < normalVarOrderSize; index++)
569     {
570         regNumber regNum = normalVarOrder[index];
571         regMaskTP regBit = genRegMask(regNum);
572
573         if (usedReg & regBit)
574             continue;
575
576         usedReg |= regBit;
577         noway_assert(listIndex < normalVarOrderSize);
578         customVarOrder[listIndex++] = regNum;
579     }
580
581     if (avoidReg)
582     {
583         /* Now place the "avoid" registers */
584
585         for (index = 0; index < normalVarOrderSize; index++)
586         {
587             regNumber regNum = normalVarOrder[index];
588             regMaskTP regBit = genRegMask(regNum);
589
590             if (avoidReg & regBit)
591             {
592                 noway_assert(listIndex < normalVarOrderSize);
593                 customVarOrder[listIndex++] = regNum;
594                 avoidReg -= regBit;
595                 if (avoidReg == 0)
596                     break;
597             }
598         }
599     }
600
601     *customVarOrderSize = listIndex;
602     noway_assert(listIndex == normalVarOrderSize);
603 }
604
605 /*****************************************************************************
606  *
607  *  Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
608  */
609
610 void Compiler::raSetupArgMasks(RegState* regState)
611 {
612     /* Determine the registers holding incoming register arguments */
613     /*  and setup raAvoidArgRegMask to the set of registers that we  */
614     /*  may want to avoid when enregistering the locals.            */
615
616     regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
617     raAvoidArgRegMask                  = RBM_NONE;
618
619     LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
620
621     for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
622     {
623         noway_assert(argDsc->lvIsParam);
624
625         // Is it a register argument ?
626         if (!argDsc->lvIsRegArg)
627             continue;
628
629         // only process args that apply to the current register file
630         if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
631         {
632             continue;
633         }
634
635         // Is it dead on entry ??
636         // In certain cases such as when compJmpOpUsed is true,
637         // or when we have a generic type context arg that we must report
638         // then the arguments have to be kept alive throughout the prolog.
639         // So we have to consider it as live on entry.
640         //
641         bool keepArgAlive = compJmpOpUsed;
642         if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
643             ((lvaTable + info.compTypeCtxtArg) == argDsc))
644         {
645             keepArgAlive = true;
646         }
647
648         if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
649         {
650             continue;
651         }
652
653         // The code to set the regState for each arg is outlined for shared use
654         // by linear scan
655         regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
656
657         // Do we need to try to avoid this incoming arg registers?
658
659         // If it's not tracked, don't do the stuff below.
660         if (!argDsc->lvTracked)
661             continue;
662
663         // If the incoming arg is used after a call it is live accross
664         //  a call and will have to be allocated to a caller saved
665         //  register anyway (a very common case).
666         //
667         // In this case it is pointless to ask that the higher ref count
668         //  locals to avoid using the incoming arg register
669
670         unsigned argVarIndex = argDsc->lvVarIndex;
671
672         /* Does the incoming register and the arg variable interfere? */
673
674         if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
675         {
676             // No they do not interfere,
677             //  so we add inArgReg to raAvoidArgRegMask
678
679             raAvoidArgRegMask |= genRegMask(inArgReg);
680         }
681 #ifdef _TARGET_ARM_
682         if (argDsc->lvType == TYP_DOUBLE)
683         {
684             // Avoid the double register argument pair for register allocation.
685             if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
686             {
687                 raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
688             }
689         }
690 #endif
691     }
692 }
693
694 #endif // LEGACY_BACKEND
695
696 // The code to set the regState for each arg is outlined for shared use
697 // by linear scan. (It is not shared for System V AMD64 platform.)
698 regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
699 {
700     regNumber inArgReg  = argDsc->lvArgReg;
701     regMaskTP inArgMask = genRegMask(inArgReg);
702
703     if (regState->rsIsFloat)
704     {
705         noway_assert(inArgMask & RBM_FLTARG_REGS);
706     }
707     else //  regState is for the integer registers
708     {
709         // This might be the fixed return buffer register argument (on ARM64)
710         // We check and allow inArgReg to be theFixedRetBuffReg
711         if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
712         {
713             // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
714             noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
715             // We should have recorded the variable number for the return buffer arg
716             noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
717         }
718         else // we have a regular arg
719         {
720             noway_assert(inArgMask & RBM_ARG_REGS);
721         }
722     }
723
724     regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
725
726 #ifdef _TARGET_ARM_
727     if (argDsc->lvType == TYP_DOUBLE)
728     {
729         if (info.compIsVarArgs || opts.compUseSoftFP)
730         {
731             assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
732             assert(!regState->rsIsFloat);
733         }
734         else
735         {
736             assert(regState->rsIsFloat);
737             assert(emitter::isDoubleReg(inArgReg));
738         }
739         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
740     }
741     else if (argDsc->lvType == TYP_LONG)
742     {
743         assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
744         assert(!regState->rsIsFloat);
745         regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
746     }
747 #endif // _TARGET_ARM_
748
749 #if FEATURE_MULTIREG_ARGS
750     if (varTypeIsStruct(argDsc->lvType))
751     {
752         if (argDsc->lvIsHfaRegArg())
753         {
754             assert(regState->rsIsFloat);
755             unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
756             for (unsigned i = 1; i < cSlots; i++)
757             {
758                 assert(inArgReg + i <= LAST_FP_ARGREG);
759                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
760             }
761         }
762         else
763         {
764             unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
765             for (unsigned i = 1; i < cSlots; i++)
766             {
767                 regNumber nextArgReg = (regNumber)(inArgReg + i);
768                 if (nextArgReg > REG_ARG_LAST)
769                 {
770                     break;
771                 }
772                 assert(regState->rsIsFloat == false);
773                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
774             }
775         }
776     }
777 #endif // FEATURE_MULTIREG_ARGS
778
779     return inArgReg;
780 }
781
782 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
783
784 /*****************************************************************************
785  *
786  *  Assign variables to live in registers, etc.
787  */
788
789 void Compiler::raAssignVars()
790 {
791 #ifdef DEBUG
792     if (verbose)
793         printf("*************** In raAssignVars()\n");
794 #endif
795     /* We need to keep track of which registers we ever touch */
796
797     codeGen->regSet.rsClearRegsModified();
798
799 #if FEATURE_STACK_FP_X87
800     // FP register allocation
801     raEnregisterVarsStackFP();
802     raGenerateFPRefCounts();
803 #endif
804
805     /* Predict registers used by code generation */
806     rpPredictRegUse(); // New reg predictor/allocator
807
808     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
809     // so that the gc tracking logic and lvMustInit logic will ignore them.
810
811     unsigned   lclNum;
812     LclVarDsc* varDsc;
813
814     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
815     {
816         if (varDsc->lvType != TYP_STRUCT)
817             continue;
818
819         if (!varDsc->lvPromoted)
820             continue;
821
822         if (varDsc->lvIsParam)
823             continue;
824
825         if (varDsc->lvRefCnt > 0)
826             continue;
827
828 #ifdef DEBUG
829         if (verbose)
830         {
831             printf("Mark unused struct local V%02u\n", lclNum);
832         }
833
834         lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
835
836         if (promotionType == PROMOTION_TYPE_DEPENDENT)
837         {
838             // This should only happen when all its field locals are unused as well.
839
840             for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
841                  varNum++)
842             {
843                 noway_assert(lvaTable[varNum].lvRefCnt == 0);
844                 lvaTable[varNum].lvIsStructField = false;
845             }
846         }
847         else
848         {
849             noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
850         }
851
852         varDsc->lvUnusedStruct = 1;
853 #endif
854
855         // Change such struct locals to ints
856
857         varDsc->lvType = TYP_INT; // Bash to a non-gc type.
858         noway_assert(!varDsc->lvTracked);
859         noway_assert(!varDsc->lvRegister);
860         varDsc->lvOnFrame  = false; // Force it not to be onstack.
861         varDsc->lvMustInit = false; // Force not to init it.
862         varDsc->lvStkOffs  = 0;     // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
863     }
864 }
865
866 /*****************************************************************************/
867 /*****************************************************************************/
868
869 /*****************************************************************************
870  *
871  *   Given a regNumber return the correct predictReg enum value
872  */
873
874 inline static rpPredictReg rpGetPredictForReg(regNumber reg)
875 {
876     return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
877 }
878
879 /*****************************************************************************
880  *
881  *   Given a varIndex return the correct predictReg enum value
882  */
883
884 inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
885 {
886     return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
887 }
888
889 /*****************************************************************************
890  *
891  *   Given a rpPredictReg return the correct varNumber value
892  */
893
894 inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
895 {
896     return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
897 }
898
899 /*****************************************************************************
900  *
901  *   Given a rpPredictReg return true if it specifies a Txx register
902  */
903
904 inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
905 {
906     if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
907         return true;
908     else
909         return false;
910 }
911
912 /*****************************************************************************
913  *
914  *   Given a regmask return the correct predictReg enum value
915  */
916
917 static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
918 {
919     rpPredictReg result = PREDICT_NONE;
920     if (regmask != 0) /* Check if regmask has zero bits set */
921     {
922         if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
923         {
924             DWORD reg = 0;
925             assert(FitsIn<DWORD>(regmask));
926             BitScanForward(&reg, (DWORD)regmask);
927             return rpGetPredictForReg((regNumber)reg);
928         }
929
930 #if defined(_TARGET_ARM_)
931         /* It has multiple bits set */
932         else if (regmask == (RBM_R0 | RBM_R1))
933         {
934             result = PREDICT_PAIR_R0R1;
935         }
936         else if (regmask == (RBM_R2 | RBM_R3))
937         {
938             result = PREDICT_PAIR_R2R3;
939         }
940 #elif defined(_TARGET_X86_)
941         /* It has multiple bits set */
942         else if (regmask == (RBM_EAX | RBM_EDX))
943         {
944             result = PREDICT_PAIR_EAXEDX;
945         }
946         else if (regmask == (RBM_ECX | RBM_EBX))
947         {
948             result = PREDICT_PAIR_ECXEBX;
949         }
950 #endif
951         else /* It doesn't match anything */
952         {
953             result = PREDICT_NONE;
954             assert(!"unreachable");
955             NO_WAY("bad regpair");
956         }
957     }
958     return result;
959 }
960
961 /*****************************************************************************
962  *
963  *  Record a variable to register(s) interference
964  */
965
966 bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
967
968 {
969     bool addedIntf = false;
970
971     if (regMask != 0)
972     {
973         for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
974         {
975             regMaskTP regBit = genRegMask(regNum);
976
977             if (regMask & regBit)
978             {
979                 VARSET_TP newIntf(VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
980                 if (!VarSetOps::IsEmpty(this, newIntf))
981                 {
982 #ifdef DEBUG
983                     if (verbose)
984                     {
985                         VarSetOps::Iter newIntfIter(this, newIntf);
986                         unsigned        varNum = 0;
987                         while (newIntfIter.NextElem(&varNum))
988                         {
989                             unsigned   lclNum = lvaTrackedToVarNum[varNum];
990                             LclVarDsc* varDsc = &lvaTable[varNum];
991 #if FEATURE_FP_REGALLOC
992                             // Only print the useful interferences
993                             // i.e. floating point LclVar interference with floating point registers
994                             //         or integer LclVar interference with general purpose registers
995                             if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
996 #endif
997                             {
998                                 printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
999                                        getRegName(regNum), msg);
1000                             }
1001                         }
1002                     }
1003 #endif
1004                     addedIntf = true;
1005                     VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
1006                 }
1007
1008                 regMask -= regBit;
1009                 if (regMask == 0)
1010                     break;
1011             }
1012         }
1013     }
1014     return addedIntf;
1015 }
1016
1017 /*****************************************************************************
1018  *
1019  *  Record a new variable to variable(s) interference
1020  */
1021
1022 bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
1023 {
1024     noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
1025     noway_assert(!VarSetOps::IsEmpty(this, intfVar));
1026
1027     VARSET_TP oneVar(VarSetOps::MakeEmpty(this));
1028     VarSetOps::AddElemD(this, oneVar, varNum);
1029
1030     bool newIntf = fgMarkIntf(intfVar, oneVar);
1031
1032     if (newIntf)
1033         rpAddedVarIntf = true;
1034
1035 #ifdef DEBUG
1036     if (verbose && newIntf)
1037     {
1038         for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
1039         {
1040             if (VarSetOps::IsMember(this, intfVar, oneNum))
1041             {
1042                 unsigned lclNum = lvaTrackedToVarNum[varNum];
1043                 unsigned lclOne = lvaTrackedToVarNum[oneNum];
1044                 printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
1045                        oneNum, msg);
1046             }
1047         }
1048     }
1049 #endif
1050
1051     return newIntf;
1052 }
1053
1054 /*****************************************************************************
1055  *
1056  *   Determine preferred register mask for a given predictReg value
1057  */
1058
1059 inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
1060 {
1061     if (rpHasVarIndexForPredict(predictReg))
1062         predictReg = PREDICT_REG;
1063
1064     noway_assert((unsigned)predictReg < _countof(rpPredictMap));
1065     noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
1066
1067     regMaskTP regAvailForType = rpPredictMap[predictReg];
1068     if (varTypeIsFloating(type))
1069     {
1070         regAvailForType &= RBM_ALLFLOAT;
1071     }
1072     else
1073     {
1074         regAvailForType &= RBM_ALLINT;
1075     }
1076 #ifdef _TARGET_ARM_
1077     if (type == TYP_DOUBLE)
1078     {
1079         if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
1080         {
1081             // Fix 388433 ARM JitStress WP7
1082             if ((regAvailForType & RBM_DBL_REGS) != 0)
1083             {
1084                 regAvailForType |= (regAvailForType << 1);
1085             }
1086             else
1087             {
1088                 regAvailForType = RBM_NONE;
1089             }
1090         }
1091     }
1092 #endif
1093     return regAvailForType;
1094 }
1095
1096 /*****************************************************************************
1097  *
1098  *  Predict register choice for a type.
1099  *
1100  *  Adds the predicted registers to rsModifiedRegsMask.
1101  */
1102 regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
1103 {
1104     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
1105     regNumber regNum;
1106     regMaskTP regBits;
1107
1108     // Add any reserved register to the lockedRegs
1109     lockedRegs |= codeGen->regSet.rsMaskResvd;
1110
1111     /* Clear out the lockedRegs from preferReg */
1112     preferReg &= ~lockedRegs;
1113
1114     if (rpAsgVarNum != -1)
1115     {
1116         noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
1117
1118         /* Don't pick the register used by rpAsgVarNum either */
1119         LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
1120         noway_assert(tgtVar->lvRegNum != REG_STK);
1121
1122         preferReg &= ~genRegMask(tgtVar->lvRegNum);
1123     }
1124
1125     switch (type)
1126     {
1127         case TYP_BOOL:
1128         case TYP_BYTE:
1129         case TYP_UBYTE:
1130         case TYP_SHORT:
1131         case TYP_USHORT:
1132         case TYP_INT:
1133         case TYP_UINT:
1134         case TYP_REF:
1135         case TYP_BYREF:
1136 #ifdef _TARGET_AMD64_
1137         case TYP_LONG:
1138 #endif // _TARGET_AMD64_
1139
1140             // expand preferReg to all non-locked registers if no bits set
1141             preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
1142
1143             if (preferReg == 0) // no bits set?
1144             {
1145                 // Add one predefined spill choice register if no bits set.
1146                 // (The jit will introduce one spill temp)
1147                 preferReg |= RBM_SPILL_CHOICE;
1148                 rpPredictSpillCnt++;
1149
1150 #ifdef DEBUG
1151                 if (verbose)
1152                     printf("Predict one spill temp\n");
1153 #endif
1154             }
1155
1156             if (preferReg != 0)
1157             {
1158                 /* Iterate the registers in the order specified by rpRegTmpOrder */
1159
1160                 for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
1161                 {
1162                     regNum  = rpRegTmpOrder[index];
1163                     regBits = genRegMask(regNum);
1164
1165                     if ((preferReg & regBits) == regBits)
1166                     {
1167                         goto RET;
1168                     }
1169                 }
1170             }
1171             /* Otherwise we have allocated all registers, so do nothing */
1172             break;
1173
1174 #ifndef _TARGET_AMD64_
1175         case TYP_LONG:
1176
1177             if ((preferReg == 0) ||                   // no bits set?
1178                 ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
1179             {
1180                 // expand preferReg to all non-locked registers
1181                 preferReg = RBM_ALLINT & ~lockedRegs;
1182             }
1183
1184             if (preferReg == 0) // no bits set?
1185             {
1186                 // Add EAX:EDX to the registers
1187                 // (The jit will introduce two spill temps)
1188                 preferReg = RBM_PAIR_TMP;
1189                 rpPredictSpillCnt += 2;
1190 #ifdef DEBUG
1191                 if (verbose)
1192                     printf("Predict two spill temps\n");
1193 #endif
1194             }
1195             else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
1196             {
1197                 if ((preferReg & RBM_PAIR_TMP_LO) == 0)
1198                 {
1199                     // Add EAX to the registers
1200                     // (The jit will introduce one spill temp)
1201                     preferReg |= RBM_PAIR_TMP_LO;
1202                 }
1203                 else
1204                 {
1205                     // Add EDX to the registers
1206                     // (The jit will introduce one spill temp)
1207                     preferReg |= RBM_PAIR_TMP_HI;
1208                 }
1209                 rpPredictSpillCnt++;
1210 #ifdef DEBUG
1211                 if (verbose)
1212                     printf("Predict one spill temp\n");
1213 #endif
1214             }
1215
1216             regPairNo regPair;
1217             regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
1218             if (regPair != REG_PAIR_NONE)
1219             {
1220                 regBits = genRegPairMask(regPair);
1221                 goto RET;
1222             }
1223
1224             /* Otherwise we have allocated all registers, so do nothing */
1225             break;
1226 #endif // _TARGET_AMD64_
1227
1228 #ifdef _TARGET_ARM_
1229         case TYP_STRUCT:
1230 #endif
1231
1232         case TYP_FLOAT:
1233         case TYP_DOUBLE:
1234
1235 #if FEATURE_FP_REGALLOC
1236             regMaskTP restrictMask;
1237             restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
1238             assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
1239
1240             // expand preferReg to all available non-locked registers if no bits set
1241             preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
1242             regMaskTP preferDouble;
1243             preferDouble = preferReg & (preferReg >> 1);
1244
1245             if ((preferReg == 0) // no bits set?
1246 #ifdef _TARGET_ARM_
1247                 || ((type == TYP_DOUBLE) &&
1248                     ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
1249 #endif
1250                 )
1251             {
1252                 // Add one predefined spill choice register if no bits set.
1253                 // (The jit will introduce one spill temp)
1254                 preferReg |= RBM_SPILL_CHOICE_FLT;
1255                 rpPredictSpillCnt++;
1256
1257 #ifdef DEBUG
1258                 if (verbose)
1259                     printf("Predict one spill temp (float)\n");
1260 #endif
1261             }
1262
1263             assert(preferReg != 0);
1264
1265             /* Iterate the registers in the order specified by raRegFltTmpOrder */
1266
1267             for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
1268             {
1269                 regNum  = raRegFltTmpOrder[index];
1270                 regBits = genRegMask(regNum);
1271
1272                 if (varTypeIsFloating(type))
1273                 {
1274 #ifdef _TARGET_ARM_
1275                     if (type == TYP_DOUBLE)
1276                     {
1277                         if ((regBits & RBM_DBL_REGS) == 0)
1278                         {
1279                             continue; // We must restrict the set to the double registers
1280                         }
1281                         else
1282                         {
1283                             // TYP_DOUBLE use two consecutive registers
1284                             regBits |= genRegMask(REG_NEXT(regNum));
1285                         }
1286                     }
1287 #endif
1288                     // See if COMPlus_JitRegisterFP is restricting this FP register
1289                     //
1290                     if ((restrictMask & regBits) != regBits)
1291                         continue;
1292                 }
1293
1294                 if ((preferReg & regBits) == regBits)
1295                 {
1296                     goto RET;
1297                 }
1298             }
1299             /* Otherwise we have allocated all registers, so do nothing */
1300             break;
1301
1302 #else // !FEATURE_FP_REGALLOC
1303
1304             return RBM_NONE;
1305
1306 #endif
1307
1308         default:
1309             noway_assert(!"unexpected type in reg use prediction");
1310     }
1311
1312     /* Abnormal return */
1313     noway_assert(!"Ran out of registers in rpPredictRegPick");
1314     return RBM_NONE;
1315
1316 RET:
1317     /*
1318      *  If during the first prediction we need to allocate
1319      *  one of the registers that we used for coloring locals
1320      *  then flag this by setting rpPredictAssignAgain.
1321      *  We will have to go back and repredict the registers
1322      */
1323     if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
1324         rpPredictAssignAgain = true;
1325
1326     // Add a register interference to each of the last use variables
1327     if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
1328     {
1329         VARSET_TP lastUse(VarSetOps::MakeEmpty(this));
1330         VarSetOps::Assign(this, lastUse, rpLastUseVars);
1331         VARSET_TP inPlaceUse(VarSetOps::MakeEmpty(this));
1332         VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
1333         // While we still have any lastUse or inPlaceUse bits
1334         VARSET_TP useUnion(VarSetOps::Union(this, lastUse, inPlaceUse));
1335
1336         VARSET_TP       varAsSet(VarSetOps::MakeEmpty(this));
1337         VarSetOps::Iter iter(this, useUnion);
1338         unsigned        varNum = 0;
1339         while (iter.NextElem(&varNum))
1340         {
1341             // We'll need this for one of the calls...
1342             VarSetOps::ClearD(this, varAsSet);
1343             VarSetOps::AddElemD(this, varAsSet, varNum);
1344
1345             // If this varBit and lastUse?
1346             if (VarSetOps::IsMember(this, lastUse, varNum))
1347             {
1348                 // Record a register to variable interference
1349                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
1350             }
1351
1352             // If this varBit and inPlaceUse?
1353             if (VarSetOps::IsMember(this, inPlaceUse, varNum))
1354             {
1355                 // Record a register to variable interference
1356                 rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
1357             }
1358         }
1359     }
1360     codeGen->regSet.rsSetRegsModified(regBits);
1361
1362     return regBits;
1363 }
1364
1365 /*****************************************************************************
1366  *
1367  *  Predict integer register use for generating an address mode for a tree,
1368  *  by setting tree->gtUsedRegs to all registers used by this tree and its
1369  *  children.
1370  *    tree       - is the child of a GT_IND node
1371  *    type       - the type of the GT_IND node (floating point/integer)
1372  *    lockedRegs - are the registers which are currently held by
1373  *                 a previously evaluated node.
1374  *    rsvdRegs   - registers which should not be allocated because they will
1375  *                 be needed to evaluate a node in the future
1376  *               - Also if rsvdRegs has the RBM_LASTUSE bit set then
1377  *                 the rpLastUseVars set should be saved and restored
1378  *                 so that we don't add any new variables to rpLastUseVars
1379  *    lenCSE     - is non-NULL only when we have a lenCSE expression
1380  *
1381  *  Return the scratch registers to be held by this tree. (one or two registers
1382  *  to form an address expression)
1383  */
1384
1385 regMaskTP Compiler::rpPredictAddressMode(
1386     GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
1387 {
1388     GenTreePtr op1;
1389     GenTreePtr op2;
1390     GenTreePtr opTemp;
1391     genTreeOps oper = tree->OperGet();
1392     regMaskTP  op1Mask;
1393     regMaskTP  op2Mask;
1394     regMaskTP  regMask;
1395     ssize_t    sh;
1396     ssize_t    cns = 0;
1397     bool       rev;
1398     bool       hasTwoAddConst     = false;
1399     bool       restoreLastUseVars = false;
1400     VARSET_TP  oldLastUseVars(VarSetOps::MakeEmpty(this));
1401
1402     /* do we need to save and restore the rpLastUseVars set ? */
1403     if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
1404     {
1405         restoreLastUseVars = true;
1406         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
1407     }
1408     rsvdRegs &= ~RBM_LASTUSE;
1409
1410     /* if not an add, then just force it to a register */
1411
1412     if (oper != GT_ADD)
1413     {
1414         if (oper == GT_ARR_ELEM)
1415         {
1416             regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
1417             goto DONE;
1418         }
1419         else
1420         {
1421             goto NO_ADDR_EXPR;
1422         }
1423     }
1424
1425     op1 = tree->gtOp.gtOp1;
1426     op2 = tree->gtOp.gtOp2;
1427     rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
1428
1429     /* look for (x + y) + icon address mode */
1430
1431     if (op2->OperGet() == GT_CNS_INT)
1432     {
1433         cns = op2->gtIntCon.gtIconVal;
1434
1435         /* if not an add, then just force op1 into a register */
1436         if (op1->OperGet() != GT_ADD)
1437             goto ONE_ADDR_EXPR;
1438
1439         hasTwoAddConst = true;
1440
1441         /* Record the 'rev' flag, reverse evaluation order */
1442         rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
1443
1444         op2 = op1->gtOp.gtOp2;
1445         op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
1446     }
1447
1448     /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
1449
1450     sh = 0;
1451     if (op2->OperGet() == GT_LSH)
1452     {
1453         if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
1454         {
1455             sh     = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1456             opTemp = op2->gtOp.gtOp1;
1457         }
1458         else
1459         {
1460             opTemp = NULL;
1461         }
1462     }
1463     else
1464     {
1465         opTemp = op2;
1466     }
1467
1468     if (opTemp != NULL)
1469     {
1470         if (opTemp->OperGet() == GT_NOP)
1471         {
1472             opTemp = opTemp->gtOp.gtOp1;
1473         }
1474
1475         // Is this a const operand?
1476         if (opTemp->OperGet() == GT_CNS_INT)
1477         {
1478             // Compute the new cns value that Codegen will end up using
1479             cns += (opTemp->gtIntCon.gtIconVal << sh);
1480
1481             goto ONE_ADDR_EXPR;
1482         }
1483     }
1484
1485     /* Check for LSH in op1 slot */
1486
1487     if (op1->OperGet() != GT_LSH)
1488         goto TWO_ADDR_EXPR;
1489
1490     opTemp = op1->gtOp.gtOp2;
1491
1492     if (opTemp->OperGet() != GT_CNS_INT)
1493         goto TWO_ADDR_EXPR;
1494
1495     sh = opTemp->gtIntCon.gtIconVal;
1496
1497     /* Check for LSH of 0, special case */
1498     if (sh == 0)
1499         goto TWO_ADDR_EXPR;
1500
1501 #if defined(_TARGET_XARCH_)
1502
1503     /* Check for LSH of 1 2 or 3 */
1504     if (sh > 3)
1505         goto TWO_ADDR_EXPR;
1506
1507 #elif defined(_TARGET_ARM_)
1508
1509     /* Check for LSH of 1 to 30 */
1510     if (sh > 30)
1511         goto TWO_ADDR_EXPR;
1512
1513 #else
1514
1515     goto TWO_ADDR_EXPR;
1516
1517 #endif
1518
1519     /* Matched a leftShift by 'sh' subtree, move op1 down */
1520     op1 = op1->gtOp.gtOp1;
1521
1522 TWO_ADDR_EXPR:
1523
1524     /* Now we have to evaluate op1 and op2 into registers */
1525
1526     /* Evaluate op1 and op2 in the correct order */
1527     if (rev)
1528     {
1529         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
1530         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
1531     }
1532     else
1533     {
1534         op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
1535         op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
1536     }
1537
1538     /*  If op1 and op2 must be spilled and reloaded then
1539      *  op1 and op2 might be reloaded into the same register
1540      *  This can only happen when all the registers are lockedRegs
1541      */
1542     if ((op1Mask == op2Mask) && (op1Mask != 0))
1543     {
1544         /* We'll need to grab a different register for op2 */
1545         op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
1546     }
1547
1548 #ifdef _TARGET_ARM_
1549     // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
1550     //      [op2 + op1<<sh + cns]
1551     // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
1552     //
1553     if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
1554     {
1555         op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1556     }
1557
1558     //
1559     // On the ARM we will need at least one scratch register for trees that have this form:
1560     //     [op1 + op2 + cns] or  [op1 + op2<<sh + cns]
1561     // or for a float/double or long when we have both op1 and op2
1562     // or when we have an 'cns' that is too large for the ld/st instruction
1563     //
1564     if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
1565     {
1566         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1567     }
1568
1569     //
1570     // If we create a CSE that immediately dies then we may need to add an additional register interference
1571     // so we don't color the CSE into R3
1572     //
1573     if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
1574     {
1575         opTemp = op2->gtOp.gtOp2;
1576         if (opTemp->OperGet() == GT_LCL_VAR)
1577         {
1578             unsigned   varNum = opTemp->gtLclVar.gtLclNum;
1579             LclVarDsc* varDsc = &lvaTable[varNum];
1580
1581             if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
1582             {
1583                 rpRecordRegIntf(RBM_TMP_0,
1584                                 VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
1585             }
1586         }
1587     }
1588 #endif
1589
1590     regMask          = (op1Mask | op2Mask);
1591     tree->gtUsedRegs = (regMaskSmall)regMask;
1592     goto DONE;
1593
1594 ONE_ADDR_EXPR:
1595
1596     /* now we have to evaluate op1 into a register */
1597
1598     op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
1599     op2Mask = RBM_NONE;
1600
1601 #ifdef _TARGET_ARM_
1602     //
1603     // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
1604     // instruction
1605     //
1606     if (!codeGen->validDispForLdSt(cns, type))
1607     {
1608         op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
1609     }
1610 #endif
1611
1612     regMask          = (op1Mask | op2Mask);
1613     tree->gtUsedRegs = (regMaskSmall)regMask;
1614     goto DONE;
1615
1616 NO_ADDR_EXPR:
1617
1618 #if !CPU_LOAD_STORE_ARCH
1619     if (oper == GT_CNS_INT)
1620     {
1621         /* Indirect of a constant does not require a register */
1622         regMask = RBM_NONE;
1623     }
1624     else
1625 #endif
1626     {
1627         /* now we have to evaluate tree into a register */
1628         regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
1629     }
1630
1631 DONE:
1632     regMaskTP regUse = tree->gtUsedRegs;
1633
1634     if (!VarSetOps::IsEmpty(this, compCurLife))
1635     {
1636         // Add interference between the current set of life variables and
1637         //  the set of temporary registers need to evaluate the sub tree
1638         if (regUse)
1639         {
1640             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
1641         }
1642     }
1643
1644     /* Do we need to resore the oldLastUseVars value */
1645     if (restoreLastUseVars)
1646     {
1647         /*
1648          *  If we used a GT_ASG targeted register then we need to add
1649          *  a variable interference between any new last use variables
1650          *  and the GT_ASG targeted register
1651          */
1652         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
1653         {
1654             rpRecordVarIntf(rpAsgVarNum,
1655                             VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
1656         }
1657         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
1658     }
1659
1660     return regMask;
1661 }
1662
1663 /*****************************************************************************
1664  *
1665  *
1666  */
1667
1668 void Compiler::rpPredictRefAssign(unsigned lclNum)
1669 {
1670     LclVarDsc* varDsc = lvaTable + lclNum;
1671
1672     varDsc->lvRefAssign = 1;
1673
1674 #if NOGC_WRITE_BARRIERS
1675 #ifdef DEBUG
1676     if (verbose)
1677     {
1678         if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
1679             printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
1680                    varDsc->lvVarIndex);
1681     }
1682 #endif
1683
1684     /* Make sure that write barrier pointer variables never land in EDX */
1685     VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
1686 #endif // NOGC_WRITE_BARRIERS
1687 }
1688
1689 /*****************************************************************************
1690  *
1691  * Predict the internal temp physical register usage for a block assignment tree,
1692  * by setting tree->gtUsedRegs.
1693  * Records the internal temp physical register usage for this tree.
1694  * Returns a mask of interfering registers for this tree.
1695  *
1696  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1697  * to the set of scratch registers needed when evaluating the tree.
1698  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1699  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1700  * predict additional internal temp physical registers to spill into.
1701  *
1702  *    tree       - is the child of a GT_IND node
1703  *    predictReg - what type of register does the tree need
1704  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1705  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1706  *    rsvdRegs   - registers which should not be allocated because they will
1707  *                 be needed to evaluate a node in the future
1708  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1709  *                 the rpLastUseVars set should be saved and restored
1710  *                 so that we don't add any new variables to rpLastUseVars.
1711  */
1712 regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr   tree,
1713                                           rpPredictReg predictReg,
1714                                           regMaskTP    lockedRegs,
1715                                           regMaskTP    rsvdRegs)
1716 {
1717     regMaskTP regMask         = RBM_NONE;
1718     regMaskTP interferingRegs = RBM_NONE;
1719
1720     bool        hasGCpointer  = false;
1721     bool        dstIsOnStack  = false;
1722     bool        useMemHelper  = false;
1723     bool        useBarriers   = false;
1724     GenTreeBlk* dst           = tree->gtGetOp1()->AsBlk();
1725     GenTreePtr  dstAddr       = dst->Addr();
1726     GenTreePtr  srcAddrOrFill = tree->gtGetOp2IfPresent();
1727
1728     size_t blkSize = dst->gtBlkSize;
1729
1730     hasGCpointer = (dst->HasGCPtr());
1731
1732     bool isCopyBlk = tree->OperIsCopyBlkOp();
1733     bool isCopyObj = isCopyBlk && hasGCpointer;
1734     bool isInitBlk = tree->OperIsInitBlkOp();
1735
1736     if (isCopyBlk)
1737     {
1738         assert(srcAddrOrFill->OperIsIndir());
1739         srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
1740     }
1741     else
1742     {
1743         // For initBlk, we don't need to worry about the GC pointers.
1744         hasGCpointer = false;
1745     }
1746
1747     if (blkSize != 0)
1748     {
1749         if (isCopyObj)
1750         {
1751             dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
1752         }
1753
1754         if (isInitBlk)
1755         {
1756             if (srcAddrOrFill->OperGet() != GT_CNS_INT)
1757             {
1758                 useMemHelper = true;
1759             }
1760         }
1761     }
1762     else
1763     {
1764         useMemHelper = true;
1765     }
1766
1767     if (hasGCpointer && !dstIsOnStack)
1768     {
1769         useBarriers = true;
1770     }
1771
1772 #ifdef _TARGET_ARM_
1773     //
1774     // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
1775     //
1776     if (!useMemHelper && !useBarriers)
1777     {
1778         bool     useLoop        = false;
1779         unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
1780
1781         // A mask to use to force the predictor to choose low registers (to reduce code size)
1782         regMaskTP avoidReg = (RBM_R12 | RBM_LR);
1783
1784         // Allow the src and dst to be used in place, unless we use a loop, in which
1785         // case we will need scratch registers as we will be writing to them.
1786         rpPredictReg srcAndDstPredict = PREDICT_REG;
1787
1788         // Will we be using a loop to implement this INITBLK/COPYBLK?
1789         if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
1790         {
1791             useLoop          = true;
1792             avoidReg         = RBM_NONE;
1793             srcAndDstPredict = PREDICT_SCRATCH_REG;
1794         }
1795
1796         if (tree->gtFlags & GTF_REVERSE_OPS)
1797         {
1798             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
1799                                            dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1800             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1801         }
1802         else
1803         {
1804             regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
1805                                            srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
1806             regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
1807         }
1808
1809         // We need at least one scratch register for a copyBlk
1810         if (isCopyBlk)
1811         {
1812             // Pick a low register to reduce the code size
1813             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1814         }
1815
1816         if (useLoop)
1817         {
1818             if (isCopyBlk)
1819             {
1820                 // We need a second temp register for a copyBlk (our code gen is load two/store two)
1821                 // Pick another low register to reduce the code size
1822                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
1823             }
1824
1825             // We need a loop index register
1826             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
1827         }
1828
1829         tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
1830
1831         return interferingRegs;
1832     }
1833 #endif
1834     // What order should the Dest, Val/Src, and Size be calculated
1835     GenTreePtr opsPtr[3];
1836     regMaskTP  regsPtr[3];
1837
1838 #if defined(_TARGET_XARCH_)
1839     fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
1840
1841     // We're going to use these, might as well make them available now
1842
1843     codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
1844     if (isCopyBlk)
1845         codeGen->regSet.rsSetRegsModified(RBM_ESI);
1846
1847 #elif defined(_TARGET_ARM_)
1848
1849     if (useMemHelper)
1850     {
1851         // For all other cases that involve non-constants, we just call memcpy/memset
1852         // JIT helpers
1853         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
1854         interferingRegs |= RBM_CALLEE_TRASH;
1855 #ifdef DEBUG
1856         if (verbose)
1857             printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
1858 #endif
1859     }
1860     else // useBarriers
1861     {
1862         assert(useBarriers);
1863         assert(isCopyBlk);
1864
1865         fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
1866
1867         // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
1868         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
1869 #ifdef DEBUG
1870         if (verbose)
1871             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
1872 #endif
1873     }
1874 #else // !_TARGET_X86_ && !_TARGET_ARM_
1875 #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
1876 #endif // !_TARGET_X86_ && !_TARGET_ARM_
1877     regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
1878     regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
1879                                    opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
1880     regMask |= regsPtr[0];
1881     opsPtr[0]->gtUsedRegs |= regsPtr[0];
1882     rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
1883
1884     regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
1885                                    opsPtr2RsvdRegs | RBM_LASTUSE);
1886     regMask |= regsPtr[1];
1887     opsPtr[1]->gtUsedRegs |= regsPtr[1];
1888     rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
1889
1890     regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
1891     if (opsPtr[2] == nullptr)
1892     {
1893         // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
1894         // Note that it is quite possible that no register is required, but this preserves
1895         // former behavior.
1896         regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
1897         rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
1898     }
1899     else
1900     {
1901         regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
1902         opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
1903     }
1904     regMask |= opsPtr2UsedRegs;
1905
1906     tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
1907     return interferingRegs;
1908 }
1909
1910 /*****************************************************************************
1911  *
1912  * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
1913  * Returns a regMask with the internal temp physical register usage for this tree.
1914  *
1915  * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
1916  * to the set of scratch registers needed when evaluating the tree.
1917  * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
1918  * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
1919  * predict additional internal temp physical registers to spill into.
1920  *
1921  *    tree       - is the child of a GT_IND node
1922  *    predictReg - what type of register does the tree need
1923  *    lockedRegs - are the registers which are currently held by a previously evaluated node.
1924  *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
1925  *    rsvdRegs   - registers which should not be allocated because they will
1926  *                 be needed to evaluate a node in the future
1927  *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
1928  *                 the rpLastUseVars set should be saved and restored
1929  *                 so that we don't add any new variables to rpLastUseVars.
1930  */
1931
1932 #pragma warning(disable : 4701)
1933
1934 #ifdef _PREFAST_
1935 #pragma warning(push)
1936 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
1937 #endif
1938 regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr   tree,
1939                                         rpPredictReg predictReg,
1940                                         regMaskTP    lockedRegs,
1941                                         regMaskTP    rsvdRegs)
1942 {
1943     regMaskTP    regMask = DUMMY_INIT(RBM_ILLEGAL);
1944     regMaskTP    op2Mask;
1945     regMaskTP    tmpMask;
1946     rpPredictReg op1PredictReg;
1947     rpPredictReg op2PredictReg;
1948     LclVarDsc*   varDsc = NULL;
1949     VARSET_TP    oldLastUseVars(VarSetOps::UninitVal());
1950
1951     VARSET_TP varBits(VarSetOps::UninitVal());
1952     VARSET_TP lastUseVarBits(VarSetOps::MakeEmpty(this));
1953
1954     bool      restoreLastUseVars = false;
1955     regMaskTP interferingRegs    = RBM_NONE;
1956
1957 #ifdef DEBUG
1958     // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
1959     noway_assert(tree);
1960     noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
1961     noway_assert(RBM_ILLEGAL);
1962     noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
1963     /* impossible values, to make sure that we set them */
1964     tree->gtUsedRegs = RBM_ILLEGAL;
1965 #endif
1966
1967     /* Figure out what kind of a node we have */
1968
1969     genTreeOps oper = tree->OperGet();
1970     var_types  type = tree->TypeGet();
1971     unsigned   kind = tree->OperKind();
1972
1973     // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
1974     genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
1975     if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
1976         predictReg = PREDICT_NONE;
1977     else if (rpHasVarIndexForPredict(predictReg))
1978     {
1979         // The only place where predictReg is set to a var is in the PURE
1980         // assignment case where varIndex is the var being assigned to.
1981         // We need to check whether the variable is used between here and
1982         // its redefinition.
1983         unsigned varIndex = rpGetVarIndexForPredict(predictReg);
1984         unsigned lclNum   = lvaTrackedToVarNum[varIndex];
1985         bool     found    = false;
1986         for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
1987         {
1988             if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
1989             {
1990                 // Is this the pure assignment?
1991                 if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
1992                 {
1993                     predictReg = PREDICT_SCRATCH_REG;
1994                 }
1995                 found = true;
1996                 break;
1997             }
1998         }
1999         assert(found);
2000     }
2001
2002     if (rsvdRegs & RBM_LASTUSE)
2003     {
2004         restoreLastUseVars = true;
2005         VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
2006         rsvdRegs &= ~RBM_LASTUSE;
2007     }
2008
2009     /* Is this a constant or leaf node? */
2010
2011     if (kind & (GTK_CONST | GTK_LEAF))
2012     {
2013         bool      lastUse   = false;
2014         regMaskTP enregMask = RBM_NONE;
2015
2016         switch (oper)
2017         {
2018 #ifdef _TARGET_ARM_
2019             case GT_CNS_DBL:
2020                 // Codegen for floating point constants on the ARM is currently
2021                 // movw/movt    rT1, <lo32 bits>
2022                 // movw/movt    rT2, <hi32 bits>
2023                 //  vmov.i2d    dT0, rT1,rT2
2024                 //
2025                 // For TYP_FLOAT one integer register is required
2026                 //
2027                 // These integer register(s) immediately die
2028                 tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2029                 if (type == TYP_DOUBLE)
2030                 {
2031                     // For TYP_DOUBLE a second integer register is required
2032                     //
2033                     tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2034                 }
2035
2036                 // We also need a floating point register that we keep
2037                 //
2038                 if (predictReg == PREDICT_NONE)
2039                     predictReg = PREDICT_SCRATCH_REG;
2040
2041                 regMask          = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
2042                 tree->gtUsedRegs = regMask | tmpMask;
2043                 goto RETURN_CHECK;
2044 #endif
2045
2046             case GT_CNS_INT:
2047             case GT_CNS_LNG:
2048
2049                 if (rpHasVarIndexForPredict(predictReg))
2050                 {
2051                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2052                     rpAsgVarNum       = tgtIndex;
2053
2054                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2055                     predictReg = PREDICT_NONE;
2056
2057                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2058                     tgtVar->lvDependReg = true;
2059
2060                     if (type == TYP_LONG)
2061                     {
2062                         assert(oper == GT_CNS_LNG);
2063
2064                         if (tgtVar->lvOtherReg == REG_STK)
2065                         {
2066                             // Well we do need one register for a partially enregistered
2067                             type       = TYP_INT;
2068                             predictReg = PREDICT_SCRATCH_REG;
2069                         }
2070                     }
2071                 }
2072                 else
2073                 {
2074 #if !CPU_LOAD_STORE_ARCH
2075                     /* If the constant is a handle then it will need to have a relocation
2076                        applied to it.  It will need to be loaded into a register.
2077                        But never throw away an existing hint.
2078                        */
2079                     if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
2080 #endif
2081                     {
2082                         if (predictReg == PREDICT_NONE)
2083                             predictReg = PREDICT_SCRATCH_REG;
2084                     }
2085                 }
2086                 break;
2087
2088             case GT_NO_OP:
2089                 break;
2090
2091             case GT_CLS_VAR:
2092                 if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
2093                     (genTypeSize(type) < sizeof(int)))
2094                 {
2095                     predictReg = PREDICT_SCRATCH_REG;
2096                 }
2097 #ifdef _TARGET_ARM_
2098                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
2099                 //
2100                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
2101                 {
2102                     // These integer register(s) immediately die
2103                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2104                     // Two integer registers are required for a TYP_DOUBLE
2105                     if (type == TYP_DOUBLE)
2106                         tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2107                 }
2108                 // We need a temp register in some cases of loads/stores to a class var
2109                 if (predictReg == PREDICT_NONE)
2110                 {
2111                     predictReg = PREDICT_SCRATCH_REG;
2112                 }
2113 #endif
2114                 if (rpHasVarIndexForPredict(predictReg))
2115                 {
2116                     unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2117                     rpAsgVarNum       = tgtIndex;
2118
2119                     // We don't need any register as we plan on writing to the rpAsgVarNum register
2120                     predictReg = PREDICT_NONE;
2121
2122                     LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2123                     tgtVar->lvDependReg = true;
2124
2125                     if (type == TYP_LONG)
2126                     {
2127                         if (tgtVar->lvOtherReg == REG_STK)
2128                         {
2129                             // Well we do need one register for a partially enregistered
2130                             type       = TYP_INT;
2131                             predictReg = PREDICT_SCRATCH_REG;
2132                         }
2133                     }
2134                 }
2135                 break;
2136
2137             case GT_LCL_FLD:
2138 #ifdef _TARGET_ARM_
2139                 // Check for a misalignment on a Floating Point field
2140                 //
2141                 if (varTypeIsFloating(type))
2142                 {
2143                     if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
2144                     {
2145                         // These integer register(s) immediately die
2146                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
2147                         // Two integer registers are required for a TYP_DOUBLE
2148                         if (type == TYP_DOUBLE)
2149                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
2150                     }
2151                 }
2152 #endif
2153                 __fallthrough;
2154
2155             case GT_LCL_VAR:
2156             case GT_REG_VAR:
2157
2158                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2159
2160                 VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
2161                 compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
2162                 lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
2163
2164 #if FEATURE_STACK_FP_X87
2165                 // If it's a floating point var, there's nothing to do
2166                 if (varTypeIsFloating(type))
2167                 {
2168                     tree->gtUsedRegs = RBM_NONE;
2169                     regMask          = RBM_NONE;
2170                     goto RETURN_CHECK;
2171                 }
2172 #endif
2173
2174                 // If the variable is already a register variable, no need to go further.
2175                 if (oper == GT_REG_VAR)
2176                     break;
2177
2178                 /* Apply the type of predictReg to the LCL_VAR */
2179
2180                 if (predictReg == PREDICT_REG)
2181                 {
2182                 PREDICT_REG_COMMON:
2183                     if (varDsc->lvRegNum == REG_STK)
2184                         break;
2185
2186                     goto GRAB_COUNT;
2187                 }
2188                 else if (predictReg == PREDICT_SCRATCH_REG)
2189                 {
2190                     noway_assert(predictReg == PREDICT_SCRATCH_REG);
2191
2192                     /* Is this the last use of a local var?   */
2193                     if (lastUse)
2194                     {
2195                         if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
2196                             goto PREDICT_REG_COMMON;
2197                     }
2198                 }
2199                 else if (rpHasVarIndexForPredict(predictReg))
2200                 {
2201                     /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
2202                     {
2203                         unsigned   tgtIndex1 = rpGetVarIndexForPredict(predictReg);
2204                         LclVarDsc* tgtVar    = lvaTable + lvaTrackedToVarNum[tgtIndex1];
2205                         VarSetOps::MakeSingleton(this, tgtIndex1);
2206
2207                         noway_assert(tgtVar->lvVarIndex == tgtIndex1);
2208                         noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
2209 #ifndef _TARGET_AMD64_
2210                         // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
2211                         // so this assert is meaningless
2212                         noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
2213 #endif // !_TARGET_AMD64_
2214
2215                         if (varDsc->lvTracked)
2216                         {
2217                             unsigned srcIndex;
2218                             srcIndex = varDsc->lvVarIndex;
2219
2220                             // If this register has it's last use here then we will prefer
2221                             // to color to the same register as tgtVar.
2222                             if (lastUse)
2223                             {
2224                                 /*
2225                                  *  Add an entry in the lvaVarPref graph to indicate
2226                                  *  that it would be worthwhile to color these two variables
2227                                  *  into the same physical register.
2228                                  *  This will help us avoid having an extra copy instruction
2229                                  */
2230                                 VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
2231                                 VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
2232                             }
2233
2234                             // Add a variable interference from srcIndex to each of the last use variables
2235                             if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2236                             {
2237                                 rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
2238                             }
2239                         }
2240                         rpAsgVarNum = tgtIndex1;
2241
2242                         /* We will rely on the target enregistered variable from the GT_ASG */
2243                         varDsc = tgtVar;
2244                     }
2245                 GRAB_COUNT:
2246                     unsigned grabCount;
2247                     grabCount = 0;
2248
2249                     if (genIsValidFloatReg(varDsc->lvRegNum))
2250                     {
2251                         enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
2252                     }
2253                     else
2254                     {
2255                         enregMask = genRegMask(varDsc->lvRegNum);
2256                     }
2257
2258 #ifdef _TARGET_ARM_
2259                     if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
2260                     {
2261                         // We need to compute the intermediate value using a TYP_DOUBLE
2262                         // but we storing the result in a TYP_SINGLE enregistered variable
2263                         //
2264                         grabCount++;
2265                     }
2266                     else
2267 #endif
2268                     {
2269                         /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2270                         if (enregMask & (rsvdRegs | lockedRegs))
2271                         {
2272                             grabCount++;
2273                         }
2274 #ifndef _TARGET_64BIT_
2275                         if (type == TYP_LONG)
2276                         {
2277                             if (varDsc->lvOtherReg != REG_STK)
2278                             {
2279                                 tmpMask = genRegMask(varDsc->lvOtherReg);
2280                                 enregMask |= tmpMask;
2281
2282                                 /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
2283                                 if (tmpMask & (rsvdRegs | lockedRegs))
2284                                     grabCount++;
2285                             }
2286                             else // lvOtherReg == REG_STK
2287                             {
2288                                 grabCount++;
2289                             }
2290                         }
2291 #endif // _TARGET_64BIT_
2292                     }
2293
2294                     varDsc->lvDependReg = true;
2295
2296                     if (grabCount == 0)
2297                     {
2298                         /* Does not need a register */
2299                         predictReg = PREDICT_NONE;
2300                         // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2301                         VarSetOps::UnionD(this, rpUseInPlace, varBits);
2302                     }
2303                     else // (grabCount > 0)
2304                     {
2305 #ifndef _TARGET_64BIT_
2306                         /* For TYP_LONG and we only need one register then change the type to TYP_INT */
2307                         if ((type == TYP_LONG) && (grabCount == 1))
2308                         {
2309                             /* We will need to pick one register */
2310                             type = TYP_INT;
2311                             // noway_assert(!VarSetOps::IsEmpty(this, varBits));
2312                             VarSetOps::UnionD(this, rpUseInPlace, varBits);
2313                         }
2314                         noway_assert((type == TYP_DOUBLE) ||
2315                                      (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
2316 #else  // !_TARGET_64BIT_
2317                         noway_assert(grabCount == 1);
2318 #endif // !_TARGET_64BIT_
2319                     }
2320                 }
2321                 else if (type == TYP_STRUCT)
2322                 {
2323 #ifdef _TARGET_ARM_
2324                     // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
2325                     //        predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
2326                     //        As a workaround we just bash it to PREDICT_NONE here
2327                     //
2328                     if (predictReg != PREDICT_NONE)
2329                         predictReg = PREDICT_NONE;
2330 #endif
2331                     // Currently predictReg is saying that we will not need any scratch registers
2332                     noway_assert(predictReg == PREDICT_NONE);
2333
2334                     /* We may need to sign or zero extend a small type when pushing a struct */
2335                     if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
2336                     {
2337                         for (unsigned varNum = varDsc->lvFieldLclStart;
2338                              varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
2339                         {
2340                             LclVarDsc* fldVar = lvaTable + varNum;
2341
2342                             if (fldVar->lvStackAligned())
2343                             {
2344                                 // When we are stack aligned Codegen will just use
2345                                 // a push instruction and thus doesn't need any register
2346                                 // since we can push both a register or a stack frame location
2347                                 continue;
2348                             }
2349
2350                             if (varTypeIsByte(fldVar->TypeGet()))
2351                             {
2352                                 // We will need to reserve one byteable register,
2353                                 //
2354                                 type       = TYP_BYTE;
2355                                 predictReg = PREDICT_SCRATCH_REG;
2356 #if CPU_HAS_BYTE_REGS
2357                                 // It is best to enregister this fldVar in a byteable register
2358                                 //
2359                                 fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
2360 #endif
2361                             }
2362                             else if (varTypeIsShort(fldVar->TypeGet()))
2363                             {
2364                                 bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
2365                                 // If fldVar is not enregistered then we will need a scratch register
2366                                 //
2367                                 if (!isEnregistered)
2368                                 {
2369                                     // We will need either an int register or a byte register
2370                                     // If we are not requesting a byte register we will request an int register
2371                                     //
2372                                     if (type != TYP_BYTE)
2373                                         type   = TYP_INT;
2374                                     predictReg = PREDICT_SCRATCH_REG;
2375                                 }
2376                             }
2377                         }
2378                     }
2379                 }
2380                 else
2381                 {
2382                     regMaskTP preferReg = rpPredictRegMask(predictReg, type);
2383                     if (preferReg != 0)
2384                     {
2385                         if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
2386                         {
2387                             varDsc->addPrefReg(preferReg, this);
2388                         }
2389                     }
2390                 }
2391                 break; /* end of case GT_LCL_VAR */
2392
2393             case GT_JMP:
2394                 tree->gtUsedRegs = RBM_NONE;
2395                 regMask          = RBM_NONE;
2396
2397 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
2398                 // Mark the registers required to emit a tailcall profiler callback
2399                 if (compIsProfilerHookNeeded())
2400                 {
2401                     tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
2402                 }
2403 #endif
2404                 goto RETURN_CHECK;
2405
2406             default:
2407                 break;
2408         } /* end of switch (oper) */
2409
2410         /* If we don't need to evaluate to register, regmask is the empty set */
2411         /* Otherwise we grab a temp for the local variable                    */
2412
2413         if (predictReg == PREDICT_NONE)
2414             regMask = RBM_NONE;
2415         else
2416         {
2417             regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
2418
2419             if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
2420             {
2421                 /* We need to sign or zero extend a small type when pushing a struct */
2422                 noway_assert((type == TYP_INT) || (type == TYP_BYTE));
2423
2424                 varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
2425                 noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
2426
2427                 for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
2428                      varNum++)
2429                 {
2430                     LclVarDsc* fldVar = lvaTable + varNum;
2431                     if (fldVar->lvTracked)
2432                     {
2433                         VARSET_TP fldBit(VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
2434                         rpRecordRegIntf(regMask, fldBit DEBUGARG(
2435                                                      "need scratch register when pushing a small field of a struct"));
2436                     }
2437                 }
2438             }
2439         }
2440
2441         /* Update the set of lastUse variables that we encountered so far */
2442         if (lastUse)
2443         {
2444             VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
2445             VARSET_TP varAsSet(VarSetOps::MakeCopy(this, lastUseVarBits));
2446
2447             /*
2448              *  Add interference from any previously locked temps into this last use variable.
2449              */
2450             if (lockedRegs)
2451             {
2452                 rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
2453             }
2454             /*
2455              *  Add interference from any reserved temps into this last use variable.
2456              */
2457             if (rsvdRegs)
2458             {
2459                 rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
2460             }
2461             /*
2462              *  For partially enregistered longs add an interference with the
2463              *  register return by rpPredictRegPick
2464              */
2465             if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
2466             {
2467                 rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
2468             }
2469         }
2470
2471         tree->gtUsedRegs = (regMaskSmall)regMask;
2472         goto RETURN_CHECK;
2473     }
2474
2475     /* Is it a 'simple' unary/binary operator? */
2476
2477     if (kind & GTK_SMPOP)
2478     {
2479         GenTreePtr op1 = tree->gtOp.gtOp1;
2480         GenTreePtr op2 = tree->gtGetOp2IfPresent();
2481
2482         GenTreePtr opsPtr[3];
2483         regMaskTP  regsPtr[3];
2484
2485         VARSET_TP startAsgUseInPlaceVars(VarSetOps::UninitVal());
2486
2487         switch (oper)
2488         {
2489             case GT_ASG:
2490
2491                 /* Is the value being assigned into a LCL_VAR? */
2492                 if (op1->gtOper == GT_LCL_VAR)
2493                 {
2494                     varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2495
2496                     /* Are we assigning a LCL_VAR the result of a call? */
2497                     if (op2->gtOper == GT_CALL)
2498                     {
2499                         /* Set a preferred register for the LCL_VAR */
2500                         if (isRegPairType(varDsc->TypeGet()))
2501                             varDsc->addPrefReg(RBM_LNGRET, this);
2502                         else if (!varTypeIsFloating(varDsc->TypeGet()))
2503                             varDsc->addPrefReg(RBM_INTRET, this);
2504 #ifdef _TARGET_AMD64_
2505                         else
2506                             varDsc->addPrefReg(RBM_FLOATRET, this);
2507 #endif
2508                         /*
2509                          *  When assigning the result of a call we don't
2510                          *  bother trying to target the right side of the
2511                          *  assignment, since we have a fixed calling convention.
2512                          */
2513                     }
2514                     else if (varDsc->lvTracked)
2515                     {
2516                         // We interfere with uses in place
2517                         if (!VarSetOps::IsEmpty(this, rpUseInPlace))
2518                         {
2519                             rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
2520                         }
2521
2522                         // Did we predict that this local will be fully enregistered?
2523                         // and the assignment type is the same as the expression type?
2524                         // and it is dead on the right side of the assignment?
2525                         // and we current have no other rpAsgVarNum active?
2526                         //
2527                         if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
2528                             (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
2529                         {
2530                             //
2531                             //  Yes, we should try to target the right side (op2) of this
2532                             //  assignment into the (enregistered) tracked variable.
2533                             //
2534
2535                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2536                             op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
2537
2538                             // Remember that this is a new use in place
2539
2540                             // We've added "new UseInPlace"; remove from the global set.
2541                             VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
2542
2543                             //  Note that later when we walk down to the leaf node for op2
2544                             //  if we decide to actually use the register for the 'varDsc'
2545                             //  to enregister the operand, the we will set rpAsgVarNum to
2546                             //  varDsc->lvVarIndex, by extracting this value using
2547                             //  rpGetVarIndexForPredict()
2548                             //
2549                             //  Also we reset rpAsgVarNum back to -1 after we have finished
2550                             //  predicting the current GT_ASG node
2551                             //
2552                             goto ASG_COMMON;
2553                         }
2554                     }
2555                 }
2556                 else if (tree->OperIsBlkOp())
2557                 {
2558                     interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
2559                     regMask = 0;
2560                     goto RETURN_CHECK;
2561                 }
2562                 __fallthrough;
2563
2564             case GT_CHS:
2565
2566             case GT_ASG_OR:
2567             case GT_ASG_XOR:
2568             case GT_ASG_AND:
2569             case GT_ASG_SUB:
2570             case GT_ASG_ADD:
2571             case GT_ASG_MUL:
2572             case GT_ASG_DIV:
2573             case GT_ASG_UDIV:
2574
2575                 /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
2576                 if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
2577                 {
2578                     /* Is the value being assigned into an enregistered LCL_VAR? */
2579                     /* For debug code we only allow a simple op2 to be assigned */
2580                     if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
2581                     {
2582                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2583                         /* Did we predict that this local will be enregistered? */
2584                         if (varDsc->lvRegNum != REG_STK)
2585                         {
2586                             /* Yes, we can use "reg <op>= addr" */
2587
2588                             op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
2589                             op2PredictReg = PREDICT_NONE;
2590
2591                             goto ASG_COMMON;
2592                         }
2593                     }
2594                 }
2595
2596 #if CPU_LOAD_STORE_ARCH
2597                 if (oper != GT_ASG)
2598                 {
2599                     op1PredictReg = PREDICT_REG;
2600                     op2PredictReg = PREDICT_REG;
2601                 }
2602                 else
2603 #endif
2604                 {
2605                     /*
2606                      *  Otherwise, initialize the normal forcing of operands:
2607                      *   "addr <op>= reg"
2608                      */
2609                     op1PredictReg = PREDICT_ADDR;
2610                     op2PredictReg = PREDICT_REG;
2611                 }
2612
2613             ASG_COMMON:
2614
2615 #if !CPU_LOAD_STORE_ARCH
2616                 if (op2PredictReg != PREDICT_NONE)
2617                 {
2618                     /* Is the value being assigned a simple one? */
2619                     if (rpCanAsgOperWithoutReg(op2, false))
2620                         op2PredictReg = PREDICT_NONE;
2621                 }
2622 #endif
2623
2624                 bool simpleAssignment;
2625                 simpleAssignment = false;
2626
2627                 if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
2628                 {
2629                     // Add a variable interference from the assign target
2630                     // to each of the last use variables
2631                     if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2632                     {
2633                         varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
2634
2635                         if (varDsc->lvTracked)
2636                         {
2637                             unsigned varIndex = varDsc->lvVarIndex;
2638
2639                             rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
2640                         }
2641                     }
2642
2643                     /*  Record whether this tree is a simple assignment to a local */
2644
2645                     simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
2646                 }
2647
2648                 bool requireByteReg;
2649                 requireByteReg = false;
2650
2651 #if CPU_HAS_BYTE_REGS
2652                 /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
2653
2654                 if (varTypeIsByte(type) &&
2655                     ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
2656
2657                 {
2658                     // Byte-assignments typically need a byte register
2659                     requireByteReg = true;
2660
2661                     if (op1->gtOper == GT_LCL_VAR)
2662                     {
2663                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2664
2665                         // Did we predict that this local will be enregistered?
2666                         if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
2667                         {
2668                             // We don't require a byte register when op1 is an enregistered local */
2669                             requireByteReg = false;
2670                         }
2671
2672                         // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
2673                         if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
2674                         {
2675                             // We should try to put op1 in an byte register
2676                             varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
2677                         }
2678                     }
2679                 }
2680 #endif
2681
2682                 VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
2683
2684                 bool isWriteBarrierAsgNode;
2685                 isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
2686 #ifdef DEBUG
2687                 GCInfo::WriteBarrierForm wbf;
2688                 if (isWriteBarrierAsgNode)
2689                     wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
2690                 else
2691                     wbf = GCInfo::WBF_NoBarrier;
2692 #endif // DEBUG
2693
2694                 regMaskTP wbaLockedRegs;
2695                 wbaLockedRegs = lockedRegs;
2696                 if (isWriteBarrierAsgNode)
2697                 {
2698 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2699 #ifdef DEBUG
2700                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2701                     {
2702 #endif // DEBUG
2703                         wbaLockedRegs |= RBM_WRITE_BARRIER;
2704                         op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
2705                         assert(REG_WRITE_BARRIER == REG_EDX);
2706                         op1PredictReg = PREDICT_REG_EDX;
2707 #ifdef DEBUG
2708                     }
2709                     else
2710 #endif // DEBUG
2711 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
2712
2713 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
2714                     {
2715 #ifdef _TARGET_X86_
2716                         op1PredictReg = PREDICT_REG_ECX;
2717                         op2PredictReg = PREDICT_REG_EDX;
2718 #elif defined(_TARGET_ARM_)
2719                         op1PredictReg = PREDICT_REG_R0;
2720                         op2PredictReg = PREDICT_REG_R1;
2721
2722                         // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
2723                         if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
2724                         {
2725                             op1 = op1->gtOp.gtOp1;
2726                         }
2727 #else // !_TARGET_X86_ && !_TARGET_ARM_
2728 #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
2729 #endif
2730                     }
2731 #endif
2732                 }
2733
2734                 /*  Are we supposed to evaluate RHS first? */
2735
2736                 if (tree->gtFlags & GTF_REVERSE_OPS)
2737                 {
2738                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
2739
2740 #if CPU_HAS_BYTE_REGS
2741                     // Should we insure that op2 gets evaluated into a byte register?
2742                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2743                     {
2744                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2745                         // and we can't select one that is already reserved (i.e. lockedRegs)
2746                         //
2747                         op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
2748                         op2->gtUsedRegs |= op2Mask;
2749
2750                         // No longer a simple assignment because we're using extra registers and might
2751                         // have interference between op1 and op2.  See DevDiv #136681
2752                         simpleAssignment = false;
2753                     }
2754 #endif
2755                     /*
2756                      *  For a simple assignment we don't want the op2Mask to be
2757                      *  marked as interferring with the LCL_VAR, since it is likely
2758                      *  that we will want to enregister the LCL_VAR in exactly
2759                      *  the register that is used to compute op2
2760                      */
2761                     tmpMask = lockedRegs;
2762
2763                     if (!simpleAssignment)
2764                         tmpMask |= op2Mask;
2765
2766                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
2767
2768                     // Did we relax the register prediction for op1 and op2 above ?
2769                     // - because we are depending upon op1 being enregistered
2770                     //
2771                     if ((op1PredictReg == PREDICT_NONE) &&
2772                         ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
2773                     {
2774                         /* We must be assigning into an enregistered LCL_VAR */
2775                         noway_assert(op1->gtOper == GT_LCL_VAR);
2776                         varDsc = lvaTable + op1->gtLclVar.gtLclNum;
2777                         noway_assert(varDsc->lvRegNum != REG_STK);
2778
2779                         /* We need to set lvDependReg, in case we lose the enregistration of op1 */
2780                         varDsc->lvDependReg = true;
2781                     }
2782                 }
2783                 else
2784                 {
2785                     // For the case of simpleAssignments op2 should always be evaluated first
2786                     noway_assert(!simpleAssignment);
2787
2788                     regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
2789                     if (isWriteBarrierAsgNode)
2790                     {
2791                         wbaLockedRegs |= op1->gtUsedRegs;
2792                     }
2793                     op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
2794
2795 #if CPU_HAS_BYTE_REGS
2796                     // Should we insure that op2 gets evaluated into a byte register?
2797                     if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
2798                     {
2799                         // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
2800                         // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
2801                         //
2802                         op2Mask |=
2803                             rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
2804                         op2->gtUsedRegs |= op2Mask;
2805                     }
2806 #endif
2807                 }
2808
2809                 if (rpHasVarIndexForPredict(op2PredictReg))
2810                 {
2811                     rpAsgVarNum = -1;
2812                 }
2813
2814                 if (isWriteBarrierAsgNode)
2815                 {
2816 #if NOGC_WRITE_BARRIERS
2817 #ifdef DEBUG
2818                     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
2819                     {
2820 #endif // DEBUG
2821
2822                         /* Steer computation away from REG_WRITE_BARRIER as the pointer is
2823                            passed to the write-barrier call in REG_WRITE_BARRIER */
2824
2825                         regMask = op2Mask;
2826
2827                         if (op1->gtOper == GT_IND)
2828                         {
2829                             GenTreePtr rv1, rv2;
2830                             unsigned   mul, cns;
2831                             bool       rev;
2832
2833                             /* Special handling of indirect assigns for write barrier */
2834
2835                             bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
2836                                                                   &mul, &cns);
2837
2838                             /* Check address mode for enregisterable locals */
2839
2840                             if (yes)
2841                             {
2842                                 if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
2843                                 {
2844                                     rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
2845                                 }
2846                                 if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
2847                                 {
2848                                     rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
2849                                 }
2850                             }
2851                         }
2852
2853                         if (op2->gtOper == GT_LCL_VAR)
2854                         {
2855                             rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
2856                         }
2857
2858                         // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
2859                         if (!VarSetOps::IsEmpty(this, rpLastUseVars))
2860                         {
2861                             rpRecordRegIntf(RBM_WRITE_BARRIER,
2862                                             rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
2863                         }
2864                         tree->gtUsedRegs |= RBM_WRITE_BARRIER;
2865 #ifdef DEBUG
2866                     }
2867                     else
2868 #endif // DEBUG
2869 #endif // NOGC_WRITE_BARRIERS
2870
2871 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
2872                     {
2873 #ifdef _TARGET_ARM_
2874 #ifdef DEBUG
2875                         if (verbose)
2876                             printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
2877 #endif
2878                         //
2879                         // For the ARM target we have an optimized JIT Helper
2880                         // that only trashes a subset of the callee saved registers
2881                         //
2882
2883                         // NOTE: Adding it to the gtUsedRegs will cause the interference to
2884                         // be added appropriately
2885
2886                         // the RBM_CALLEE_TRASH_NOGC set is killed.  We will record this in interferingRegs
2887                         // instead of gtUsedRegs, because the latter will be modified later, but we need
2888                         // to remember to add the interference.
2889
2890                         interferingRegs |= RBM_CALLEE_TRASH_NOGC;
2891
2892                         op1->gtUsedRegs |= RBM_R0;
2893                         op2->gtUsedRegs |= RBM_R1;
2894 #else // _TARGET_ARM_
2895
2896 #ifdef DEBUG
2897                         if (verbose)
2898                             printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
2899 #endif
2900                         // We have to call a normal JIT helper to perform the Write Barrier Assignment
2901                         // It will trash the callee saved registers
2902
2903                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
2904 #endif // _TARGET_ARM_
2905                     }
2906 #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
2907                 }
2908
2909                 if (simpleAssignment)
2910                 {
2911                     /*
2912                      *  Consider a simple assignment to a local:
2913                      *
2914                      *   lcl = expr;
2915                      *
2916                      *  Since the "=" node is visited after the variable
2917                      *  is marked live (assuming it's live after the
2918                      *  assignment), we don't want to use the register
2919                      *  use mask of the "=" node but rather that of the
2920                      *  variable itself.
2921                      */
2922                     tree->gtUsedRegs = op1->gtUsedRegs;
2923                 }
2924                 else
2925                 {
2926                     tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
2927                 }
2928                 VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
2929                 goto RETURN_CHECK;
2930
2931             case GT_ASG_LSH:
2932             case GT_ASG_RSH:
2933             case GT_ASG_RSZ:
2934                 /* assigning shift operators */
2935
2936                 noway_assert(type != TYP_LONG);
2937
2938 #if CPU_LOAD_STORE_ARCH
2939                 predictReg = PREDICT_ADDR;
2940 #else
2941                 predictReg = PREDICT_NONE;
2942 #endif
2943
2944                 /* shift count is handled same as ordinary shift */
2945                 goto HANDLE_SHIFT_COUNT;
2946
2947             case GT_ADDR:
2948                 regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
2949
2950                 if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
2951                 {
2952                     // We need a scratch register for the LEA instruction
2953                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
2954                 }
2955
2956                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
2957                 goto RETURN_CHECK;
2958
2959             case GT_CAST:
2960
2961                 /* Cannot cast to VOID */
2962                 noway_assert(type != TYP_VOID);
2963
2964                 /* cast to long is special */
2965                 if (type == TYP_LONG && op1->gtType <= TYP_INT)
2966                 {
2967                     noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
2968 #if CPU_LONG_USES_REGPAIR
2969                     rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
2970
2971                     if (rpHasVarIndexForPredict(predictReg))
2972                     {
2973                         unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
2974                         rpAsgVarNum       = tgtIndex;
2975
2976                         // We don't need any register as we plan on writing to the rpAsgVarNum register
2977                         predictReg = PREDICT_NONE;
2978
2979                         LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
2980                         tgtVar->lvDependReg = true;
2981
2982                         if (tgtVar->lvOtherReg != REG_STK)
2983                         {
2984                             predictRegHi = PREDICT_NONE;
2985                         }
2986                     }
2987                     else
2988 #endif
2989                         if (predictReg == PREDICT_NONE)
2990                     {
2991                         predictReg = PREDICT_SCRATCH_REG;
2992                     }
2993 #ifdef _TARGET_ARM_
2994                     // If we are widening an int into a long using a targeted register pair we
2995                     // should retarget so that the low part get loaded into the appropriate register
2996                     else if (predictReg == PREDICT_PAIR_R0R1)
2997                     {
2998                         predictReg   = PREDICT_REG_R0;
2999                         predictRegHi = PREDICT_REG_R1;
3000                     }
3001                     else if (predictReg == PREDICT_PAIR_R2R3)
3002                     {
3003                         predictReg   = PREDICT_REG_R2;
3004                         predictRegHi = PREDICT_REG_R3;
3005                     }
3006 #endif
3007 #ifdef _TARGET_X86_
3008                     // If we are widening an int into a long using a targeted register pair we
3009                     // should retarget so that the low part get loaded into the appropriate register
3010                     else if (predictReg == PREDICT_PAIR_EAXEDX)
3011                     {
3012                         predictReg   = PREDICT_REG_EAX;
3013                         predictRegHi = PREDICT_REG_EDX;
3014                     }
3015                     else if (predictReg == PREDICT_PAIR_ECXEBX)
3016                     {
3017                         predictReg   = PREDICT_REG_ECX;
3018                         predictRegHi = PREDICT_REG_EBX;
3019                     }
3020 #endif
3021
3022                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3023
3024 #if CPU_LONG_USES_REGPAIR
3025                     if (predictRegHi != PREDICT_NONE)
3026                     {
3027                         // Now get one more reg for the upper part
3028                         regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
3029                     }
3030 #endif
3031                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3032                     goto RETURN_CHECK;
3033                 }
3034
3035                 /* cast from long is special - it frees a register */
3036                 if (type <= TYP_INT // nice.  this presumably is intended to mean "signed int and shorter types"
3037                     && op1->gtType == TYP_LONG)
3038                 {
3039                     if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
3040                         predictReg = PREDICT_REG;
3041
3042                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3043
3044                     // If we have 2 or more regs, free one of them
3045                     if (!genMaxOneBit(regMask))
3046                     {
3047                         /* Clear the 2nd lowest bit in regMask */
3048                         /* First set tmpMask to the lowest bit in regMask */
3049                         tmpMask = genFindLowestBit(regMask);
3050                         /* Next find the second lowest bit in regMask */
3051                         tmpMask = genFindLowestBit(regMask & ~tmpMask);
3052                         /* Clear this bit from regmask */
3053                         regMask &= ~tmpMask;
3054                     }
3055                     tree->gtUsedRegs = op1->gtUsedRegs;
3056                     goto RETURN_CHECK;
3057                 }
3058
3059 #if CPU_HAS_BYTE_REGS
3060                 /* cast from signed-byte is special - it uses byteable registers */
3061                 if (type == TYP_INT)
3062                 {
3063                     var_types smallType;
3064
3065                     if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
3066                         smallType = tree->gtCast.CastOp()->TypeGet();
3067                     else
3068                         smallType = tree->gtCast.gtCastType;
3069
3070                     if (smallType == TYP_BYTE)
3071                     {
3072                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3073
3074                         if ((regMask & RBM_BYTE_REGS) == 0)
3075                             regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
3076
3077                         tree->gtUsedRegs = (regMaskSmall)regMask;
3078                         goto RETURN_CHECK;
3079                     }
3080                 }
3081 #endif
3082
3083 #if FEATURE_STACK_FP_X87
3084                 /* cast to float/double is special */
3085                 if (varTypeIsFloating(type))
3086                 {
3087                     switch (op1->TypeGet())
3088                     {
3089                         /* uses fild, so don't need to be loaded to reg */
3090                         case TYP_INT:
3091                         case TYP_LONG:
3092                             rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3093                             tree->gtUsedRegs = op1->gtUsedRegs;
3094                             regMask          = 0;
3095                             goto RETURN_CHECK;
3096                         default:
3097                             break;
3098                     }
3099                 }
3100
3101                 /* Casting from integral type to floating type is special */
3102                 if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
3103                 {
3104                     if (opts.compCanUseSSE2)
3105                     {
3106                         // predict for SSE2 based casting
3107                         if (predictReg <= PREDICT_REG)
3108                             predictReg = PREDICT_SCRATCH_REG;
3109                         regMask        = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3110
3111                         // Get one more int reg to hold cast result
3112                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
3113                         tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3114                         goto RETURN_CHECK;
3115                     }
3116                 }
3117 #endif
3118
3119 #if FEATURE_FP_REGALLOC
3120                 // Are we casting between int to float or float to int
3121                 // Fix 388428 ARM JitStress WP7
3122                 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
3123                 {
3124                     // op1 needs to go into a register
3125                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
3126
3127 #ifdef _TARGET_ARM_
3128                     if (varTypeIsFloating(op1->TypeGet()))
3129                     {
3130                         // We also need a fp scratch register for the convert operation
3131                         regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
3132                                                     PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3133                     }
3134 #endif
3135                     // We also need a register to hold the result
3136                     regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
3137                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3138                     goto RETURN_CHECK;
3139                 }
3140 #endif
3141
3142                 /* otherwise must load op1 into a register */
3143                 goto GENERIC_UNARY;
3144
3145             case GT_INTRINSIC:
3146
3147 #ifdef _TARGET_XARCH_
3148                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
3149                 {
3150                     // This is a special case to handle the following
3151                     // optimization: conv.i4(round.d(d)) -> round.i(d)
3152                     // if flowgraph 3186
3153
3154                     if (predictReg <= PREDICT_REG)
3155                         predictReg = PREDICT_SCRATCH_REG;
3156
3157                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3158
3159                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
3160
3161                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3162                     goto RETURN_CHECK;
3163                 }
3164 #endif
3165                 __fallthrough;
3166
3167             case GT_NEG:
3168 #ifdef _TARGET_ARM_
3169                 if (tree->TypeGet() == TYP_LONG)
3170                 {
3171                     // On ARM this consumes an extra register for the '0' value
3172                     if (predictReg <= PREDICT_REG)
3173                         predictReg = PREDICT_SCRATCH_REG;
3174
3175                     regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3176
3177                     regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
3178
3179                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
3180                     goto RETURN_CHECK;
3181                 }
3182 #endif // _TARGET_ARM_
3183
3184                 __fallthrough;
3185
3186             case GT_NOT:
3187             // these unary operators will write new values
3188             // and thus will need a scratch register
3189             GENERIC_UNARY:
3190                 /* generic unary operators */
3191
3192                 if (predictReg <= PREDICT_REG)
3193                     predictReg = PREDICT_SCRATCH_REG;
3194
3195                 __fallthrough;
3196
3197             case GT_NOP:
3198                 // these unary operators do not write new values
3199                 // and thus won't need a scratch register
3200                 CLANG_FORMAT_COMMENT_ANCHOR;
3201
3202 #if OPT_BOOL_OPS
3203                 if (!op1)
3204                 {
3205                     tree->gtUsedRegs = 0;
3206                     regMask          = 0;
3207                     goto RETURN_CHECK;
3208                 }
3209 #endif
3210                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3211                 tree->gtUsedRegs = op1->gtUsedRegs;
3212                 goto RETURN_CHECK;
3213
3214             case GT_IND:
3215             case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
3216             {
3217                 bool      intoReg = true;
3218                 VARSET_TP startIndUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
3219
3220                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
3221                 {
3222                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
3223                 }
3224
3225                 if (predictReg == PREDICT_ADDR)
3226                 {
3227                     intoReg = false;
3228                 }
3229                 else if (predictReg == PREDICT_NONE)
3230                 {
3231                     if (type != TYP_LONG)
3232                     {
3233                         intoReg = false;
3234                     }
3235                     else
3236                     {
3237                         predictReg = PREDICT_REG;
3238                     }
3239                 }
3240
3241                 /* forcing to register? */
3242                 if (intoReg && (type != TYP_LONG))
3243                 {
3244                     rsvdRegs |= RBM_LASTUSE;
3245                 }
3246
3247                 GenTreePtr lenCSE;
3248                 lenCSE = NULL;
3249
3250                 /* check for address mode */
3251                 regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
3252                 tmpMask = RBM_NONE;
3253
3254 #if CPU_LOAD_STORE_ARCH
3255                 // We may need a scratch register for loading a long
3256                 if (type == TYP_LONG)
3257                 {
3258                     /* This scratch register immediately dies */
3259                     tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3260                 }
3261 #endif // CPU_LOAD_STORE_ARCH
3262
3263 #ifdef _TARGET_ARM_
3264                 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
3265                 //
3266                 if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
3267                 {
3268                     /* These integer register(s) immediately die */
3269                     tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
3270                     // Two integer registers are required for a TYP_DOUBLE
3271                     if (type == TYP_DOUBLE)
3272                         tmpMask |=
3273                             rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
3274                 }
3275 #endif
3276
3277                 /* forcing to register? */
3278                 if (intoReg)
3279                 {
3280                     regMaskTP lockedMask = lockedRegs | rsvdRegs;
3281                     tmpMask |= regMask;
3282
3283                     // We will compute a new regMask that holds the register(s)
3284                     // that we will load the indirection into.
3285                     //
3286                     CLANG_FORMAT_COMMENT_ANCHOR;
3287
3288 #ifndef _TARGET_64BIT_
3289                     if (type == TYP_LONG)
3290                     {
3291                         // We need to use multiple load instructions here:
3292                         // For the first register we can not choose
3293                         // any registers that are being used in place or
3294                         // any register in the current regMask
3295                         //
3296                         regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3297
3298                         // For the second register we can choose a register that was
3299                         // used in place or any register in the old now overwritten regMask
3300                         // but not the same register that we picked above in 'regMask'
3301                         //
3302                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3303                         regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
3304                     }
3305                     else
3306 #endif
3307                     {
3308                         // We will use one load instruction here:
3309                         // The load target register can be a register that was used in place
3310                         // or one of the register from the orginal regMask.
3311                         //
3312                         VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
3313                         regMask = rpPredictRegPick(type, predictReg, lockedMask);
3314                     }
3315                 }
3316                 else if (predictReg != PREDICT_ADDR)
3317                 {
3318                     /* Unless the caller specified PREDICT_ADDR   */
3319                     /* we don't return the temp registers used    */
3320                     /* to form the address                        */
3321                     regMask = RBM_NONE;
3322                 }
3323             }
3324
3325                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
3326
3327                 goto RETURN_CHECK;
3328
3329             case GT_EQ:
3330             case GT_NE:
3331             case GT_LT:
3332             case GT_LE:
3333             case GT_GE:
3334             case GT_GT:
3335
3336 #ifdef _TARGET_X86_
3337                 /* Floating point comparison uses EAX for flags */
3338                 if (varTypeIsFloating(op1->TypeGet()))
3339                 {
3340                     regMask = RBM_EAX;
3341                 }
3342                 else
3343 #endif
3344                     if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
3345                 {
3346                     // Some comparisons are converted to ?:
3347                     noway_assert(!fgMorphRelopToQmark(op1));
3348
3349                     if (predictReg <= PREDICT_REG)
3350                         predictReg = PREDICT_SCRATCH_REG;
3351
3352                     // The set instructions need a byte register
3353                     regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
3354                 }
3355                 else
3356                 {
3357                     regMask = RBM_NONE;
3358 #ifdef _TARGET_XARCH_
3359                     tmpMask = RBM_NONE;
3360                     // Optimize the compare with a constant cases for xarch
3361                     if (op1->gtOper == GT_CNS_INT)
3362                     {
3363                         if (op2->gtOper == GT_CNS_INT)
3364                             tmpMask =
3365                                 rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3366                         rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
3367                         tree->gtUsedRegs = op2->gtUsedRegs;
3368                         goto RETURN_CHECK;
3369                     }
3370                     else if (op2->gtOper == GT_CNS_INT)
3371                     {
3372                         rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
3373                         tree->gtUsedRegs = op1->gtUsedRegs;
3374                         goto RETURN_CHECK;
3375                     }
3376                     else if (op2->gtOper == GT_CNS_LNG)
3377                     {
3378                         regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
3379 #ifdef _TARGET_X86_
3380                         // We also need one extra register to read values from
3381                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
3382 #endif // _TARGET_X86_
3383                         tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
3384                         goto RETURN_CHECK;
3385                     }
3386 #endif // _TARGET_XARCH_
3387                 }
3388
3389                 unsigned op1TypeSize;
3390                 unsigned op2TypeSize;
3391
3392                 op1TypeSize = genTypeSize(op1->TypeGet());
3393                 op2TypeSize = genTypeSize(op2->TypeGet());
3394
3395                 op1PredictReg = PREDICT_REG;
3396                 op2PredictReg = PREDICT_REG;
3397
3398                 if (tree->gtFlags & GTF_REVERSE_OPS)
3399                 {
3400 #ifdef _TARGET_XARCH_
3401                     if (op1TypeSize == sizeof(int))
3402                         op1PredictReg = PREDICT_NONE;
3403 #endif
3404
3405                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3406                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3407                 }
3408                 else
3409                 {
3410 #ifdef _TARGET_XARCH_
3411                     // For full DWORD compares we can have
3412                     //
3413                     //      op1 is an address mode and op2 is a register
3414                     // or
3415                     //      op1 is a register and op2 is an address mode
3416                     //
3417                     if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
3418                     {
3419                         if (op2->gtOper == GT_LCL_VAR)
3420                         {
3421                             unsigned lclNum = op2->gtLclVar.gtLclNum;
3422                             varDsc          = lvaTable + lclNum;
3423                             /* Did we predict that this local will be enregistered? */
3424                             if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
3425                             {
3426                                 op1PredictReg = PREDICT_ADDR;
3427                             }
3428                         }
3429                     }
3430                     // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
3431                     if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
3432                         op2PredictReg = PREDICT_ADDR;
3433 #endif // _TARGET_XARCH_
3434
3435                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3436 #ifdef _TARGET_ARM_
3437                     if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
3438 #endif
3439                     {
3440                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
3441                     }
3442                 }
3443
3444 #ifdef _TARGET_XARCH_
3445                 // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
3446                 // to generate a sign/zero extension before doing a compare. Save a register for this purpose
3447                 // if one of the registers is small and the types aren't equal.
3448
3449                 if (regMask == RBM_NONE)
3450                 {
3451                     rpPredictReg op1xPredictReg, op2xPredictReg;
3452                     GenTreePtr   op1x, op2x;
3453                     if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
3454                     {
3455                         op1xPredictReg = op2PredictReg;
3456                         op2xPredictReg = op1PredictReg;
3457                         op1x           = op2;
3458                         op2x           = op1;
3459                     }
3460                     else
3461                     {
3462                         op1xPredictReg = op1PredictReg;
3463                         op2xPredictReg = op2PredictReg;
3464                         op1x           = op1;
3465                         op2x           = op2;
3466                     }
3467                     if ((op1xPredictReg < PREDICT_REG) &&  // op1 doesn't get a register (probably an indir)
3468                         (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
3469                         varTypeIsSmall(op1x->TypeGet()))   // op1 is smaller than an int
3470                     {
3471                         bool needTmp = false;
3472
3473                         // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
3474                         // We could predict a byteable register for op2x, but what if we don't get it?
3475                         // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
3476                         // result.
3477                         if (varTypeIsByte(op1x->TypeGet()))
3478                         {
3479                             needTmp = true;
3480                         }
3481                         else
3482                         {
3483                             if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
3484                             {
3485                                 if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
3486                                     needTmp = true;
3487                             }
3488                             else
3489                             {
3490                                 if (op1x->TypeGet() != op2x->TypeGet())
3491                                     needTmp = true;
3492                             }
3493                         }
3494                         if (needTmp)
3495                         {
3496                             regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3497                         }
3498                     }
3499                 }
3500 #endif // _TARGET_XARCH_
3501
3502                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3503                 goto RETURN_CHECK;
3504
3505             case GT_MUL:
3506
3507 #ifndef _TARGET_AMD64_
3508                 if (type == TYP_LONG)
3509                 {
3510                     assert(tree->gtIsValid64RsltMul());
3511
3512                     /* Strip out the cast nodes */
3513
3514                     noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
3515                     op1 = op1->gtCast.CastOp();
3516                     op2 = op2->gtCast.CastOp();
3517 #else
3518                 if (false)
3519                 {
3520 #endif // !_TARGET_AMD64_
3521                 USE_MULT_EAX:
3522
3523 #if defined(_TARGET_X86_)
3524                     // This will done by a 64-bit imul "imul eax, reg"
3525                     //   (i.e. EDX:EAX = EAX * reg)
3526
3527                     /* Are we supposed to evaluate op2 first? */
3528                     if (tree->gtFlags & GTF_REVERSE_OPS)
3529                     {
3530                         rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3531                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3532                     }
3533                     else
3534                     {
3535                         rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3536                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
3537                     }
3538
3539                     /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
3540
3541                     tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
3542
3543                     /* set regMask to the set of held registers */
3544
3545                     regMask = RBM_PAIR_TMP_LO;
3546
3547                     if (type == TYP_LONG)
3548                         regMask |= RBM_PAIR_TMP_HI;
3549
3550 #elif defined(_TARGET_ARM_)
3551                     // This will done by a 4 operand multiply
3552
3553                     // Are we supposed to evaluate op2 first?
3554                     if (tree->gtFlags & GTF_REVERSE_OPS)
3555                     {
3556                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3557                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3558                     }
3559                     else
3560                     {
3561                         rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3562                         rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
3563                     }
3564
3565                     // set regMask to the set of held registers,
3566                     //  the two scratch register we need to compute the mul result
3567
3568                     regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
3569
3570                     // set gtUsedRegs toregMask and the registers needed by op1 and op2
3571
3572                     tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3573
3574 #else // !_TARGET_X86_ && !_TARGET_ARM_
3575 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
3576 #endif
3577
3578                     goto RETURN_CHECK;
3579                 }
3580                 else
3581                 {
3582                     /* We use imulEAX for most unsigned multiply operations */
3583                     if (tree->gtOverflow())
3584                     {
3585                         if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
3586                         {
3587                             goto USE_MULT_EAX;
3588                         }
3589                     }
3590                 }
3591
3592                 __fallthrough;
3593
3594             case GT_OR:
3595             case GT_XOR:
3596             case GT_AND:
3597
3598             case GT_SUB:
3599             case GT_ADD:
3600                 tree->gtUsedRegs = 0;
3601
3602                 if (predictReg <= PREDICT_REG)
3603                     predictReg = PREDICT_SCRATCH_REG;
3604
3605             GENERIC_BINARY:
3606
3607                 noway_assert(op2);
3608                 if (tree->gtFlags & GTF_REVERSE_OPS)
3609                 {
3610                     op1PredictReg = PREDICT_REG;
3611 #if !CPU_LOAD_STORE_ARCH
3612                     if (genTypeSize(op1->gtType) >= sizeof(int))
3613                         op1PredictReg = PREDICT_NONE;
3614 #endif
3615                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
3616                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3617                 }
3618                 else
3619                 {
3620                     op2PredictReg = PREDICT_REG;
3621 #if !CPU_LOAD_STORE_ARCH
3622                     if (genTypeSize(op2->gtType) >= sizeof(int))
3623                         op2PredictReg = PREDICT_NONE;
3624 #endif
3625                     regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3626 #ifdef _TARGET_ARM_
3627                     // For most ALU operations we can generate a single instruction that encodes
3628                     // a small immediate integer constant value.  (except for multiply)
3629                     //
3630                     if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
3631                     {
3632                         ssize_t ival = op2->gtIntCon.gtIconVal;
3633                         if (codeGen->validImmForAlu(ival))
3634                         {
3635                             op2PredictReg = PREDICT_NONE;
3636                         }
3637                         else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
3638                                  ((oper == GT_ADD) || (oper == GT_SUB)))
3639                         {
3640                             op2PredictReg = PREDICT_NONE;
3641                         }
3642                     }
3643                     if (op2PredictReg == PREDICT_NONE)
3644                     {
3645                         op2->gtUsedRegs = RBM_NONE;
3646                     }
3647                     else
3648 #endif
3649                     {
3650                         rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
3651                     }
3652                 }
3653                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
3654
3655 #if CPU_HAS_BYTE_REGS
3656                 /* We have special register requirements for byte operations */
3657
3658                 if (varTypeIsByte(tree->TypeGet()))
3659                 {
3660                     /* For 8 bit arithmetic, one operands has to be in a
3661                        byte-addressable register, and the other has to be
3662                        in a byte-addrble reg or in memory. Assume its in a reg */
3663
3664                     regMaskTP regByteMask = 0;
3665                     regMaskTP op1ByteMask = op1->gtUsedRegs;
3666
3667                     if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
3668                     {
3669                         // Pick a Byte register to use for op1
3670                         regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
3671                         op1ByteMask = regByteMask;
3672                     }
3673
3674                     if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
3675                     {
3676                         // Pick a Byte register to use for op2, avoiding the one used by op1
3677                         regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
3678                     }
3679
3680                     if (regByteMask)
3681                     {
3682                         tree->gtUsedRegs |= regByteMask;
3683                         regMask = regByteMask;
3684                     }
3685                 }
3686 #endif
3687                 goto RETURN_CHECK;
3688
3689             case GT_DIV:
3690             case GT_MOD:
3691
3692             case GT_UDIV:
3693             case GT_UMOD:
3694
3695                 /* non-integer division handled in generic way */
3696                 if (!varTypeIsIntegral(type))
3697                 {
3698                     tree->gtUsedRegs = 0;
3699                     if (predictReg <= PREDICT_REG)
3700                         predictReg = PREDICT_SCRATCH_REG;
3701                     goto GENERIC_BINARY;
3702                 }
3703
3704 #ifndef _TARGET_64BIT_
3705
3706                 if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
3707                 {
3708                     /* Special case:  a mod with an int op2 is done inline using idiv or div
3709                        to avoid a costly call to the helper */
3710
3711                     noway_assert((op2->gtOper == GT_CNS_LNG) &&
3712                                  (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
3713
3714 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3715                     if (tree->gtFlags & GTF_REVERSE_OPS)
3716                     {
3717                         tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
3718                                                       rsvdRegs | op1->gtRsvdRegs);
3719                         tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
3720                     }
3721                     else
3722                     {
3723                         tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3724                         tmpMask |=
3725                             rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
3726                     }
3727                     regMask = RBM_PAIR_TMP;
3728 #else // !_TARGET_X86_ && !_TARGET_ARM_
3729 #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
3730 #endif // !_TARGET_X86_ && !_TARGET_ARM_
3731
3732                     tree->gtUsedRegs =
3733                         (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
3734                                        rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
3735
3736                     goto RETURN_CHECK;
3737                 }
3738 #endif // _TARGET_64BIT_
3739
3740                 /* no divide immediate, so force integer constant which is not
3741                  * a power of two to register
3742                  */
3743
3744                 if (op2->OperKind() & GTK_CONST)
3745                 {
3746                     ssize_t ival = op2->gtIntConCommon.IconValue();
3747
3748                     /* Is the divisor a power of 2 ? */
3749
3750                     if (ival > 0 && genMaxOneBit(size_t(ival)))
3751                     {
3752                         goto GENERIC_UNARY;
3753                     }
3754                     else
3755                         op2PredictReg = PREDICT_SCRATCH_REG;
3756                 }
3757                 else
3758                 {
3759                     /* Non integer constant also must be enregistered */
3760                     op2PredictReg = PREDICT_REG;
3761                 }
3762
3763                 regMaskTP trashedMask;
3764                 trashedMask = DUMMY_INIT(RBM_ILLEGAL);
3765                 regMaskTP op1ExcludeMask;
3766                 op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3767                 regMaskTP op2ExcludeMask;
3768                 op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
3769
3770 #ifdef _TARGET_XARCH_
3771                 /*  Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
3772                  *  we can safely allow the "b" value to die. Unfortunately, if we simply
3773                  *  mark the node "b" as using EDX, this will not work if "b" is a register
3774                  *  variable that dies with this particular reference. Thus, if we want to
3775                  *  avoid this situation (where we would have to spill the variable from
3776                  *  EDX to someplace else), we need to explicitly mark the interference
3777                  *  of the variable at this point.
3778                  */
3779
3780                 if (op2->gtOper == GT_LCL_VAR)
3781                 {
3782                     unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
3783                     varDsc          = lvaTable + lclNum;
3784                     if (varDsc->lvTracked)
3785                     {
3786 #ifdef DEBUG
3787                         if (verbose)
3788                         {
3789                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
3790                                 printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
3791                                        varDsc->lvVarIndex);
3792                             if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
3793                                 printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
3794                                        varDsc->lvVarIndex);
3795                         }
3796 #endif
3797                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
3798                         VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
3799                     }
3800                 }
3801
3802                 /* set the held register based on opcode */
3803                 if (oper == GT_DIV || oper == GT_UDIV)
3804                     regMask = RBM_EAX;
3805                 else
3806                     regMask    = RBM_EDX;
3807                 trashedMask    = (RBM_EAX | RBM_EDX);
3808                 op1ExcludeMask = 0;
3809                 op2ExcludeMask = (RBM_EAX | RBM_EDX);
3810
3811 #endif // _TARGET_XARCH_
3812
3813 #ifdef _TARGET_ARM_
3814                 trashedMask    = RBM_NONE;
3815                 op1ExcludeMask = RBM_NONE;
3816                 op2ExcludeMask = RBM_NONE;
3817 #endif
3818
3819                 /* set the lvPref reg if possible */
3820                 GenTreePtr dest;
3821                 /*
3822                  *  Walking the gtNext link twice from here should get us back
3823                  *  to our parent node, if this is an simple assignment tree.
3824                  */
3825                 dest = tree->gtNext;
3826                 if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
3827                     dest->gtNext->gtOp.gtOp2 == tree)
3828                 {
3829                     varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
3830                     varDsc->addPrefReg(regMask, this);
3831                 }
3832 #ifdef _TARGET_XARCH_
3833                 op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
3834 #else
3835                 op1PredictReg        = PREDICT_SCRATCH_REG;
3836 #endif
3837
3838                 /* are we supposed to evaluate op2 first? */
3839                 if (tree->gtFlags & GTF_REVERSE_OPS)
3840                 {
3841                     tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
3842                                                   rsvdRegs | op1->gtRsvdRegs);
3843                     rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
3844                 }
3845                 else
3846                 {
3847                     tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
3848                                                   rsvdRegs | op2->gtRsvdRegs);
3849                     rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
3850                 }
3851 #ifdef _TARGET_ARM_
3852                 regMask = tmpMask;
3853 #endif
3854                 /* grab EAX, EDX for this tree node */
3855                 tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
3856
3857                 goto RETURN_CHECK;
3858
3859             case GT_LSH:
3860             case GT_RSH:
3861             case GT_RSZ:
3862
3863                 if (predictReg <= PREDICT_REG)
3864                     predictReg = PREDICT_SCRATCH_REG;
3865
3866 #ifndef _TARGET_64BIT_
3867                 if (type == TYP_LONG)
3868                 {
3869                     if (op2->IsCnsIntOrI())
3870                     {
3871                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
3872                         // no register used by op2
3873                         op2->gtUsedRegs  = 0;
3874                         tree->gtUsedRegs = op1->gtUsedRegs;
3875                     }
3876                     else
3877                     {
3878                         // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
3879                         tmpMask = lockedRegs;
3880                         tmpMask &= ~RBM_LNGARG_0;
3881                         tmpMask &= ~RBM_SHIFT_LNG;
3882
3883                         // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
3884                         if (tree->gtFlags & GTF_REVERSE_OPS)
3885                         {
3886                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
3887                             tmpMask |= RBM_SHIFT_LNG;
3888                             // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
3889                             // Fix 383843 X86/ARM ILGEN
3890                             rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
3891                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
3892                         }
3893                         else
3894                         {
3895                             rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
3896                             tmpMask |= RBM_LNGARG_0;
3897                             // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
3898                             // Fix 383839 ARM ILGEN
3899                             rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
3900                             rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
3901                         }
3902                         regMask = RBM_LNGRET; // function return registers
3903                         op1->gtUsedRegs |= RBM_LNGARG_0;
3904                         op2->gtUsedRegs |= RBM_SHIFT_LNG;
3905
3906                         tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
3907
3908                         // We are using a helper function to do shift:
3909                         //
3910                         tree->gtUsedRegs |= RBM_CALLEE_TRASH;
3911                     }
3912                 }
3913                 else
3914 #endif // _TARGET_64BIT_
3915                 {
3916 #ifdef _TARGET_XARCH_
3917                     if (!op2->IsCnsIntOrI())
3918                         predictReg = PREDICT_NOT_REG_ECX;
3919 #endif
3920
3921                 HANDLE_SHIFT_COUNT:
3922                     // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
3923
3924                     regMaskTP tmpRsvdRegs;
3925
3926                     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
3927                     {
3928                         regMask     = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
3929                         rsvdRegs    = RBM_LASTUSE;
3930                         tmpRsvdRegs = RBM_NONE;
3931                     }
3932                     else
3933                     {
3934                         regMask = RBM_NONE;
3935                         // Special case op1 of a constant
3936                         if (op1->IsCnsIntOrI())
3937                             tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
3938                                                        // System.Xml.Schema.BitSet:Get(int):bool
3939                         else
3940                             tmpRsvdRegs = op1->gtRsvdRegs;
3941                     }
3942
3943                     op2Mask = RBM_NONE;
3944                     if (!op2->IsCnsIntOrI())
3945                     {
3946                         if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
3947                         {
3948                             op2PredictReg = PREDICT_REG_SHIFT;
3949                         }
3950                         else
3951                         {
3952                             op2PredictReg = PREDICT_REG;
3953                         }
3954
3955                         /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
3956                         op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
3957
3958                         // If our target arch has a REG_SHIFT register then
3959                         //     we set the PrefReg when we have a LclVar for op2
3960                         //     we add an interference with REG_SHIFT for any other LclVars alive at op2
3961                         if (REG_SHIFT != REG_NA)
3962                         {
3963                             VARSET_TP liveSet(VarSetOps::MakeCopy(this, compCurLife));
3964
3965                             while (op2->gtOper == GT_COMMA)
3966                             {
3967                                 op2 = op2->gtOp.gtOp2;
3968                             }
3969
3970                             if (op2->gtOper == GT_LCL_VAR)
3971                             {
3972                                 varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
3973                                 varDsc->setPrefReg(REG_SHIFT, this);
3974                                 if (varDsc->lvTracked)
3975                                 {
3976                                     VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
3977                                 }
3978                             }
3979
3980                             // Ensure that we have a register interference with the LclVar in tree's LiveSet,
3981                             // excluding the LclVar that was used for the shift amount as it is read-only
3982                             // and can be kept alive through the shift operation
3983                             //
3984                             rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
3985                             // In case op2Mask doesn't contain the required shift register,
3986                             // we will or it in now.
3987                             op2Mask |= RBM_SHIFT;
3988                         }
3989                     }
3990
3991                     if (tree->gtFlags & GTF_REVERSE_OPS)
3992                     {
3993                         assert(regMask == RBM_NONE);
3994                         regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
3995                     }
3996
3997 #if CPU_HAS_BYTE_REGS
3998                     if (varTypeIsByte(type))
3999                     {
4000                         // Fix 383789 X86 ILGEN
4001                         // Fix 383813 X86 ILGEN
4002                         // Fix 383828 X86 ILGEN
4003                         if (op1->gtOper == GT_LCL_VAR)
4004                         {
4005                             varDsc = lvaTable + op1->gtLclVar.gtLclNum;
4006                             if (varDsc->lvTracked)
4007                             {
4008                                 VARSET_TP op1VarBit(VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
4009
4010                                 // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
4011                                 rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
4012                             }
4013                         }
4014                         if ((regMask & RBM_BYTE_REGS) == 0)
4015                         {
4016                             // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
4017                             // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
4018                             //
4019                             regMask |=
4020                                 rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
4021                         }
4022                     }
4023 #endif
4024                     tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4025                 }
4026
4027                 goto RETURN_CHECK;
4028
4029             case GT_COMMA:
4030                 if (tree->gtFlags & GTF_REVERSE_OPS)
4031                 {
4032                     if (predictReg == PREDICT_NONE)
4033                     {
4034                         predictReg = PREDICT_REG;
4035                     }
4036                     else if (rpHasVarIndexForPredict(predictReg))
4037                     {
4038                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4039                         predictReg = PREDICT_SCRATCH_REG;
4040                     }
4041
4042                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4043                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
4044                 }
4045                 else
4046                 {
4047                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4048
4049                     /* CodeGen will enregister the op2 side of a GT_COMMA */
4050                     if (predictReg == PREDICT_NONE)
4051                     {
4052                         predictReg = PREDICT_REG;
4053                     }
4054                     else if (rpHasVarIndexForPredict(predictReg))
4055                     {
4056                         /* Don't propagate the use of tgt reg use in a GT_COMMA */
4057                         predictReg = PREDICT_SCRATCH_REG;
4058                     }
4059
4060                     regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4061                 }
4062                 // tree should only accumulate the used registers from the op2 side of the GT_COMMA
4063                 //
4064                 tree->gtUsedRegs = op2->gtUsedRegs;
4065                 if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
4066                 {
4067                     LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
4068
4069                     if (op2VarDsc->lvTracked)
4070                     {
4071                         VARSET_TP op2VarBit(VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
4072                         rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
4073                     }
4074                 }
4075                 goto RETURN_CHECK;
4076
4077             case GT_QMARK:
4078             {
4079                 noway_assert(op1 != NULL && op2 != NULL);
4080
4081                 /*
4082                  *  If the gtUsedRegs conflicts with lockedRegs
4083                  *  then we going to have to spill some registers
4084                  *  into the non-trashed register set to keep it alive
4085                  */
4086                 unsigned spillCnt;
4087                 spillCnt = 0;
4088                 regMaskTP spillRegs;
4089                 spillRegs = lockedRegs & tree->gtUsedRegs;
4090
4091                 while (spillRegs)
4092                 {
4093                     /* Find the next register that needs to be spilled */
4094                     tmpMask = genFindLowestBit(spillRegs);
4095
4096 #ifdef DEBUG
4097                     if (verbose)
4098                     {
4099                         printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
4100                         gtDispTree(tree, 0, NULL, true);
4101                     }
4102 #endif
4103                     /* In Codegen it will typically introduce a spill temp here */
4104                     /* rather than relocating the register to a non trashed reg */
4105                     rpPredictSpillCnt++;
4106                     spillCnt++;
4107
4108                     /* Remove it from the spillRegs and lockedRegs*/
4109                     spillRegs &= ~tmpMask;
4110                     lockedRegs &= ~tmpMask;
4111                 }
4112                 {
4113                     VARSET_TP startQmarkCondUseInPlaceVars(VarSetOps::MakeCopy(this, rpUseInPlace));
4114
4115                     /* Evaluate the <cond> subtree */
4116                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4117                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4118                     tree->gtUsedRegs = op1->gtUsedRegs;
4119
4120                     noway_assert(op2->gtOper == GT_COLON);
4121                     if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
4122                     {
4123                         // Don't try to target the register specified in predictReg when we have complex subtrees
4124                         //
4125                         predictReg = PREDICT_SCRATCH_REG;
4126                     }
4127                     GenTreePtr elseTree = op2->AsColon()->ElseNode();
4128                     GenTreePtr thenTree = op2->AsColon()->ThenNode();
4129
4130                     noway_assert(thenTree != NULL && elseTree != NULL);
4131
4132                     // Update compCurLife to only those vars live on the <then> subtree
4133
4134                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
4135
4136                     if (type == TYP_VOID)
4137                     {
4138                         /* Evaluate the <then> subtree */
4139                         rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4140                         regMask    = RBM_NONE;
4141                         predictReg = PREDICT_NONE;
4142                     }
4143                     else
4144                     {
4145                         // A mask to use to force the predictor to choose low registers (to reduce code size)
4146                         regMaskTP avoidRegs = RBM_NONE;
4147 #ifdef _TARGET_ARM_
4148                         avoidRegs = (RBM_R12 | RBM_LR);
4149 #endif
4150                         if (predictReg <= PREDICT_REG)
4151                             predictReg = PREDICT_SCRATCH_REG;
4152
4153                         /* Evaluate the <then> subtree */
4154                         regMask =
4155                             rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
4156
4157                         if (regMask)
4158                         {
4159                             rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
4160                             if (op1PredictReg != PREDICT_NONE)
4161                                 predictReg = op1PredictReg;
4162                         }
4163                     }
4164
4165                     VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
4166
4167                     /* Evaluate the <else> subtree */
4168                     // First record the post-then liveness, and reset the current liveness to the else
4169                     // branch liveness.
4170                     CLANG_FORMAT_COMMENT_ANCHOR;
4171
4172 #ifdef DEBUG
4173                     VARSET_TP postThenLive(VarSetOps::MakeCopy(this, compCurLife));
4174 #endif
4175
4176                     VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
4177
4178                     rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
4179                     tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
4180
4181                     // The then and the else are "virtual basic blocks" that form a control-flow diamond.
4182                     // They each have only one successor, which they share.  Their live-out sets must equal the
4183                     // live-in set of this virtual successor block, and thus must be the same.  We can assert
4184                     // that equality here.
4185                     assert(VarSetOps::Equal(this, compCurLife, postThenLive));
4186
4187                     if (spillCnt > 0)
4188                     {
4189                         regMaskTP reloadMask = RBM_NONE;
4190
4191                         while (spillCnt)
4192                         {
4193                             regMaskTP reloadReg;
4194
4195                             /* Get an extra register to hold it */
4196                             reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
4197 #ifdef DEBUG
4198                             if (verbose)
4199                             {
4200                                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
4201                                 gtDispTree(tree, 0, NULL, true);
4202                             }
4203 #endif
4204                             reloadMask |= reloadReg;
4205
4206                             spillCnt--;
4207                         }
4208
4209                         /* update the gtUsedRegs mask */
4210                         tree->gtUsedRegs |= reloadMask;
4211                     }
4212                 }
4213
4214                 goto RETURN_CHECK;
4215             }
4216             case GT_RETURN:
4217                 tree->gtUsedRegs = RBM_NONE;
4218                 regMask          = RBM_NONE;
4219
4220                 /* Is there a return value? */
4221                 if (op1 != NULL)
4222                 {
4223 #if FEATURE_FP_REGALLOC
4224                     if (varTypeIsFloating(type))
4225                     {
4226                         predictReg = PREDICT_FLTRET;
4227                         if (type == TYP_FLOAT)
4228                             regMask = RBM_FLOATRET;
4229                         else
4230                             regMask = RBM_DOUBLERET;
4231                     }
4232                     else
4233 #endif
4234                         if (isRegPairType(type))
4235                     {
4236                         predictReg = PREDICT_LNGRET;
4237                         regMask    = RBM_LNGRET;
4238                     }
4239                     else
4240                     {
4241                         predictReg = PREDICT_INTRET;
4242                         regMask    = RBM_INTRET;
4243                     }
4244                     if (info.compCallUnmanaged)
4245                     {
4246                         lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
4247                     }
4248                     rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
4249                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4250                 }
4251
4252 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4253                 // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
4254                 // We could optimize on registers based on int/long or no return value.  But to
4255                 // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
4256                 if (compIsProfilerHookNeeded())
4257                 {
4258                     tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
4259                 }
4260
4261 #endif
4262                 goto RETURN_CHECK;
4263
4264             case GT_RETFILT:
4265                 if (op1 != NULL)
4266                 {
4267                     rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4268                     regMask          = genReturnRegForTree(tree);
4269                     tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
4270                     goto RETURN_CHECK;
4271                 }
4272                 tree->gtUsedRegs = 0;
4273                 regMask          = 0;
4274
4275                 goto RETURN_CHECK;
4276
4277             case GT_JTRUE:
4278                 /* This must be a test of a relational operator */
4279
4280                 noway_assert(op1->OperIsCompare());
4281
4282                 /* Only condition code set by this operation */
4283
4284                 rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
4285
4286                 tree->gtUsedRegs = op1->gtUsedRegs;
4287                 regMask          = 0;
4288
4289                 goto RETURN_CHECK;
4290
4291             case GT_SWITCH:
4292                 noway_assert(type <= TYP_INT);
4293                 noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
4294 #ifdef _TARGET_ARM_
4295                 {
4296                     regMask          = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4297                     unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
4298                     if (jumpCnt > 2)
4299                     {
4300                         // Table based switch requires an extra register for the table base
4301                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
4302                     }
4303                     tree->gtUsedRegs = op1->gtUsedRegs | regMask;
4304                 }
4305 #else  // !_TARGET_ARM_
4306                 rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
4307                 tree->gtUsedRegs = op1->gtUsedRegs;
4308 #endif // _TARGET_ARM_
4309                 regMask = 0;
4310                 goto RETURN_CHECK;
4311
4312             case GT_CKFINITE:
4313                 if (predictReg <= PREDICT_REG)
4314                     predictReg = PREDICT_SCRATCH_REG;
4315
4316                 rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4317                 // Need a reg to load exponent into
4318                 regMask          = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
4319                 tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
4320                 goto RETURN_CHECK;
4321
4322             case GT_LCLHEAP:
4323                 regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
4324                 op2Mask = 0;
4325
4326 #ifdef _TARGET_ARM_
4327                 if (info.compInitMem)
4328                 {
4329                     // We zero out two registers in the ARM codegen path
4330                     op2Mask |=
4331                         rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
4332                 }
4333 #endif
4334
4335                 op1->gtUsedRegs |= (regMaskSmall)regMask;
4336                 tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
4337
4338                 // The result will be put in the reg we picked for the size
4339                 // regMask = <already set as we want it to be>
4340
4341                 goto RETURN_CHECK;
4342
4343             case GT_OBJ:
4344             {
4345 #ifdef _TARGET_ARM_
4346                 if (predictReg <= PREDICT_REG)
4347                     predictReg = PREDICT_SCRATCH_REG;
4348
4349                 regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
4350                                                           // registers (to reduce code size)
4351                 regMask = RBM_NONE;
4352                 tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
4353 #endif
4354
4355                 if (fgIsIndirOfAddrOfLocal(tree) != NULL)
4356                 {
4357                     compUpdateLifeVar</*ForCodeGen*/ false>(tree);
4358                 }
4359
4360 #ifdef _TARGET_ARM_
4361                 unsigned  objSize   = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
4362                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4363                 // If it has one bit set, and that's an arg reg...
4364                 if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
4365                 {
4366                     // We are passing the 'obj' in the argument registers
4367                     //
4368                     regNumber rn = genRegNumFromMask(preferReg);
4369
4370                     //  Add the registers used to pass the 'obj' to regMask.
4371                     for (unsigned i = 0; i < objSize / 4; i++)
4372                     {
4373                         if (rn == MAX_REG_ARG)
4374                             break;
4375                         // Otherwise...
4376                         regMask |= genRegMask(rn);
4377                         rn = genRegArgNext(rn);
4378                     }
4379                 }
4380                 else
4381                 {
4382                     // We are passing the 'obj' in the outgoing arg space
4383                     // We will need one register to load into unless the 'obj' size is 4 or less.
4384                     //
4385                     if (objSize > 4)
4386                     {
4387                         regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
4388                     }
4389                 }
4390                 tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
4391                 goto RETURN_CHECK;
4392 #else  // !_TARGET_ARM_
4393                 goto GENERIC_UNARY;
4394 #endif // _TARGET_ARM_
4395             }
4396
4397             case GT_MKREFANY:
4398             {
4399 #ifdef _TARGET_ARM_
4400                 regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
4401                 regMask             = RBM_NONE;
4402                 if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
4403                 {
4404                     // A MKREFANY takes up two registers.
4405                     regNumber rn = genRegNumFromMask(preferReg);
4406                     regMask      = RBM_NONE;
4407                     if (rn < MAX_REG_ARG)
4408                     {
4409                         regMask |= genRegMask(rn);
4410                         rn = genRegArgNext(rn);
4411                         if (rn < MAX_REG_ARG)
4412                             regMask |= genRegMask(rn);
4413                     }
4414                 }
4415                 if (regMask != RBM_NONE)
4416                 {
4417                     // Condensation of GENERIC_BINARY path.
4418                     assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
4419                     op2PredictReg        = PREDICT_REG;
4420                     regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
4421                     rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
4422                     regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
4423                     tree->gtUsedRegs = (regMaskSmall)regMask;
4424                     goto RETURN_CHECK;
4425                 }
4426                 tree->gtUsedRegs = op1->gtUsedRegs;
4427 #endif // _TARGET_ARM_
4428                 goto GENERIC_BINARY;
4429             }
4430
4431             case GT_BOX:
4432                 goto GENERIC_UNARY;
4433
4434             case GT_LOCKADD:
4435                 goto GENERIC_BINARY;
4436
4437             case GT_XADD:
4438             case GT_XCHG:
4439                 // Ensure we can write to op2.  op2 will hold the output.
4440                 if (predictReg < PREDICT_SCRATCH_REG)
4441                     predictReg = PREDICT_SCRATCH_REG;
4442
4443                 if (tree->gtFlags & GTF_REVERSE_OPS)
4444                 {
4445                     op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
4446                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
4447                 }
4448                 else
4449                 {
4450                     regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
4451                     op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
4452                 }
4453                 tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
4454                 goto RETURN_CHECK;
4455
4456             case GT_ARR_LENGTH:
4457                 goto GENERIC_UNARY;
4458
4459             case GT_INIT_VAL:
4460                 // This unary operator simply passes through the value from its child (much like GT_NOP)
4461                 // and thus won't need a scratch register.
4462                 regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
4463                 tree->gtUsedRegs = op1->gtUsedRegs;
4464                 goto RETURN_CHECK;
4465
4466             default:
4467 #ifdef DEBUG
4468                 gtDispTree(tree);
4469 #endif
4470                 noway_assert(!"unexpected simple operator in reg use prediction");
4471                 break;
4472         }
4473     }
4474
4475     /* See what kind of a special operator we have here */
4476
4477     switch (oper)
4478     {
4479         GenTreePtr      args;
4480         GenTreeArgList* list;
4481         regMaskTP       keepMask;
4482         unsigned        regArgsNum;
4483         int             regIndex;
4484         regMaskTP       regArgMask;
4485         regMaskTP       curArgMask;
4486
4487         case GT_CALL:
4488
4489         {
4490
4491             /* initialize so we can just or in various bits */
4492             tree->gtUsedRegs = RBM_NONE;
4493
4494 #if GTF_CALL_REG_SAVE
4495             /*
4496              *  Unless the GTF_CALL_REG_SAVE flag is set,
4497              *  we can't preserve the RBM_CALLEE_TRASH registers.
4498              *  (likewise we can't preserve the return registers)
4499              *  So we remove them from the lockedRegs set and
4500              *  record any of them in the keepMask
4501              */
4502
4503             if (tree->gtFlags & GTF_CALL_REG_SAVE)
4504             {
4505                 regMaskTP trashMask = genReturnRegForTree(tree);
4506
4507                 keepMask = lockedRegs & trashMask;
4508                 lockedRegs &= ~trashMask;
4509             }
4510             else
4511 #endif
4512             {
4513                 keepMask = lockedRegs & RBM_CALLEE_TRASH;
4514                 lockedRegs &= ~RBM_CALLEE_TRASH;
4515             }
4516
4517             regArgsNum = 0;
4518             regIndex   = 0;
4519
4520             /* Is there an object pointer? */
4521             if (tree->gtCall.gtCallObjp)
4522             {
4523                 /* Evaluate the instance pointer first */
4524
4525                 args = tree->gtCall.gtCallObjp;
4526
4527                 /* the objPtr always goes to an integer register (through temp or directly) */
4528                 noway_assert(regArgsNum == 0);
4529                 regArgsNum++;
4530
4531                 /* Must be passed in a register */
4532
4533                 noway_assert(args->gtFlags & GTF_LATE_ARG);
4534
4535                 /* Must be either a deferred reg arg node or a GT_ASG node */
4536
4537                 noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4538                              args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4539
4540                 if (!args->IsArgPlaceHolderNode())
4541                 {
4542                     rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4543                 }
4544             }
4545             VARSET_TP startArgUseInPlaceVars(VarSetOps::UninitVal());
4546             VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
4547
4548             /* process argument list */
4549             for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
4550             {
4551                 args = list->Current();
4552
4553                 if (args->gtFlags & GTF_LATE_ARG)
4554                 {
4555                     /* Must be either a Placeholder/NOP node or a GT_ASG node */
4556
4557                     noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
4558                                  args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
4559
4560                     if (!args->IsArgPlaceHolderNode())
4561                     {
4562                         rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
4563                     }
4564
4565                     regArgsNum++;
4566                 }
4567                 else
4568                 {
4569 #ifdef FEATURE_FIXED_OUT_ARGS
4570                     // We'll store this argument into the outgoing argument area
4571                     // It needs to be in a register to be stored.
4572                     //
4573                     predictReg = PREDICT_REG;
4574
4575 #else // !FEATURE_FIXED_OUT_ARGS
4576                     // We'll generate a push for this argument
4577                     //
4578                     predictReg = PREDICT_NONE;
4579                     if (varTypeIsSmall(args->TypeGet()))
4580                     {
4581                         /* We may need to sign or zero extend a small type using a register */
4582                         predictReg = PREDICT_SCRATCH_REG;
4583                     }
4584 #endif
4585
4586                     rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
4587                 }
4588                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4589                 tree->gtUsedRegs |= args->gtUsedRegs;
4590             }
4591
4592             /* Is there a late argument list */
4593
4594             regIndex   = 0;
4595             regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
4596             args       = NULL;
4597
4598             /* process the late argument list */
4599             for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
4600             {
4601                 // If the current argument being copied is a promoted struct local, set this pointer to its description.
4602                 LclVarDsc* promotedStructLocal = NULL;
4603
4604                 curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
4605                 tmpMask    = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
4606
4607                 assert(list->OperIsList());
4608
4609                 args = list->Current();
4610                 list = list->Rest();
4611
4612                 assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
4613
4614                 fgArgTabEntry* curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
4615                 assert(curArgTabEntry);
4616
4617                 regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
4618                 unsigned  numSlots =
4619                     curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
4620
4621                 rpPredictReg argPredictReg;
4622                 regMaskTP    avoidReg = RBM_NONE;
4623
4624                 if (regNum != REG_STK)
4625                 {
4626                     argPredictReg = rpGetPredictForReg(regNum);
4627                     curArgMask |= genRegMask(regNum);
4628                 }
4629                 else
4630                 {
4631                     assert(numSlots > 0);
4632                     argPredictReg = PREDICT_NONE;
4633 #ifdef _TARGET_ARM_
4634                     // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
4635                     avoidReg = (RBM_R12 | RBM_LR);
4636 #endif
4637                 }
4638
4639 #ifdef _TARGET_ARM_
4640                 // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
4641                 //
4642                 if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
4643                 {
4644                     // 64-bit longs and doubles require 2 consecutive argument registers
4645                     curArgMask |= genRegMask(REG_NEXT(regNum));
4646                 }
4647                 else if (args->TypeGet() == TYP_STRUCT)
4648                 {
4649                     GenTreePtr argx       = args;
4650                     GenTreePtr lclVarTree = NULL;
4651
4652                     /* The GT_OBJ may be be a child of a GT_COMMA */
4653                     while (argx->gtOper == GT_COMMA)
4654                     {
4655                         argx = argx->gtOp.gtOp2;
4656                     }
4657                     unsigned originalSize = 0;
4658
4659                     if (argx->gtOper == GT_OBJ)
4660                     {
4661                         originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
4662
4663                         // Is it the address of a promoted struct local?
4664                         if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
4665                         {
4666                             lclVarTree        = argx->gtObj.gtOp1->gtOp.gtOp1;
4667                             LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
4668                             if (varDsc->lvPromoted)
4669                                 promotedStructLocal = varDsc;
4670                         }
4671                     }
4672                     else if (argx->gtOper == GT_LCL_VAR)
4673                     {
4674                         varDsc       = lvaTable + argx->gtLclVarCommon.gtLclNum;
4675                         originalSize = varDsc->lvSize();
4676
4677                         // Is it a promoted struct local?
4678                         if (varDsc->lvPromoted)
4679                             promotedStructLocal = varDsc;
4680                     }
4681                     else if (argx->gtOper == GT_MKREFANY)
4682                     {
4683                         originalSize = 2 * TARGET_POINTER_SIZE;
4684                     }
4685                     else
4686                     {
4687                         noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
4688                     }
4689
4690                     // We only pass arguments differently if it a struct local "independently" promoted, which
4691                     // allows the field locals can be independently enregistered.
4692                     if (promotedStructLocal != NULL)
4693                     {
4694                         if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
4695                             promotedStructLocal = NULL;
4696                     }
4697
4698                     unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
4699
4700                     // Are we passing a TYP_STRUCT in multiple integer registers?
4701                     // if so set up curArgMask to reflect this
4702                     // Also slots is updated to reflect the number of outgoing arg slots that we will write
4703                     if (regNum != REG_STK)
4704                     {
4705                         regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
4706                         assert(genIsValidReg(regNum));
4707                         regNumber nextReg = REG_NEXT(regNum);
4708                         slots--;
4709                         while (slots > 0 && nextReg <= regLast)
4710                         {
4711                             curArgMask |= genRegMask(nextReg);
4712                             nextReg = REG_NEXT(nextReg);
4713                             slots--;
4714                         }
4715                     }
4716
4717                     if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
4718                     {
4719                         // All or a portion of this struct will be placed in the argument registers indicated by
4720                         // "curArgMask". We build in knowledge of the order in which the code is generated here, so
4721                         // that the second arg to be evaluated interferes with the reg for the first, the third with
4722                         // the regs for the first and second, etc. But since we always place the stack slots before
4723                         // placing the register slots we do not add inteferences for any part of the struct that gets
4724                         // passed on the stack.
4725
4726                         argPredictReg =
4727                             PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
4728                         regMaskTP prevArgMask = RBM_NONE;
4729                         for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
4730                         {
4731                             LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
4732                             if (fieldVarDsc->lvTracked)
4733                             {
4734                                 assert(lclVarTree != NULL);
4735                                 if (prevArgMask != RBM_NONE)
4736                                 {
4737                                     rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
4738                                                                      DEBUGARG("fieldVar/argReg"));
4739                                 }
4740                             }
4741                             // Now see many registers this uses up.
4742                             unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
4743                             unsigned nextAfterLastRegOffset =
4744                                 (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
4745                                 TARGET_POINTER_SIZE;
4746                             unsigned nextAfterLastArgRegOffset =
4747                                 min(nextAfterLastRegOffset,
4748                                     genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
4749
4750                             for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
4751                                  regOffset++)
4752                             {
4753                                 prevArgMask |= genRegMask(regNumber(regNum + regOffset));
4754                             }
4755
4756                             if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
4757                             {
4758                                 break;
4759                             }
4760
4761                             if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
4762                             {
4763                                 // Add the argument register used here as a preferred register for this fieldVarDsc
4764                                 //
4765                                 regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
4766                                 fieldVarDsc->setPrefReg(firstRegUsed, this);
4767                             }
4768                         }
4769                         compUpdateLifeVar</*ForCodeGen*/ false>(argx);
4770                     }
4771
4772                     // If slots is greater than zero then part or all of this TYP_STRUCT
4773                     // argument is passed in the outgoing argument area. (except HFA arg)
4774                     //
4775                     if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
4776                     {
4777                         // We will need a register to address the TYP_STRUCT
4778                         // Note that we can use an argument register in curArgMask as in
4779                         // codegen we pass the stack portion of the argument before we
4780                         // setup the register part.
4781                         //
4782
4783                         // Force the predictor to choose a LOW_REG here to reduce code bloat
4784                         avoidReg = (RBM_R12 | RBM_LR);
4785
4786                         assert(tmpMask == RBM_NONE);
4787                         tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
4788
4789                         // If slots > 1 then we will need a second register to perform the load/store into the outgoing
4790                         // arg area
4791                         if (slots > 1)
4792                         {
4793                             tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
4794                                                         lockedRegs | regArgMask | tmpMask | avoidReg);
4795                         }
4796                     }
4797                 } // (args->TypeGet() == TYP_STRUCT)
4798 #endif            // _TARGET_ARM_
4799
4800                 // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
4801                 // as we have already calculated the correct tmpMask and curArgMask values and
4802                 // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
4803                 //
4804                 if (promotedStructLocal == NULL)
4805                 {
4806                     /* Target the appropriate argument register */
4807                     tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4808                 }
4809
4810                 // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
4811                 // for the duration of the OBJ.
4812                 if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
4813                 {
4814                     GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
4815                     assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
4816                     compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
4817                 }
4818
4819                 regArgMask |= curArgMask;
4820                 args->gtUsedRegs |= (tmpMask | regArgMask);
4821                 tree->gtUsedRegs |= args->gtUsedRegs;
4822                 tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
4823
4824                 if (args->gtUsedRegs != RBM_NONE)
4825                 {
4826                     // Add register interference with the set of registers used or in use when we evaluated
4827                     // the current arg, with whatever is alive after the current arg
4828                     //
4829                     rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
4830                 }
4831                 VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
4832             }
4833             assert(list == NULL);
4834
4835 #ifdef LEGACY_BACKEND
4836 #if CPU_LOAD_STORE_ARCH
4837 #ifdef FEATURE_READYTORUN_COMPILER
4838             if (tree->gtCall.IsR2RRelativeIndir())
4839             {
4840                 tree->gtUsedRegs |= RBM_R2R_INDIRECT_PARAM;
4841             }
4842 #endif // FEATURE_READYTORUN_COMPILER
4843 #endif // CPU_LOAD_STORE_ARCH
4844 #endif // LEGACY_BACKEND
4845
4846             regMaskTP callAddrMask;
4847             callAddrMask = RBM_NONE;
4848 #if CPU_LOAD_STORE_ARCH
4849             predictReg = PREDICT_SCRATCH_REG;
4850 #else
4851             predictReg       = PREDICT_NONE;
4852 #endif
4853
4854             switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
4855             {
4856                 case GTF_CALL_VIRT_STUB:
4857
4858                     // We only want to record an interference between the virtual stub
4859                     // param reg and anything that's live AFTER the call, but we've not
4860                     // yet processed the indirect target.  So add virtualStubParamInfo.regMask
4861                     // to interferingRegs.
4862                     interferingRegs |= virtualStubParamInfo->GetRegMask();
4863 #ifdef DEBUG
4864                     if (verbose)
4865                         printf("Adding interference with Virtual Stub Param\n");
4866 #endif
4867                     codeGen->regSet.rsSetRegsModified(virtualStubParamInfo->GetRegMask());
4868
4869                     if (tree->gtCall.gtCallType == CT_INDIRECT)
4870                     {
4871                         predictReg = virtualStubParamInfo->GetPredict();
4872                     }
4873                     break;
4874
4875                 case GTF_CALL_VIRT_VTABLE:
4876                     predictReg = PREDICT_SCRATCH_REG;
4877                     break;
4878
4879                 case GTF_CALL_NONVIRT:
4880                     predictReg = PREDICT_SCRATCH_REG;
4881                     break;
4882             }
4883
4884             if (tree->gtCall.gtCallType == CT_INDIRECT)
4885             {
4886 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4887                 if (tree->gtCall.gtCallCookie)
4888                 {
4889                     codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4890
4891                     callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
4892                                                         lockedRegs | regArgMask, RBM_LASTUSE);
4893
4894                     // Just in case we predict some other registers, force interference with our two special
4895                     // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
4896                     callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
4897
4898                     predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
4899                 }
4900 #endif
4901                 callAddrMask |=
4902                     rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
4903             }
4904             else if (predictReg != PREDICT_NONE)
4905             {
4906                 callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
4907             }
4908
4909             if (tree->gtFlags & GTF_CALL_UNMANAGED)
4910             {
4911                 // Need a register for tcbReg
4912                 callAddrMask |=
4913                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4914 #if CPU_LOAD_STORE_ARCH
4915                 // Need an extra register for tmpReg
4916                 callAddrMask |=
4917                     rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
4918 #endif
4919             }
4920
4921             tree->gtUsedRegs |= callAddrMask;
4922
4923             /* After the call restore the orginal value of lockedRegs */
4924             lockedRegs |= keepMask;
4925
4926             /* set the return register */
4927             regMask = genReturnRegForTree(tree);
4928
4929             if (regMask & rsvdRegs)
4930             {
4931                 // We will need to relocate the return register value
4932                 regMaskTP intRegMask = (regMask & RBM_ALLINT);
4933 #if FEATURE_FP_REGALLOC
4934                 regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
4935 #endif
4936                 regMask = RBM_NONE;
4937
4938                 if (intRegMask)
4939                 {
4940                     if (intRegMask == RBM_INTRET)
4941                     {
4942                         regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4943                     }
4944                     else if (intRegMask == RBM_LNGRET)
4945                     {
4946                         regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4947                     }
4948                     else
4949                     {
4950                         noway_assert(!"unexpected return regMask");
4951                     }
4952                 }
4953
4954 #if FEATURE_FP_REGALLOC
4955                 if (floatRegMask)
4956                 {
4957                     if (floatRegMask == RBM_FLOATRET)
4958                     {
4959                         regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4960                     }
4961                     else if (floatRegMask == RBM_DOUBLERET)
4962                     {
4963                         regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4964                     }
4965                     else // HFA return case
4966                     {
4967                         for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
4968                         {
4969                             regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
4970                         }
4971                     }
4972                 }
4973 #endif
4974             }
4975
4976             /* the return registers (if any) are killed */
4977             tree->gtUsedRegs |= regMask;
4978
4979 #if GTF_CALL_REG_SAVE
4980             if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
4981 #endif
4982             {
4983                 /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
4984                 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
4985             }
4986         }
4987
4988 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4989             // Mark required registers for emitting tailcall profiler callback as used
4990             if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
4991             {
4992                 tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
4993             }
4994 #endif
4995             break;
4996
4997         case GT_ARR_ELEM:
4998
4999             // Figure out which registers can't be touched
5000             unsigned dim;
5001             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5002                 rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
5003
5004             regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
5005
5006             regMaskTP dimsMask;
5007             dimsMask = 0;
5008
5009 #if CPU_LOAD_STORE_ARCH
5010             // We need a register to load the bounds of the MD array
5011             regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
5012 #endif
5013
5014             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
5015             {
5016                 /* We need scratch registers to compute index-lower_bound.
5017                    Also, gtArrInds[0]'s register will be used as the second
5018                    addressability register (besides gtArrObj's) */
5019
5020                 regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
5021                                                         lockedRegs | regMask | dimsMask, rsvdRegs);
5022                 if (dim == 0)
5023                     regMask |= dimMask;
5024
5025                 dimsMask |= dimMask;
5026             }
5027 #ifdef _TARGET_XARCH_
5028             // INS_imul doesnt have an immediate constant.
5029             if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
5030                 regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
5031 #endif
5032             tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
5033             break;
5034
5035         case GT_CMPXCHG:
5036         {
5037 #ifdef _TARGET_XARCH_
5038             rsvdRegs |= RBM_EAX;
5039 #endif
5040             if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
5041             {
5042                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
5043             }
5044             else
5045             {
5046                 regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
5047             }
5048             op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
5049
5050 #ifdef _TARGET_XARCH_
5051             rsvdRegs &= ~RBM_EAX;
5052             tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
5053                                           rsvdRegs | regMask | op2Mask);
5054             tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
5055             predictReg       = PREDICT_REG_EAX; // When this is done the result is always in EAX.
5056 #else
5057             tmpMask          = 0;
5058             tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
5059 #endif
5060         }
5061         break;
5062
5063         case GT_ARR_BOUNDS_CHECK:
5064         {
5065             regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
5066             regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
5067             rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
5068
5069             tree->gtUsedRegs =
5070                 (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
5071         }
5072         break;
5073
5074         default:
5075             NO_WAY("unexpected special operator in reg use prediction");
5076             break;
5077     }
5078
5079 RETURN_CHECK:
5080
5081 #ifdef DEBUG
5082     /* make sure we set them to something reasonable */
5083     if (tree->gtUsedRegs & RBM_ILLEGAL)
5084         noway_assert(!"used regs not set properly in reg use prediction");
5085
5086     if (regMask & RBM_ILLEGAL)
5087         noway_assert(!"return value not set propery in reg use prediction");
5088
5089 #endif
5090
5091     /*
5092      *  If the gtUsedRegs conflicts with lockedRegs
5093      *  then we going to have to spill some registers
5094      *  into the non-trashed register set to keep it alive
5095      */
5096     regMaskTP spillMask;
5097     spillMask = tree->gtUsedRegs & lockedRegs;
5098
5099     if (spillMask)
5100     {
5101         while (spillMask)
5102         {
5103             /* Find the next register that needs to be spilled */
5104             tmpMask = genFindLowestBit(spillMask);
5105
5106 #ifdef DEBUG
5107             if (verbose)
5108             {
5109                 printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
5110                 gtDispTree(tree, 0, NULL, true);
5111                 if ((tmpMask & regMask) == 0)
5112                 {
5113                     printf("Predict reload of   %s after : ", getRegName(genRegNumFromMask(tmpMask)));
5114                     gtDispTree(tree, 0, NULL, true);
5115                 }
5116             }
5117 #endif
5118             /* In Codegen it will typically introduce a spill temp here */
5119             /* rather than relocating the register to a non trashed reg */
5120             rpPredictSpillCnt++;
5121
5122             /* Remove it from the spillMask */
5123             spillMask &= ~tmpMask;
5124         }
5125     }
5126
5127     /*
5128      *  If the return registers in regMask conflicts with the lockedRegs
5129      *  then we allocate extra registers for the reload of the conflicting
5130      *  registers.
5131      *
5132      *  Set spillMask to the set of locked registers that have to be reloaded here.
5133      *  reloadMask is set to the extra registers that are used to reload
5134      *  the spilled lockedRegs.
5135      */
5136
5137     noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
5138     spillMask = lockedRegs & regMask;
5139
5140     if (spillMask)
5141     {
5142         /* Remove the spillMask from regMask */
5143         regMask &= ~spillMask;
5144
5145         regMaskTP reloadMask = RBM_NONE;
5146         while (spillMask)
5147         {
5148             /* Get an extra register to hold it */
5149             regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
5150 #ifdef DEBUG
5151             if (verbose)
5152             {
5153                 printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
5154                 gtDispTree(tree, 0, NULL, true);
5155             }
5156 #endif
5157             reloadMask |= reloadReg;
5158
5159             /* Remove it from the spillMask */
5160             spillMask &= ~genFindLowestBit(spillMask);
5161         }
5162
5163         /* Update regMask to use the reloadMask */
5164         regMask |= reloadMask;
5165
5166         /* update the gtUsedRegs mask */
5167         tree->gtUsedRegs |= (regMaskSmall)regMask;
5168     }
5169
5170     regMaskTP regUse = tree->gtUsedRegs;
5171     regUse |= interferingRegs;
5172
5173     if (!VarSetOps::IsEmpty(this, compCurLife))
5174     {
5175         // Add interference between the current set of live variables and
5176         //  the set of temporary registers need to evaluate the sub tree
5177         if (regUse)
5178         {
5179             rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
5180         }
5181     }
5182
5183     if (rpAsgVarNum != -1)
5184     {
5185         // Add interference between the registers used (if any)
5186         // and the assignment target variable
5187         if (regUse)
5188         {
5189             rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
5190         }
5191
5192         // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
5193         // side of the assignment passed here using PREDICT_REG_VAR_Txx)
5194         // to the set of currently live variables. This new interference will prevent us
5195         // from using the register value used here for enregistering different live variable
5196         //
5197         if (!VarSetOps::IsEmpty(this, compCurLife))
5198         {
5199             rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
5200         }
5201     }
5202
5203     /* Do we need to resore the oldLastUseVars value */
5204     if (restoreLastUseVars)
5205     {
5206         /*  If we used a GT_ASG targeted register then we need to add
5207          *  a variable interference between any new last use variables
5208          *  and the GT_ASG targeted register
5209          */
5210         if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
5211         {
5212             rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
5213                                              DEBUGARG("asgn tgt last use conflict"));
5214         }
5215         VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
5216     }
5217
5218     return regMask;
5219 }
5220 #ifdef _PREFAST_
5221 #pragma warning(pop)
5222 #endif
5223
5224 #endif // LEGACY_BACKEND
5225
5226 /****************************************************************************/
5227 /* Returns true when we must create an EBP frame
5228    This is used to force most managed methods to have EBP based frames
5229    which allows the ETW kernel stackwalker to walk the stacks of managed code
5230    this allows the kernel to perform light weight profiling
5231  */
5232 bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
5233 {
5234     bool result = false;
5235 #ifdef DEBUG
5236     const char* reason = nullptr;
5237 #endif
5238
5239 #if ETW_EBP_FRAMED
5240     if (!result && (opts.MinOpts() || opts.compDbgCode))
5241     {
5242         INDEBUG(reason = "Debug Code");
5243         result = true;
5244     }
5245     if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
5246     {
5247         INDEBUG(reason = "IL Code Size");
5248         result = true;
5249     }
5250     if (!result && (fgBBcount > 3))
5251     {
5252         INDEBUG(reason = "BasicBlock Count");
5253         result = true;
5254     }
5255     if (!result && fgHasLoops)
5256     {
5257         INDEBUG(reason = "Method has Loops");
5258         result = true;
5259     }
5260     if (!result && (optCallCount >= 2))
5261     {
5262         INDEBUG(reason = "Call Count");
5263         result = true;
5264     }
5265     if (!result && (optIndirectCallCount >= 1))
5266     {
5267         INDEBUG(reason = "Indirect Call");
5268         result = true;
5269     }
5270 #endif // ETW_EBP_FRAMED
5271
5272     // VM wants to identify the containing frame of an InlinedCallFrame always
5273     // via the frame register never the stack register so we need a frame.
5274     if (!result && (optNativeCallCount != 0))
5275     {
5276         INDEBUG(reason = "Uses PInvoke");
5277         result = true;
5278     }
5279
5280 #ifdef _TARGET_ARM64_
5281     // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
5282     // pointer frames.
5283     if (!result)
5284     {
5285         INDEBUG(reason = "Temporary ARM64 force frame pointer");
5286         result = true;
5287     }
5288 #endif // _TARGET_ARM64_
5289
5290 #ifdef DEBUG
5291     if ((result == true) && (wbReason != nullptr))
5292     {
5293         *wbReason = reason;
5294     }
5295 #endif
5296
5297     return result;
5298 }
5299
5300 #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
5301
5302 /*****************************************************************************
5303  *
5304  *  Predict which variables will be assigned to registers
5305  *  This is x86 specific and only predicts the integer registers and
5306  *  must be conservative, any register that is predicted to be enregister
5307  *  must end up being enregistered.
5308  *
5309  *  The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
5310  *  predicted to be enregistered to minimize calls to rpPredictRegPick.
5311  *
5312  */
5313
5314 #ifdef _PREFAST_
5315 #pragma warning(push)
5316 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
5317 #endif
5318 regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
5319 {
5320     unsigned regInx;
5321
5322     if (rpPasses <= rpPassesPessimize)
5323     {
5324         // Assume that we won't have to reverse EBP enregistration
5325         rpReverseEBPenreg = false;
5326
5327         // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
5328         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
5329             rpFrameType = FT_EBP_FRAME;
5330         else
5331             rpFrameType = FT_ESP_FRAME;
5332     }
5333
5334 #if !ETW_EBP_FRAMED
5335     // If we are using FPBASE as the frame register, we cannot also use it for
5336     // a local var
5337     if (rpFrameType == FT_EBP_FRAME)
5338     {
5339         regAvail &= ~RBM_FPBASE;
5340     }
5341 #endif // !ETW_EBP_FRAMED
5342
5343     rpStkPredict        = 0;
5344     rpPredictAssignMask = regAvail;
5345
5346     raSetupArgMasks(&codeGen->intRegState);
5347 #if !FEATURE_STACK_FP_X87
5348     raSetupArgMasks(&codeGen->floatRegState);
5349 #endif
5350
5351     // If there is a secret stub param, it is also live in
5352     if (info.compPublishStubParam)
5353     {
5354         codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
5355     }
5356
5357     if (regAvail == RBM_NONE)
5358     {
5359         unsigned   lclNum;
5360         LclVarDsc* varDsc;
5361
5362         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
5363         {
5364 #if FEATURE_STACK_FP_X87
5365             if (!varDsc->IsFloatRegType())
5366 #endif
5367             {
5368                 varDsc->lvRegNum = REG_STK;
5369                 if (isRegPairType(varDsc->lvType))
5370                     varDsc->lvOtherReg = REG_STK;
5371             }
5372         }
5373     }
5374
5375 #ifdef DEBUG
5376     if (verbose)
5377     {
5378         printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
5379         printf("\n        Available registers = ");
5380         dspRegMask(regAvail);
5381         printf("\n");
5382     }
5383 #endif
5384
5385     if (regAvail == RBM_NONE)
5386     {
5387         return RBM_NONE;
5388     }
5389
5390     /* We cannot change the lvVarIndexes at this point, so we  */
5391     /* can only re-order the existing set of tracked variables */
5392     /* Which will change the order in which we select the      */
5393     /* locals for enregistering.                               */
5394
5395     assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
5396
5397     // Should not be set unless optimizing
5398     noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
5399
5400     if (lvaSortAgain)
5401         lvaSortOnly();
5402
5403 #ifdef DEBUG
5404     fgDebugCheckBBlist();
5405 #endif
5406
5407     /* Initialize the weighted count of variables that could have */
5408     /* been enregistered but weren't */
5409     unsigned refCntStk    = 0; // sum of     ref counts for all stack based variables
5410     unsigned refCntEBP    = 0; // sum of     ref counts for EBP enregistered variables
5411     unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
5412 #if DOUBLE_ALIGN
5413     unsigned refCntStkParam;  // sum of     ref counts for all stack based parameters
5414     unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
5415
5416 #if FEATURE_STACK_FP_X87
5417     refCntStkParam  = raCntStkParamDblStackFP;
5418     refCntWtdStkDbl = raCntWtdStkDblStackFP;
5419     refCntStk       = raCntStkStackFP;
5420 #else
5421     refCntStkParam  = 0;
5422     refCntWtdStkDbl = 0;
5423     refCntStk       = 0;
5424 #endif // FEATURE_STACK_FP_X87
5425
5426 #endif // DOUBLE_ALIGN
5427
5428     /* Set of registers used to enregister variables in the predition */
5429     regMaskTP regUsed = RBM_NONE;
5430
5431     /*-------------------------------------------------------------------------
5432      *
5433      *  Predict/Assign the enregistered locals in ref-count order
5434      *
5435      */
5436
5437     VARSET_TP unprocessedVars(VarSetOps::MakeFull(this));
5438
5439     unsigned FPRegVarLiveInCnt;
5440     FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
5441
5442     LclVarDsc* varDsc;
5443     for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
5444     {
5445         bool notWorthy = false;
5446
5447         unsigned  varIndex;
5448         bool      isDouble;
5449         regMaskTP regAvailForType;
5450         var_types regType;
5451         regMaskTP avoidReg;
5452         unsigned  customVarOrderSize;
5453         regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
5454         bool      firstHalf;
5455         regNumber saveOtherReg;
5456
5457         varDsc = lvaRefSorted[sortNum];
5458
5459 #if FEATURE_STACK_FP_X87
5460         if (varTypeIsFloating(varDsc->TypeGet()))
5461         {
5462 #ifdef DEBUG
5463             if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5464             {
5465                 // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
5466                 // be en-registered.
5467                 noway_assert(!varDsc->lvRegister);
5468             }
5469 #endif
5470             continue;
5471         }
5472 #endif
5473
5474         /* Check the set of invariant things that would prevent enregistration */
5475
5476         /* Ignore the variable if it's not tracked */
5477         if (!varDsc->lvTracked)
5478             goto CANT_REG;
5479
5480         /* Get hold of the index and the interference mask for the variable */
5481         varIndex = varDsc->lvVarIndex;
5482
5483         // Remove 'varIndex' from unprocessedVars
5484         VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
5485
5486         // Skip the variable if it's marked as DoNotEnregister.
5487
5488         if (varDsc->lvDoNotEnregister)
5489             goto CANT_REG;
5490
5491         /* TODO: For now if we have JMP all register args go to stack
5492          * TODO: Later consider extending the life of the argument or make a copy of it */
5493
5494         if (compJmpOpUsed && varDsc->lvIsRegArg)
5495             goto CANT_REG;
5496
5497         /* Skip the variable if the ref count is zero */
5498
5499         if (varDsc->lvRefCnt == 0)
5500             goto CANT_REG;
5501
5502         /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
5503
5504         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
5505         {
5506             goto CANT_REG;
5507         }
5508
5509         /* Is the unweighted ref count too low to be interesting? */
5510
5511         if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
5512             (varDsc->lvRefCnt <= 1))
5513         {
5514             /* Sometimes it's useful to enregister a variable with only one use */
5515             /*   arguments referenced in loops are one example */
5516
5517             if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
5518                 goto OK_TO_ENREGISTER;
5519
5520             /* If the variable has a preferred register set it may be useful to put it there */
5521             if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
5522                 goto OK_TO_ENREGISTER;
5523
5524             /* Keep going; the table is sorted by "weighted" ref count */
5525             goto CANT_REG;
5526         }
5527
5528     OK_TO_ENREGISTER:
5529
5530         if (varTypeIsFloating(varDsc->TypeGet()))
5531         {
5532             regType         = varDsc->TypeGet();
5533             regAvailForType = regAvail & RBM_ALLFLOAT;
5534         }
5535         else
5536         {
5537             regType         = TYP_INT;
5538             regAvailForType = regAvail & RBM_ALLINT;
5539         }
5540
5541 #ifdef _TARGET_ARM_
5542         isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
5543
5544         if (isDouble)
5545         {
5546             regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
5547         }
5548 #endif
5549
5550         /* If we don't have any registers available then skip the enregistration attempt */
5551         if (regAvailForType == RBM_NONE)
5552             goto NO_REG;
5553
5554         // On the pessimize passes don't even try to enregister LONGS
5555         if (isRegPairType(varDsc->lvType))
5556         {
5557             if (rpPasses > rpPassesPessimize)
5558                 goto NO_REG;
5559             else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
5560                 goto NO_REG;
5561         }
5562
5563         // Set of registers to avoid when performing register allocation
5564         avoidReg = RBM_NONE;
5565
5566         if (!varDsc->lvIsRegArg)
5567         {
5568             /* For local variables,
5569              *  avoid the incoming arguments,
5570              *  but only if you conflict with them */
5571
5572             if (raAvoidArgRegMask != 0)
5573             {
5574                 LclVarDsc* argDsc;
5575                 LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
5576
5577                 for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
5578                 {
5579                     if (!argDsc->lvIsRegArg)
5580                         continue;
5581
5582                     bool      isFloat  = argDsc->IsFloatRegType();
5583                     regNumber inArgReg = argDsc->lvArgReg;
5584                     regMaskTP inArgBit = genRegMask(inArgReg);
5585
5586                     // Is this inArgReg in the raAvoidArgRegMask set?
5587
5588                     if (!(raAvoidArgRegMask & inArgBit))
5589                         continue;
5590
5591                     noway_assert(argDsc->lvIsParam);
5592                     noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
5593
5594                     unsigned locVarIndex = varDsc->lvVarIndex;
5595                     unsigned argVarIndex = argDsc->lvVarIndex;
5596
5597                     /* Does this variable interfere with the arg variable ? */
5598                     if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
5599                     {
5600                         noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5601                         /* Yes, so try to avoid the incoming arg reg */
5602                         avoidReg |= inArgBit;
5603                     }
5604                     else
5605                     {
5606                         noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
5607                     }
5608                 }
5609             }
5610         }
5611
5612         // Now we will try to predict which register the variable
5613         // could  be enregistered in
5614
5615         customVarOrderSize = MAX_VAR_ORDER_SIZE;
5616
5617         raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
5618
5619         firstHalf    = false;
5620         saveOtherReg = DUMMY_INIT(REG_NA);
5621
5622         for (regInx = 0; regInx < customVarOrderSize; regInx++)
5623         {
5624             regNumber regNum  = customVarOrder[regInx];
5625             regMaskTP regBits = genRegMask(regNum);
5626
5627             /* Skip this register if it isn't available */
5628             if ((regAvailForType & regBits) == 0)
5629                 continue;
5630
5631             /* Skip this register if it interferes with the variable */
5632
5633             if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
5634                 continue;
5635
5636             if (varTypeIsFloating(regType))
5637             {
5638 #ifdef _TARGET_ARM_
5639                 if (isDouble)
5640                 {
5641                     regNumber regNext = REG_NEXT(regNum);
5642                     regBits |= genRegMask(regNext);
5643
5644                     /* Skip if regNext interferes with the variable */
5645                     if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
5646                         continue;
5647                 }
5648 #endif
5649             }
5650
5651             bool firstUseOfReg     = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
5652             bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
5653             bool calleeSavedReg    = ((regBits & RBM_CALLEE_SAVED) != 0);
5654
5655             /* Skip this register if the weighted ref count is less than two
5656                and we are considering a unused callee saved register */
5657
5658             if (lessThanTwoRefWtd && // less than two references (weighted)
5659                 firstUseOfReg &&     // first use of this register
5660                 calleeSavedReg)      // callee saved register
5661             {
5662                 unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
5663
5664                 // psc is abbeviation for possibleSameColor
5665                 VARSET_TP pscVarSet(VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
5666
5667                 VarSetOps::Iter pscIndexIter(this, pscVarSet);
5668                 unsigned        pscIndex = 0;
5669                 while (pscIndexIter.NextElem(&pscIndex))
5670                 {
5671                     LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
5672                     totalRefCntWtd += pscVar->lvRefCntWtd;
5673                     if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
5674                         break;
5675                 }
5676
5677                 if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
5678                 {
5679                     notWorthy = true;
5680                     continue; // not worth spilling a callee saved register
5681                 }
5682                 // otherwise we will spill this callee saved registers,
5683                 // because its uses when combined with the uses of
5684                 // other yet to be processed candidates exceed our threshold.
5685                 // totalRefCntWtd = totalRefCntWtd;
5686             }
5687
5688             /* Looks good - mark the variable as living in the register */
5689
5690             if (isRegPairType(varDsc->lvType))
5691             {
5692                 if (firstHalf == false)
5693                 {
5694                     /* Enregister the first half of the long */
5695                     varDsc->lvRegNum   = regNum;
5696                     saveOtherReg       = varDsc->lvOtherReg;
5697                     varDsc->lvOtherReg = REG_STK;
5698                     firstHalf          = true;
5699                 }
5700                 else
5701                 {
5702                     /* Ensure 'well-formed' register pairs */
5703                     /* (those returned by gen[Pick|Grab]RegPair) */
5704
5705                     if (regNum < varDsc->lvRegNum)
5706                     {
5707                         varDsc->lvOtherReg = varDsc->lvRegNum;
5708                         varDsc->lvRegNum   = regNum;
5709                     }
5710                     else
5711                     {
5712                         varDsc->lvOtherReg = regNum;
5713                     }
5714                     firstHalf = false;
5715                 }
5716             }
5717             else
5718             {
5719                 varDsc->lvRegNum = regNum;
5720 #ifdef _TARGET_ARM_
5721                 if (isDouble)
5722                 {
5723                     varDsc->lvOtherReg = REG_NEXT(regNum);
5724                 }
5725 #endif
5726             }
5727
5728             if (regNum == REG_FPBASE)
5729             {
5730                 refCntEBP += varDsc->lvRefCnt;
5731                 refCntWtdEBP += varDsc->lvRefCntWtd;
5732 #if DOUBLE_ALIGN
5733                 if (varDsc->lvIsParam)
5734                 {
5735                     refCntStkParam += varDsc->lvRefCnt;
5736                 }
5737 #endif
5738             }
5739
5740             /* Record this register in the regUsed set */
5741             regUsed |= regBits;
5742
5743             /* The register is now ineligible for all interfering variables */
5744
5745             VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
5746
5747 #ifdef _TARGET_ARM_
5748             if (isDouble)
5749             {
5750                 regNumber       secondHalf = REG_NEXT(regNum);
5751                 VarSetOps::Iter iter(this, lvaVarIntf[varIndex]);
5752                 unsigned        intfIndex = 0;
5753                 while (iter.NextElem(&intfIndex))
5754                 {
5755                     VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
5756                 }
5757             }
5758 #endif
5759
5760             /* If a register argument, remove its incoming register
5761              * from the "avoid" list */
5762
5763             if (varDsc->lvIsRegArg)
5764             {
5765                 raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
5766 #ifdef _TARGET_ARM_
5767                 if (isDouble)
5768                 {
5769                     raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
5770                 }
5771 #endif
5772             }
5773
5774             /* A variable of TYP_LONG can take two registers */
5775             if (firstHalf)
5776                 continue;
5777
5778             // Since we have successfully enregistered this variable it is
5779             // now time to move on and consider the next variable
5780             goto ENREG_VAR;
5781         }
5782
5783         if (firstHalf)
5784         {
5785             noway_assert(isRegPairType(varDsc->lvType));
5786
5787             /* This TYP_LONG is partially enregistered */
5788
5789             noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
5790
5791             if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
5792             {
5793                 rpLostEnreg = true;
5794             }
5795
5796             raAddToStkPredict(varDsc->lvRefCntWtd);
5797             goto ENREG_VAR;
5798         }
5799
5800     NO_REG:;
5801         if (varDsc->lvDependReg)
5802         {
5803             rpLostEnreg = true;
5804         }
5805
5806         if (!notWorthy)
5807         {
5808             /* Weighted count of variables that could have been enregistered but weren't */
5809             raAddToStkPredict(varDsc->lvRefCntWtd);
5810
5811             if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
5812                 raAddToStkPredict(varDsc->lvRefCntWtd);
5813         }
5814
5815     CANT_REG:;
5816         varDsc->lvRegister = false;
5817
5818         varDsc->lvRegNum = REG_STK;
5819         if (isRegPairType(varDsc->lvType))
5820             varDsc->lvOtherReg = REG_STK;
5821
5822         /* unweighted count of variables that were not enregistered */
5823
5824         refCntStk += varDsc->lvRefCnt;
5825
5826 #if DOUBLE_ALIGN
5827         if (varDsc->lvIsParam)
5828         {
5829             refCntStkParam += varDsc->lvRefCnt;
5830         }
5831         else
5832         {
5833             /* Is it a stack based double? */
5834             /* Note that double params are excluded since they can not be double aligned */
5835             if (varDsc->lvType == TYP_DOUBLE)
5836             {
5837                 refCntWtdStkDbl += varDsc->lvRefCntWtd;
5838             }
5839         }
5840 #endif
5841 #ifdef DEBUG
5842         if (verbose)
5843         {
5844             printf("; ");
5845             gtDispLclVar((unsigned)(varDsc - lvaTable));
5846             if (varDsc->lvTracked)
5847                 printf("T%02u", varDsc->lvVarIndex);
5848             else
5849                 printf("   ");
5850             printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
5851             if (varDsc->lvDoNotEnregister)
5852                 printf(", do-not-enregister");
5853             printf("\n");
5854         }
5855 #endif
5856         continue;
5857
5858     ENREG_VAR:;
5859
5860         varDsc->lvRegister = true;
5861
5862         // Record the fact that we enregistered a stack arg when tail call is used.
5863         if (compJmpOpUsed && !varDsc->lvIsRegArg)
5864         {
5865             rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
5866             if (isRegPairType(varDsc->lvType))
5867             {
5868                 rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
5869             }
5870         }
5871
5872 #ifdef DEBUG
5873         if (verbose)
5874         {
5875             printf("; ");
5876             gtDispLclVar((unsigned)(varDsc - lvaTable));
5877             printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
5878                    refCntWtd2str(varDsc->lvRefCntWtd));
5879             varDsc->PrintVarReg();
5880 #ifdef _TARGET_ARM_
5881             if (isDouble)
5882             {
5883                 printf(":%s", getRegName(varDsc->lvOtherReg));
5884             }
5885 #endif
5886             printf("\n");
5887         }
5888 #endif
5889     }
5890
5891 #if ETW_EBP_FRAMED
5892     noway_assert(refCntEBP == 0);
5893 #endif
5894
5895 #ifdef DEBUG
5896     if (verbose)
5897     {
5898         if (refCntStk > 0)
5899             printf("; refCntStk       = %u\n", refCntStk);
5900         if (refCntEBP > 0)
5901             printf("; refCntEBP       = %u\n", refCntEBP);
5902         if (refCntWtdEBP > 0)
5903             printf("; refCntWtdEBP    = %u\n", refCntWtdEBP);
5904 #if DOUBLE_ALIGN
5905         if (refCntStkParam > 0)
5906             printf("; refCntStkParam  = %u\n", refCntStkParam);
5907         if (refCntWtdStkDbl > 0)
5908             printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
5909 #endif
5910     }
5911 #endif
5912
5913     /* Determine how the EBP register should be used */
5914     CLANG_FORMAT_COMMENT_ANCHOR;
5915
5916 #if DOUBLE_ALIGN
5917
5918     if (!codeGen->isFramePointerRequired())
5919     {
5920         noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
5921
5922         /*
5923             First let us decide if we should use EBP to create a
5924             double-aligned frame, instead of enregistering variables
5925         */
5926
5927         if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
5928         {
5929             rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5930             goto REVERSE_EBP_ENREG;
5931         }
5932
5933         if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
5934         {
5935             if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
5936             {
5937                 rpFrameType = FT_DOUBLE_ALIGN_FRAME;
5938                 goto REVERSE_EBP_ENREG;
5939             }
5940         }
5941     }
5942
5943 #endif // DOUBLE_ALIGN
5944
5945     if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
5946     {
5947 #ifdef _TARGET_XARCH_
5948 // clang-format off
5949         /*  If we are using EBP to enregister variables then
5950             will we actually save bytes by setting up an EBP frame?
5951
5952             Each stack reference is an extra byte of code if we use
5953             an ESP frame.
5954
5955             Here we measure the savings that we get by using EBP to
5956             enregister variables vs. the cost in code size that we
5957             pay when using an ESP based frame.
5958
5959             We pay one byte of code for each refCntStk
5960             but we save one byte (or more) for each refCntEBP.
5961
5962             Our savings are the elimination of a stack memory read/write.
5963             We use the loop weighted value of
5964                refCntWtdEBP * mem_access_weight (0, 3, 6)
5965             to represent this savings.
5966          */
5967
5968         // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
5969         // to set up an EBP frame in the prolog and epilog
5970         #define EBP_FRAME_SETUP_SIZE  5
5971         // clang-format on
5972
5973         if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
5974         {
5975             unsigned bytesSaved        = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
5976             unsigned mem_access_weight = 3;
5977
5978             if (compCodeOpt() == SMALL_CODE)
5979                 mem_access_weight = 0;
5980             else if (compCodeOpt() == FAST_CODE)
5981                 mem_access_weight *= 2;
5982
5983             if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
5984             {
5985                 /* It's not be a good idea to use EBP in our predictions */
5986                 CLANG_FORMAT_COMMENT_ANCHOR;
5987 #ifdef DEBUG
5988                 if (verbose && (refCntEBP > 0))
5989                     printf("; Predicting that it's not worth using EBP to enregister variables\n");
5990 #endif
5991                 rpFrameType = FT_EBP_FRAME;
5992                 goto REVERSE_EBP_ENREG;
5993             }
5994         }
5995 #endif // _TARGET_XARCH_
5996
5997         if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
5998         {
5999 #ifdef DEBUG
6000             const char* reason;
6001 #endif
6002             if (rpMustCreateEBPCalled == false)
6003             {
6004                 rpMustCreateEBPCalled = true;
6005                 if (rpMustCreateEBPFrame(INDEBUG(&reason)))
6006                 {
6007 #ifdef DEBUG
6008                     if (verbose)
6009                         printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
6010 #endif
6011                     codeGen->setFrameRequired(true);
6012
6013                     rpFrameType = FT_EBP_FRAME;
6014                     goto REVERSE_EBP_ENREG;
6015                 }
6016             }
6017         }
6018     }
6019
6020     goto EXIT;
6021
6022 REVERSE_EBP_ENREG:
6023
6024     noway_assert(rpFrameType != FT_ESP_FRAME);
6025
6026     rpReverseEBPenreg = true;
6027
6028 #if !ETW_EBP_FRAMED
6029     if (refCntEBP > 0)
6030     {
6031         noway_assert(regUsed & RBM_FPBASE);
6032
6033         regUsed &= ~RBM_FPBASE;
6034
6035         /* variables that were enregistered in EBP become stack based variables */
6036         raAddToStkPredict(refCntWtdEBP);
6037
6038         unsigned lclNum;
6039
6040         /* We're going to have to undo some predicted enregistered variables */
6041         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6042         {
6043             /* Is this a register variable? */
6044             if (varDsc->lvRegNum != REG_STK)
6045             {
6046                 if (isRegPairType(varDsc->lvType))
6047                 {
6048                     /* Only one can be EBP */
6049                     if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
6050                     {
6051                         if (varDsc->lvRegNum == REG_FPBASE)
6052                             varDsc->lvRegNum = varDsc->lvOtherReg;
6053
6054                         varDsc->lvOtherReg = REG_STK;
6055
6056                         if (varDsc->lvRegNum == REG_STK)
6057                             varDsc->lvRegister = false;
6058
6059                         if (varDsc->lvDependReg)
6060                             rpLostEnreg = true;
6061 #ifdef DEBUG
6062                         if (verbose)
6063                             goto DUMP_MSG;
6064 #endif
6065                     }
6066                 }
6067                 else
6068                 {
6069                     if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
6070                     {
6071                         varDsc->lvRegNum = REG_STK;
6072
6073                         varDsc->lvRegister = false;
6074
6075                         if (varDsc->lvDependReg)
6076                             rpLostEnreg = true;
6077 #ifdef DEBUG
6078                         if (verbose)
6079                         {
6080                         DUMP_MSG:
6081                             printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
6082                                    varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
6083                                    (varDsc->lvRefCntWtd & 1) ? ".5" : "");
6084                         }
6085 #endif
6086                     }
6087                 }
6088             }
6089         }
6090     }
6091 #endif // ETW_EBP_FRAMED
6092
6093 EXIT:;
6094
6095     unsigned lclNum;
6096     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6097     {
6098         /* Clear the lvDependReg flag for next iteration of the predictor */
6099         varDsc->lvDependReg = false;
6100
6101         // If we set rpLostEnreg and this is the first pessimize pass
6102         // then reverse the enreg of all TYP_LONG
6103         if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
6104         {
6105             varDsc->lvRegNum   = REG_STK;
6106             varDsc->lvOtherReg = REG_STK;
6107         }
6108     }
6109
6110 #ifdef DEBUG
6111     if (verbose && raNewBlocks)
6112     {
6113         printf("\nAdded FP register killing blocks:\n");
6114         fgDispBasicBlocks();
6115         printf("\n");
6116     }
6117 #endif
6118     noway_assert(rpFrameType != FT_NOT_SET);
6119
6120     /* return the set of registers used to enregister variables */
6121     return regUsed;
6122 }
6123 #ifdef _PREFAST_
6124 #pragma warning(pop)
6125 #endif
6126
6127 /*****************************************************************************
6128  *
6129  *  Predict register use for every tree in the function. Note that we do this
6130  *  at different times (not to mention in a totally different way) for x86 vs
6131  *  RISC targets.
6132  */
6133 void Compiler::rpPredictRegUse()
6134 {
6135 #ifdef DEBUG
6136     if (verbose)
6137         raDumpVarIntf();
6138 #endif
6139
6140     // We might want to adjust the ref counts based on interference
6141     raAdjustVarIntf();
6142
6143     regMaskTP allAcceptableRegs = RBM_ALLINT;
6144
6145 #if FEATURE_FP_REGALLOC
6146     allAcceptableRegs |= raConfigRestrictMaskFP();
6147 #endif
6148
6149     allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
6150
6151     /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
6152        to acdHelper(). This is done implicitly, without creating a GT_CALL
6153        node. Hence, this interference is be handled implicitly by
6154        restricting the registers used for enregistering variables */
6155
6156     if (opts.compDbgCode)
6157     {
6158         allAcceptableRegs &= RBM_CALLEE_SAVED;
6159     }
6160
6161     /* Compute the initial regmask to use for the first pass */
6162     regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
6163     regMaskTP regUsed;
6164
6165 #if CPU_USES_BLOCK_MOVE
6166     /* If we might need to generate a rep mov instruction */
6167     /* remove ESI and EDI */
6168     if (compBlkOpUsed)
6169         regAvail &= ~(RBM_ESI | RBM_EDI);
6170 #endif
6171
6172 #ifdef _TARGET_X86_
6173     /* If we using longs then we remove ESI to allow */
6174     /* ESI:EBX to be saved accross a call */
6175     if (compLongUsed)
6176         regAvail &= ~(RBM_ESI);
6177 #endif
6178
6179 #ifdef _TARGET_ARM_
6180     // For the first register allocation pass we don't want to color using r4
6181     // as we want to allow it to be used to color the internal temps instead
6182     // when r0,r1,r2,r3 are all in use.
6183     //
6184     regAvail &= ~(RBM_R4);
6185 #endif
6186
6187 #if ETW_EBP_FRAMED
6188     // We never have EBP available when ETW_EBP_FRAME is defined
6189     regAvail &= ~RBM_FPBASE;
6190 #else
6191     /* If a frame pointer is required then we remove EBP */
6192     if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6193         regAvail &= ~RBM_FPBASE;
6194 #endif
6195
6196 #ifdef DEBUG
6197     BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
6198     if (fJitNoRegLoc)
6199         regAvail = RBM_NONE;
6200 #endif
6201
6202     if ((opts.compFlags & CLFLG_REGVAR) == 0)
6203         regAvail = RBM_NONE;
6204
6205 #if FEATURE_STACK_FP_X87
6206     VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
6207     VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
6208
6209     // Calculate the set of all tracked FP/non-FP variables
6210     //  into optAllFloatVars and optAllNonFPvars
6211
6212     unsigned   lclNum;
6213     LclVarDsc* varDsc;
6214
6215     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6216     {
6217         /* Ignore the variable if it's not tracked */
6218
6219         if (!varDsc->lvTracked)
6220             continue;
6221
6222         /* Get hold of the index and the interference mask for the variable */
6223
6224         unsigned varNum = varDsc->lvVarIndex;
6225
6226         /* add to the set of all tracked FP/non-FP variables */
6227
6228         if (varDsc->IsFloatRegType())
6229             VarSetOps::AddElemD(this, optAllFloatVars, varNum);
6230         else
6231             VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
6232     }
6233 #endif
6234
6235     for (unsigned i = 0; i < REG_COUNT; i++)
6236     {
6237         VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
6238     }
6239     for (unsigned i = 0; i < lvaTrackedCount; i++)
6240     {
6241         VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
6242     }
6243
6244     raNewBlocks          = false;
6245     rpPredictAssignAgain = false;
6246     rpPasses             = 0;
6247
6248     bool      mustPredict   = true;
6249     unsigned  stmtNum       = 0;
6250     unsigned  oldStkPredict = DUMMY_INIT(~0);
6251     VARSET_TP oldLclRegIntf[REG_COUNT];
6252
6253     for (unsigned i = 0; i < REG_COUNT; i++)
6254     {
6255         VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
6256     }
6257
6258     while (true)
6259     {
6260         /* Assign registers to variables using the variable/register interference
6261            graph (raLclRegIntf[]) calculated in the previous pass */
6262         regUsed = rpPredictAssignRegVars(regAvail);
6263
6264         mustPredict |= rpLostEnreg;
6265
6266 #ifdef _TARGET_ARM_
6267         // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
6268         if ((rpPasses == 0) && ((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) != 0) &&
6269             !compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6270         {
6271             // We can release our reservation on R10 and use it to color registers
6272             codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
6273             allAcceptableRegs |= RBM_OPT_RSVD;
6274         }
6275 #endif
6276
6277         /* Is our new prediction good enough?? */
6278         if (!mustPredict)
6279         {
6280             /* For small methods (less than 12 stmts), we add a    */
6281             /*   extra pass if we are predicting the use of some   */
6282             /*   of the caller saved registers.                    */
6283             /* This fixes RAID perf bug 43440 VB Ackerman function */
6284
6285             if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
6286             {
6287                 goto EXTRA_PASS;
6288             }
6289
6290             /* If every variable was fully enregistered then we're done */
6291             if (rpStkPredict == 0)
6292                 goto ALL_DONE;
6293
6294             // This was a successful prediction.  Record it, in case it turns out to be the best one.
6295             rpRecordPrediction();
6296
6297             if (rpPasses > 1)
6298             {
6299                 noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
6300
6301                 // Be careful about overflow
6302                 unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
6303                 if (oldStkPredict < highStkPredict)
6304                     goto ALL_DONE;
6305
6306                 if (rpStkPredict < rpPasses * 8)
6307                     goto ALL_DONE;
6308
6309                 if (rpPasses >= (rpPassesMax - 1))
6310                     goto ALL_DONE;
6311             }
6312
6313         EXTRA_PASS:
6314             /* We will do another pass */;
6315         }
6316
6317 #ifdef DEBUG
6318         if (JitConfig.JitAssertOnMaxRAPasses())
6319         {
6320             noway_assert(rpPasses < rpPassesMax &&
6321                          "This may not a bug, but dev team should look and see what is happening");
6322         }
6323 #endif
6324
6325         // The "64" here had been "VARSET_SZ".  It is unclear why this number is connected with
6326         // the (max) size of a VARSET.  We've eliminated this constant, so I left this as a constant.  We hope
6327         // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
6328         if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
6329         {
6330             NO_WAY("we seem to be stuck in an infinite loop. breaking out");
6331         }
6332
6333 #ifdef DEBUG
6334         if (verbose)
6335         {
6336             if (rpPasses > 0)
6337             {
6338                 if (rpLostEnreg)
6339                     printf("\n; Another pass due to rpLostEnreg");
6340                 if (rpAddedVarIntf)
6341                     printf("\n; Another pass due to rpAddedVarIntf");
6342                 if ((rpPasses == 1) && rpPredictAssignAgain)
6343                     printf("\n; Another pass due to rpPredictAssignAgain");
6344             }
6345             printf("\n; Register predicting pass# %d\n", rpPasses + 1);
6346         }
6347 #endif
6348
6349         /*  Zero the variable/register interference graph */
6350         for (unsigned i = 0; i < REG_COUNT; i++)
6351         {
6352             VarSetOps::ClearD(this, raLclRegIntf[i]);
6353         }
6354
6355         // if there are PInvoke calls and compLvFrameListRoot is enregistered,
6356         // it must not be in a register trashed by the callee
6357         if (info.compLvFrameListRoot != BAD_VAR_NUM)
6358         {
6359             assert(!opts.ShouldUsePInvokeHelpers());
6360             noway_assert(info.compLvFrameListRoot < lvaCount);
6361
6362             LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
6363
6364             if (pinvokeVarDsc->lvTracked)
6365             {
6366                 rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
6367                                                       DEBUGARG("compLvFrameListRoot"));
6368
6369                 // We would prefer to have this be enregister in the PINVOKE_TCB register
6370                 pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
6371             }
6372
6373             // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
6374             // worst case).  Make sure that the return value compiler temp that we create for the single
6375             // return block knows about this interference.
6376             if (genReturnLocal != BAD_VAR_NUM)
6377             {
6378                 noway_assert(genReturnBB);
6379                 LclVarDsc* localTmp = &lvaTable[genReturnLocal];
6380                 if (localTmp->lvTracked)
6381                 {
6382                     rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
6383                                     VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
6384                 }
6385             }
6386         }
6387
6388 #ifdef _TARGET_ARM_
6389         if (compFloatingPointUsed)
6390         {
6391             bool hasMustInitFloat = false;
6392
6393             // if we have any must-init floating point LclVars then we will add register interferences
6394             // for the arguments with RBM_SCRATCH
6395             // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
6396             // we won't home the arguments into REG_SCRATCH
6397
6398             unsigned   lclNum;
6399             LclVarDsc* varDsc;
6400
6401             for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6402             {
6403                 if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
6404                 {
6405                     hasMustInitFloat = true;
6406                     break;
6407                 }
6408             }
6409
6410             if (hasMustInitFloat)
6411             {
6412                 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6413                 {
6414                     // If is an incoming argument, that is tracked and not floating-point
6415                     if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
6416                     {
6417                         rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
6418                                                          DEBUGARG("arg home with must-init fp"));
6419                     }
6420                 }
6421             }
6422         }
6423 #endif
6424
6425         stmtNum        = 0;
6426         rpAddedVarIntf = false;
6427         rpLostEnreg    = false;
6428
6429         /* Walk the basic blocks and predict reg use for each tree */
6430
6431         for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
6432         {
6433             GenTreePtr stmt;
6434             compCurBB       = block;
6435             compCurLifeTree = NULL;
6436             VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
6437
6438             compCurBB = block;
6439
6440             for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
6441             {
6442                 noway_assert(stmt->gtOper == GT_STMT);
6443
6444                 rpPredictSpillCnt = 0;
6445                 VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
6446                 VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
6447
6448                 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6449                 stmtNum++;
6450 #ifdef DEBUG
6451                 if (verbose && 1)
6452                 {
6453                     printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
6454                     gtDispTree(tree);
6455                     printf("\n");
6456                 }
6457 #endif
6458                 rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
6459
6460                 noway_assert(rpAsgVarNum == -1);
6461
6462                 if (rpPredictSpillCnt > tmpIntSpillMax)
6463                     tmpIntSpillMax = rpPredictSpillCnt;
6464             }
6465         }
6466         rpPasses++;
6467
6468         /* Decide whether we need to set mustPredict */
6469         mustPredict = false;
6470
6471 #ifdef _TARGET_ARM_
6472         // The spill count may be now high enough that we now need to reserve r10. If this is the case, we'll need to
6473         // reserve r10, and if it was used, throw out the last prediction and repredict.
6474         if (((codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD) == 0) && compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
6475         {
6476             codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
6477             allAcceptableRegs &= ~RBM_OPT_RSVD;
6478             if ((regUsed & RBM_OPT_RSVD) != 0)
6479             {
6480                 mustPredict              = true;
6481                 rpBestRecordedPrediction = nullptr;
6482             }
6483         }
6484 #endif
6485
6486         if (rpAddedVarIntf)
6487         {
6488             mustPredict = true;
6489 #ifdef DEBUG
6490             if (verbose)
6491                 raDumpVarIntf();
6492 #endif
6493         }
6494
6495         if (rpPasses == 1)
6496         {
6497             if ((opts.compFlags & CLFLG_REGVAR) == 0)
6498                 goto ALL_DONE;
6499
6500             if (rpPredictAssignAgain)
6501                 mustPredict = true;
6502 #ifdef DEBUG
6503             if (fJitNoRegLoc)
6504                 goto ALL_DONE;
6505 #endif
6506         }
6507
6508         /* Calculate the new value to use for regAvail */
6509
6510         regAvail = allAcceptableRegs;
6511
6512         /* If a frame pointer is required then we remove EBP */
6513         if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
6514             regAvail &= ~RBM_FPBASE;
6515
6516 #if ETW_EBP_FRAMED
6517         // We never have EBP available when ETW_EBP_FRAME is defined
6518         regAvail &= ~RBM_FPBASE;
6519 #endif
6520
6521         // If we have done n-passes then we must continue to pessimize the
6522         // interference graph by or-ing the interferences from the previous pass
6523
6524         if (rpPasses > rpPassesPessimize)
6525         {
6526             for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
6527                 VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
6528
6529             /* If we reverse an EBP enregistration then keep it that way */
6530             if (rpReverseEBPenreg)
6531                 regAvail &= ~RBM_FPBASE;
6532         }
6533
6534 #ifdef DEBUG
6535         if (verbose)
6536             raDumpRegIntf();
6537 #endif
6538
6539         /*  Save the old variable/register interference graph */
6540         for (unsigned i = 0; i < REG_COUNT; i++)
6541         {
6542             VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
6543         }
6544         oldStkPredict = rpStkPredict;
6545     } // end of while (true)
6546
6547 ALL_DONE:;
6548
6549     // If we recorded a better feasible allocation than we ended up with, go back to using it.
6550     rpUseRecordedPredictionIfBetter();
6551
6552 #if DOUBLE_ALIGN
6553     codeGen->setDoubleAlign(false);
6554 #endif
6555
6556     switch (rpFrameType)
6557     {
6558         default:
6559             noway_assert(!"rpFrameType not set correctly!");
6560             break;
6561         case FT_ESP_FRAME:
6562             noway_assert(!codeGen->isFramePointerRequired());
6563             noway_assert(!codeGen->isFrameRequired());
6564             codeGen->setFramePointerUsed(false);
6565             break;
6566         case FT_EBP_FRAME:
6567             noway_assert((regUsed & RBM_FPBASE) == 0);
6568             codeGen->setFramePointerUsed(true);
6569             break;
6570 #if DOUBLE_ALIGN
6571         case FT_DOUBLE_ALIGN_FRAME:
6572             noway_assert((regUsed & RBM_FPBASE) == 0);
6573             noway_assert(!codeGen->isFramePointerRequired());
6574             codeGen->setFramePointerUsed(false);
6575             codeGen->setDoubleAlign(true);
6576             break;
6577 #endif
6578     }
6579
6580     /* Record the set of registers that we need */
6581     codeGen->regSet.rsClearRegsModified();
6582     if (regUsed != RBM_NONE)
6583     {
6584         codeGen->regSet.rsSetRegsModified(regUsed);
6585     }
6586
6587     /* We need genFullPtrRegMap if :
6588      * The method is fully interruptible, or
6589      * We are generating an EBP-less frame (for stack-pointer deltas)
6590      */
6591
6592     genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
6593
6594     raMarkStkVars();
6595 #ifdef DEBUG
6596     if (verbose)
6597     {
6598         printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
6599         printf("  rpStkPredict was %u\n", rpStkPredict);
6600     }
6601 #endif
6602     rpRegAllocDone = true;
6603 }
6604
6605 #endif // LEGACY_BACKEND
6606
6607 /*****************************************************************************
6608  *
6609  *  Mark all variables as to whether they live on the stack frame
6610  *  (part or whole), and if so what the base is (FP or SP).
6611  */
6612
6613 void Compiler::raMarkStkVars()
6614 {
6615     unsigned   lclNum;
6616     LclVarDsc* varDsc;
6617
6618     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
6619     {
6620         // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
6621         CLANG_FORMAT_COMMENT_ANCHOR;
6622
6623 #ifdef LEGACY_BACKEND
6624         varDsc->lvOnFrame = false;
6625 #endif // LEGACY_BACKEND
6626
6627         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
6628         {
6629             noway_assert(!varDsc->lvRegister);
6630             goto ON_STK;
6631         }
6632
6633         /* Fully enregistered variables don't need any frame space */
6634
6635         if (varDsc->lvRegister)
6636         {
6637             if (!isRegPairType(varDsc->TypeGet()))
6638             {
6639                 goto NOT_STK;
6640             }
6641
6642             /* For "large" variables make sure both halves are enregistered */
6643
6644             if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
6645             {
6646                 goto NOT_STK;
6647             }
6648         }
6649         /* Unused variables typically don't get any frame space */
6650         else if (varDsc->lvRefCnt == 0)
6651         {
6652             bool needSlot = false;
6653
6654             bool stkFixedArgInVarArgs =
6655                 info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
6656
6657             // If its address has been exposed, ignore lvRefCnt. However, exclude
6658             // fixed arguments in varargs method as lvOnFrame shouldn't be set
6659             // for them as we don't want to explicitly report them to GC.
6660
6661             if (!stkFixedArgInVarArgs)
6662             {
6663                 needSlot |= varDsc->lvAddrExposed;
6664             }
6665
6666 #if FEATURE_FIXED_OUT_ARGS
6667
6668             /* Is this the dummy variable representing GT_LCLBLK ? */
6669             needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
6670
6671 #endif // FEATURE_FIXED_OUT_ARGS
6672
6673 #ifdef DEBUG
6674             /* For debugging, note that we have to reserve space even for
6675                unused variables if they are ever in scope. However, this is not
6676                an issue as fgExtendDbgLifetimes() adds an initialization and
6677                variables in scope will not have a zero ref-cnt.
6678              */
6679             if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
6680             {
6681                 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
6682                 {
6683                     noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
6684                 }
6685             }
6686 #endif
6687             /*
6688               For Debug Code, we have to reserve space even if the variable is never
6689               in scope. We will also need to initialize it if it is a GC var.
6690               So we set lvMustInit and artifically bump up the ref-cnt.
6691              */
6692
6693             if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
6694             {
6695                 needSlot |= true;
6696
6697                 if (lvaTypeIsGC(lclNum))
6698                 {
6699                     varDsc->lvRefCnt = 1;
6700                 }
6701
6702                 if (!varDsc->lvIsParam)
6703                 {
6704                     varDsc->lvMustInit = true;
6705                 }
6706             }
6707
6708 #ifndef LEGACY_BACKEND
6709             varDsc->lvOnFrame = needSlot;
6710 #endif // !LEGACY_BACKEND
6711             if (!needSlot)
6712             {
6713                 /* Clear the lvMustInit flag in case it is set */
6714                 varDsc->lvMustInit = false;
6715
6716                 goto NOT_STK;
6717             }
6718         }
6719
6720 #ifndef LEGACY_BACKEND
6721         if (!varDsc->lvOnFrame)
6722         {
6723             goto NOT_STK;
6724         }
6725 #endif // !LEGACY_BACKEND
6726
6727     ON_STK:
6728         /* The variable (or part of it) lives on the stack frame */
6729
6730         noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
6731 #if FEATURE_FIXED_OUT_ARGS
6732         noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
6733 #else  // FEATURE_FIXED_OUT_ARGS
6734         noway_assert(lvaLclSize(lclNum) != 0);
6735 #endif // FEATURE_FIXED_OUT_ARGS
6736
6737         varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
6738                                   // stack frame
6739
6740     NOT_STK:;
6741         varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
6742
6743 #if DOUBLE_ALIGN
6744
6745         if (codeGen->doDoubleAlign())
6746         {
6747             noway_assert(codeGen->isFramePointerUsed() == false);
6748
6749             /* All arguments are off of EBP with double-aligned frames */
6750
6751             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
6752             {
6753                 varDsc->lvFramePointerBased = true;
6754             }
6755         }
6756
6757 #endif
6758
6759         /* Some basic checks */
6760
6761         // It must be in a register, on frame, or have zero references.
6762
6763         noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
6764
6765 #ifndef LEGACY_BACKEND
6766         // We can't have both lvRegister and lvOnFrame for RyuJIT
6767         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
6768 #else  // LEGACY_BACKEND
6769
6770         /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
6771         noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
6772                      (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
6773 #endif // LEGACY_BACKEND
6774
6775 #ifdef DEBUG
6776
6777         // For varargs functions, there should be no direct references to
6778         // parameter variables except for 'this' (because these were morphed
6779         // in the importer) and the 'arglist' parameter (which is not a GC
6780         // pointer). and the return buffer argument (if we are returning a
6781         // struct).
6782         // This is important because we don't want to try to report them
6783         // to the GC, as the frame offsets in these local varables would
6784         // not be correct.
6785
6786         if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
6787         {
6788             if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
6789             {
6790                 noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
6791             }
6792         }
6793 #endif
6794     }
6795 }
6796
6797 #ifdef LEGACY_BACKEND
6798 void Compiler::rpRecordPrediction()
6799 {
6800     if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
6801     {
6802         if (rpBestRecordedPrediction == NULL)
6803         {
6804             rpBestRecordedPrediction =
6805                 reinterpret_cast<VarRegPrediction*>(compGetMemArray(lvaCount, sizeof(VarRegPrediction)));
6806         }
6807         for (unsigned k = 0; k < lvaCount; k++)
6808         {
6809             rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
6810             rpBestRecordedPrediction[k].m_regNum         = (regNumberSmall)lvaTable[k].GetRegNum();
6811             rpBestRecordedPrediction[k].m_otherReg       = (regNumberSmall)lvaTable[k].GetOtherReg();
6812         }
6813         rpBestRecordedStkPredict = rpStkPredict;
6814         JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
6815     }
6816 }
6817
6818 void Compiler::rpUseRecordedPredictionIfBetter()
6819 {
6820     JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
6821             rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
6822     if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
6823     {
6824         JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
6825                 rpBestRecordedStkPredict);
6826
6827         for (unsigned k = 0; k < lvaCount; k++)
6828         {
6829             lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
6830             lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
6831             lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
6832         }
6833     }
6834 }
6835 #endif // LEGACY_BACKEND