Fix reading Time zone rules using Julian days (#17672)
[platform/upstream/coreclr.git] / src / jit / codegenlegacy.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           CodeGenerator                                   XX
9 XX                                                                           XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 */
13 #include "jitpch.h"
14 #ifdef _MSC_VER
15 #pragma hdrstop
16 #endif
17 #include "codegen.h"
18
19 #ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
20
21 #ifdef _TARGET_AMD64_
22 #error AMD64 must be !LEGACY_BACKEND
23 #endif
24
25 #ifdef _TARGET_ARM64_
26 #error ARM64 must be !LEGACY_BACKEND
27 #endif
28
29 #include "gcinfo.h"
30 #include "emit.h"
31
32 #ifndef JIT32_GCENCODER
33 #include "gcinfoencoder.h"
34 #endif
35
36 /*****************************************************************************
37  *
38  *  Determine what variables die between beforeSet and afterSet, and
39  *  update the liveness globals accordingly:
40  *  compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
41  */
42
43 void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
44 {
45     unsigned   varNum;
46     LclVarDsc* varDsc;
47     regMaskTP  regBit;
48     VARSET_TP  deadSet(VarSetOps::Diff(compiler, beforeSet, afterSet));
49
50     if (VarSetOps::IsEmpty(compiler, deadSet))
51         return;
52
53     /* iterate through the dead variables */
54
55     VarSetOps::Iter iter(compiler, deadSet);
56     unsigned        varIndex = 0;
57     while (iter.NextElem(&varIndex))
58     {
59         varNum = compiler->lvaTrackedToVarNum[varIndex];
60         varDsc = compiler->lvaTable + varNum;
61
62         /* Remove this variable from the 'deadSet' bit set */
63
64         noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
65
66         VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
67
68         noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
69                      VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
70
71         VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
72
73         /* We are done if the variable is not enregistered */
74
75         if (!varDsc->lvRegister)
76         {
77 #ifdef DEBUG
78             if (compiler->verbose)
79             {
80                 printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
81             }
82 #endif
83             continue;
84         }
85
86 #if !FEATURE_FP_REGALLOC
87         // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
88         if (!varDsc->IsFloatRegType())
89 #endif
90         {
91             /* Get hold of the appropriate register bit(s) */
92
93             if (varTypeIsFloating(varDsc->TypeGet()))
94             {
95                 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
96             }
97             else
98             {
99                 regBit = genRegMask(varDsc->lvRegNum);
100                 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
101                     regBit |= genRegMask(varDsc->lvOtherReg);
102             }
103
104 #ifdef DEBUG
105             if (compiler->verbose)
106             {
107                 printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
108                        compiler->compRegVarName(varDsc->lvRegNum));
109             }
110 #endif
111             noway_assert((regSet.rsMaskVars & regBit) != 0);
112
113             regSet.RemoveMaskVars(regBit);
114
115             // Remove GC tracking if any for this register
116
117             if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
118                 gcInfo.gcMarkRegSetNpt(regBit);
119         }
120     }
121 }
122
123 /*****************************************************************************
124  *
125  *  Change the given enregistered local variable node to a register variable node
126  */
127
128 void CodeGenInterface::genBashLclVar(GenTree* tree, unsigned varNum, LclVarDsc* varDsc)
129 {
130     noway_assert(tree->gtOper == GT_LCL_VAR);
131     noway_assert(varDsc->lvRegister);
132
133     if (isRegPairType(varDsc->lvType))
134     {
135         /* Check for the case of a variable that was narrowed to an int */
136
137         if (isRegPairType(tree->gtType))
138         {
139             genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
140             return;
141         }
142
143         noway_assert(tree->gtFlags & GTF_VAR_CAST);
144         noway_assert(tree->gtType == TYP_INT);
145     }
146     else
147     {
148         noway_assert(!isRegPairType(tree->gtType));
149     }
150
151     /* It's a register variable -- modify the node */
152
153     unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
154
155     ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
156     tree->SetOper(GT_REG_VAR);
157     tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
158
159     tree->gtFlags |= livenessFlags;
160     tree->SetInReg();
161     tree->gtRegNum          = varDsc->lvRegNum;
162     tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
163     tree->gtRegVar.SetLclNum(varNum);
164 }
165
166 // inline
167 void CodeGen::saveLiveness(genLivenessSet* ls)
168 {
169     VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
170     VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
171     ls->maskVars  = (regMaskSmall)regSet.rsMaskVars;
172     ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
173     ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
174 }
175
176 // inline
177 void CodeGen::restoreLiveness(genLivenessSet* ls)
178 {
179     VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
180     VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
181     regSet.rsMaskVars       = ls->maskVars;
182     gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
183     gcInfo.gcRegByrefSetCur = ls->byRefRegs;
184 }
185
186 // inline
187 void CodeGen::checkLiveness(genLivenessSet* ls)
188 {
189     assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
190     assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
191     assert(regSet.rsMaskVars == ls->maskVars);
192     assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
193     assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
194 }
195
196 // inline
197 bool CodeGenInterface::genMarkLclVar(GenTree* tree)
198 {
199     unsigned   varNum;
200     LclVarDsc* varDsc;
201
202     assert(tree->gtOper == GT_LCL_VAR);
203
204     /* Does the variable live in a register? */
205
206     varNum = tree->gtLclVarCommon.gtLclNum;
207     assert(varNum < compiler->lvaCount);
208     varDsc = compiler->lvaTable + varNum;
209
210     // Retype byref-typed appearances of intptr-typed lclVars as type intptr.
211     if ((varDsc->TypeGet() == TYP_I_IMPL) && (tree->TypeGet() == TYP_BYREF))
212     {
213         tree->gtType = TYP_I_IMPL;
214     }
215
216     if (varDsc->lvRegister)
217     {
218         genBashLclVar(tree, varNum, varDsc);
219         return true;
220     }
221     else
222     {
223         return false;
224     }
225 }
226
227 // inline
228 GenTree* CodeGen::genGetAddrModeBase(GenTree* tree)
229 {
230     bool     rev;
231     unsigned mul;
232     unsigned cns;
233     GenTree* adr;
234     GenTree* idx;
235
236     if (genCreateAddrMode(tree,     // address
237                           0,        // mode
238                           false,    // fold
239                           RBM_NONE, // reg mask
240                           &rev,     // reverse ops
241                           &adr,     // base addr
242                           &idx,     // index val
243 #if SCALED_ADDR_MODES
244                           &mul, // scaling
245 #endif
246                           &cns,  // displacement
247                           true)) // don't generate code
248         return adr;
249     else
250         return NULL;
251 }
252
253 #if FEATURE_STACK_FP_X87
254 // inline
255 void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
256 {
257     genFPstkLevel = newValue;
258 }
259
260 // inline
261 unsigned CodeGenInterface::genGetFPstkLevel()
262 {
263     return genFPstkLevel;
264 }
265
266 // inline
267 void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
268 {
269     noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
270     genFPstkLevel += inc;
271 }
272
273 // inline
274 void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
275 {
276     noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
277     genFPstkLevel -= dec;
278 }
279
280 #endif // FEATURE_STACK_FP_X87
281
282 /*****************************************************************************
283  *
284  *  Generate code that will set the given register to the integer constant.
285  */
286
287 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
288 {
289     noway_assert(type != TYP_REF || val == NULL);
290
291     /* Does the reg already hold this constant? */
292
293     if (!regTracker.rsIconIsInReg(val, reg))
294     {
295         if (val == 0)
296         {
297             instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
298         }
299 #ifdef _TARGET_ARM_
300         // If we can set a register to a constant with a small encoding, then do that.
301         else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
302         {
303             instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
304         }
305 #endif
306         else
307         {
308             /* See if a register holds the value or a close value? */
309             bool      constantLoaded = false;
310             ssize_t   delta;
311             regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
312
313             if (srcReg != REG_NA)
314             {
315                 if (delta == 0)
316                 {
317                     inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
318                     constantLoaded = true;
319                 }
320                 else
321                 {
322 #if defined(_TARGET_XARCH_)
323                     /* delta should fit inside a byte */
324                     if (delta == (signed char)delta)
325                     {
326                         /* use an lea instruction to set reg */
327                         getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
328                         constantLoaded = true;
329                     }
330 #elif defined(_TARGET_ARM_)
331                     /* We found a register 'regS' that has the value we need, modulo a small delta.
332                        That is, the value we need is 'regS + delta'.
333                        We one to generate one of the following instructions, listed in order of preference:
334
335                             adds  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and
336                        0<=delta<=255
337                             subs  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and
338                        -255<=delta<=0
339                             adds  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
340                             subs  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
341                             mov   regD, icon         ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
342                             movw  regD, icon         ; 4 bytes. 0<=icon<=65535
343                             add.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
344                             sub.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
345                             addw  regD, regS, delta  ; 4 bytes. 0<=delta<=4095
346                             subw  regD, regS, delta  ; 4 bytes. -4095<=delta<=0
347
348                        If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
349                        than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
350                        can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
351                        0x80000000. In this case, delta will be 1.
352                     */
353
354                     bool      useAdd     = false;
355                     regMaskTP regMask    = genRegMask(reg);
356                     regMaskTP srcRegMask = genRegMask(srcReg);
357
358                     if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
359                         (unsigned_abs(delta) <= 255))
360                     {
361                         useAdd = true;
362                     }
363                     else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
364                              (unsigned_abs(delta) <= 7))
365                     {
366                         useAdd = true;
367                     }
368                     else if (arm_Valid_Imm_For_Mov(val))
369                     {
370                         // fall through to general "!constantLoaded" case below
371                     }
372                     else if (arm_Valid_Imm_For_Add(delta, flags))
373                     {
374                         useAdd = true;
375                     }
376
377                     if (useAdd)
378                     {
379                         getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
380                         constantLoaded = true;
381                     }
382 #else
383                     assert(!"Codegen missing");
384 #endif
385                 }
386             }
387
388             if (!constantLoaded) // Have we loaded it yet?
389             {
390 #ifdef _TARGET_X86_
391                 if (val == -1)
392                 {
393                     /* or reg,-1 takes 3 bytes */
394                     inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
395                 }
396                 else
397                     /* For SMALL_CODE it is smaller to push a small immediate and
398                        then pop it into the dest register */
399                     if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
400                 {
401                     /* "mov" has no s(sign)-bit and so always takes 6 bytes,
402                        whereas push+pop takes 2+1 bytes */
403
404                     inst_IV(INS_push, val);
405                     genSinglePush();
406
407                     inst_RV(INS_pop, reg, type);
408                     genSinglePop();
409                 }
410                 else
411 #endif // _TARGET_X86_
412                 {
413                     instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
414                 }
415             }
416         }
417     }
418     regTracker.rsTrackRegIntCns(reg, val);
419     gcInfo.gcMarkRegPtrVal(reg, type);
420 }
421
422 /*****************************************************************************
423  *
424  *  Find an existing register set to the given integer constant, or
425  *  pick a register and generate code that will set it to the integer constant.
426  *
427  *  If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
428  *  to pick some register to set.  NOTE that this means the returned regNumber
429  *  might *not* be in regBest.  It also implies that you should lock any registers
430  *  you don't want spilled (not just mark as used).
431  *
432  */
433
434 regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
435 {
436     regNumber regCns;
437 #if REDUNDANT_LOAD
438
439     // Is there already a register with zero that we can use?
440     regCns = regTracker.rsIconIsInReg(val);
441
442     if (regCns == REG_NA)
443 #endif
444     {
445         // If not, grab a register to hold the constant, preferring
446         // any register besides RBM_TMP_0 so it can hopefully be re-used
447         regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
448
449         // Now set the constant
450         genSetRegToIcon(regCns, val, type);
451     }
452
453     // NOTE: there is guarantee that regCns is in regBest's mask
454     return regCns;
455 }
456
457 /*****************************************************************************/
458 /*****************************************************************************
459  *
460  *  Add the given constant to the specified register.
461  *  'tree' is the resulting tree
462  */
463
464 void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTree* tree, var_types dstType, bool ovfl)
465 {
466     bool setFlags = (tree != NULL) && tree->gtSetFlags();
467
468 #ifdef _TARGET_XARCH_
469     /* First check to see if we can generate inc or dec instruction(s) */
470     /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
471     if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
472     {
473         emitAttr size = emitTypeSize(dstType);
474
475         switch (ival)
476         {
477             case 2:
478                 inst_RV(INS_inc, reg, dstType, size);
479                 __fallthrough;
480             case 1:
481                 inst_RV(INS_inc, reg, dstType, size);
482
483                 goto UPDATE_LIVENESS;
484
485             case -2:
486                 inst_RV(INS_dec, reg, dstType, size);
487                 __fallthrough;
488             case -1:
489                 inst_RV(INS_dec, reg, dstType, size);
490
491                 goto UPDATE_LIVENESS;
492         }
493     }
494 #endif
495     {
496         insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
497         inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
498     }
499
500 #ifdef _TARGET_XARCH_
501 UPDATE_LIVENESS:
502 #endif
503
504     if (setFlags)
505         genFlagsEqualToReg(tree, reg);
506
507     regTracker.rsTrackRegTrash(reg);
508
509     gcInfo.gcMarkRegSetNpt(genRegMask(reg));
510
511     if (tree != NULL)
512     {
513         if (!tree->OperIsAssignment())
514         {
515             genMarkTreeInReg(tree, reg);
516             if (varTypeIsGC(tree->TypeGet()))
517                 gcInfo.gcMarkRegSetByref(genRegMask(reg));
518         }
519     }
520 }
521
522 /*****************************************************************************
523  *
524  *  Subtract the given constant from the specified register.
525  *  Should only be used for unsigned sub with overflow. Else
526  *  genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
527  *  for these cases as the flags are set differently, and the following
528  *  check for overflow won't work correctly.
529  *  'tree' is the resulting tree.
530  */
531
532 void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTree* tree)
533 {
534     noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
535                  ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
536     noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
537
538     regTracker.rsTrackRegTrash(reg);
539
540     noway_assert(!varTypeIsGC(tree->TypeGet()));
541     gcInfo.gcMarkRegSetNpt(genRegMask(reg));
542
543     insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
544     inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
545
546     if (tree->gtSetFlags())
547         genFlagsEqualToReg(tree, reg);
548
549     if (tree)
550     {
551         genMarkTreeInReg(tree, reg);
552     }
553 }
554
555 /*****************************************************************************
556  *
557  *  Multiply the specified register by the given value.
558  *  'tree' is the resulting tree
559  */
560
561 void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTree* tree, var_types dstType, bool ovfl)
562 {
563     noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
564
565     regTracker.rsTrackRegTrash(reg);
566
567     if (tree)
568     {
569         genMarkTreeInReg(tree, reg);
570     }
571
572     bool     use_shift = false;
573     unsigned shift_by  = 0;
574
575     if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
576     {
577         use_shift = true;
578         BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
579     }
580
581     if (use_shift)
582     {
583         if (shift_by != 0)
584         {
585             insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
586             inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
587             if (tree->gtSetFlags())
588                 genFlagsEqualToReg(tree, reg);
589         }
590     }
591     else
592     {
593         instruction ins;
594 #ifdef _TARGET_XARCH_
595         ins = getEmitter()->inst3opImulForReg(reg);
596 #else
597         ins = INS_mul;
598 #endif
599
600         inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
601     }
602 }
603
604 /*****************************************************************************/
605 /*****************************************************************************/
606 /*****************************************************************************
607  *
608  *  Compute the value 'tree' into a register that's in 'needReg'
609  *  (or any free register if 'needReg' is RBM_NONE).
610  *
611  *  Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
612  *  If keepReg==RegSet::KEEP_REG, we mark the register as being used.
613  *
614  *  If you require that the register returned is trashable, pass true for 'freeOnly'.
615  */
616
617 void CodeGen::genComputeReg(
618     GenTree* tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
619 {
620     noway_assert(tree->gtType != TYP_VOID);
621
622     regNumber reg;
623     regNumber rg2;
624
625 #if FEATURE_STACK_FP_X87
626     noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
627                  genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
628 #elif defined(_TARGET_ARM_)
629     noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
630                  genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
631                  genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
632                  genActualType(tree->gtType) == TYP_STRUCT);
633 #else
634     noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
635                  genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
636                  genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
637 #endif
638
639     /* Generate the value, hopefully into the right register */
640
641     genCodeForTree(tree, needReg);
642     noway_assert(tree->InReg());
643
644     // There is a workaround in genCodeForTreeLng() that changes the type of the
645     // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
646     // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
647     // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
648     // running the rest of this code, because anything looking at gtRegNum on ARM or
649     // attempting to move from EAX/EDX will be wrong.
650     if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
651         goto REG_OK;
652
653     reg = tree->gtRegNum;
654
655     /* Did the value end up in an acceptable register? */
656
657     if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
658     {
659         /* Not good enough to satisfy the caller's orders */
660
661         if (varTypeIsFloating(tree))
662         {
663             RegSet::RegisterPreference pref(needReg, RBM_NONE);
664             rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
665         }
666         else
667         {
668             rg2 = regSet.rsGrabReg(needReg);
669         }
670     }
671     else
672     {
673         /* Do we have to end up with a free register? */
674
675         if (!freeOnly)
676             goto REG_OK;
677
678         /* Did we luck out and the value got computed into an unused reg? */
679
680         if (genRegMask(reg) & regSet.rsRegMaskFree())
681             goto REG_OK;
682
683         /* Register already in use, so spill previous value */
684
685         if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
686         {
687             rg2 = regSet.rsGrabReg(needReg);
688             if (rg2 == reg)
689             {
690                 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
691                 tree->gtRegNum = reg;
692                 goto REG_OK;
693             }
694         }
695         else
696         {
697             /* OK, let's find a trashable home for the value */
698
699             regMaskTP rv1RegUsed;
700
701             regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
702             rg2 = regSet.rsPickReg(needReg);
703             regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
704         }
705     }
706
707     noway_assert(reg != rg2);
708
709     /* Update the value in the target register */
710
711     regTracker.rsTrackRegCopy(rg2, reg);
712
713     inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
714
715     /* The value has been transferred to 'reg' */
716
717     if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
718         gcInfo.gcMarkRegSetNpt(genRegMask(reg));
719
720     gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
721
722     /* The value is now in an appropriate register */
723
724     tree->gtRegNum = rg2;
725
726 REG_OK:
727
728     /* Does the caller want us to mark the register as used? */
729
730     if (keepReg == RegSet::KEEP_REG)
731     {
732         /* In case we're computing a value into a register variable */
733
734         genUpdateLife(tree);
735
736         /* Mark the register as 'used' */
737
738         regSet.rsMarkRegUsed(tree);
739     }
740 }
741
742 /*****************************************************************************
743  *
744  *  Same as genComputeReg(), the only difference being that the result is
745  *  guaranteed to end up in a trashable register.
746  */
747
748 // inline
749 void CodeGen::genCompIntoFreeReg(GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg)
750 {
751     genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
752 }
753
754 /*****************************************************************************
755  *
756  *  The value 'tree' was earlier computed into a register; free up that
757  *  register (but also make sure the value is presently in a register).
758  */
759
760 void CodeGen::genReleaseReg(GenTree* tree)
761 {
762     if (tree->gtFlags & GTF_SPILLED)
763     {
764         /* The register has been spilled -- reload it */
765
766         regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
767         return;
768     }
769
770     regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
771 }
772
773 /*****************************************************************************
774  *
775  *  The value 'tree' was earlier computed into a register. Check whether that
776  *  register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
777  *  free the register. The caller shouldn't need to be setting GCness of the register
778  *  where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
779  */
780
781 void CodeGen::genRecoverReg(GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg)
782 {
783     if (tree->gtFlags & GTF_SPILLED)
784     {
785         /* The register has been spilled -- reload it */
786
787         regSet.rsUnspillReg(tree, needReg, keepReg);
788         return;
789     }
790     else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
791     {
792         /* We need the tree in another register. So move it there */
793
794         noway_assert(tree->InReg());
795         regNumber oldReg = tree->gtRegNum;
796
797         /* Pick an acceptable register */
798
799         regNumber reg = regSet.rsGrabReg(needReg);
800
801         /* Copy the value */
802
803         inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
804         tree->gtRegNum = reg;
805
806         gcInfo.gcMarkRegPtrVal(tree);
807         regSet.rsMarkRegUsed(tree);
808         regSet.rsMarkRegFree(oldReg, tree);
809
810         regTracker.rsTrackRegCopy(reg, oldReg);
811     }
812
813     /* Free the register if the caller desired so */
814
815     if (keepReg == RegSet::FREE_REG)
816     {
817         regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
818         // Can't use RegSet::FREE_REG on a GC type
819         noway_assert(!varTypeIsGC(tree->gtType));
820     }
821     else
822     {
823         noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
824     }
825 }
826
827 /*****************************************************************************
828  *
829  * Move one half of a register pair to its new regPair(half).
830  */
831
832 // inline
833 void CodeGen::genMoveRegPairHalf(GenTree* tree, regNumber dst, regNumber src, int off)
834 {
835     if (src == REG_STK)
836     {
837         // handle long to unsigned long overflow casts
838         while (tree->gtOper == GT_CAST)
839         {
840             noway_assert(tree->gtType == TYP_LONG);
841             tree = tree->gtCast.CastOp();
842         }
843         noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
844         noway_assert(tree->gtType == TYP_LONG);
845         inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
846         regTracker.rsTrackRegTrash(dst);
847     }
848     else
849     {
850         regTracker.rsTrackRegCopy(dst, src);
851         inst_RV_RV(INS_mov, dst, src, TYP_INT);
852     }
853 }
854
855 /*****************************************************************************
856  *
857  *  The given long value is in a register pair, but it's not an acceptable
858  *  one. We have to move the value into a register pair in 'needReg' (if
859  *  non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
860  *
861  *  Important note: if 'needReg' is non-zero, we assume the current pair
862  *  has not been marked as free. If, OTOH, 'newPair' is specified, we
863  *  assume that the current register pair is marked as used and free it.
864  */
865
866 void CodeGen::genMoveRegPair(GenTree* tree, regMaskTP needReg, regPairNo newPair)
867 {
868     regPairNo oldPair;
869
870     regNumber oldLo;
871     regNumber oldHi;
872     regNumber newLo;
873     regNumber newHi;
874
875     /* Either a target set or a specific pair may be requested */
876
877     noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
878
879     /* Get hold of the current pair */
880
881     oldPair = tree->gtRegPair;
882     noway_assert(oldPair != newPair);
883
884     /* Are we supposed to move to a specific pair? */
885
886     if (newPair != REG_PAIR_NONE)
887     {
888         regMaskTP oldMask = genRegPairMask(oldPair);
889         regMaskTP loMask  = genRegMask(genRegPairLo(newPair));
890         regMaskTP hiMask  = genRegMask(genRegPairHi(newPair));
891         regMaskTP overlap = oldMask & (loMask | hiMask);
892
893         /* First lock any registers that are in both pairs */
894
895         noway_assert((regSet.rsMaskUsed & overlap) == overlap);
896         noway_assert((regSet.rsMaskLock & overlap) == 0);
897         regSet.rsMaskLock |= overlap;
898
899         /* Make sure any additional registers we need are free */
900
901         if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
902         {
903             regSet.rsGrabReg(loMask);
904         }
905
906         if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
907         {
908             regSet.rsGrabReg(hiMask);
909         }
910
911         /* Unlock those registers we have temporarily locked */
912
913         noway_assert((regSet.rsMaskUsed & overlap) == overlap);
914         noway_assert((regSet.rsMaskLock & overlap) == overlap);
915         regSet.rsMaskLock -= overlap;
916
917         /* We can now free the old pair */
918
919         regSet.rsMarkRegFree(oldMask);
920     }
921     else
922     {
923         /* Pick the new pair based on the caller's stated preference */
924
925         newPair = regSet.rsGrabRegPair(needReg);
926     }
927
928     // If grabbed pair is the same as old one we're done
929     if (newPair == oldPair)
930     {
931         noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
932                       newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
933         return;
934     }
935
936     /* Move the values from the old pair into the new one */
937
938     oldLo = genRegPairLo(oldPair);
939     oldHi = genRegPairHi(oldPair);
940     newLo = genRegPairLo(newPair);
941     newHi = genRegPairHi(newPair);
942
943     noway_assert(newLo != REG_STK && newHi != REG_STK);
944
945     /* Careful - the register pairs might overlap */
946
947     if (newLo == oldLo)
948     {
949         /* The low registers are identical, just move the upper half */
950
951         noway_assert(newHi != oldHi);
952         genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
953     }
954     else
955     {
956         /* The low registers are different, are the upper ones the same? */
957
958         if (newHi == oldHi)
959         {
960             /* Just move the lower half, then */
961             genMoveRegPairHalf(tree, newLo, oldLo, 0);
962         }
963         else
964         {
965             /* Both sets are different - is there an overlap? */
966
967             if (newLo == oldHi)
968             {
969                 /* Are high and low simply swapped ? */
970
971                 if (newHi == oldLo)
972                 {
973 #ifdef _TARGET_ARM_
974                     /* Let's use XOR swap to reduce register pressure. */
975                     inst_RV_RV(INS_eor, oldLo, oldHi);
976                     inst_RV_RV(INS_eor, oldHi, oldLo);
977                     inst_RV_RV(INS_eor, oldLo, oldHi);
978 #else
979                     inst_RV_RV(INS_xchg, oldHi, oldLo);
980 #endif
981                     regTracker.rsTrackRegSwap(oldHi, oldLo);
982                 }
983                 else
984                 {
985                     /* New lower == old higher, so move higher half first */
986
987                     noway_assert(newHi != oldLo);
988                     genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
989                     genMoveRegPairHalf(tree, newLo, oldLo, 0);
990                 }
991             }
992             else
993             {
994                 /* Move lower half first */
995                 genMoveRegPairHalf(tree, newLo, oldLo, 0);
996                 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
997             }
998         }
999     }
1000
1001     /* Record the fact that we're switching to another pair */
1002
1003     tree->gtRegPair = newPair;
1004 }
1005
1006 /*****************************************************************************
1007  *
1008  *  Compute the value 'tree' into the register pair specified by 'needRegPair'
1009  *  if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
1010  *  those in avoidReg.
1011  *  If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
1012  *  value ends up in as being used.
1013  */
1014
1015 void CodeGen::genComputeRegPair(
1016     GenTree* tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
1017 {
1018     regMaskTP regMask;
1019     regPairNo regPair;
1020     regMaskTP tmpMask;
1021     regMaskTP tmpUsedMask;
1022     regNumber rLo;
1023     regNumber rHi;
1024
1025     noway_assert(isRegPairType(tree->gtType));
1026
1027     if (needRegPair == REG_PAIR_NONE)
1028     {
1029         if (freeOnly)
1030         {
1031             regMask = regSet.rsRegMaskFree() & ~avoidReg;
1032             if (genMaxOneBit(regMask))
1033                 regMask = regSet.rsRegMaskFree();
1034         }
1035         else
1036         {
1037             regMask = RBM_ALLINT & ~avoidReg;
1038         }
1039
1040         if (genMaxOneBit(regMask))
1041             regMask = regSet.rsRegMaskCanGrab();
1042     }
1043     else
1044     {
1045         regMask = genRegPairMask(needRegPair);
1046     }
1047
1048     /* Generate the value, hopefully into the right register pair */
1049
1050     genCodeForTreeLng(tree, regMask, avoidReg);
1051
1052     noway_assert(tree->InReg());
1053
1054     regPair = tree->gtRegPair;
1055     tmpMask = genRegPairMask(regPair);
1056
1057     rLo = genRegPairLo(regPair);
1058     rHi = genRegPairHi(regPair);
1059
1060     /* At least one half is in a real register */
1061
1062     noway_assert(rLo != REG_STK || rHi != REG_STK);
1063
1064     /* Did the value end up in an acceptable register pair? */
1065
1066     if (needRegPair != REG_PAIR_NONE)
1067     {
1068         if (needRegPair != regPair)
1069         {
1070             /* This is a workaround. If we specify a regPair for genMoveRegPair */
1071             /* it expects the source pair being marked as used */
1072             regSet.rsMarkRegPairUsed(tree);
1073             genMoveRegPair(tree, 0, needRegPair);
1074         }
1075     }
1076     else if (freeOnly)
1077     {
1078         /* Do we have to end up with a free register pair?
1079            Something might have gotten freed up above */
1080         bool mustMoveReg = false;
1081
1082         regMask = regSet.rsRegMaskFree() & ~avoidReg;
1083
1084         if (genMaxOneBit(regMask))
1085             regMask = regSet.rsRegMaskFree();
1086
1087         if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
1088         {
1089             /* Note that we must call genMoveRegPair if one of our registers
1090                comes from the used mask, so that it will be properly spilled. */
1091
1092             mustMoveReg = true;
1093         }
1094
1095         if (genMaxOneBit(regMask))
1096             regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
1097
1098         if (genMaxOneBit(regMask))
1099             regMask |= regSet.rsRegMaskCanGrab();
1100
1101         /* Did the value end up in a free register pair? */
1102
1103         if (mustMoveReg)
1104         {
1105             /* We'll have to move the value to a free (trashable) pair */
1106             genMoveRegPair(tree, regMask, REG_PAIR_NONE);
1107         }
1108     }
1109     else
1110     {
1111         noway_assert(needRegPair == REG_PAIR_NONE);
1112         noway_assert(!freeOnly);
1113
1114         /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
1115         tmpUsedMask = tmpMask & regSet.rsMaskUsed;
1116         tmpMask &= ~regSet.rsMaskUsed;
1117
1118         /* Make sure that the value is in "real" registers*/
1119         if (rLo == REG_STK)
1120         {
1121             /* Get one of the desired registers, but exclude rHi */
1122
1123             regSet.rsLockReg(tmpMask);
1124             regSet.rsLockUsedReg(tmpUsedMask);
1125
1126             regNumber reg = regSet.rsPickReg(regMask);
1127
1128             regSet.rsUnlockUsedReg(tmpUsedMask);
1129             regSet.rsUnlockReg(tmpMask);
1130
1131             inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
1132
1133             tree->gtRegPair = gen2regs2pair(reg, rHi);
1134
1135             regTracker.rsTrackRegTrash(reg);
1136             gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1137         }
1138         else if (rHi == REG_STK)
1139         {
1140             /* Get one of the desired registers, but exclude rLo */
1141
1142             regSet.rsLockReg(tmpMask);
1143             regSet.rsLockUsedReg(tmpUsedMask);
1144
1145             regNumber reg = regSet.rsPickReg(regMask);
1146
1147             regSet.rsUnlockUsedReg(tmpUsedMask);
1148             regSet.rsUnlockReg(tmpMask);
1149
1150             inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
1151
1152             tree->gtRegPair = gen2regs2pair(rLo, reg);
1153
1154             regTracker.rsTrackRegTrash(reg);
1155             gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1156         }
1157     }
1158
1159     /* Does the caller want us to mark the register as used? */
1160
1161     if (keepReg == RegSet::KEEP_REG)
1162     {
1163         /* In case we're computing a value into a register variable */
1164
1165         genUpdateLife(tree);
1166
1167         /* Mark the register as 'used' */
1168
1169         regSet.rsMarkRegPairUsed(tree);
1170     }
1171 }
1172
1173 /*****************************************************************************
1174  *
1175  *  Same as genComputeRegPair(), the only difference being that the result
1176  *  is guaranteed to end up in a trashable register pair.
1177  */
1178
1179 // inline
1180 void CodeGen::genCompIntoFreeRegPair(GenTree* tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
1181 {
1182     genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
1183 }
1184
1185 /*****************************************************************************
1186  *
1187  *  The value 'tree' was earlier computed into a register pair; free up that
1188  *  register pair (but also make sure the value is presently in a register
1189  *  pair).
1190  */
1191
1192 void CodeGen::genReleaseRegPair(GenTree* tree)
1193 {
1194     if (tree->gtFlags & GTF_SPILLED)
1195     {
1196         /* The register has been spilled -- reload it */
1197
1198         regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
1199         return;
1200     }
1201
1202     regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1203 }
1204
1205 /*****************************************************************************
1206  *
1207  *  The value 'tree' was earlier computed into a register pair. Check whether
1208  *  either register of that pair has been spilled (and reload it if so), and
1209  *  if 'keepReg' is 0, free the register pair.
1210  */
1211
1212 void CodeGen::genRecoverRegPair(GenTree* tree, regPairNo regPair, RegSet::KeepReg keepReg)
1213 {
1214     if (tree->gtFlags & GTF_SPILLED)
1215     {
1216         regMaskTP regMask;
1217
1218         if (regPair == REG_PAIR_NONE)
1219             regMask = RBM_NONE;
1220         else
1221             regMask = genRegPairMask(regPair);
1222
1223         /* The register pair has been spilled -- reload it */
1224
1225         regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
1226     }
1227
1228     /* Does the caller insist on the value being in a specific place? */
1229
1230     if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
1231     {
1232         /* No good -- we'll have to move the value to a new place */
1233
1234         genMoveRegPair(tree, 0, regPair);
1235
1236         /* Mark the pair as used if appropriate */
1237
1238         if (keepReg == RegSet::KEEP_REG)
1239             regSet.rsMarkRegPairUsed(tree);
1240
1241         return;
1242     }
1243
1244     /* Free the register pair if the caller desired so */
1245
1246     if (keepReg == RegSet::FREE_REG)
1247         regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1248 }
1249
1250 /*****************************************************************************
1251  *
1252  *  Compute the given long value into the specified register pair; don't mark
1253  *  the register pair as used.
1254  */
1255
1256 // inline
1257 void CodeGen::genEvalIntoFreeRegPair(GenTree* tree, regPairNo regPair, regMaskTP avoidReg)
1258 {
1259     genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
1260     genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
1261 }
1262
1263 /*****************************************************************************
1264  *  This helper makes sure that the regpair target of an assignment is
1265  *  available for use.  This needs to be called in genCodeForTreeLng just before
1266  *  a long assignment, but must not be called until everything has been
1267  *  evaluated, or else we might try to spill enregistered variables.
1268  *
1269  */
1270
1271 // inline
1272 void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
1273 {
1274     /* Make sure the target of the store is available */
1275
1276     regNumber regLo = genRegPairLo(regPair);
1277     regNumber regHi = genRegPairHi(regPair);
1278
1279     if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
1280         regSet.rsSpillReg(regHi);
1281
1282     if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
1283         regSet.rsSpillReg(regLo);
1284 }
1285
1286 /*****************************************************************************/
1287 /*****************************************************************************
1288  *
1289  *  Return true if the given tree 'addr' can be computed via an addressing mode,
1290  *  such as "[ebx+esi*4+20]". If the expression isn't an address mode already
1291  *  try to make it so (but we don't try 'too hard' to accomplish this).
1292  *
1293  *  If we end up needing a register (or two registers) to hold some part(s) of the
1294  *  address, we return the use register mask via '*useMaskPtr'.
1295  *
1296  *  If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
1297  *  in use. The caller would then be responsible for calling
1298  *  regSet.rsMarkRegFree(*useMaskPtr).
1299  *
1300  *  If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
1301  *  calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
1302  */
1303
1304 bool CodeGen::genMakeIndAddrMode(GenTree*        addr,
1305                                  GenTree*        oper,
1306                                  bool            forLea,
1307                                  regMaskTP       regMask,
1308                                  RegSet::KeepReg keepReg,
1309                                  regMaskTP*      useMaskPtr,
1310                                  bool            deferOK)
1311 {
1312     if (addr->gtOper == GT_ARR_ELEM)
1313     {
1314         regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
1315         *useMaskPtr    = regs;
1316         return true;
1317     }
1318
1319     bool     rev;
1320     GenTree* rv1;
1321     GenTree* rv2;
1322     bool     operIsArrIndex; // is oper an array index
1323     GenTree* scaledIndex;    // If scaled addressing mode can't be used
1324
1325     regMaskTP anyMask = RBM_ALLINT;
1326
1327     unsigned cns;
1328     unsigned mul;
1329
1330     GenTree* tmp;
1331     int      ixv = INT_MAX; // unset value
1332
1333     GenTree* scaledIndexVal;
1334
1335     regMaskTP newLiveMask;
1336     regMaskTP rv1Mask;
1337     regMaskTP rv2Mask;
1338
1339     /* Deferred address mode forming NYI for x86 */
1340
1341     noway_assert(deferOK == false);
1342
1343     noway_assert(oper == NULL ||
1344                  ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
1345                   ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
1346     operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
1347
1348     if (addr->gtOper == GT_LEA)
1349     {
1350         rev                  = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
1351         GenTreeAddrMode* lea = addr->AsAddrMode();
1352         rv1                  = lea->Base();
1353         rv2                  = lea->Index();
1354         mul                  = lea->gtScale;
1355         cns                  = lea->gtOffset;
1356
1357         if (rv1 != NULL && rv2 == NULL && cns == 0 && rv1->InReg())
1358         {
1359             scaledIndex = NULL;
1360             goto YES;
1361         }
1362     }
1363     else
1364     {
1365         // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
1366         // EASIER TO MERGE
1367
1368         /* Is the complete address already sitting in a register? */
1369
1370         if ((addr->InReg()) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
1371         {
1372             genUpdateLife(addr);
1373
1374             rv1 = addr;
1375             rv2 = scaledIndex = 0;
1376             cns               = 0;
1377
1378             goto YES;
1379         }
1380
1381         /* Is it an absolute address */
1382
1383         if (addr->IsCnsIntOrI())
1384         {
1385             rv1 = rv2 = scaledIndex = 0;
1386             // along this code path cns is never used, so place a BOGUS value in it as proof
1387             // cns = addr->gtIntCon.gtIconVal;
1388             cns = UINT_MAX;
1389
1390             goto YES;
1391         }
1392
1393         /* Is there a chance of forming an address mode? */
1394
1395         if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
1396         {
1397             /* This better not be an array index */
1398             noway_assert(!operIsArrIndex);
1399
1400             return false;
1401         }
1402         // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
1403     }
1404
1405     /*  For scaled array access, RV2 may not be pointing to the index of the
1406         array if the CPU does not support the needed scaling factor.  We will
1407         make it point to the actual index, and scaledIndex will point to
1408         the scaled value */
1409
1410     scaledIndex    = NULL;
1411     scaledIndexVal = NULL;
1412
1413     if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
1414         rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
1415     {
1416         scaledIndex = rv2;
1417         compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
1418
1419         noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
1420     }
1421
1422     /* Has the address already been computed? */
1423
1424     if (addr->InReg())
1425     {
1426         if (forLea)
1427             return true;
1428
1429         rv1         = addr;
1430         rv2         = NULL;
1431         scaledIndex = NULL;
1432         genUpdateLife(addr);
1433         goto YES;
1434     }
1435
1436     /*
1437         Here we have the following operands:
1438
1439             rv1     .....       base address
1440             rv2     .....       offset value        (or NULL)
1441             mul     .....       multiplier for rv2  (or 0)
1442             cns     .....       additional constant (or 0)
1443
1444         The first operand must be present (and be an address) unless we're
1445         computing an expression via 'LEA'. The scaled operand is optional,
1446         but must not be a pointer if present.
1447      */
1448
1449     noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
1450
1451     /*-------------------------------------------------------------------------
1452      *
1453      * Make sure both rv1 and rv2 (if present) are in registers
1454      *
1455      */
1456
1457     // Trivial case : Is either rv1 or rv2 a NULL ?
1458
1459     if (!rv2)
1460     {
1461         /* A single operand, make sure it's in a register */
1462
1463         if (cns != 0)
1464         {
1465             // In the case where "rv1" is already in a register, there's no reason to get into a
1466             // register in "regMask" yet, if there's a non-zero constant that we're going to add;
1467             // if there is, we can do an LEA.
1468             genCodeForTree(rv1, RBM_NONE);
1469         }
1470         else
1471         {
1472             genCodeForTree(rv1, regMask);
1473         }
1474         goto DONE_REGS;
1475     }
1476     else if (!rv1)
1477     {
1478         /* A single (scaled) operand, make sure it's in a register */
1479
1480         genCodeForTree(rv2, 0);
1481         goto DONE_REGS;
1482     }
1483
1484     /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
1485        they are in registers */
1486
1487     noway_assert(rv1 && rv2);
1488
1489     /*  If we have to check a constant array index, compare it against
1490         the array dimension (see below) but then fold the index with a
1491         scaling factor (if any) and additional offset (if any).
1492      */
1493
1494     if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
1495     {
1496         if (scaledIndex != NULL)
1497         {
1498             assert(rv2 == scaledIndex && scaledIndexVal != NULL);
1499             rv2 = scaledIndexVal;
1500         }
1501         /* We must have a range-checked index operation */
1502
1503         noway_assert(operIsArrIndex);
1504
1505         /* Get hold of the index value and see if it's a constant */
1506
1507         if (rv2->IsIntCnsFitsInI32())
1508         {
1509             ixv = (int)rv2->gtIntCon.gtIconVal;
1510             // Maybe I should just set "fold" true in the call to genMakeAddressable above.
1511             if (scaledIndex != NULL)
1512             {
1513                 int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
1514                                                                                      // multiple of 2^6.
1515                 if (mul == 0)
1516                 {
1517                     mul = scale;
1518                 }
1519                 else
1520                 {
1521                     mul *= scale;
1522                 }
1523             }
1524             rv2 = scaledIndex = NULL;
1525
1526             /* Add the scaled index into the added value */
1527
1528             if (mul)
1529                 cns += ixv * mul;
1530             else
1531                 cns += ixv;
1532
1533             /* Make sure 'rv1' is in a register */
1534
1535             genCodeForTree(rv1, regMask);
1536
1537             goto DONE_REGS;
1538         }
1539     }
1540
1541     if (rv1->InReg())
1542     {
1543         /* op1 already in register - how about op2? */
1544
1545         if (rv2->InReg())
1546         {
1547             /* Great - both operands are in registers already. Just update
1548                the liveness and we are done. */
1549
1550             if (rev)
1551             {
1552                 genUpdateLife(rv2);
1553                 genUpdateLife(rv1);
1554             }
1555             else
1556             {
1557                 genUpdateLife(rv1);
1558                 genUpdateLife(rv2);
1559             }
1560
1561             goto DONE_REGS;
1562         }
1563
1564         /* rv1 is in a register, but rv2 isn't */
1565
1566         if (!rev)
1567         {
1568             /* rv1 is already materialized in a register. Just update liveness
1569                to rv1 and generate code for rv2 */
1570
1571             genUpdateLife(rv1);
1572             regSet.rsMarkRegUsed(rv1, oper);
1573         }
1574
1575         goto GEN_RV2;
1576     }
1577     else if (rv2->InReg())
1578     {
1579         /* rv2 is in a register, but rv1 isn't */
1580
1581         noway_assert(rv2->gtOper == GT_REG_VAR);
1582
1583         if (rev)
1584         {
1585             /* rv2 is already materialized in a register. Update liveness
1586                to after rv2 and then hang on to rv2 */
1587
1588             genUpdateLife(rv2);
1589             regSet.rsMarkRegUsed(rv2, oper);
1590         }
1591
1592         /* Generate the for the first operand */
1593
1594         genCodeForTree(rv1, regMask);
1595
1596         if (rev)
1597         {
1598             // Free up rv2 in the right fashion (it might be re-marked if keepReg)
1599             regSet.rsMarkRegUsed(rv1, oper);
1600             regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1601             genReleaseReg(rv2);
1602             regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1603             genReleaseReg(rv1);
1604         }
1605         else
1606         {
1607             /* We have evaluated rv1, and now we just need to update liveness
1608                to rv2 which was already in a register */
1609
1610             genUpdateLife(rv2);
1611         }
1612
1613         goto DONE_REGS;
1614     }
1615
1616     if (forLea && !cns)
1617         return false;
1618
1619     /* Make sure we preserve the correct operand order */
1620
1621     if (rev)
1622     {
1623         /* Generate the second operand first */
1624
1625         // Determine what registers go live between rv2 and rv1
1626         newLiveMask = genNewLiveRegMask(rv2, rv1);
1627
1628         rv2Mask = regMask & ~newLiveMask;
1629         rv2Mask &= ~rv1->gtRsvdRegs;
1630
1631         if (rv2Mask == RBM_NONE)
1632         {
1633             // The regMask hint cannot be honored
1634             // We probably have a call that trashes the register(s) in regMask
1635             // so ignore the regMask hint, but try to avoid using
1636             // the registers in newLiveMask and the rv1->gtRsvdRegs
1637             //
1638             rv2Mask = RBM_ALLINT & ~newLiveMask;
1639             rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
1640         }
1641
1642         genCodeForTree(rv2, rv2Mask);
1643         regMask &= ~genRegMask(rv2->gtRegNum);
1644
1645         regSet.rsMarkRegUsed(rv2, oper);
1646
1647         /* Generate the first operand second */
1648
1649         genCodeForTree(rv1, regMask);
1650         regSet.rsMarkRegUsed(rv1, oper);
1651
1652         /* Free up both operands in the right order (they might be
1653            re-marked as used below)
1654         */
1655         regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1656         genReleaseReg(rv2);
1657         regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1658         genReleaseReg(rv1);
1659     }
1660     else
1661     {
1662         /* Get the first operand into a register */
1663
1664         // Determine what registers go live between rv1 and rv2
1665         newLiveMask = genNewLiveRegMask(rv1, rv2);
1666
1667         rv1Mask = regMask & ~newLiveMask;
1668         rv1Mask &= ~rv2->gtRsvdRegs;
1669
1670         if (rv1Mask == RBM_NONE)
1671         {
1672             // The regMask hint cannot be honored
1673             // We probably have a call that trashes the register(s) in regMask
1674             // so ignore the regMask hint, but try to avoid using
1675             // the registers in liveMask and the rv2->gtRsvdRegs
1676             //
1677             rv1Mask = RBM_ALLINT & ~newLiveMask;
1678             rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
1679         }
1680
1681         genCodeForTree(rv1, rv1Mask);
1682         regSet.rsMarkRegUsed(rv1, oper);
1683
1684     GEN_RV2:
1685
1686         /* Here, we need to get rv2 in a register. We have either already
1687            materialized rv1 into a register, or it was already in a one */
1688
1689         noway_assert(rv1->InReg());
1690         noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
1691
1692         /* Generate the second operand as well */
1693
1694         regMask &= ~genRegMask(rv1->gtRegNum);
1695         genCodeForTree(rv2, regMask);
1696
1697         if (rev)
1698         {
1699             /* rev==true means the evaluation order is rv2,rv1. We just
1700                evaluated rv2, and rv1 was already in a register. Just
1701                update liveness to rv1 and we are done. */
1702
1703             genUpdateLife(rv1);
1704         }
1705         else
1706         {
1707             /* We have evaluated rv1 and rv2. Free up both operands in
1708                the right order (they might be re-marked as used below) */
1709
1710             /* Even though we have not explicitly marked rv2 as used,
1711                rv2->gtRegNum may be used if rv2 is a multi-use or
1712                an enregistered variable. */
1713             regMaskTP rv2Used;
1714             regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
1715
1716             /* Check for special case both rv1 and rv2 are the same register */
1717             if (rv2Used != genRegMask(rv1->gtRegNum))
1718             {
1719                 genReleaseReg(rv1);
1720                 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1721             }
1722             else
1723             {
1724                 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1725                 genReleaseReg(rv1);
1726             }
1727         }
1728     }
1729
1730 /*-------------------------------------------------------------------------
1731  *
1732  * At this point, both rv1 and rv2 (if present) are in registers
1733  *
1734  */
1735
1736 DONE_REGS:
1737
1738     /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
1739
1740     if (rv1 && !(rv1->InReg()))
1741         return false;
1742     if (rv2 && !(rv2->InReg()))
1743         return false;
1744
1745 YES:
1746
1747     // *(intVar1+intVar1) causes problems as we
1748     // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
1749     // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
1750     // that currently as we return a single mask in useMaskPtr.
1751
1752     if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && rv1->InReg() && rv2->InReg())
1753     {
1754         if (rv1->gtRegNum == rv2->gtRegNum)
1755         {
1756             noway_assert(!operIsArrIndex);
1757             return false;
1758         }
1759     }
1760
1761     /* Check either register operand to see if it needs to be saved */
1762
1763     if (rv1)
1764     {
1765         noway_assert(rv1->InReg());
1766
1767         if (keepReg == RegSet::KEEP_REG)
1768         {
1769             regSet.rsMarkRegUsed(rv1, oper);
1770         }
1771         else
1772         {
1773             /* If the register holds an address, mark it */
1774
1775             gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
1776         }
1777     }
1778
1779     if (rv2)
1780     {
1781         noway_assert(rv2->InReg());
1782
1783         if (keepReg == RegSet::KEEP_REG)
1784             regSet.rsMarkRegUsed(rv2, oper);
1785     }
1786
1787     if (deferOK)
1788     {
1789         noway_assert(!scaledIndex);
1790         return true;
1791     }
1792
1793     /* Compute the set of registers the address depends on */
1794
1795     regMaskTP useMask = RBM_NONE;
1796
1797     if (rv1)
1798     {
1799         if (rv1->gtFlags & GTF_SPILLED)
1800             regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
1801
1802         noway_assert(rv1->InReg());
1803         useMask |= genRegMask(rv1->gtRegNum);
1804     }
1805
1806     if (rv2)
1807     {
1808         if (rv2->gtFlags & GTF_SPILLED)
1809         {
1810             if (rv1)
1811             {
1812                 regMaskTP lregMask = genRegMask(rv1->gtRegNum);
1813                 regMaskTP used;
1814
1815                 regSet.rsLockReg(lregMask, &used);
1816                 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1817                 regSet.rsUnlockReg(lregMask, used);
1818             }
1819             else
1820                 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1821         }
1822         noway_assert(rv2->InReg());
1823         useMask |= genRegMask(rv2->gtRegNum);
1824     }
1825
1826     /* Tell the caller which registers we need to hang on to */
1827
1828     *useMaskPtr = useMask;
1829
1830     return true;
1831 }
1832
1833 /*****************************************************************************
1834  *
1835  *  'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
1836  */
1837
1838 void CodeGen::genRangeCheck(GenTree* oper)
1839 {
1840     noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1841     GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1842
1843     GenTree* arrLen    = bndsChk->gtArrLen;
1844     GenTree* arrRef    = NULL;
1845     int      lenOffset = 0;
1846
1847     /* Is the array index a constant value? */
1848     GenTree* index = bndsChk->gtIndex;
1849     if (!index->IsCnsIntOrI())
1850     {
1851         // No, it's not a constant.
1852         genCodeForTree(index, RBM_ALLINT);
1853         regSet.rsMarkRegUsed(index);
1854     }
1855
1856     // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
1857     // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
1858     // a register.
1859
1860     if (arrLen->OperGet() == GT_ARR_LENGTH)
1861     {
1862         GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
1863         lenOffset                  = arrLenExact->ArrLenOffset();
1864
1865 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1866         // We always load the length into a register on ARM and x64.
1867
1868         // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
1869         // lengths, but the index expression *can* be native int (64-bits)
1870         arrRef = arrLenExact->ArrRef();
1871         genCodeForTree(arrRef, RBM_ALLINT);
1872         noway_assert(arrRef->InReg());
1873         regSet.rsMarkRegUsed(arrRef);
1874         noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1875 #endif
1876     }
1877 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1878     // This is another form in which we have an array reference and a constant length.  Don't use
1879     // on LOAD_STORE or 64BIT.
1880     else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
1881     {
1882         genCodeForTree(arrRef, RBM_ALLINT);
1883         noway_assert(arrRef->InReg());
1884         regSet.rsMarkRegUsed(arrRef);
1885         noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1886     }
1887 #endif
1888
1889     // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
1890     if (arrRef == NULL)
1891     {
1892         // (Unless it's a constant.)
1893         if (!arrLen->IsCnsIntOrI())
1894         {
1895             genCodeForTree(arrLen, RBM_ALLINT);
1896             regSet.rsMarkRegUsed(arrLen);
1897
1898             noway_assert(arrLen->InReg());
1899             noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
1900         }
1901     }
1902
1903     if (!index->IsCnsIntOrI())
1904     {
1905         // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
1906         // from its register, get it back in a register.
1907         regMaskTP indRegMask = RBM_ALLINT;
1908         regMaskTP arrRegMask = RBM_ALLINT;
1909         if (!(index->gtFlags & GTF_SPILLED))
1910             arrRegMask = ~genRegMask(index->gtRegNum);
1911         if (arrRef != NULL)
1912         {
1913             genRecoverReg(arrRef, arrRegMask, RegSet::KEEP_REG);
1914             indRegMask &= ~genRegMask(arrRef->gtRegNum);
1915         }
1916         else if (!arrLen->IsCnsIntOrI())
1917         {
1918             genRecoverReg(arrLen, arrRegMask, RegSet::KEEP_REG);
1919             indRegMask &= ~genRegMask(arrLen->gtRegNum);
1920         }
1921         if (index->gtFlags & GTF_SPILLED)
1922             regSet.rsUnspillReg(index, indRegMask, RegSet::KEEP_REG);
1923
1924         /* Make sure we have the values we expect */
1925         noway_assert(index->InReg());
1926         noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
1927
1928         noway_assert(index->TypeGet() == TYP_I_IMPL ||
1929                      (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
1930         var_types indxType = index->TypeGet();
1931         if (indxType != TYP_I_IMPL)
1932             indxType = TYP_INT;
1933
1934         if (arrRef != NULL)
1935         { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1936
1937             /* Generate "cmp index, [arrRef+LenOffs]" */
1938             inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
1939         }
1940         else if (arrLen->IsCnsIntOrI())
1941         {
1942             ssize_t len = arrLen->AsIntConCommon()->IconValue();
1943             inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
1944         }
1945         else
1946         {
1947             inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
1948         }
1949
1950         /* Generate "jae <fail_label>" */
1951
1952         noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
1953         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1954         genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1955     }
1956     else
1957     {
1958         /* Generate "cmp [rv1+LenOffs], cns" */
1959
1960         bool indIsInt = true;
1961 #ifdef _TARGET_64BIT_
1962         int     ixv     = 0;
1963         ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1964         if (ixvFull > INT32_MAX)
1965         {
1966             indIsInt = false;
1967         }
1968         else
1969         {
1970             ixv = (int)ixvFull;
1971         }
1972 #else
1973         ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1974         int     ixv     = (int)ixvFull;
1975 #endif
1976         if (arrRef != NULL && indIsInt)
1977         { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1978             /* Generate "cmp [arrRef+LenOffs], ixv" */
1979             inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
1980             // Generate "jbe <fail_label>"
1981             emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1982             genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1983         }
1984         else if (arrLen->IsCnsIntOrI())
1985         {
1986             ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
1987             // Both are constants; decide at compile time.
1988             if (!(0 <= ixvFull && ixvFull < lenv))
1989             {
1990                 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1991             }
1992         }
1993         else if (!indIsInt)
1994         {
1995             genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1996         }
1997         else
1998         {
1999             /* Generate "cmp arrLen, ixv" */
2000             inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
2001             // Generate "jbe <fail_label>"
2002             emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
2003             genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2004         }
2005     }
2006
2007     // Free the registers that were used.
2008     if (!index->IsCnsIntOrI())
2009     {
2010         genReleaseReg(index);
2011     }
2012
2013     if (arrRef != NULL)
2014     {
2015         genReleaseReg(arrRef);
2016     }
2017     else if (!arrLen->IsCnsIntOrI())
2018     {
2019         genReleaseReg(arrLen);
2020     }
2021 }
2022
2023 /*****************************************************************************
2024  *
2025  * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
2026  * Otherwise, check if rvalue is in register. If so, mark it. Then
2027  * call genMakeAddressable(). Needed because genMakeAddressable is used
2028  * for both lvalue and rvalue, and we only can do this for rvalue.
2029  */
2030
2031 // inline
2032 regMaskTP CodeGen::genMakeRvalueAddressable(
2033     GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
2034 {
2035     regNumber reg;
2036
2037 #if REDUNDANT_LOAD
2038
2039     if (tree->gtOper == GT_LCL_VAR)
2040     {
2041         reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
2042
2043         if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
2044         {
2045             noway_assert(!isRegPairType(tree->gtType));
2046
2047             genMarkTreeInReg(tree, reg);
2048         }
2049     }
2050
2051 #endif
2052
2053     return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
2054 }
2055
2056 /*****************************************************************************/
2057
2058 bool CodeGen::genIsLocalLastUse(GenTree* tree)
2059 {
2060     const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
2061
2062     noway_assert(tree->OperGet() == GT_LCL_VAR);
2063     noway_assert(varDsc->lvTracked);
2064
2065     return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
2066 }
2067
2068 /*****************************************************************************
2069  *
2070  *  This is genMakeAddressable(GT_ARR_ELEM).
2071  *  Makes the array-element addressible and returns the addressibility registers.
2072  *  It also marks them as used if keepReg==RegSet::KEEP_REG.
2073  *  tree is the dependant tree.
2074  *
2075  *  Note that an array-element needs 2 registers to be addressibile, the
2076  *  array-object and the offset. This function marks gtArrObj and gtArrInds[0]
2077  *  with the 2 registers so that other functions (like instGetAddrMode()) know
2078  *  where to look for the offset to use.
2079  */
2080
2081 regMaskTP CodeGen::genMakeAddrArrElem(GenTree* arrElem, GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg)
2082 {
2083     noway_assert(arrElem->gtOper == GT_ARR_ELEM);
2084     noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
2085
2086     /* Evaluate all the operands. We don't evaluate them into registers yet
2087        as GT_ARR_ELEM does not reorder the evaluation of the operands, and
2088        hence may use a sub-optimal ordering. We try to improve this
2089        situation somewhat by accessing the operands in stages
2090        (genMakeAddressable2 + genComputeAddressable and
2091        genCompIntoFreeReg + genRecoverReg).
2092
2093        Note: we compute operands into free regs to avoid multiple uses of
2094        the same register. Multi-use would cause problems when we free
2095        registers in FIFO order instead of the assumed LIFO order that
2096        applies to all type of tree nodes except for GT_ARR_ELEM.
2097      */
2098
2099     GenTree*  arrObj   = arrElem->gtArrElem.gtArrObj;
2100     unsigned  rank     = arrElem->gtArrElem.gtArrRank;
2101     var_types elemType = arrElem->gtArrElem.gtArrElemType;
2102     regMaskTP addrReg  = RBM_NONE;
2103     regMaskTP regNeed  = RBM_ALLINT;
2104
2105 #if !NOGC_WRITE_BARRIERS
2106     // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
2107     // since the arrObj participates in the lea/add instruction
2108     // that computes ARG_0 we should avoid putting it in ARG_1
2109     //
2110     if (varTypeIsGC(elemType))
2111     {
2112         regNeed &= ~RBM_ARG_1;
2113     }
2114 #endif
2115
2116     // Strip off any comma expression.
2117     arrObj = genCodeForCommaTree(arrObj);
2118
2119     // Having generated the code for the comma, we don't care about it anymore.
2120     arrElem->gtArrElem.gtArrObj = arrObj;
2121
2122     // If the array ref is a stack var that's dying here we have to move it
2123     // into a register (regalloc already counts of this), as if it's a GC pointer
2124     // it can be collected from here on. This is not an issue for locals that are
2125     // in a register, as they get marked as used an will be tracked.
2126     // The bug that caused this is #100776. (untracked vars?)
2127     if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
2128         !genMarkLclVar(arrObj))
2129     {
2130         genCodeForTree(arrObj, regNeed);
2131         regSet.rsMarkRegUsed(arrObj, 0);
2132         addrReg = genRegMask(arrObj->gtRegNum);
2133     }
2134     else
2135     {
2136         addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
2137                                       true,  // forLoadStore
2138                                       false, // smallOK
2139                                       false, // deferOK
2140                                       true); // evalSideEffs
2141     }
2142
2143     unsigned dim;
2144     for (dim = 0; dim < rank; dim++)
2145         genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
2146
2147     /* Ensure that the array-object is in a register */
2148
2149     addrReg = genKeepAddressable(arrObj, addrReg);
2150     genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
2151
2152     regNumber arrReg     = arrObj->gtRegNum;
2153     regMaskTP arrRegMask = genRegMask(arrReg);
2154     regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
2155     regSet.rsLockUsedReg(arrRegMask);
2156
2157     /* Now process all the indices, do the range check, and compute
2158        the offset of the element */
2159
2160     regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
2161
2162     for (dim = 0; dim < rank; dim++)
2163     {
2164         GenTree* index = arrElem->gtArrElem.gtArrInds[dim];
2165
2166         /* Get the index into a free register (other than the register holding the array) */
2167
2168         genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
2169
2170 #if CPU_LOAD_STORE_ARCH
2171         /* Subtract the lower bound, and do the range check */
2172
2173         regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
2174         getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2175                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2176         regTracker.rsTrackRegTrash(valueReg);
2177         getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
2178         regTracker.rsTrackRegTrash(index->gtRegNum);
2179
2180         getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2181                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2182         getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
2183 #else
2184         /* Subtract the lower bound, and do the range check */
2185         getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
2186                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2187         regTracker.rsTrackRegTrash(index->gtRegNum);
2188
2189         getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
2190                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2191 #endif
2192         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2193         genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
2194
2195         if (dim == 0)
2196         {
2197             /* Hang on to the register of the first index */
2198
2199             noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
2200             accReg = index->gtRegNum;
2201             noway_assert(accReg != arrReg);
2202             regSet.rsLockUsedReg(genRegMask(accReg));
2203         }
2204         else
2205         {
2206             /* Evaluate accReg = accReg*dim_size + index */
2207
2208             noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
2209 #if CPU_LOAD_STORE_ARCH
2210             getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2211                                        compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2212             regTracker.rsTrackRegTrash(valueReg);
2213             getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
2214 #else
2215             getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
2216                                        compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2217 #endif
2218
2219             inst_RV_RV(INS_add, accReg, index->gtRegNum);
2220             regSet.rsMarkRegFree(index->gtRegNum, index);
2221             regTracker.rsTrackRegTrash(accReg);
2222         }
2223     }
2224
2225     if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
2226     {
2227         regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
2228
2229         getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
2230         regTracker.rsTrackRegTrash(accReg);
2231     }
2232
2233     regSet.rsUnlockUsedReg(genRegMask(arrReg));
2234     regSet.rsUnlockUsedReg(genRegMask(accReg));
2235
2236     regSet.rsMarkRegFree(genRegMask(arrReg));
2237     regSet.rsMarkRegFree(genRegMask(accReg));
2238
2239     if (keepReg == RegSet::KEEP_REG)
2240     {
2241         /* We mark the addressability registers on arrObj and gtArrInds[0].
2242            instGetAddrMode() knows to work with this. */
2243
2244         regSet.rsMarkRegUsed(arrObj, tree);
2245         regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
2246     }
2247
2248     return genRegMask(arrReg) | genRegMask(accReg);
2249 }
2250
2251 /*****************************************************************************
2252  *
2253  *  Make sure the given tree is addressable.  'needReg' is a mask that indicates
2254  *  the set of registers we would prefer the destination tree to be computed
2255  *  into (RBM_NONE means no preference).
2256  *
2257  *  'tree' can subsequently be used with the inst_XX_TT() family of functions.
2258  *
2259  *  If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
2260  *  on as used, and return the mask for that register set (if no registers
2261  *  are marked as used, RBM_NONE is returned).
2262  *
2263  *  If 'smallOK' is not true and the datatype being address is a byte or short,
2264  *  then the tree is forced into a register.  This is useful when the machine
2265  *  instruction being emitted does not have a byte or short version.
2266  *
2267  *  The "deferOK" parameter indicates the mode of operation - when it's false,
2268  *  upon returning an actual address mode must have been formed (i.e. it must
2269  *  be possible to immediately call one of the inst_TT methods to operate on
2270  *  the value). When "deferOK" is true, we do whatever it takes to be ready
2271  *  to form the address mode later - for example, if an index address mode on
2272  *  a particular CPU requires the use of a specific register, we usually don't
2273  *  want to immediately grab that register for an address mode that will only
2274  *  be needed later. The convention is to call genMakeAddressable() with
2275  *  "deferOK" equal to true, do whatever work is needed to prepare the other
2276  *  operand, call genMakeAddressable() with "deferOK" equal to false, and
2277  *  finally call one of the inst_TT methods right after that.
2278  *
2279  *  If we do any other codegen after genMakeAddressable(tree) which can
2280  *  potentially spill the addressability registers, genKeepAddressable()
2281  *  needs to be called before accessing the tree again.
2282  *
2283  *  genDoneAddressable() needs to be called when we are done with the tree
2284  *  to free the addressability registers.
2285  */
2286
2287 regMaskTP CodeGen::genMakeAddressable(
2288     GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
2289 {
2290     GenTree*  addr = NULL;
2291     regMaskTP regMask;
2292
2293     /* Is the value simply sitting in a register? */
2294
2295     if (tree->InReg())
2296     {
2297         genUpdateLife(tree);
2298
2299         goto GOT_VAL;
2300     }
2301
2302     // TODO: If the value is for example a cast of float -> int, compute
2303     // TODO: the converted value into a stack temp, and leave it there,
2304     // TODO: since stack temps are always addressable. This would require
2305     // TODO: recording the fact that a particular tree is in a stack temp.
2306
2307     /* byte/char/short operand -- is this acceptable to the caller? */
2308
2309     if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
2310         goto EVAL_TREE;
2311
2312     // Evaluate non-last elements of comma expressions, to get to the last.
2313     tree = genCodeForCommaTree(tree);
2314
2315     switch (tree->gtOper)
2316     {
2317         case GT_LCL_FLD:
2318
2319             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
2320             // to worry about it being enregistered.
2321             noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
2322
2323             genUpdateLife(tree);
2324             return 0;
2325
2326         case GT_LCL_VAR:
2327
2328             if (!genMarkLclVar(tree))
2329             {
2330                 genUpdateLife(tree);
2331                 return 0;
2332             }
2333
2334             __fallthrough; // it turns out the variable lives in a register
2335
2336         case GT_REG_VAR:
2337
2338             genUpdateLife(tree);
2339
2340             goto GOT_VAL;
2341
2342         case GT_CLS_VAR:
2343
2344             return 0;
2345
2346         case GT_CNS_INT:
2347 #ifdef _TARGET_64BIT_
2348             // Non-relocs will be sign extended, so we don't have to enregister
2349             // constants that are equivalent to a sign-extended int.
2350             // Relocs can be left alone if they are RIP-relative.
2351             if ((genTypeSize(tree->TypeGet()) > 4) &&
2352                 (!tree->IsIntCnsFitsInI32() ||
2353                  (tree->IsIconHandle() &&
2354                   (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
2355             {
2356                 break;
2357             }
2358 #endif // _TARGET_64BIT_
2359             __fallthrough;
2360
2361         case GT_CNS_LNG:
2362         case GT_CNS_DBL:
2363             // For MinOpts, we don't do constant folding, so we have
2364             // constants showing up in places we don't like.
2365             // force them into a register now to prevent that.
2366             if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
2367                 return 0;
2368             break;
2369
2370         case GT_IND:
2371         case GT_NULLCHECK:
2372
2373             /* Try to make the address directly addressable */
2374
2375             if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2376                                    needReg, keepReg, &regMask, deferOK))
2377             {
2378                 genUpdateLife(tree);
2379                 return regMask;
2380             }
2381
2382             /* No good, we'll have to load the address into a register */
2383
2384             addr = tree;
2385             tree = tree->gtOp.gtOp1;
2386             break;
2387
2388         default:
2389             break;
2390     }
2391
2392 EVAL_TREE:
2393
2394     /* Here we need to compute the value 'tree' into a register */
2395
2396     genCodeForTree(tree, needReg);
2397
2398 GOT_VAL:
2399
2400     noway_assert(tree->InReg());
2401
2402     if (isRegPairType(tree->gtType))
2403     {
2404         /* Are we supposed to hang on to the register? */
2405
2406         if (keepReg == RegSet::KEEP_REG)
2407             regSet.rsMarkRegPairUsed(tree);
2408
2409         regMask = genRegPairMask(tree->gtRegPair);
2410     }
2411     else
2412     {
2413         /* Are we supposed to hang on to the register? */
2414
2415         if (keepReg == RegSet::KEEP_REG)
2416             regSet.rsMarkRegUsed(tree, addr);
2417
2418         regMask = genRegMask(tree->gtRegNum);
2419     }
2420
2421     return regMask;
2422 }
2423
2424 /*****************************************************************************
2425  *  Compute a tree (which was previously made addressable using
2426  *  genMakeAddressable()) into a register.
2427  *  needReg - mask of preferred registers.
2428  *  keepReg - should the computed register be marked as used by the tree
2429  *  freeOnly - target register needs to be a scratch register
2430  */
2431
2432 void CodeGen::genComputeAddressable(GenTree*        tree,
2433                                     regMaskTP       addrReg,
2434                                     RegSet::KeepReg keptReg,
2435                                     regMaskTP       needReg,
2436                                     RegSet::KeepReg keepReg,
2437                                     bool            freeOnly)
2438 {
2439     noway_assert(genStillAddressable(tree));
2440     noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
2441
2442     genDoneAddressable(tree, addrReg, keptReg);
2443
2444     regNumber reg;
2445
2446     if (tree->InReg())
2447     {
2448         reg = tree->gtRegNum;
2449
2450         if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
2451             goto MOVE_REG;
2452     }
2453     else
2454     {
2455         if (tree->OperIsConst())
2456         {
2457             /* Need to handle consts separately as we don't want to emit
2458               "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
2459               handles consts better for SMALL_CODE */
2460
2461             noway_assert(tree->IsCnsIntOrI());
2462             reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
2463         }
2464         else
2465         {
2466         MOVE_REG:
2467             reg = regSet.rsPickReg(needReg);
2468
2469             inst_RV_TT(INS_mov, reg, tree);
2470             regTracker.rsTrackRegTrash(reg);
2471         }
2472     }
2473
2474     genMarkTreeInReg(tree, reg);
2475
2476     if (keepReg == RegSet::KEEP_REG)
2477         regSet.rsMarkRegUsed(tree);
2478     else
2479         gcInfo.gcMarkRegPtrVal(tree);
2480 }
2481
2482 /*****************************************************************************
2483  *  Should be similar to genMakeAddressable() but gives more control.
2484  */
2485
2486 regMaskTP CodeGen::genMakeAddressable2(GenTree*        tree,
2487                                        regMaskTP       needReg,
2488                                        RegSet::KeepReg keepReg,
2489                                        bool            forLoadStore,
2490                                        bool            smallOK,
2491                                        bool            deferOK,
2492                                        bool            evalSideEffs)
2493
2494 {
2495     bool evalToReg = false;
2496
2497     if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
2498         evalToReg = true;
2499
2500 #if CPU_LOAD_STORE_ARCH
2501     if (!forLoadStore)
2502         evalToReg = true;
2503 #endif
2504
2505     if (evalToReg)
2506     {
2507         genCodeForTree(tree, needReg);
2508
2509         noway_assert(tree->InReg());
2510
2511         if (isRegPairType(tree->gtType))
2512         {
2513             /* Are we supposed to hang on to the register? */
2514
2515             if (keepReg == RegSet::KEEP_REG)
2516                 regSet.rsMarkRegPairUsed(tree);
2517
2518             return genRegPairMask(tree->gtRegPair);
2519         }
2520         else
2521         {
2522             /* Are we supposed to hang on to the register? */
2523
2524             if (keepReg == RegSet::KEEP_REG)
2525                 regSet.rsMarkRegUsed(tree);
2526
2527             return genRegMask(tree->gtRegNum);
2528         }
2529     }
2530     else
2531     {
2532         return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
2533     }
2534 }
2535
2536 /*****************************************************************************
2537  *
2538  *  The given tree was previously passed to genMakeAddressable(); return
2539  *  'true' if the operand is still addressable.
2540  */
2541
2542 // inline
2543 bool CodeGen::genStillAddressable(GenTree* tree)
2544 {
2545     /* Has the value (or one or more of its sub-operands) been spilled? */
2546
2547     if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
2548         return false;
2549
2550     return true;
2551 }
2552
2553 /*****************************************************************************
2554  *
2555  *  Recursive helper to restore complex address modes. The 'lockPhase'
2556  *  argument indicates whether we're in the 'lock' or 'reload' phase.
2557  */
2558
2559 regMaskTP CodeGen::genRestoreAddrMode(GenTree* addr, GenTree* tree, bool lockPhase)
2560 {
2561     regMaskTP regMask = RBM_NONE;
2562
2563     /* Have we found a spilled value? */
2564
2565     if (tree->gtFlags & GTF_SPILLED)
2566     {
2567         /* Do nothing if we're locking, otherwise reload and lock */
2568
2569         if (!lockPhase)
2570         {
2571             /* Unspill the register */
2572
2573             regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
2574
2575             /* The value should now be sitting in a register */
2576
2577             noway_assert(tree->InReg());
2578             regMask = genRegMask(tree->gtRegNum);
2579
2580             /* Mark the register as used for the address */
2581
2582             regSet.rsMarkRegUsed(tree, addr);
2583
2584             /* Lock the register until we're done with the entire address */
2585
2586             regSet.rsMaskLock |= regMask;
2587         }
2588
2589         return regMask;
2590     }
2591
2592     /* Is this sub-tree sitting in a register? */
2593
2594     if (tree->InReg())
2595     {
2596         regMask = genRegMask(tree->gtRegNum);
2597
2598         /* Lock the register if we're in the locking phase */
2599
2600         if (lockPhase)
2601             regSet.rsMaskLock |= regMask;
2602     }
2603     else
2604     {
2605         /* Process any sub-operands of this node */
2606
2607         unsigned kind = tree->OperKind();
2608
2609         if (kind & GTK_SMPOP)
2610         {
2611             /* Unary/binary operator */
2612
2613             if (tree->gtOp.gtOp1)
2614                 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
2615             if (tree->gtGetOp2IfPresent())
2616                 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
2617         }
2618         else if (tree->gtOper == GT_ARR_ELEM)
2619         {
2620             /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
2621                which holds the offset-calculation */
2622
2623             regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
2624             regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
2625         }
2626         else if (tree->gtOper == GT_CMPXCHG)
2627         {
2628             regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
2629         }
2630         else
2631         {
2632             /* Must be a leaf/constant node */
2633
2634             noway_assert(kind & (GTK_LEAF | GTK_CONST));
2635         }
2636     }
2637
2638     return regMask;
2639 }
2640
2641 /*****************************************************************************
2642  *
2643  *  The given tree was previously passed to genMakeAddressable, but since then
2644  *  some of its registers are known to have been spilled; do whatever it takes
2645  *  to make the operand addressable again (typically by reloading any spilled
2646  *  registers).
2647  */
2648
2649 regMaskTP CodeGen::genRestAddressable(GenTree* tree, regMaskTP addrReg, regMaskTP lockMask)
2650 {
2651     noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2652
2653     /* Is this a 'simple' register spill? */
2654
2655     if (tree->gtFlags & GTF_SPILLED)
2656     {
2657         /* The mask must match the original register/regpair */
2658
2659         if (isRegPairType(tree->gtType))
2660         {
2661             noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
2662
2663             regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2664
2665             addrReg = genRegPairMask(tree->gtRegPair);
2666         }
2667         else
2668         {
2669             noway_assert(addrReg == genRegMask(tree->gtRegNum));
2670
2671             regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2672
2673             addrReg = genRegMask(tree->gtRegNum);
2674         }
2675
2676         noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2677         regSet.rsMaskLock -= lockMask;
2678
2679         return addrReg;
2680     }
2681
2682     /* We have a complex address mode with some of its sub-operands spilled */
2683
2684     noway_assert((tree->InReg()) == 0);
2685     noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
2686
2687     /*
2688         We'll proceed in several phases:
2689
2690          1. Lock any registers that are part of the address mode and
2691             have not been spilled. This prevents these registers from
2692             getting spilled in step 2.
2693
2694          2. Reload any registers that have been spilled; lock each
2695             one right after it is reloaded.
2696
2697          3. Unlock all the registers.
2698      */
2699
2700     addrReg = genRestoreAddrMode(tree, tree, true);
2701     addrReg |= genRestoreAddrMode(tree, tree, false);
2702
2703     /* Unlock all registers that the address mode uses */
2704
2705     lockMask |= addrReg;
2706
2707     noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2708     regSet.rsMaskLock -= lockMask;
2709
2710     return addrReg;
2711 }
2712
2713 /*****************************************************************************
2714  *
2715  *  The given tree was previously passed to genMakeAddressable, but since then
2716  *  some of its registers might have been spilled ('addrReg' is the set of
2717  *  registers used by the address). This function makes sure the operand is
2718  *  still addressable (while avoiding any of the registers in 'avoidMask'),
2719  *  and returns the (possibly modified) set of registers that are used by
2720  *  the address (these will be marked as used on exit).
2721  */
2722
2723 regMaskTP CodeGen::genKeepAddressable(GenTree* tree, regMaskTP addrReg, regMaskTP avoidMask)
2724 {
2725     /* Is the operand still addressable? */
2726
2727     tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
2728
2729     if (!genStillAddressable(tree))
2730     {
2731         if (avoidMask)
2732         {
2733             // Temporarily lock 'avoidMask' while we restore addressability
2734             // genRestAddressable will unlock the 'avoidMask' for us
2735             // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
2736             // In regSet.rsRegMaskFree() we require that all locked register be marked as used
2737             //
2738             regSet.rsLockUsedReg(avoidMask);
2739         }
2740
2741         addrReg = genRestAddressable(tree, addrReg, avoidMask);
2742
2743         noway_assert((regSet.rsMaskLock & avoidMask) == 0);
2744     }
2745
2746     return addrReg;
2747 }
2748
2749 /*****************************************************************************
2750  *
2751  *  After we're finished with the given operand (which was previously marked
2752  *  by calling genMakeAddressable), this function must be called to free any
2753  *  registers that may have been used by the address.
2754  *  keptReg indicates if the addressability registers were marked as used
2755  *  by genMakeAddressable().
2756  */
2757
2758 void CodeGen::genDoneAddressable(GenTree* tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
2759 {
2760     if (keptReg == RegSet::FREE_REG)
2761     {
2762         // We exclude regSet.rsMaskUsed since the registers may be multi-used.
2763         // ie. There may be a pending use in a higher-up tree.
2764
2765         addrReg &= ~regSet.rsMaskUsed;
2766
2767         /* addrReg was not marked as used. So just reset its GC info */
2768         if (addrReg)
2769         {
2770             gcInfo.gcMarkRegSetNpt(addrReg);
2771         }
2772     }
2773     else
2774     {
2775         /* addrReg was marked as used. So we need to free it up (which
2776            will also reset its GC info) */
2777
2778         regSet.rsMarkRegFree(addrReg);
2779     }
2780 }
2781
2782 /*****************************************************************************/
2783 /*****************************************************************************
2784  *
2785  *  Make sure the given floating point value is addressable, and return a tree
2786  *  that will yield the value as an addressing mode (this tree may differ from
2787  *  the one passed in, BTW). If the only way to make the value addressable is
2788  *  to evaluate into the FP stack, we do this and return zero.
2789  */
2790
2791 GenTree* CodeGen::genMakeAddrOrFPstk(GenTree* tree, regMaskTP* regMaskPtr, bool roundResult)
2792 {
2793     *regMaskPtr = 0;
2794
2795     switch (tree->gtOper)
2796     {
2797         case GT_LCL_VAR:
2798         case GT_LCL_FLD:
2799         case GT_CLS_VAR:
2800             return tree;
2801
2802         case GT_CNS_DBL:
2803             if (tree->gtType == TYP_FLOAT)
2804             {
2805                 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
2806                 return genMakeConst(&f, TYP_FLOAT, tree, false);
2807             }
2808             return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
2809
2810         case GT_IND:
2811         case GT_NULLCHECK:
2812
2813             /* Try to make the address directly addressable */
2814
2815             if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2816                                    0, RegSet::FREE_REG, regMaskPtr, false))
2817             {
2818                 genUpdateLife(tree);
2819                 return tree;
2820             }
2821
2822             break;
2823
2824         default:
2825             break;
2826     }
2827 #if FEATURE_STACK_FP_X87
2828     /* We have no choice but to compute the value 'tree' onto the FP stack */
2829
2830     genCodeForTreeFlt(tree);
2831 #endif
2832     return 0;
2833 }
2834
2835 /*****************************************************************************/
2836 /*****************************************************************************
2837  *
2838  *  Display a string literal value (debug only).
2839  */
2840
2841 #ifdef DEBUG
2842 #endif
2843
2844 /*****************************************************************************
2845  *
2846  *   Generate code to check that the GS cookie wasn't thrashed by a buffer
2847  *   overrun.  If pushReg is true, preserve all registers around code sequence.
2848  *   Otherwise, ECX maybe modified.
2849  */
2850 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2851 {
2852     // Make sure that EAX didn't die in the return expression
2853     if (!pushReg && (compiler->info.compRetType == TYP_REF))
2854         gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2855
2856     // Add cookie check code for unsafe buffers
2857     BasicBlock* gsCheckBlk;
2858     regMaskTP   byrefPushedRegs = RBM_NONE;
2859     regMaskTP   norefPushedRegs = RBM_NONE;
2860     regMaskTP   pushedRegs      = RBM_NONE;
2861
2862     noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2863
2864 #if CPU_LOAD_STORE_ARCH
2865     // Lock all ABI argument registers before generating the check. All other registers should be dead, so this
2866     // shouldn't over-constrain us.
2867     const regMaskTP unlockedArgRegs = RBM_ARG_REGS & ~regSet.rsMaskLock;
2868     regMaskTP       usedArgRegs;
2869     regSet.rsLockReg(unlockedArgRegs, &usedArgRegs);
2870 #endif
2871
2872     if (compiler->gsGlobalSecurityCookieAddr == NULL)
2873     {
2874         // JIT case
2875         CLANG_FORMAT_COMMENT_ANCHOR;
2876
2877 #if CPU_LOAD_STORE_ARCH
2878         regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
2879         getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
2880         regTracker.rsTrackRegTrash(reg);
2881
2882         if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
2883             arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
2884         {
2885             getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
2886         }
2887         else
2888         {
2889             // Load CookieVal into a register
2890             regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
2891             instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
2892             getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
2893         }
2894 #else
2895         getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
2896                                   (int)compiler->gsGlobalSecurityCookieVal);
2897 #endif
2898     }
2899     else
2900     {
2901         regNumber regGSCheck;
2902         regMaskTP regMaskGSCheck;
2903 #if CPU_LOAD_STORE_ARCH
2904         regGSCheck     = regSet.rsGrabReg(RBM_ALLINT);
2905         regMaskGSCheck = genRegMask(regGSCheck);
2906 #else
2907         // Don't pick the 'this' register
2908         if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
2909             (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
2910         {
2911             regGSCheck     = REG_EDX;
2912             regMaskGSCheck = RBM_EDX;
2913         }
2914         else
2915         {
2916             regGSCheck     = REG_ECX;
2917             regMaskGSCheck = RBM_ECX;
2918         }
2919
2920         // NGen case
2921         if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
2922         {
2923             pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
2924         }
2925         else
2926         {
2927             noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
2928         }
2929 #endif
2930 #if defined(_TARGET_ARM_)
2931         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2932         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
2933 #else
2934         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
2935                                   (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2936 #endif // !_TARGET_ARM_
2937         regTracker.rsTrashRegSet(regMaskGSCheck);
2938 #ifdef _TARGET_ARM_
2939         regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
2940         getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
2941         regTracker.rsTrackRegTrash(regTmp);
2942         getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
2943 #else
2944         getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
2945 #endif
2946     }
2947
2948     gsCheckBlk            = genCreateTempLabel();
2949     emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2950     inst_JMP(jmpEqual, gsCheckBlk);
2951     genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
2952     genDefineTempLabel(gsCheckBlk);
2953
2954     genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
2955
2956 #if CPU_LOAD_STORE_ARCH
2957     // Unlock all ABI argument registers.
2958     regSet.rsUnlockReg(unlockedArgRegs, usedArgRegs);
2959 #endif
2960 }
2961
2962 /*****************************************************************************
2963  *
2964  *  Generate any side effects within the given expression tree.
2965  */
2966
2967 void CodeGen::genEvalSideEffects(GenTree* tree)
2968 {
2969     genTreeOps oper;
2970     unsigned   kind;
2971
2972 AGAIN:
2973
2974     /* Does this sub-tree contain any side-effects? */
2975     if (tree->gtFlags & GTF_SIDE_EFFECT)
2976     {
2977 #if FEATURE_STACK_FP_X87
2978         /* Remember the current FP stack level */
2979         int iTemps = genNumberTemps();
2980 #endif
2981         if (tree->OperIsIndir())
2982         {
2983             regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
2984
2985             if (tree->InReg())
2986             {
2987                 gcInfo.gcMarkRegPtrVal(tree);
2988                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
2989             }
2990             // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
2991             // do not need an additional null-check
2992             /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
2993             else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
2994             {
2995                 /* Compare against any register to do null-check */
2996                 CLANG_FORMAT_COMMENT_ANCHOR;
2997
2998 #if defined(_TARGET_XARCH_)
2999                 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
3000                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3001 #elif CPU_LOAD_STORE_ARCH
3002                 if (varTypeIsFloating(tree->TypeGet()))
3003                 {
3004                     genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
3005                                                RegSet::FREE_REG);
3006                 }
3007                 else
3008                 {
3009                     genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
3010                 }
3011 #ifdef _TARGET_ARM_
3012                 if (tree->gtFlags & GTF_IND_VOLATILE)
3013                 {
3014                     // Emit a memory barrier instruction after the load
3015                     instGen_MemoryBarrier();
3016                 }
3017 #endif
3018 #else
3019                 NYI("TARGET");
3020 #endif
3021             }
3022             else
3023             {
3024                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3025             }
3026         }
3027         else
3028         {
3029             /* Generate the expression and throw it away */
3030             genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
3031             if (tree->InReg())
3032             {
3033                 gcInfo.gcMarkRegPtrVal(tree);
3034             }
3035         }
3036 #if FEATURE_STACK_FP_X87
3037         /* If the tree computed a value on the FP stack, pop the stack */
3038         if (genNumberTemps() > iTemps)
3039         {
3040             noway_assert(genNumberTemps() == iTemps + 1);
3041             genDiscardStackFP(tree);
3042         }
3043 #endif
3044         return;
3045     }
3046
3047     noway_assert(tree->gtOper != GT_ASG);
3048
3049     /* Walk the tree, just to mark any dead values appropriately */
3050
3051     oper = tree->OperGet();
3052     kind = tree->OperKind();
3053
3054     /* Is this a constant or leaf node? */
3055
3056     if (kind & (GTK_CONST | GTK_LEAF))
3057     {
3058 #if FEATURE_STACK_FP_X87
3059         if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
3060         {
3061             genRegVarDeathStackFP(tree);
3062             FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
3063         }
3064 #endif
3065         genUpdateLife(tree);
3066         gcInfo.gcMarkRegPtrVal(tree);
3067         return;
3068     }
3069
3070     /* Must be a 'simple' unary/binary operator */
3071
3072     noway_assert(kind & GTK_SMPOP);
3073
3074     if (tree->gtGetOp2IfPresent())
3075     {
3076         genEvalSideEffects(tree->gtOp.gtOp1);
3077
3078         tree = tree->gtOp.gtOp2;
3079         goto AGAIN;
3080     }
3081     else
3082     {
3083         tree = tree->gtOp.gtOp1;
3084         if (tree)
3085             goto AGAIN;
3086     }
3087 }
3088
3089 /*****************************************************************************
3090  *
3091  *  A persistent pointer value is being overwritten, record it for the GC.
3092  *
3093  *  tgt        : the destination being written to
3094  *  assignVal  : the value being assigned (the source). It must currently be in a register.
3095  *  tgtAddrReg : the set of registers being used by "tgt"
3096  *
3097  *  Returns    : the mask of the scratch register that was used.
3098  *               RBM_NONE if a write-barrier is not needed.
3099  */
3100
3101 regMaskTP CodeGen::WriteBarrier(GenTree* tgt, GenTree* assignVal, regMaskTP tgtAddrReg)
3102 {
3103     noway_assert(assignVal->InReg());
3104
3105     GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3106     if (wbf == GCInfo::WBF_NoBarrier)
3107         return RBM_NONE;
3108
3109     regMaskTP resultRegMask = RBM_NONE;
3110
3111     regNumber reg = assignVal->gtRegNum;
3112
3113 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3114 #ifdef DEBUG
3115     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
3116     {
3117 #endif
3118         const static int regToHelper[2][8] = {
3119             // If the target is known to be in managed memory
3120             {
3121                 CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
3122                 CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
3123             },
3124
3125             // Don't know if the target is in managed memory
3126             {
3127                 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
3128                 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
3129                 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
3130             },
3131         };
3132
3133         noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
3134         noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
3135         noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
3136         noway_assert(regToHelper[0][REG_ESP] == -1);
3137         noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
3138         noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
3139         noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
3140
3141         noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
3142         noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
3143         noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
3144         noway_assert(regToHelper[1][REG_ESP] == -1);
3145         noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
3146         noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
3147         noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
3148
3149         noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
3150
3151         /*
3152             Generate the following code:
3153
3154                     lea     edx, tgt
3155                     call    write_barrier_helper_reg
3156
3157             First grab the RBM_WRITE_BARRIER register for the target address.
3158          */
3159
3160         regNumber rg1;
3161         bool      trashOp1;
3162
3163         if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
3164         {
3165             rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
3166
3167             regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
3168             regSet.rsMaskLock |= RBM_WRITE_BARRIER;
3169
3170             trashOp1 = false;
3171         }
3172         else
3173         {
3174             rg1 = REG_WRITE_BARRIER;
3175
3176             trashOp1 = true;
3177         }
3178
3179         noway_assert(rg1 == REG_WRITE_BARRIER);
3180
3181         /* Generate "lea EDX, [addr-mode]" */
3182
3183         noway_assert(tgt->gtType == TYP_REF);
3184         tgt->gtType = TYP_BYREF;
3185         inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
3186
3187         /* Free up anything that was tied up by the LHS */
3188         genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3189
3190         // In case "tgt" was a comma:
3191         tgt = tgt->gtEffectiveVal();
3192
3193         regTracker.rsTrackRegTrash(rg1);
3194         gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
3195         gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
3196
3197         /* Call the proper vm helper */
3198
3199         // enforced by gcIsWriteBarrierCandidate
3200         noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
3201
3202         unsigned tgtAnywhere = 0;
3203         if ((tgt->gtOper == GT_IND) &&
3204             ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
3205         {
3206             tgtAnywhere = 1;
3207         }
3208
3209         int helper    = regToHelper[tgtAnywhere][reg];
3210         resultRegMask = genRegMask(reg);
3211
3212         gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
3213
3214         genEmitHelperCall(helper,
3215                           0,           // argSize
3216                           EA_PTRSIZE); // retSize
3217
3218         if (!trashOp1)
3219         {
3220             regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
3221             regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
3222         }
3223
3224         return resultRegMask;
3225
3226 #ifdef DEBUG
3227     }
3228     else
3229 #endif
3230 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3231
3232 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3233     {
3234         /*
3235             Generate the following code (or its equivalent on the given target):
3236
3237                     mov     arg1, srcReg
3238                     lea     arg0, tgt
3239                     call    write_barrier_helper
3240
3241             First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
3242          */
3243
3244         if (reg != REG_ARG_1)
3245         {
3246             // We may need to spill whatever is in the ARG_1 register
3247             //
3248             if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
3249             {
3250                 regSet.rsSpillReg(REG_ARG_1);
3251             }
3252
3253             inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
3254         }
3255         resultRegMask = RBM_ARG_1;
3256
3257         regTracker.rsTrackRegTrash(REG_ARG_1);
3258         gcInfo.gcMarkRegSetNpt(REG_ARG_1);
3259         gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
3260
3261         bool free_arg1 = false;
3262         if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
3263         {
3264             regSet.rsMaskUsed |= RBM_ARG_1;
3265             free_arg1 = true;
3266         }
3267
3268         // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
3269
3270         /* Generate "lea R0, [addr-mode]" */
3271
3272         noway_assert(tgt->gtType == TYP_REF);
3273         tgt->gtType = TYP_BYREF;
3274
3275         tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
3276
3277         // We may need to spill whatever is in the ARG_0 register
3278         //
3279         if (((tgtAddrReg & RBM_ARG_0) == 0) &&        // tgtAddrReg does not contain REG_ARG_0
3280             ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
3281             (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
3282         {
3283             regSet.rsSpillReg(REG_ARG_0);
3284         }
3285
3286         inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
3287
3288         /* Free up anything that was tied up by the LHS */
3289         genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3290
3291         regTracker.rsTrackRegTrash(REG_ARG_0);
3292         gcInfo.gcMarkRegSetNpt(REG_ARG_0);
3293         gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
3294
3295 #ifdef _TARGET_ARM_
3296 #if NOGC_WRITE_BARRIERS
3297         // Finally, we may be required to spill whatever is in the further argument registers
3298         // trashed by the call. The write barrier trashes some further registers --
3299         // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
3300
3301         regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
3302 #else
3303         regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
3304 #endif
3305         // Spill any other registers trashed by the write barrier call and currently in use.
3306         regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
3307         if (mustSpill)
3308             regSet.rsSpillRegs(mustSpill);
3309 #endif // _TARGET_ARM_
3310
3311         bool free_arg0 = false;
3312         if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
3313         {
3314             regSet.rsMaskUsed |= RBM_ARG_0;
3315             free_arg0 = true;
3316         }
3317
3318         // genEmitHelperCall might need to grab a register
3319         // so don't let it spill one of the arguments
3320         //
3321         regMaskTP reallyUsedRegs = RBM_NONE;
3322         regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
3323
3324         genGCWriteBarrier(tgt, wbf);
3325
3326         regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
3327         gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
3328
3329         if (free_arg0)
3330         {
3331             regSet.rsMaskUsed &= ~RBM_ARG_0;
3332         }
3333         if (free_arg1)
3334         {
3335             regSet.rsMaskUsed &= ~RBM_ARG_1;
3336         }
3337
3338         return resultRegMask;
3339     }
3340 #endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3341 }
3342
3343 #ifdef _TARGET_X86_
3344 /*****************************************************************************
3345  *
3346  *  Generate the appropriate conditional jump(s) right after the low 32 bits
3347  *  of two long values have been compared.
3348  */
3349
3350 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3351 {
3352     if (cmp != GT_NE)
3353     {
3354         jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3355     }
3356
3357     switch (cmp)
3358     {
3359         case GT_EQ:
3360             inst_JMP(EJ_jne, jumpFalse);
3361             break;
3362
3363         case GT_NE:
3364             inst_JMP(EJ_jne, jumpTrue);
3365             break;
3366
3367         case GT_LT:
3368         case GT_LE:
3369             if (isUnsigned)
3370             {
3371                 inst_JMP(EJ_ja, jumpFalse);
3372                 inst_JMP(EJ_jb, jumpTrue);
3373             }
3374             else
3375             {
3376                 inst_JMP(EJ_jg, jumpFalse);
3377                 inst_JMP(EJ_jl, jumpTrue);
3378             }
3379             break;
3380
3381         case GT_GE:
3382         case GT_GT:
3383             if (isUnsigned)
3384             {
3385                 inst_JMP(EJ_jb, jumpFalse);
3386                 inst_JMP(EJ_ja, jumpTrue);
3387             }
3388             else
3389             {
3390                 inst_JMP(EJ_jl, jumpFalse);
3391                 inst_JMP(EJ_jg, jumpTrue);
3392             }
3393             break;
3394
3395         default:
3396             noway_assert(!"expected a comparison operator");
3397     }
3398 }
3399
3400 /*****************************************************************************
3401  *
3402  *  Generate the appropriate conditional jump(s) right after the high 32 bits
3403  *  of two long values have been compared.
3404  */
3405
3406 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3407 {
3408     switch (cmp)
3409     {
3410         case GT_EQ:
3411             inst_JMP(EJ_je, jumpTrue);
3412             break;
3413
3414         case GT_NE:
3415             inst_JMP(EJ_jne, jumpTrue);
3416             break;
3417
3418         case GT_LT:
3419             inst_JMP(EJ_jb, jumpTrue);
3420             break;
3421
3422         case GT_LE:
3423             inst_JMP(EJ_jbe, jumpTrue);
3424             break;
3425
3426         case GT_GE:
3427             inst_JMP(EJ_jae, jumpTrue);
3428             break;
3429
3430         case GT_GT:
3431             inst_JMP(EJ_ja, jumpTrue);
3432             break;
3433
3434         default:
3435             noway_assert(!"expected comparison");
3436     }
3437 }
3438 #elif defined(_TARGET_ARM_)
3439 /*****************************************************************************
3440 *
3441 *  Generate the appropriate conditional jump(s) right after the low 32 bits
3442 *  of two long values have been compared.
3443 */
3444
3445 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3446 {
3447     if (cmp != GT_NE)
3448     {
3449         jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3450     }
3451
3452     switch (cmp)
3453     {
3454         case GT_EQ:
3455             inst_JMP(EJ_ne, jumpFalse);
3456             break;
3457
3458         case GT_NE:
3459             inst_JMP(EJ_ne, jumpTrue);
3460             break;
3461
3462         case GT_LT:
3463         case GT_LE:
3464             if (isUnsigned)
3465             {
3466                 inst_JMP(EJ_hi, jumpFalse);
3467                 inst_JMP(EJ_lo, jumpTrue);
3468             }
3469             else
3470             {
3471                 inst_JMP(EJ_gt, jumpFalse);
3472                 inst_JMP(EJ_lt, jumpTrue);
3473             }
3474             break;
3475
3476         case GT_GE:
3477         case GT_GT:
3478             if (isUnsigned)
3479             {
3480                 inst_JMP(EJ_lo, jumpFalse);
3481                 inst_JMP(EJ_hi, jumpTrue);
3482             }
3483             else
3484             {
3485                 inst_JMP(EJ_lt, jumpFalse);
3486                 inst_JMP(EJ_gt, jumpTrue);
3487             }
3488             break;
3489
3490         default:
3491             noway_assert(!"expected a comparison operator");
3492     }
3493 }
3494
3495 /*****************************************************************************
3496 *
3497 *  Generate the appropriate conditional jump(s) right after the high 32 bits
3498 *  of two long values have been compared.
3499 */
3500
3501 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3502 {
3503     switch (cmp)
3504     {
3505         case GT_EQ:
3506             inst_JMP(EJ_eq, jumpTrue);
3507             break;
3508
3509         case GT_NE:
3510             inst_JMP(EJ_ne, jumpTrue);
3511             break;
3512
3513         case GT_LT:
3514             inst_JMP(EJ_lo, jumpTrue);
3515             break;
3516
3517         case GT_LE:
3518             inst_JMP(EJ_ls, jumpTrue);
3519             break;
3520
3521         case GT_GE:
3522             inst_JMP(EJ_hs, jumpTrue);
3523             break;
3524
3525         case GT_GT:
3526             inst_JMP(EJ_hi, jumpTrue);
3527             break;
3528
3529         default:
3530             noway_assert(!"expected comparison");
3531     }
3532 }
3533 #endif
3534 /*****************************************************************************
3535  *
3536  *  Called by genCondJump() for TYP_LONG.
3537  */
3538
3539 void CodeGen::genCondJumpLng(GenTree* cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
3540 {
3541     noway_assert(jumpTrue && jumpFalse);
3542     noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
3543     noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
3544
3545     GenTree*   op1 = cond->gtOp.gtOp1;
3546     GenTree*   op2 = cond->gtOp.gtOp2;
3547     genTreeOps cmp = cond->OperGet();
3548
3549     regMaskTP addrReg;
3550
3551     /* Are we comparing against a constant? */
3552
3553     if (op2->gtOper == GT_CNS_LNG)
3554     {
3555         __int64   lval = op2->gtLngCon.gtLconVal;
3556         regNumber rTmp;
3557
3558         // We're "done" evaluating op2; let's strip any commas off op1 before we
3559         // evaluate it.
3560         op1 = genCodeForCommaTree(op1);
3561
3562         /* We can generate better code for some special cases */
3563         instruction ins              = INS_invalid;
3564         bool        useIncToSetFlags = false;
3565         bool        specialCaseCmp   = false;
3566
3567         if (cmp == GT_EQ)
3568         {
3569             if (lval == 0)
3570             {
3571                 /* op1 == 0  */
3572                 ins              = INS_OR;
3573                 useIncToSetFlags = false;
3574                 specialCaseCmp   = true;
3575             }
3576             else if (lval == -1)
3577             {
3578                 /* op1 == -1 */
3579                 ins              = INS_AND;
3580                 useIncToSetFlags = true;
3581                 specialCaseCmp   = true;
3582             }
3583         }
3584         else if (cmp == GT_NE)
3585         {
3586             if (lval == 0)
3587             {
3588                 /* op1 != 0  */
3589                 ins              = INS_OR;
3590                 useIncToSetFlags = false;
3591                 specialCaseCmp   = true;
3592             }
3593             else if (lval == -1)
3594             {
3595                 /* op1 != -1 */
3596                 ins              = INS_AND;
3597                 useIncToSetFlags = true;
3598                 specialCaseCmp   = true;
3599             }
3600         }
3601
3602         if (specialCaseCmp)
3603         {
3604             /* Make the comparand addressable */
3605
3606             addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
3607
3608             regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
3609             insFlags  flags   = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
3610
3611             if (op1->InReg())
3612             {
3613                 regPairNo regPair = op1->gtRegPair;
3614                 regNumber rLo     = genRegPairLo(regPair);
3615                 regNumber rHi     = genRegPairHi(regPair);
3616                 if (tmpMask & genRegMask(rLo))
3617                 {
3618                     rTmp = rLo;
3619                 }
3620                 else if (tmpMask & genRegMask(rHi))
3621                 {
3622                     rTmp = rHi;
3623                     rHi  = rLo;
3624                 }
3625                 else
3626                 {
3627                     rTmp = regSet.rsGrabReg(tmpMask);
3628                     inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
3629                 }
3630
3631                 /* The register is now trashed */
3632                 regTracker.rsTrackRegTrash(rTmp);
3633
3634                 if (rHi != REG_STK)
3635                 {
3636                     /* Set the flags using INS_AND | INS_OR */
3637                     inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
3638                 }
3639                 else
3640                 {
3641                     /* Set the flags using INS_AND | INS_OR */
3642                     inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3643                 }
3644             }
3645             else // op1 is not in a register.
3646             {
3647                 rTmp = regSet.rsGrabReg(tmpMask);
3648
3649                 /* Load the low 32-bits of op1 */
3650                 inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
3651
3652                 /* The register is now trashed */
3653                 regTracker.rsTrackRegTrash(rTmp);
3654
3655                 /* Set the flags using INS_AND | INS_OR */
3656                 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3657             }
3658
3659             /* Free up the addrReg(s) if any */
3660             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
3661
3662             /* compares against -1, also requires an an inc instruction */
3663             if (useIncToSetFlags)
3664             {
3665                 /* Make sure the inc will set the flags */
3666                 assert(cond->gtSetFlags());
3667                 genIncRegBy(rTmp, 1, cond, TYP_INT);
3668             }
3669
3670 #if FEATURE_STACK_FP_X87
3671             // We may need a transition block
3672             if (bFPTransition)
3673             {
3674                 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3675             }
3676 #endif
3677             emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
3678             inst_JMP(jmpKind, jumpTrue);
3679         }
3680         else // specialCaseCmp == false
3681         {
3682             /* Make the comparand addressable */
3683             addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
3684
3685             /* Compare the high part first */
3686
3687             int ival = (int)(lval >> 32);
3688
3689             /* Comparing a register against 0 is easier */
3690
3691             if (!ival && (op1->InReg()) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
3692             {
3693                 /* Generate 'test rTmp, rTmp' */
3694                 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3695             }
3696             else
3697             {
3698                 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3699                 {
3700                     /* Special case: comparison of two constants */
3701                     // Needed as gtFoldExpr() doesn't fold longs
3702
3703                     noway_assert(addrReg == 0);
3704                     int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
3705
3706                     /* Get the constant operand into a register */
3707                     rTmp = genGetRegSetToIcon(op1_hiword);
3708
3709                     /* Generate 'cmp rTmp, ival' */
3710
3711                     inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3712                 }
3713                 else
3714                 {
3715                     /* Generate 'cmp op1, ival' */
3716
3717                     inst_TT_IV(INS_cmp, op1, ival, 4);
3718                 }
3719             }
3720
3721 #if FEATURE_STACK_FP_X87
3722             // We may need a transition block
3723             if (bFPTransition)
3724             {
3725                 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3726             }
3727 #endif
3728             /* Generate the appropriate jumps */
3729
3730             if (cond->gtFlags & GTF_UNSIGNED)
3731                 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3732             else
3733                 genJccLongHi(cmp, jumpTrue, jumpFalse);
3734
3735             /* Compare the low part second */
3736
3737             ival = (int)lval;
3738
3739             /* Comparing a register against 0 is easier */
3740
3741             if (!ival && (op1->InReg()) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
3742             {
3743                 /* Generate 'test rTmp, rTmp' */
3744                 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3745             }
3746             else
3747             {
3748                 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3749                 {
3750                     /* Special case: comparison of two constants */
3751                     // Needed as gtFoldExpr() doesn't fold longs
3752
3753                     noway_assert(addrReg == 0);
3754                     int op1_loword = (int)op1->gtLngCon.gtLconVal;
3755
3756                     /* get the constant operand into a register */
3757                     rTmp = genGetRegSetToIcon(op1_loword);
3758
3759                     /* Generate 'cmp rTmp, ival' */
3760
3761                     inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3762                 }
3763                 else
3764                 {
3765                     /* Generate 'cmp op1, ival' */
3766
3767                     inst_TT_IV(INS_cmp, op1, ival, 0);
3768                 }
3769             }
3770
3771             /* Generate the appropriate jumps */
3772             genJccLongLo(cmp, jumpTrue, jumpFalse);
3773
3774             genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
3775         }
3776     }
3777     else // (op2->gtOper != GT_CNS_LNG)
3778     {
3779
3780         /* The operands would be reversed by physically swapping them */
3781
3782         noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
3783
3784         /* Generate the first operand into a register pair */
3785
3786         genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
3787         noway_assert(op1->InReg());
3788
3789 #if CPU_LOAD_STORE_ARCH
3790         /* Generate the second operand into a register pair */
3791         // Fix 388442 ARM JitStress WP7
3792         genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3793         noway_assert(op2->InReg());
3794         regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
3795 #else
3796         /* Make the second operand addressable */
3797
3798         addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3799 #endif
3800         /* Make sure the first operand hasn't been spilled */
3801
3802         genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
3803         noway_assert(op1->InReg());
3804
3805         regPairNo regPair = op1->gtRegPair;
3806
3807 #if !CPU_LOAD_STORE_ARCH
3808         /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
3809
3810         addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
3811 #endif
3812
3813 #if FEATURE_STACK_FP_X87
3814         // We may need a transition block
3815         if (bFPTransition)
3816         {
3817             jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3818         }
3819 #endif
3820
3821         /* Perform the comparison - high parts */
3822
3823         inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
3824
3825         if (cond->gtFlags & GTF_UNSIGNED)
3826             genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3827         else
3828             genJccLongHi(cmp, jumpTrue, jumpFalse);
3829
3830         /* Compare the low parts */
3831
3832         inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
3833         genJccLongLo(cmp, jumpTrue, jumpFalse);
3834
3835         /* Free up anything that was tied up by either operand */
3836         CLANG_FORMAT_COMMENT_ANCHOR;
3837
3838 #if CPU_LOAD_STORE_ARCH
3839
3840         // Fix 388442 ARM JitStress WP7
3841         regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
3842         genReleaseRegPair(op2);
3843 #else
3844         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
3845 #endif
3846         genReleaseRegPair(op1);
3847     }
3848 }
3849
3850 /*****************************************************************************
3851  *  gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
3852  *  Called by genCondJumpFlt() to generate the fcomp instruction appropriate
3853  *  to the architecture we're running on.
3854  *
3855  *  P5:
3856  *  gen_fcomp_FN:     fcomp ST(0), stk
3857  *  gen_fcomp_FS_TT:  fcomp ST(0), addr
3858  *  gen_fcompp_FS:    fcompp
3859  *    These are followed by fnstsw, sahf to get the flags in EFLAGS.
3860  *
3861  *  P6:
3862  *  gen_fcomp_FN:     fcomip ST(0), stk
3863  *  gen_fcomp_FS_TT:  fld addr, fcomip ST(0), ST(1), fstp ST(0)
3864  *      (and reverse the branch condition since addr comes first)
3865  *  gen_fcompp_FS:    fcomip, fstp
3866  *    These instructions will correctly set the EFLAGS register.
3867  *
3868  *  Return value:  These functions return true if the instruction has
3869  *    already placed its result in the EFLAGS register.
3870  */
3871
3872 bool CodeGen::genUse_fcomip()
3873 {
3874     return compiler->opts.compUseFCOMI;
3875 }
3876
3877 /*****************************************************************************
3878  *
3879  *  Sets the flag for the TYP_INT/TYP_REF comparison.
3880  *  We try to use the flags if they have already been set by a prior
3881  *  instruction.
3882  *  eg. i++; if(i<0) {}  Here, the "i++;" will have set the sign flag. We don't
3883  *                       need to compare again with zero. Just use a "INS_js"
3884  *
3885  *  Returns the flags the following jump/set instruction should use.
3886  */
3887
3888 emitJumpKind CodeGen::genCondSetFlags(GenTree* cond)
3889 {
3890     noway_assert(cond->OperIsCompare());
3891     noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
3892
3893     GenTree*   op1 = cond->gtOp.gtOp1;
3894     GenTree*   op2 = cond->gtOp.gtOp2;
3895     genTreeOps cmp = cond->OperGet();
3896
3897     if (cond->gtFlags & GTF_REVERSE_OPS)
3898     {
3899         /* Don't forget to modify the condition as well */
3900
3901         cond->gtOp.gtOp1 = op2;
3902         cond->gtOp.gtOp2 = op1;
3903         cond->SetOper(GenTree::SwapRelop(cmp));
3904         cond->gtFlags &= ~GTF_REVERSE_OPS;
3905
3906         /* Get hold of the new values */
3907
3908         cmp = cond->OperGet();
3909         op1 = cond->gtOp.gtOp1;
3910         op2 = cond->gtOp.gtOp2;
3911     }
3912
3913     // Note that op1's type may get bashed. So save it early
3914
3915     var_types op1Type     = op1->TypeGet();
3916     bool      unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
3917     emitAttr  size        = EA_UNKNOWN;
3918
3919     regMaskTP    regNeed;
3920     regMaskTP    addrReg1 = RBM_NONE;
3921     regMaskTP    addrReg2 = RBM_NONE;
3922     emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
3923
3924     bool byteCmp;
3925     bool shortCmp;
3926
3927     regMaskTP newLiveMask;
3928     regNumber op1Reg;
3929
3930     /* Are we comparing against a constant? */
3931
3932     if (op2->IsCnsIntOrI())
3933     {
3934         ssize_t ival = op2->gtIntConCommon.IconValue();
3935
3936         /* unsigned less than comparisons with 1 ('< 1' )
3937            should be transformed into '== 0' to potentially
3938            suppress a tst instruction.
3939         */
3940         if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
3941         {
3942             op2->gtIntCon.gtIconVal = ival = 0;
3943             cond->gtOper = cmp = GT_EQ;
3944         }
3945
3946         /* Comparisons against 0 can be easier */
3947
3948         if (ival == 0)
3949         {
3950             // if we can safely change the comparison to unsigned we do so
3951             if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
3952             {
3953                 unsignedCmp = true;
3954             }
3955
3956             /* unsigned comparisons with 0 should be transformed into
3957                '==0' or '!= 0' to potentially suppress a tst instruction. */
3958
3959             if (unsignedCmp)
3960             {
3961                 if (cmp == GT_GT)
3962                     cond->gtOper = cmp = GT_NE;
3963                 else if (cmp == GT_LE)
3964                     cond->gtOper = cmp = GT_EQ;
3965             }
3966
3967             /* Is this a simple zero/non-zero test? */
3968
3969             if (cmp == GT_EQ || cmp == GT_NE)
3970             {
3971                 /* Is the operand an "AND" operation? */
3972
3973                 if (op1->gtOper == GT_AND)
3974                 {
3975                     GenTree* an1 = op1->gtOp.gtOp1;
3976                     GenTree* an2 = op1->gtOp.gtOp2;
3977
3978                     /* Check for the case "expr & icon" */
3979
3980                     if (an2->IsIntCnsFitsInI32())
3981                     {
3982                         int iVal = (int)an2->gtIntCon.gtIconVal;
3983
3984                         /* make sure that constant is not out of an1's range */
3985
3986                         switch (an1->gtType)
3987                         {
3988                             case TYP_BOOL:
3989                             case TYP_BYTE:
3990                                 if (iVal & 0xffffff00)
3991                                     goto NO_TEST_FOR_AND;
3992                                 break;
3993                             case TYP_USHORT:
3994                             case TYP_SHORT:
3995                                 if (iVal & 0xffff0000)
3996                                     goto NO_TEST_FOR_AND;
3997                                 break;
3998                             default:
3999                                 break;
4000                         }
4001
4002                         if (an1->IsCnsIntOrI())
4003                         {
4004                             // Special case - Both operands of AND are consts
4005                             genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
4006                             addrReg1 = genRegMask(an1->gtRegNum);
4007                         }
4008                         else
4009                         {
4010                             addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
4011                         }
4012 #if CPU_LOAD_STORE_ARCH
4013                         if ((an1->InReg()) == 0)
4014                         {
4015                             genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
4016                             if (arm_Valid_Imm_For_Alu(iVal))
4017                             {
4018                                 inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
4019                             }
4020                             else
4021                             {
4022                                 regNumber regTmp = regSet.rsPickFreeReg();
4023                                 instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
4024                                 inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
4025                             }
4026                             genReleaseReg(an1);
4027                             addrReg1 = RBM_NONE;
4028                         }
4029                         else
4030 #endif
4031                         {
4032 #ifdef _TARGET_XARCH_
4033                             // Check to see if we can use a smaller immediate.
4034                             if ((an1->InReg()) && ((iVal & 0x0000FFFF) == iVal))
4035                             {
4036                                 var_types testType =
4037                                     (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
4038 #if CPU_HAS_BYTE_REGS
4039                                 // if we don't have byte-able register, switch to the 2-byte form
4040                                 if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
4041                                 {
4042                                     testType = TYP_USHORT;
4043                                 }
4044 #endif // CPU_HAS_BYTE_REGS
4045
4046                                 inst_TT_IV(INS_TEST, an1, iVal, testType);
4047                             }
4048                             else
4049 #endif // _TARGET_XARCH_
4050                             {
4051                                 inst_TT_IV(INS_TEST, an1, iVal);
4052                             }
4053                         }
4054
4055                         goto DONE;
4056
4057                     NO_TEST_FOR_AND:;
4058                     }
4059
4060                     // TODO: Check for other cases that can generate 'test',
4061                     // TODO: also check for a 64-bit integer zero test which
4062                     // TODO: could generate 'or lo, hi' followed by jz/jnz.
4063                 }
4064             }
4065
4066             // See what Jcc instruction we would use if we can take advantage of
4067             // the knowledge of EFLAGs.
4068
4069             if (unsignedCmp)
4070             {
4071                 /*
4072                     Unsigned comparison to 0. Using this table:
4073
4074                     ----------------------------------------------------
4075                     | Comparison | Flags Checked    | Instruction Used |
4076                     ----------------------------------------------------
4077                     |    == 0    | ZF = 1           |       je         |
4078                     ----------------------------------------------------
4079                     |    != 0    | ZF = 0           |       jne        |
4080                     ----------------------------------------------------
4081                     |     < 0    | always FALSE     |       N/A        |
4082                     ----------------------------------------------------
4083                     |    <= 0    | ZF = 1           |       je         |
4084                     ----------------------------------------------------
4085                     |    >= 0    | always TRUE      |       N/A        |
4086                     ----------------------------------------------------
4087                     |     > 0    | ZF = 0           |       jne        |
4088                     ----------------------------------------------------
4089                 */
4090                 switch (cmp)
4091                 {
4092 #ifdef _TARGET_ARM_
4093                     case GT_EQ:
4094                         jumpKind = EJ_eq;
4095                         break;
4096                     case GT_NE:
4097                         jumpKind = EJ_ne;
4098                         break;
4099                     case GT_LT:
4100                         jumpKind = EJ_NONE;
4101                         break;
4102                     case GT_LE:
4103                         jumpKind = EJ_eq;
4104                         break;
4105                     case GT_GE:
4106                         jumpKind = EJ_NONE;
4107                         break;
4108                     case GT_GT:
4109                         jumpKind = EJ_ne;
4110                         break;
4111 #elif defined(_TARGET_X86_)
4112                     case GT_EQ:
4113                         jumpKind = EJ_je;
4114                         break;
4115                     case GT_NE:
4116                         jumpKind = EJ_jne;
4117                         break;
4118                     case GT_LT:
4119                         jumpKind = EJ_NONE;
4120                         break;
4121                     case GT_LE:
4122                         jumpKind = EJ_je;
4123                         break;
4124                     case GT_GE:
4125                         jumpKind = EJ_NONE;
4126                         break;
4127                     case GT_GT:
4128                         jumpKind = EJ_jne;
4129                         break;
4130 #endif // TARGET
4131                     default:
4132                         noway_assert(!"Unexpected comparison OpCode");
4133                         break;
4134                 }
4135             }
4136             else
4137             {
4138                 /*
4139                     Signed comparison to 0. Using this table:
4140
4141                     -----------------------------------------------------
4142                     | Comparison | Flags Checked     | Instruction Used |
4143                     -----------------------------------------------------
4144                     |    == 0    | ZF = 1            |       je         |
4145                     -----------------------------------------------------
4146                     |    != 0    | ZF = 0            |       jne        |
4147                     -----------------------------------------------------
4148                     |     < 0    | SF = 1            |       js         |
4149                     -----------------------------------------------------
4150                     |    <= 0    |      N/A          |       N/A        |
4151                     -----------------------------------------------------
4152                     |    >= 0    | SF = 0            |       jns        |
4153                     -----------------------------------------------------
4154                     |     > 0    |      N/A          |       N/A        |
4155                     -----------------------------------------------------
4156                 */
4157
4158                 switch (cmp)
4159                 {
4160 #ifdef _TARGET_ARM_
4161                     case GT_EQ:
4162                         jumpKind = EJ_eq;
4163                         break;
4164                     case GT_NE:
4165                         jumpKind = EJ_ne;
4166                         break;
4167                     case GT_LT:
4168                         jumpKind = EJ_mi;
4169                         break;
4170                     case GT_LE:
4171                         jumpKind = EJ_NONE;
4172                         break;
4173                     case GT_GE:
4174                         jumpKind = EJ_pl;
4175                         break;
4176                     case GT_GT:
4177                         jumpKind = EJ_NONE;
4178                         break;
4179 #elif defined(_TARGET_X86_)
4180                     case GT_EQ:
4181                         jumpKind = EJ_je;
4182                         break;
4183                     case GT_NE:
4184                         jumpKind = EJ_jne;
4185                         break;
4186                     case GT_LT:
4187                         jumpKind = EJ_js;
4188                         break;
4189                     case GT_LE:
4190                         jumpKind = EJ_NONE;
4191                         break;
4192                     case GT_GE:
4193                         jumpKind = EJ_jns;
4194                         break;
4195                     case GT_GT:
4196                         jumpKind = EJ_NONE;
4197                         break;
4198 #endif // TARGET
4199                     default:
4200                         noway_assert(!"Unexpected comparison OpCode");
4201                         break;
4202                 }
4203                 assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
4204             }
4205             assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
4206
4207             /* Is the value a simple local variable? */
4208
4209             if (op1->gtOper == GT_LCL_VAR)
4210             {
4211                 /* Is the flags register set to the value? */
4212
4213                 if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
4214                 {
4215                     if (jumpKind != EJ_NONE)
4216                     {
4217                         addrReg1 = RBM_NONE;
4218                         genUpdateLife(op1);
4219                         goto DONE_FLAGS;
4220                     }
4221                 }
4222             }
4223
4224             /* Make the comparand addressable */
4225             addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4226
4227             /* Are the condition flags set based on the value? */
4228
4229             unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
4230
4231             if (op1->InReg())
4232             {
4233                 if (genFlagsAreReg(op1->gtRegNum))
4234                 {
4235                     flags |= GTF_ZSF_SET;
4236                 }
4237             }
4238
4239             if (flags)
4240             {
4241                 if (jumpKind != EJ_NONE)
4242                 {
4243                     goto DONE_FLAGS;
4244                 }
4245             }
4246
4247             /* Is the value in a register? */
4248
4249             if (op1->InReg())
4250             {
4251                 regNumber reg = op1->gtRegNum;
4252
4253                 /* With a 'test' we can do any signed test or any test for equality */
4254
4255                 if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
4256                 {
4257                     emitAttr compareSize = emitTypeSize(op1->TypeGet());
4258
4259                     // If we have an GT_REG_VAR then the register will be properly sign/zero extended
4260                     // But only up to 4 bytes
4261                     if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
4262                     {
4263                         compareSize = EA_4BYTE;
4264                     }
4265
4266 #if CPU_HAS_BYTE_REGS
4267                     // Make sure if we require a byte compare that we have a byte-able register
4268                     if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
4269 #endif // CPU_HAS_BYTE_REGS
4270                     {
4271                         /* Generate 'test reg, reg' */
4272                         instGen_Compare_Reg_To_Zero(compareSize, reg);
4273                         goto DONE;
4274                     }
4275                 }
4276             }
4277         }
4278
4279         else // if (ival != 0)
4280         {
4281             bool smallOk = true;
4282
4283             /* make sure that constant is not out of op1's range
4284                if it is, we need to perform an int with int comparison
4285                and therefore, we set smallOk to false, so op1 gets loaded
4286                into a register
4287             */
4288
4289             /* If op1 is TYP_SHORT, and is followed by an unsigned
4290              * comparison, we can use smallOk. But we don't know which
4291              * flags will be needed. This probably doesn't happen often.
4292             */
4293             var_types gtType = op1->TypeGet();
4294
4295             switch (gtType)
4296             {
4297                 case TYP_BYTE:
4298                     if (ival != (signed char)ival)
4299                         smallOk = false;
4300                     break;
4301                 case TYP_BOOL:
4302                 case TYP_UBYTE:
4303                     if (ival != (unsigned char)ival)
4304                         smallOk = false;
4305                     break;
4306
4307                 case TYP_SHORT:
4308                     if (ival != (signed short)ival)
4309                         smallOk = false;
4310                     break;
4311                 case TYP_USHORT:
4312                     if (ival != (unsigned short)ival)
4313                         smallOk = false;
4314                     break;
4315
4316 #ifdef _TARGET_64BIT_
4317                 case TYP_INT:
4318                     if (!FitsIn<INT32>(ival))
4319                         smallOk = false;
4320                     break;
4321                 case TYP_UINT:
4322                     if (!FitsIn<UINT32>(ival))
4323                         smallOk = false;
4324                     break;
4325 #endif // _TARGET_64BIT_
4326
4327                 default:
4328                     break;
4329             }
4330
4331             if (smallOk &&                 // constant is in op1's range
4332                 !unsignedCmp &&            // signed comparison
4333                 varTypeIsSmall(gtType) &&  // smalltype var
4334                 varTypeIsUnsigned(gtType)) // unsigned type
4335             {
4336                 unsignedCmp = true;
4337             }
4338
4339             /* Make the comparand addressable */
4340             addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
4341         }
4342
4343         /* Special case: comparison of two constants */
4344
4345         // Needed if Importer doesn't call gtFoldExpr()
4346
4347         if (!(op1->InReg()) && (op1->IsCnsIntOrI()))
4348         {
4349             // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
4350
4351             /* Workaround: get the constant operand into a register */
4352             genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4353
4354             noway_assert(addrReg1 == RBM_NONE);
4355             noway_assert(op1->InReg());
4356
4357             addrReg1 = genRegMask(op1->gtRegNum);
4358         }
4359
4360         /* Compare the operand against the constant */
4361
4362         if (op2->IsIconHandle())
4363         {
4364             inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
4365         }
4366         else
4367         {
4368             inst_TT_IV(INS_cmp, op1, ival);
4369         }
4370         goto DONE;
4371     }
4372
4373     //---------------------------------------------------------------------
4374     //
4375     // We reach here if op2 was not a GT_CNS_INT
4376     //
4377
4378     byteCmp  = false;
4379     shortCmp = false;
4380
4381     if (op1Type == op2->gtType)
4382     {
4383         shortCmp = varTypeIsShort(op1Type);
4384         byteCmp  = varTypeIsByte(op1Type);
4385     }
4386
4387     noway_assert(op1->gtOper != GT_CNS_INT);
4388
4389     if (op2->gtOper == GT_LCL_VAR)
4390         genMarkLclVar(op2);
4391
4392     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4393     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4394
4395     /* Are we comparing against a register? */
4396
4397     if (op2->InReg())
4398     {
4399         /* Make the comparands addressable and mark as used */
4400
4401         assert(addrReg1 == RBM_NONE);
4402         addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4403
4404         /* Is the size of the comparison byte/char/short ? */
4405
4406         if (varTypeIsSmall(op1->TypeGet()))
4407         {
4408             /* Is op2 sitting in an appropriate register? */
4409
4410             if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
4411                 goto NO_SMALL_CMP;
4412
4413             /* Is op2 of the right type for a small comparison */
4414
4415             if (op2->gtOper == GT_REG_VAR)
4416             {
4417                 if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
4418                     goto NO_SMALL_CMP;
4419             }
4420             else
4421             {
4422                 if (op1->gtType != op2->gtType)
4423                     goto NO_SMALL_CMP;
4424             }
4425
4426             if (varTypeIsUnsigned(op1->TypeGet()))
4427                 unsignedCmp = true;
4428         }
4429
4430         assert(addrReg2 == RBM_NONE);
4431
4432         genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4433         addrReg2 = genRegMask(op2->gtRegNum);
4434         addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
4435         assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4436         assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4437
4438         /* Compare against the register */
4439
4440         inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
4441
4442         goto DONE;
4443
4444     NO_SMALL_CMP:
4445
4446         // op1 has been made addressable and is marked as in use
4447         // op2 is un-generated
4448         assert(addrReg2 == 0);
4449
4450         if ((op1->InReg()) == 0)
4451         {
4452             regNumber reg1 = regSet.rsPickReg();
4453
4454             noway_assert(varTypeIsSmall(op1->TypeGet()));
4455             instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->InReg()) != 0);
4456
4457             // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
4458             // so we need to make sure it is still valid.  Note that at this point, reg1 is
4459             // *not* marked as in use, and it is possible for it to be used in the address
4460             // mode expression, but that is OK, because we are done with expression after
4461             // this.  We only need reg1.
4462             addrReg1 = genKeepAddressable(op1, addrReg1);
4463             inst_RV_TT(ins, reg1, op1);
4464             regTracker.rsTrackRegTrash(reg1);
4465
4466             genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4467             addrReg1 = 0;
4468
4469             genMarkTreeInReg(op1, reg1);
4470
4471             regSet.rsMarkRegUsed(op1);
4472             addrReg1 = genRegMask(op1->gtRegNum);
4473         }
4474
4475         assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4476         assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4477
4478         goto DONE_OP1;
4479     }
4480
4481     // We come here if op2 is not enregistered or not in a "good" register.
4482
4483     assert(addrReg1 == 0);
4484
4485     // Determine what registers go live between op1 and op2
4486     newLiveMask = genNewLiveRegMask(op1, op2);
4487
4488     // Setup regNeed with the set of register that we suggest for op1 to be in
4489     //
4490     regNeed = RBM_ALLINT;
4491
4492     // avoid selecting registers that get newly born in op2
4493     regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
4494
4495     // avoid selecting op2 reserved regs
4496     regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
4497
4498 #if CPU_HAS_BYTE_REGS
4499     // if necessary setup regNeed to select just the byte-able registers
4500     if (byteCmp)
4501         regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
4502 #endif // CPU_HAS_BYTE_REGS
4503
4504     // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
4505     //
4506     genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
4507     noway_assert(op1->InReg());
4508
4509     op1Reg = op1->gtRegNum;
4510
4511     // Setup regNeed with the set of register that we require for op1 to be in
4512     //
4513     regNeed = RBM_ALLINT;
4514
4515 #if CPU_HAS_BYTE_REGS
4516     // if necessary setup regNeed to select just the byte-able registers
4517     if (byteCmp)
4518         regNeed &= RBM_BYTE_REGS;
4519 #endif // CPU_HAS_BYTE_REGS
4520
4521     // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
4522     regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
4523
4524     // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
4525     regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
4526
4527     // Did we end up in an acceptable register?
4528     // and do we have an acceptable free register available to grab?
4529     //
4530     if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
4531     {
4532         // Grab an acceptable register
4533         regNumber newReg = regSet.rsGrabReg(regNeed);
4534
4535         noway_assert(op1Reg != newReg);
4536
4537         /* Update the value in the target register */
4538
4539         regTracker.rsTrackRegCopy(newReg, op1Reg);
4540
4541         inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
4542
4543         /* The value has been transferred to 'reg' */
4544
4545         if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
4546             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
4547
4548         gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
4549
4550         /* The value is now in an appropriate register */
4551
4552         op1->gtRegNum = newReg;
4553     }
4554     noway_assert(op1->InReg());
4555     op1Reg = op1->gtRegNum;
4556
4557     genUpdateLife(op1);
4558
4559     /* Mark the register as 'used' */
4560     regSet.rsMarkRegUsed(op1);
4561
4562     addrReg1 = genRegMask(op1Reg);
4563
4564     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4565     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4566
4567 DONE_OP1:
4568
4569     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4570     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4571     noway_assert(op1->InReg());
4572
4573     // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
4574     // when byteCmp is true we will perform a byte sized cmp instruction
4575     // and that instruction requires that any registers used are byte-able ones.
4576     //
4577     regNeed = RBM_ALLINT;
4578
4579 #if CPU_HAS_BYTE_REGS
4580     // if necessary setup regNeed to select just the byte-able registers
4581     if (byteCmp)
4582         regNeed &= RBM_BYTE_REGS;
4583 #endif // CPU_HAS_BYTE_REGS
4584
4585     /* Make the comparand addressable */
4586     assert(addrReg2 == 0);
4587     addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
4588
4589     /*  Make sure the first operand is still in a register; if
4590         it's been spilled, we have to make sure it's reloaded
4591         into a byte-addressable register if needed.
4592         Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
4593      */
4594
4595     assert(addrReg1 != 0);
4596     genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
4597
4598     noway_assert(op1->InReg());
4599     noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
4600
4601     addrReg1 = genRegMask(op1->gtRegNum);
4602     regSet.rsLockUsedReg(addrReg1);
4603
4604     /* Make sure that op2 is addressable. If we are going to do a
4605        byte-comparison, we need it to be in a byte register. */
4606
4607     if (byteCmp && (op2->InReg()))
4608     {
4609         genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
4610         addrReg2 = genRegMask(op2->gtRegNum);
4611     }
4612     else
4613     {
4614         addrReg2 = genKeepAddressable(op2, addrReg2);
4615     }
4616
4617     regSet.rsUnlockUsedReg(addrReg1);
4618
4619     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4620     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4621
4622     if (byteCmp || shortCmp)
4623     {
4624         size = emitTypeSize(op2->TypeGet());
4625         if (varTypeIsUnsigned(op1Type))
4626             unsignedCmp = true;
4627     }
4628     else
4629     {
4630         size = emitActualTypeSize(op2->TypeGet());
4631     }
4632
4633     /* Perform the comparison */
4634     inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
4635
4636 DONE:
4637
4638     jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
4639
4640 DONE_FLAGS: // We have determined what jumpKind to use
4641
4642     genUpdateLife(cond);
4643
4644     /* The condition value is dead at the jump that follows */
4645
4646     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4647     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4648     genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4649     genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
4650
4651     noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
4652
4653     return jumpKind;
4654 }
4655
4656 /*****************************************************************************/
4657 /*****************************************************************************/
4658 /*****************************************************************************
4659  *
4660  *  Generate code to jump to the jump target of the current basic block if
4661  *  the given relational operator yields 'true'.
4662  */
4663
4664 void CodeGen::genCondJump(GenTree* cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
4665 {
4666     BasicBlock* jumpTrue;
4667     BasicBlock* jumpFalse;
4668
4669     GenTree*   op1 = cond->gtOp.gtOp1;
4670     GenTree*   op2 = cond->gtOp.gtOp2;
4671     genTreeOps cmp = cond->OperGet();
4672
4673     if (destTrue)
4674     {
4675         jumpTrue  = destTrue;
4676         jumpFalse = destFalse;
4677     }
4678     else
4679     {
4680         noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
4681
4682         jumpTrue  = compiler->compCurBB->bbJumpDest;
4683         jumpFalse = compiler->compCurBB->bbNext;
4684     }
4685
4686     noway_assert(cond->OperIsCompare());
4687
4688     /* Make sure the more expensive operand is 'op1' */
4689     noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
4690
4691     if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
4692     {
4693         /* Don't forget to modify the condition as well */
4694
4695         cond->gtOp.gtOp1 = op2;
4696         cond->gtOp.gtOp2 = op1;
4697         cond->SetOper(GenTree::SwapRelop(cmp));
4698         cond->gtFlags &= ~GTF_REVERSE_OPS;
4699
4700         /* Get hold of the new values */
4701
4702         cmp = cond->OperGet();
4703         op1 = cond->gtOp.gtOp1;
4704         op2 = cond->gtOp.gtOp2;
4705     }
4706
4707     /* What is the type of the operand? */
4708
4709     switch (genActualType(op1->gtType))
4710     {
4711         case TYP_INT:
4712         case TYP_REF:
4713         case TYP_BYREF:
4714             emitJumpKind jumpKind;
4715
4716             // Check if we can use the currently set flags. Else set them
4717
4718             jumpKind = genCondSetFlags(cond);
4719
4720 #if FEATURE_STACK_FP_X87
4721             if (bStackFPFixup)
4722             {
4723                 genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
4724             }
4725             else
4726 #endif
4727             {
4728                 /* Generate the conditional jump */
4729                 inst_JMP(jumpKind, jumpTrue);
4730             }
4731
4732             return;
4733
4734         case TYP_LONG:
4735 #if FEATURE_STACK_FP_X87
4736             if (bStackFPFixup)
4737             {
4738                 genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
4739             }
4740             else
4741 #endif
4742             {
4743                 genCondJumpLng(cond, jumpTrue, jumpFalse);
4744             }
4745             return;
4746
4747         case TYP_FLOAT:
4748         case TYP_DOUBLE:
4749 #if FEATURE_STACK_FP_X87
4750             genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
4751 #else
4752             genCondJumpFloat(cond, jumpTrue, jumpFalse);
4753 #endif
4754             return;
4755
4756         default:
4757 #ifdef DEBUG
4758             compiler->gtDispTree(cond);
4759 #endif
4760             unreached(); // unexpected/unsupported 'jtrue' operands type
4761     }
4762 }
4763
4764 /*****************************************************************************
4765  *  Spill registers to check callers can handle it.
4766  */
4767
4768 #ifdef DEBUG
4769
4770 void CodeGen::genStressRegs(GenTree* tree)
4771 {
4772     if (regSet.rsStressRegs() < 2)
4773         return;
4774
4775     /* Spill as many registers as possible. Callers should be prepared
4776        to handle this case.
4777        But don't spill trees with no size (TYP_STRUCT comes to mind) */
4778
4779     {
4780         regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
4781         regNumber regNum;
4782         regMaskTP regBit;
4783
4784         for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
4785         {
4786             if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
4787                 (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
4788             {
4789                 regSet.rsSpillReg(regNum);
4790
4791                 spillRegs &= regSet.rsMaskUsed;
4792
4793                 if (!spillRegs)
4794                     break;
4795             }
4796         }
4797     }
4798
4799     regMaskTP trashRegs = regSet.rsRegMaskFree();
4800
4801     if (trashRegs == RBM_NONE)
4802         return;
4803
4804     /* It is sometimes reasonable to expect that calling genCodeForTree()
4805        on certain trees won't spill anything */
4806
4807     if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
4808         handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
4809     {
4810         trashRegs &= ~(RBM_EXCEPTION_OBJECT);
4811     }
4812
4813     // If genCodeForTree() effectively gets called a second time on the same tree
4814
4815     if (tree->InReg())
4816     {
4817         noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
4818         trashRegs &= ~genRegMask(tree->gtRegNum);
4819     }
4820
4821     if (tree->gtType == TYP_INT && tree->OperIsSimple())
4822     {
4823         GenTree* op1 = tree->gtOp.gtOp1;
4824         GenTree* op2 = tree->gtOp.gtOp2;
4825         if (op1 && (op1->InReg()))
4826             trashRegs &= ~genRegMask(op1->gtRegNum);
4827         if (op2 && (op2->InReg()))
4828             trashRegs &= ~genRegMask(op2->gtRegNum);
4829     }
4830
4831     if (compiler->compCurBB == compiler->genReturnBB)
4832     {
4833         if (compiler->info.compCallUnmanaged)
4834         {
4835             LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
4836             if (varDsc->lvRegister)
4837                 trashRegs &= ~genRegMask(varDsc->lvRegNum);
4838         }
4839     }
4840
4841     /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
4842        to save/restore the register. We try to be as unintrusive
4843        as possible */
4844
4845     noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
4846     // This is obviously false for ARM, but this function is never called.
4847     for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
4848     {
4849         regMaskTP regMask = genRegMask(reg);
4850
4851         if (regSet.rsRegsModified(regMask & trashRegs))
4852             genSetRegToIcon(reg, 0);
4853     }
4854 }
4855
4856 #endif // DEBUG
4857
4858 /*****************************************************************************
4859  *
4860  *  Generate code for a GTK_CONST tree
4861  */
4862
4863 void CodeGen::genCodeForTreeConst(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
4864 {
4865     noway_assert(tree->IsCnsIntOrI());
4866     GenTreeIntConCommon* con       = tree->AsIntConCommon();
4867     ssize_t              ival      = con->IconValue();
4868     bool                 needReloc = con->ImmedValNeedsReloc(compiler);
4869     regMaskTP            needReg   = destReg;
4870     regNumber            reg;
4871
4872 #if REDUNDANT_LOAD
4873
4874     /* If we are targeting destReg and ival is zero           */
4875     /* we would rather xor needReg than copy another register */
4876
4877     if (!needReloc)
4878     {
4879         bool reuseConstantInReg = false;
4880
4881         if (destReg == RBM_NONE)
4882             reuseConstantInReg = true;
4883
4884 #ifdef _TARGET_ARM_
4885         // If we can set a register to a constant with a small encoding, then do that.
4886         // Assume we'll get a low register if needReg has low registers as options.
4887         if (!reuseConstantInReg &&
4888             !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
4889         {
4890             reuseConstantInReg = true;
4891         }
4892 #else
4893         if (!reuseConstantInReg && ival != 0)
4894             reuseConstantInReg = true;
4895 #endif
4896
4897         if (reuseConstantInReg)
4898         {
4899             /* Is the constant already in register? If so, use this register */
4900
4901             reg = regTracker.rsIconIsInReg(ival);
4902             if (reg != REG_NA)
4903                 goto REG_LOADED;
4904         }
4905     }
4906
4907 #endif // REDUNDANT_LOAD
4908
4909     reg = regSet.rsPickReg(needReg, bestReg);
4910
4911     /* If the constant is a handle, we need a reloc to be applied to it */
4912
4913     if (needReloc)
4914     {
4915         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
4916         regTracker.rsTrackRegTrash(reg);
4917     }
4918     else
4919     {
4920         genSetRegToIcon(reg, ival, tree->TypeGet());
4921     }
4922
4923 REG_LOADED:
4924
4925 #ifdef DEBUG
4926     /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
4927
4928     if (!genTempLiveChg)
4929     {
4930         VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
4931         genTempLiveChg = true;
4932     }
4933 #endif
4934
4935     gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
4936     genCodeForTree_DONE(tree, reg);
4937 }
4938
4939 /*****************************************************************************
4940  *
4941  *  Generate code for a GTK_LEAF tree
4942  */
4943
4944 void CodeGen::genCodeForTreeLeaf(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
4945 {
4946     genTreeOps oper    = tree->OperGet();
4947     regNumber  reg     = DUMMY_INIT(REG_CORRUPT);
4948     regMaskTP  regs    = regSet.rsMaskUsed;
4949     regMaskTP  needReg = destReg;
4950     size_t     size;
4951
4952     noway_assert(tree->OperKind() & GTK_LEAF);
4953
4954     switch (oper)
4955     {
4956         case GT_REG_VAR:
4957             NO_WAY("GT_REG_VAR should have been caught above");
4958             break;
4959
4960         case GT_LCL_VAR:
4961
4962             /* Does the variable live in a register? */
4963
4964             if (genMarkLclVar(tree))
4965             {
4966                 genCodeForTree_REG_VAR1(tree);
4967                 return;
4968             }
4969
4970 #if REDUNDANT_LOAD
4971
4972             /* Is the local variable already in register? */
4973
4974             reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
4975
4976             if (reg != REG_NA)
4977             {
4978                 /* Use the register the variable happens to be in */
4979                 regMaskTP regMask = genRegMask(reg);
4980
4981                 // If the register that it was in isn't one of the needRegs
4982                 // then try to move it into a needReg register
4983
4984                 if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
4985                 {
4986                     regNumber rg2 = reg;
4987                     reg           = regSet.rsPickReg(needReg, bestReg);
4988                     if (reg != rg2)
4989                     {
4990                         regMask = genRegMask(reg);
4991                         inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
4992                     }
4993                 }
4994
4995                 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
4996                 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
4997                 break;
4998             }
4999
5000 #endif
5001             goto MEM_LEAF;
5002
5003         case GT_LCL_FLD:
5004
5005             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
5006             // to worry about it being enregistered.
5007             noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
5008             goto MEM_LEAF;
5009
5010         case GT_CLS_VAR:
5011
5012         MEM_LEAF:
5013
5014             /* Pick a register for the value */
5015
5016             reg = regSet.rsPickReg(needReg, bestReg);
5017
5018             /* Load the variable into the register */
5019
5020             size = genTypeSize(tree->gtType);
5021
5022             if (size < EA_4BYTE)
5023             {
5024                 instruction ins = ins_Move_Extend(tree->TypeGet(), tree->InReg());
5025                 inst_RV_TT(ins, reg, tree, 0);
5026
5027                 /* We've now "promoted" the tree-node to TYP_INT */
5028
5029                 tree->gtType = TYP_INT;
5030             }
5031             else
5032             {
5033                 inst_RV_TT(INS_mov, reg, tree, 0);
5034             }
5035
5036             regTracker.rsTrackRegTrash(reg);
5037
5038             gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
5039
5040             switch (oper)
5041             {
5042                 case GT_CLS_VAR:
5043                     regTracker.rsTrackRegClsVar(reg, tree);
5044                     break;
5045                 case GT_LCL_VAR:
5046                     regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5047                     break;
5048                 case GT_LCL_FLD:
5049                     break;
5050                 default:
5051                     noway_assert(!"Unexpected oper");
5052             }
5053
5054 #ifdef _TARGET_ARM_
5055             if (tree->gtFlags & GTF_IND_VOLATILE)
5056             {
5057                 // Emit a memory barrier instruction after the load
5058                 instGen_MemoryBarrier();
5059             }
5060 #endif
5061
5062             break;
5063
5064         case GT_NO_OP:
5065             instGen(INS_nop);
5066             reg = REG_STK;
5067             break;
5068
5069 #if !FEATURE_EH_FUNCLETS
5070         case GT_END_LFIN:
5071
5072             /* Have to clear the shadowSP of the nesting level which
5073                encloses the finally */
5074
5075             unsigned finallyNesting;
5076             finallyNesting = (unsigned)tree->gtVal.gtVal1;
5077             noway_assert(tree->gtVal.gtVal1 <
5078                          compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
5079             noway_assert(finallyNesting < compiler->compHndBBtabCount);
5080
5081             // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
5082             unsigned filterEndOffsetSlotOffs;
5083             PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
5084                           TARGET_POINTER_SIZE); // below doesn't underflow.
5085             filterEndOffsetSlotOffs =
5086                 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
5087
5088             unsigned curNestingSlotOffs;
5089             curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
5090             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
5091             reg = REG_STK;
5092             break;
5093 #endif // !FEATURE_EH_FUNCLETS
5094
5095         case GT_CATCH_ARG:
5096
5097             noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
5098
5099             /* Catch arguments get passed in a register. genCodeForBBlist()
5100                would have marked it as holding a GC object, but not used. */
5101
5102             noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
5103             reg = REG_EXCEPTION_OBJECT;
5104             break;
5105
5106         case GT_JMP:
5107             genCodeForTreeLeaf_GT_JMP(tree);
5108             return;
5109
5110         case GT_MEMORYBARRIER:
5111             // Emit the memory barrier instruction
5112             instGen_MemoryBarrier();
5113             reg = REG_STK;
5114             break;
5115
5116         default:
5117 #ifdef DEBUG
5118             compiler->gtDispTree(tree);
5119 #endif
5120             noway_assert(!"unexpected leaf");
5121     }
5122
5123     noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
5124     genCodeForTree_DONE(tree, reg);
5125 }
5126
5127 GenTree* CodeGen::genCodeForCommaTree(GenTree* tree)
5128 {
5129     while (tree->OperGet() == GT_COMMA)
5130     {
5131         GenTree* op1 = tree->gtOp.gtOp1;
5132         genEvalSideEffects(op1);
5133         gcInfo.gcMarkRegPtrVal(op1);
5134
5135         tree = tree->gtOp.gtOp2;
5136     }
5137     return tree;
5138 }
5139
5140 /*****************************************************************************
5141  *
5142  *  Generate code for the a leaf node of type GT_JMP
5143  */
5144
5145 void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTree* tree)
5146 {
5147     noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
5148
5149 #ifdef PROFILING_SUPPORTED
5150     if (compiler->compIsProfilerHookNeeded())
5151     {
5152         /* fire the event at the call site */
5153         unsigned saveStackLvl2 = genStackLevel;
5154
5155         compiler->info.compProfilerCallback = true;
5156
5157 #ifdef _TARGET_X86_
5158         //
5159         // Push the profilerHandle
5160         //
5161         regMaskTP byrefPushedRegs;
5162         regMaskTP norefPushedRegs;
5163         regMaskTP pushedArgRegs =
5164             genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
5165                         &norefPushedRegs);
5166
5167         if (compiler->compProfilerMethHndIndirected)
5168         {
5169             getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
5170                                        (ssize_t)compiler->compProfilerMethHnd);
5171         }
5172         else
5173         {
5174             inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
5175         }
5176         genSinglePush();
5177
5178         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5179                           sizeof(int) * 1, // argSize
5180                           EA_UNKNOWN);     // retSize
5181
5182         //
5183         // Adjust the number of stack slots used by this managed method if necessary.
5184         //
5185         if (compiler->fgPtrArgCntMax < 1)
5186         {
5187             JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
5188             compiler->fgPtrArgCntMax = 1;
5189         }
5190
5191         genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
5192 #elif _TARGET_ARM_
5193         // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
5194         // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
5195         regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
5196         noway_assert(argReg == REG_PROFILER_JMP_ARG);
5197         regSet.rsLockReg(RBM_PROFILER_JMP_USED);
5198
5199         if (compiler->compProfilerMethHndIndirected)
5200         {
5201             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
5202             regTracker.rsTrackRegTrash(argReg);
5203         }
5204         else
5205         {
5206             instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
5207         }
5208
5209         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5210                           0,           // argSize
5211                           EA_UNKNOWN); // retSize
5212
5213         regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
5214 #else
5215         NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
5216 #endif //_TARGET_X86_
5217
5218         /* Restore the stack level */
5219         SetStackLevel(saveStackLvl2);
5220     }
5221 #endif // PROFILING_SUPPORTED
5222
5223     /* This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
5224      * call genPInvokeMethodEpilog anywhere that we have a method return.  We should really
5225      * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
5226      */
5227
5228     if (compiler->info.compCallUnmanaged)
5229     {
5230         genPInvokeMethodEpilog();
5231     }
5232
5233     // Make sure register arguments are in their initial registers
5234     // and stack arguments are put back as well.
5235     //
5236     // This does not deal with circular dependencies of register
5237     // arguments, which is safe because RegAlloc prevents that by
5238     // not enregistering any RegArgs when a JMP opcode is used.
5239
5240     if (compiler->info.compArgsCount == 0)
5241     {
5242         return;
5243     }
5244
5245     unsigned   varNum;
5246     LclVarDsc* varDsc;
5247
5248     // First move any enregistered stack arguments back to the stack
5249     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5250     {
5251         noway_assert(varDsc->lvIsParam);
5252         if (varDsc->lvIsRegArg || !varDsc->lvRegister)
5253             continue;
5254
5255         /* Argument was passed on the stack, but ended up in a register
5256          * Store it back to the stack */
5257         CLANG_FORMAT_COMMENT_ANCHOR;
5258
5259 #ifndef _TARGET_64BIT_
5260         if (varDsc->TypeGet() == TYP_LONG)
5261         {
5262             /* long - at least the low half must be enregistered */
5263
5264             getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
5265
5266             /* Is the upper half also enregistered? */
5267
5268             if (varDsc->lvOtherReg != REG_STK)
5269             {
5270                 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
5271             }
5272         }
5273         else
5274 #endif // _TARGET_64BIT_
5275         {
5276             getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
5277                                       varNum, 0);
5278         }
5279     }
5280
5281 #ifdef _TARGET_ARM_
5282     regMaskTP fixedArgsMask = RBM_NONE;
5283 #endif
5284
5285     // Next move any un-enregistered register arguments back to their register
5286     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5287     {
5288         /* Is this variable a register arg? */
5289
5290         if (!varDsc->lvIsRegArg)
5291             continue;
5292
5293         /* Register argument */
5294
5295         noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5296         noway_assert(!varDsc->lvRegister);
5297
5298         /* Reload it from the stack */
5299         CLANG_FORMAT_COMMENT_ANCHOR;
5300
5301 #ifndef _TARGET_64BIT_
5302         if (varDsc->TypeGet() == TYP_LONG)
5303         {
5304             /* long - at least the low half must be enregistered */
5305
5306             getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
5307             regTracker.rsTrackRegTrash(varDsc->lvArgReg);
5308
5309             /* Also assume the upper half also enregistered */
5310
5311             getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
5312                                       sizeof(int));
5313             regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
5314
5315 #ifdef _TARGET_ARM_
5316             fixedArgsMask |= genRegMask(varDsc->lvArgReg);
5317             fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
5318 #endif
5319         }
5320         else
5321 #endif // _TARGET_64BIT_
5322 #ifdef _TARGET_ARM_
5323             if (varDsc->lvIsHfaRegArg())
5324         {
5325             const var_types   elemType = varDsc->GetHfaType();
5326             const instruction loadOp   = ins_Load(elemType);
5327             const emitAttr    size     = emitTypeSize(elemType);
5328             regNumber         argReg   = varDsc->lvArgReg;
5329             const unsigned    maxSize  = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
5330
5331             for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5332             {
5333                 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5334                 assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
5335                 argReg = regNextOfType(argReg, elemType);
5336             }
5337         }
5338         else if (varDsc->TypeGet() == TYP_STRUCT)
5339         {
5340             const var_types   elemType = TYP_INT; // we pad everything out to at least 4 bytes
5341             const instruction loadOp   = ins_Load(elemType);
5342             const emitAttr    size     = emitTypeSize(elemType);
5343             regNumber         argReg   = varDsc->lvArgReg;
5344             const unsigned    maxSize  = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
5345
5346             for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5347             {
5348                 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5349                 regTracker.rsTrackRegTrash(argReg);
5350
5351                 fixedArgsMask |= genRegMask(argReg);
5352
5353                 argReg = genRegArgNext(argReg);
5354             }
5355         }
5356         else
5357 #endif //_TARGET_ARM_
5358         {
5359             var_types loadType = varDsc->TypeGet();
5360             regNumber argReg   = varDsc->lvArgReg; // incoming arg register
5361             bool      twoParts = false;
5362
5363             if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
5364             {
5365 #ifndef _TARGET_64BIT_
5366                 if (loadType == TYP_DOUBLE)
5367                     twoParts = true;
5368 #endif
5369                 loadType = TYP_I_IMPL;
5370                 assert(isValidIntArgReg(argReg));
5371             }
5372
5373             getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
5374             regTracker.rsTrackRegTrash(argReg);
5375
5376 #ifdef _TARGET_ARM_
5377             fixedArgsMask |= genRegMask(argReg);
5378 #endif
5379             if (twoParts)
5380             {
5381                 argReg = genRegArgNext(argReg);
5382                 assert(isValidIntArgReg(argReg));
5383
5384                 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
5385                 regTracker.rsTrackRegTrash(argReg);
5386
5387 #ifdef _TARGET_ARM_
5388                 fixedArgsMask |= genRegMask(argReg);
5389 #endif
5390             }
5391         }
5392     }
5393
5394 #ifdef _TARGET_ARM_
5395     // Check if we have any non-fixed args possibly in the arg registers.
5396     if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
5397     {
5398         noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
5399
5400         regNumber regDeclArgs = REG_ARG_FIRST;
5401
5402         // Skip the 'this' pointer.
5403         if (!compiler->info.compIsStatic)
5404         {
5405             regDeclArgs = REG_NEXT(regDeclArgs);
5406         }
5407
5408         // Skip the 'generic context.'
5409         if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5410         {
5411             regDeclArgs = REG_NEXT(regDeclArgs);
5412         }
5413
5414         // Skip any 'return buffer arg.'
5415         if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
5416         {
5417             regDeclArgs = REG_NEXT(regDeclArgs);
5418         }
5419
5420         // Skip the 'vararg cookie.'
5421         regDeclArgs = REG_NEXT(regDeclArgs);
5422
5423         // Also add offset for the vararg cookie.
5424         int offset = REGSIZE_BYTES;
5425
5426         // Load all the variable arguments in registers back to their registers.
5427         for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
5428         {
5429             if (!(fixedArgsMask & genRegMask(reg)))
5430             {
5431                 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
5432                 regTracker.rsTrackRegTrash(reg);
5433             }
5434             offset += REGSIZE_BYTES;
5435         }
5436     }
5437 #endif // _TARGET_ARM_
5438 }
5439
5440 /*****************************************************************************
5441  *
5442  *  Check if a variable is assigned to in a tree.  The variable number is
5443  *  passed in pCallBackData.  If the variable is assigned to, return
5444  *  Compiler::WALK_ABORT.  Otherwise return Compiler::WALK_CONTINUE.
5445  */
5446 Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTree** pTree, Compiler::fgWalkData* data)
5447 {
5448     GenTree* tree = *pTree;
5449     if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
5450         (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
5451     {
5452         return Compiler::WALK_ABORT;
5453     }
5454
5455     return Compiler::WALK_CONTINUE;
5456 }
5457
5458 regNumber CodeGen::genIsEnregisteredIntVariable(GenTree* tree)
5459 {
5460     unsigned   varNum;
5461     LclVarDsc* varDsc;
5462
5463     if (tree->gtOper == GT_LCL_VAR)
5464     {
5465         /* Does the variable live in a register? */
5466
5467         varNum = tree->gtLclVarCommon.gtLclNum;
5468         noway_assert(varNum < compiler->lvaCount);
5469         varDsc = compiler->lvaTable + varNum;
5470
5471         if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
5472         {
5473             return varDsc->lvRegNum;
5474         }
5475     }
5476
5477     return REG_NA;
5478 }
5479
5480 // inline
5481 void CodeGen::unspillLiveness(genLivenessSet* ls)
5482 {
5483     // Only try to unspill the registers that are missing from the currentLiveRegs
5484     //
5485     regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
5486     regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
5487     cannotSpillMask &= ~currentLiveRegs;
5488
5489     // Typically this will always be true and we will return
5490     //
5491     if (cannotSpillMask == 0)
5492         return;
5493
5494     for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
5495     {
5496         // Is this a register that we cannot leave in the spilled state?
5497         //
5498         if ((cannotSpillMask & genRegMask(reg)) == 0)
5499             continue;
5500
5501         RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
5502
5503         // Was it spilled, if not then skip it.
5504         //
5505         if (!spill)
5506             continue;
5507
5508         noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
5509
5510         regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
5511     }
5512 }
5513
5514 /*****************************************************************************
5515  *
5516  *  Generate code for a qmark colon
5517  */
5518
5519 void CodeGen::genCodeForQmark(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
5520 {
5521     GenTree*  op1 = tree->gtOp.gtOp1;
5522     GenTree*  op2 = tree->gtOp.gtOp2;
5523     regNumber reg;
5524     regMaskTP regs    = regSet.rsMaskUsed;
5525     regMaskTP needReg = destReg;
5526
5527     noway_assert(compiler->compQmarkUsed);
5528     noway_assert(tree->gtOper == GT_QMARK);
5529     noway_assert(op1->OperIsCompare());
5530     noway_assert(op2->gtOper == GT_COLON);
5531
5532     GenTree* thenNode = op2->AsColon()->ThenNode();
5533     GenTree* elseNode = op2->AsColon()->ElseNode();
5534
5535     /* If elseNode is a Nop node you must reverse the
5536        thenNode and elseNode prior to reaching here!
5537        (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
5538
5539     noway_assert(!elseNode->IsNothingNode());
5540
5541     /* Try to implement the qmark colon using a CMOV.  If we can't for
5542        whatever reason, this will return false and we will implement
5543        it using regular branching constructs. */
5544
5545     if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
5546         return;
5547
5548     /*
5549         This is a ?: operator; generate code like this:
5550
5551             condition_compare
5552             jmp_if_true lab_true
5553
5554         lab_false:
5555             op1 (false = 'else' part)
5556             jmp lab_done
5557
5558         lab_true:
5559             op2 (true = 'then' part)
5560
5561         lab_done:
5562
5563
5564         NOTE: If no 'then' part we do not generate the 'jmp lab_done'
5565             or the 'lab_done' label
5566     */
5567
5568     BasicBlock* lab_true;
5569     BasicBlock* lab_false;
5570     BasicBlock* lab_done;
5571
5572     genLivenessSet entryLiveness;
5573     genLivenessSet exitLiveness;
5574
5575     lab_true  = genCreateTempLabel();
5576     lab_false = genCreateTempLabel();
5577
5578 #if FEATURE_STACK_FP_X87
5579     /* Spill any register that hold partial values so that the exit liveness
5580        from sides is the same */
5581     CLANG_FORMAT_COMMENT_ANCHOR;
5582
5583 #ifdef DEBUG
5584     regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
5585
5586     // spillMask should be the whole FP stack
5587     noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
5588 #endif
5589
5590     SpillTempsStackFP(regSet.rsMaskUsedFloat);
5591     noway_assert(regSet.rsMaskUsedFloat == 0);
5592 #endif
5593
5594     /* Before we generate code for qmark, we spill all the currently used registers
5595        that conflict with the registers used in the qmark tree. This is to avoid
5596        introducing spills that only occur on either the 'then' or 'else' side of
5597        the tree, but not both identically. We need to be careful with enregistered
5598        variables that are used; see below.
5599     */
5600
5601     if (regSet.rsMaskUsed)
5602     {
5603         /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
5604            variable), then it may not get spilled. However, the variable may
5605            then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
5606            may get spilled from one side and not the other. So unmark regSet.rsMaskVars
5607            before spilling regSet.rsMaskUsed */
5608
5609         regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
5610         regMaskTP rsAdditional           = RBM_NONE;
5611
5612         // For each multi-use of an enregistered variable, we need to determine if
5613         // it can get spilled inside the qmark colon.  This can only happen if
5614         // its life ends somewhere in the qmark colon.  We have the following
5615         // cases:
5616         // 1) Variable is dead at the end of the colon -- needs to be spilled
5617         // 2) Variable is alive at the end of the colon -- needs to be spilled
5618         //    iff it is assigned to in the colon.  In order to determine that, we
5619         //    examine the GTF_ASG flag to see if any assignments were made in the
5620         //    colon.  If there are any, we need to do a tree walk to see if this
5621         //    variable is the target of an assignment.  This treewalk should not
5622         //    happen frequently.
5623         if (rsAdditionalCandidates)
5624         {
5625 #ifdef DEBUG
5626             if (compiler->verbose)
5627             {
5628                 Compiler::printTreeID(tree);
5629                 printf(": Qmark-Colon additional spilling candidates are ");
5630                 dspRegMask(rsAdditionalCandidates);
5631                 printf("\n");
5632             }
5633 #endif
5634
5635             // If any candidates are not alive at the GT_QMARK node, then they
5636             // need to be spilled
5637
5638             const VARSET_TP& rsLiveNow(compiler->compCurLife);
5639             VARSET_TP rsLiveAfter(compiler->fgUpdateLiveSet(compiler->compCurLife, compiler->compCurLifeTree, tree));
5640
5641             VARSET_TP regVarLiveNow(VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
5642
5643             VarSetOps::Iter iter(compiler, regVarLiveNow);
5644             unsigned        varIndex = 0;
5645             while (iter.NextElem(&varIndex))
5646             {
5647                 // Find the variable in compiler->lvaTable
5648                 unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
5649                 LclVarDsc* varDsc = compiler->lvaTable + varNum;
5650
5651 #if !FEATURE_FP_REGALLOC
5652                 if (varDsc->IsFloatRegType())
5653                     continue;
5654 #endif
5655
5656                 noway_assert(varDsc->lvRegister);
5657
5658                 regMaskTP regBit;
5659
5660                 if (varTypeIsFloating(varDsc->TypeGet()))
5661                 {
5662                     regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
5663                 }
5664                 else
5665                 {
5666                     regBit = genRegMask(varDsc->lvRegNum);
5667
5668                     // For longs we may need to spill both regs
5669                     if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
5670                         regBit |= genRegMask(varDsc->lvOtherReg);
5671                 }
5672
5673                 // Is it one of our reg-use vars?  If not, we don't need to spill it.
5674                 regBit &= rsAdditionalCandidates;
5675                 if (!regBit)
5676                     continue;
5677
5678                 // Is the variable live at the end of the colon?
5679                 if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
5680                 {
5681                     // Variable is alive at the end of the colon.  Was it assigned
5682                     // to inside the colon?
5683
5684                     if (!(op2->gtFlags & GTF_ASG))
5685                         continue;
5686
5687                     if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
5688                         Compiler::WALK_ABORT)
5689                     {
5690                         // Variable was assigned to, so we need to spill it.
5691
5692                         rsAdditional |= regBit;
5693 #ifdef DEBUG
5694                         if (compiler->verbose)
5695                         {
5696                             Compiler::printTreeID(tree);
5697                             printf(": Qmark-Colon candidate ");
5698                             dspRegMask(regBit);
5699                             printf("\n");
5700                             printf("    is assigned to inside colon and will be spilled\n");
5701                         }
5702 #endif
5703                     }
5704                 }
5705                 else
5706                 {
5707                     // Variable is not alive at the end of the colon.  We need to spill it.
5708
5709                     rsAdditional |= regBit;
5710 #ifdef DEBUG
5711                     if (compiler->verbose)
5712                     {
5713                         Compiler::printTreeID(tree);
5714                         printf(": Qmark-Colon candidate ");
5715                         dspRegMask(regBit);
5716                         printf("\n");
5717                         printf("    is alive at end of colon and will be spilled\n");
5718                     }
5719 #endif
5720                 }
5721             }
5722
5723 #ifdef DEBUG
5724             if (compiler->verbose)
5725             {
5726                 Compiler::printTreeID(tree);
5727                 printf(": Qmark-Colon approved additional spilling candidates are ");
5728                 dspRegMask(rsAdditional);
5729                 printf("\n");
5730             }
5731 #endif
5732         }
5733
5734         noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
5735
5736         // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
5737         // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
5738         // we will have unbalanced spills and generate bad code.
5739         regMaskTP rsSpill =
5740             ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
5741
5742 #ifdef DEBUG
5743         // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
5744         // 'bad' registers, causing spills. So, just force all used registers to get spilled
5745         // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
5746         // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
5747         // unspilled while generating that same tree.
5748
5749         if (regSet.rsStressRegs() >= 1)
5750         {
5751             rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
5752         }
5753 #endif // DEBUG
5754
5755         if (rsSpill)
5756         {
5757             // Remember which registers hold pointers. We will spill
5758             // them, but the code that follows will fetch reg vars from
5759             // the registers, so we need that gc compiler->info.
5760             regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
5761             regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
5762
5763             // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
5764             // So, pretend there aren't any, and spill them anyway. This will only occur
5765             // if rsAdditional is non-empty.
5766             regMaskTP rsTemp = regSet.rsMaskVars;
5767             regSet.ClearMaskVars();
5768
5769             regSet.rsSpillRegs(rsSpill);
5770
5771             // Restore gc tracking masks.
5772             gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
5773             gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
5774
5775             // Set regSet.rsMaskVars back to normal
5776             regSet.rsMaskVars = rsTemp;
5777         }
5778     }
5779
5780     // Generate the conditional jump but without doing any StackFP fixups.
5781     genCondJump(op1, lab_true, lab_false, false);
5782
5783     /* Save the current liveness, register status, and GC pointers */
5784     /* This is the liveness information upon entry                 */
5785     /* to both the then and else parts of the qmark                */
5786
5787     saveLiveness(&entryLiveness);
5788
5789     /* Clear the liveness of any local variables that are dead upon   */
5790     /* entry to the else part.                                        */
5791
5792     /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
5793     /* from the "colon or op2" liveSet                                */
5794     genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
5795
5796     /* genCondJump() closes the current emitter block */
5797
5798     genDefineTempLabel(lab_false);
5799
5800 #if FEATURE_STACK_FP_X87
5801     // Store fpstate
5802
5803     QmarkStateStackFP tempFPState;
5804     bool              bHasFPUState = !compCurFPState.IsEmpty();
5805     genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
5806 #endif
5807
5808     /* Does the operator yield a value? */
5809
5810     if (tree->gtType == TYP_VOID)
5811     {
5812         /* Generate the code for the else part of the qmark */
5813
5814         genCodeForTree(elseNode, needReg, bestReg);
5815
5816         /* The type is VOID, so we shouldn't have computed a value */
5817
5818         noway_assert(!(elseNode->InReg()));
5819
5820         /* Save the current liveness, register status, and GC pointers               */
5821         /* This is the liveness information upon exit of the then part of the qmark  */
5822
5823         saveLiveness(&exitLiveness);
5824
5825         /* Is there a 'then' part? */
5826
5827         if (thenNode->IsNothingNode())
5828         {
5829 #if FEATURE_STACK_FP_X87
5830             if (bHasFPUState)
5831             {
5832                 // We had FP state on entry just after the condition, so potentially, the else
5833                 // node may have to do transition work.
5834                 lab_done = genCreateTempLabel();
5835
5836                 /* Generate jmp lab_done */
5837
5838                 inst_JMP(EJ_jmp, lab_done);
5839
5840                 /* No 'then' - just generate the 'lab_true' label */
5841
5842                 genDefineTempLabel(lab_true);
5843
5844                 // We need to do this after defining the lab_false label
5845                 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5846                 genQMarkAfterThenBlockStackFP(&tempFPState);
5847                 genDefineTempLabel(lab_done);
5848             }
5849             else
5850 #endif // FEATURE_STACK_FP_X87
5851             {
5852                 /* No 'then' - just generate the 'lab_true' label */
5853                 genDefineTempLabel(lab_true);
5854             }
5855         }
5856         else
5857         {
5858             lab_done = genCreateTempLabel();
5859
5860             /* Generate jmp lab_done */
5861
5862             inst_JMP(EJ_jmp, lab_done);
5863
5864             /* Restore the liveness that we had upon entry of the then part of the qmark */
5865
5866             restoreLiveness(&entryLiveness);
5867
5868             /* Clear the liveness of any local variables that are dead upon    */
5869             /* entry to the then part.                                         */
5870             genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5871
5872             /* Generate lab_true: */
5873
5874             genDefineTempLabel(lab_true);
5875 #if FEATURE_STACK_FP_X87
5876             // We need to do this after defining the lab_false label
5877             genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5878 #endif
5879             /* Enter the then part - trash all registers */
5880
5881             regTracker.rsTrackRegClr();
5882
5883             /* Generate the code for the then part of the qmark */
5884
5885             genCodeForTree(thenNode, needReg, bestReg);
5886
5887             /* The type is VOID, so we shouldn't have computed a value */
5888
5889             noway_assert(!(thenNode->InReg()));
5890
5891             unspillLiveness(&exitLiveness);
5892
5893             /* Verify that the exit liveness information is the same for the two parts of the qmark */
5894
5895             checkLiveness(&exitLiveness);
5896 #if FEATURE_STACK_FP_X87
5897             genQMarkAfterThenBlockStackFP(&tempFPState);
5898 #endif
5899             /* Define the "result" label */
5900
5901             genDefineTempLabel(lab_done);
5902         }
5903
5904         /* Join of the two branches - trash all registers */
5905
5906         regTracker.rsTrackRegClr();
5907
5908         /* We're just about done */
5909
5910         genUpdateLife(tree);
5911     }
5912     else
5913     {
5914         /* Generate code for a qmark that generates a value */
5915
5916         /* Generate the code for the else part of the qmark */
5917
5918         noway_assert(elseNode->IsNothingNode() == false);
5919
5920         /* Compute the elseNode into any free register */
5921         genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
5922         noway_assert(elseNode->InReg());
5923         noway_assert(elseNode->gtRegNum != REG_NA);
5924
5925         /* Record the chosen register */
5926         reg  = elseNode->gtRegNum;
5927         regs = genRegMask(reg);
5928
5929         /* Save the current liveness, register status, and GC pointers               */
5930         /* This is the liveness information upon exit of the else part of the qmark  */
5931
5932         saveLiveness(&exitLiveness);
5933
5934         /* Generate jmp lab_done */
5935         lab_done = genCreateTempLabel();
5936
5937 #ifdef DEBUG
5938         // We will use this to assert we don't emit instructions if we decide not to
5939         // do the jmp
5940         unsigned emittedInstructions = getEmitter()->emitInsCount;
5941         bool     bSkippedJump        = false;
5942 #endif
5943         // We would like to know here if the else node is really going to generate
5944         // code, as if it isn't, we're generating here a jump to the next instruction.
5945         // What you would really like is to be able to go back and remove the jump, but
5946         // we have no way of doing that right now.
5947
5948         if (
5949 #if FEATURE_STACK_FP_X87
5950             !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
5951 #endif
5952             genIsEnregisteredIntVariable(thenNode) == reg)
5953         {
5954 #ifdef DEBUG
5955             // For the moment, fix this easy case (enregistered else node), which
5956             // is the one that happens all the time.
5957
5958             bSkippedJump = true;
5959 #endif
5960         }
5961         else
5962         {
5963             inst_JMP(EJ_jmp, lab_done);
5964         }
5965
5966         /* Restore the liveness that we had upon entry of the else part of the qmark */
5967
5968         restoreLiveness(&entryLiveness);
5969
5970         /* Clear the liveness of any local variables that are dead upon    */
5971         /* entry to the then part.                                         */
5972         genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5973
5974         /* Generate lab_true: */
5975         genDefineTempLabel(lab_true);
5976 #if FEATURE_STACK_FP_X87
5977         // Store FP state
5978
5979         // We need to do this after defining the lab_true label
5980         genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5981 #endif
5982         /* Enter the then part - trash all registers */
5983
5984         regTracker.rsTrackRegClr();
5985
5986         /* Generate the code for the then part of the qmark */
5987
5988         noway_assert(thenNode->IsNothingNode() == false);
5989
5990         /* This must place a value into the chosen register */
5991         genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
5992
5993         noway_assert(thenNode->InReg());
5994         noway_assert(thenNode->gtRegNum == reg);
5995
5996         unspillLiveness(&exitLiveness);
5997
5998         /* Verify that the exit liveness information is the same for the two parts of the qmark */
5999         checkLiveness(&exitLiveness);
6000 #if FEATURE_STACK_FP_X87
6001         genQMarkAfterThenBlockStackFP(&tempFPState);
6002 #endif
6003
6004 #ifdef DEBUG
6005         noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
6006 #endif
6007
6008         /* Define the "result" label */
6009         genDefineTempLabel(lab_done);
6010
6011         /* Join of the two branches - trash all registers */
6012
6013         regTracker.rsTrackRegClr();
6014
6015         /* Check whether this subtree has freed up any variables */
6016
6017         genUpdateLife(tree);
6018
6019         genMarkTreeInReg(tree, reg);
6020     }
6021 }
6022
6023 /*****************************************************************************
6024  *
6025  *  Generate code for a qmark colon using the CMOV instruction.  It's OK
6026  *  to return false when we can't easily implement it using a cmov (leading
6027  *  genCodeForQmark to implement it using branches).
6028  */
6029
6030 bool CodeGen::genCodeForQmarkWithCMOV(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
6031 {
6032 #ifdef _TARGET_XARCH_
6033     GenTree* cond  = tree->gtOp.gtOp1;
6034     GenTree* colon = tree->gtOp.gtOp2;
6035     // Warning: this naming of the local vars is backwards!
6036     GenTree*  thenNode = colon->gtOp.gtOp1;
6037     GenTree*  elseNode = colon->gtOp.gtOp2;
6038     GenTree*  alwaysNode;
6039     GenTree*  predicateNode;
6040     regNumber reg;
6041     regMaskTP needReg = destReg;
6042
6043     noway_assert(tree->gtOper == GT_QMARK);
6044     noway_assert(cond->OperIsCompare());
6045     noway_assert(colon->gtOper == GT_COLON);
6046
6047 #ifdef DEBUG
6048     if (JitConfig.JitNoCMOV())
6049     {
6050         return false;
6051     }
6052 #endif
6053
6054     /* Can only implement CMOV on processors that support it */
6055
6056     if (!compiler->opts.compUseCMOV)
6057     {
6058         return false;
6059     }
6060
6061     /* thenNode better be a local or a constant */
6062
6063     if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
6064     {
6065         return false;
6066     }
6067
6068     /* elseNode better be a local or a constant or nothing */
6069
6070     if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
6071     {
6072         return false;
6073     }
6074
6075     /* can't handle two constants here */
6076
6077     if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
6078     {
6079         return false;
6080     }
6081
6082     /* let's not handle comparisons of non-integer types */
6083
6084     if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
6085     {
6086         return false;
6087     }
6088
6089     /* Choose nodes for predicateNode and alwaysNode.  Swap cond if necessary.
6090        The biggest constraint is that cmov doesn't take an integer argument.
6091     */
6092
6093     bool reverseCond = false;
6094     if (elseNode->OperGet() == GT_CNS_INT)
6095     {
6096         // else node is a constant
6097
6098         alwaysNode    = elseNode;
6099         predicateNode = thenNode;
6100         reverseCond   = true;
6101     }
6102     else
6103     {
6104         alwaysNode    = thenNode;
6105         predicateNode = elseNode;
6106     }
6107
6108     // If the live set in alwaysNode is not the same as in tree, then
6109     // the variable in predicate node dies here.  This is a dangerous
6110     // case that we don't handle (genComputeReg could overwrite
6111     // the value of the variable in the predicate node).
6112
6113     // This assert is just paranoid (we've already asserted it above)
6114     assert(predicateNode->OperGet() == GT_LCL_VAR);
6115     if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
6116     {
6117         return false;
6118     }
6119
6120     // Pass this point we are comitting to use CMOV.
6121
6122     if (reverseCond)
6123     {
6124         compiler->gtReverseCond(cond);
6125     }
6126
6127     emitJumpKind jumpKind = genCondSetFlags(cond);
6128
6129     // Compute the always node into any free register.  If it's a constant,
6130     // we need to generate the mov instruction here (otherwise genComputeReg might
6131     // modify the flags, as in xor reg,reg).
6132
6133     if (alwaysNode->OperGet() == GT_CNS_INT)
6134     {
6135         reg = regSet.rsPickReg(needReg, bestReg);
6136         inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
6137         gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
6138         regTracker.rsTrackRegTrash(reg);
6139     }
6140     else
6141     {
6142         genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6143         noway_assert(alwaysNode->InReg());
6144         noway_assert(alwaysNode->gtRegNum != REG_NA);
6145
6146         // Record the chosen register
6147
6148         reg = alwaysNode->gtRegNum;
6149     }
6150
6151     regNumber regPredicate = REG_NA;
6152
6153     // Is predicateNode an enregistered variable?
6154
6155     if (genMarkLclVar(predicateNode))
6156     {
6157         // Variable lives in a register
6158
6159         regPredicate = predicateNode->gtRegNum;
6160     }
6161 #if REDUNDANT_LOAD
6162     else
6163     {
6164         // Checks if the variable happens to be in any of the registers
6165
6166         regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
6167     }
6168 #endif
6169
6170     const static instruction EJtoCMOV[] = {INS_nop,    INS_nop,    INS_cmovo,  INS_cmovno, INS_cmovb,  INS_cmovae,
6171                                            INS_cmove,  INS_cmovne, INS_cmovbe, INS_cmova,  INS_cmovs,  INS_cmovns,
6172                                            INS_cmovpe, INS_cmovpo, INS_cmovl,  INS_cmovge, INS_cmovle, INS_cmovg};
6173
6174     noway_assert((unsigned)jumpKind < _countof(EJtoCMOV));
6175     instruction cmov_ins = EJtoCMOV[jumpKind];
6176
6177     noway_assert(insIsCMOV(cmov_ins));
6178
6179     if (regPredicate != REG_NA)
6180     {
6181         // regPredicate is in a register
6182
6183         inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
6184     }
6185     else
6186     {
6187         // regPredicate is in memory
6188
6189         inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
6190     }
6191     gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
6192     regTracker.rsTrackRegTrash(reg);
6193
6194     genUpdateLife(alwaysNode);
6195     genUpdateLife(predicateNode);
6196     genCodeForTree_DONE_LIFE(tree, reg);
6197     return true;
6198 #else
6199     return false;
6200 #endif
6201 }
6202
6203 #ifdef _TARGET_XARCH_
6204 void CodeGen::genCodeForMultEAX(GenTree* tree)
6205 {
6206     GenTree*  op1  = tree->gtOp.gtOp1;
6207     GenTree*  op2  = tree->gtGetOp2();
6208     bool      ovfl = tree->gtOverflow();
6209     regNumber reg  = DUMMY_INIT(REG_CORRUPT);
6210     regMaskTP addrReg;
6211
6212     noway_assert(tree->OperGet() == GT_MUL);
6213
6214     /* We'll evaluate 'op1' first */
6215
6216     regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
6217
6218     /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
6219
6220     genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
6221     noway_assert(op1->InReg());
6222
6223     // If op2 is a constant we need to load  the constant into a register
6224     if (op2->OperKind() & GTK_CONST)
6225     {
6226         genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
6227         noway_assert(op2->InReg());
6228         regSet.rsMarkRegUsed(op2);
6229         addrReg = genRegMask(op2->gtRegNum);
6230     }
6231     else
6232     {
6233         /* Make the second operand addressable */
6234         // Try to avoid EAX.
6235         addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
6236     }
6237
6238     /* Make sure the first operand is still in a register */
6239     // op1 *must* go into EAX.
6240     genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
6241     noway_assert(op1->InReg());
6242
6243     reg = op1->gtRegNum;
6244
6245     // For 8 bit operations, we need to pick byte addressable registers
6246
6247     if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
6248     {
6249         regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6250
6251         inst_RV_RV(INS_mov, byteReg, reg);
6252
6253         regTracker.rsTrackRegTrash(byteReg);
6254         regSet.rsMarkRegFree(genRegMask(reg));
6255
6256         reg           = byteReg;
6257         op1->gtRegNum = reg;
6258         regSet.rsMarkRegUsed(op1);
6259     }
6260
6261     /* Make sure the operand is still addressable */
6262     addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
6263
6264     /* Free up the operand, if it's a regvar */
6265
6266     genUpdateLife(op2);
6267
6268     /* The register is about to be trashed */
6269
6270     regTracker.rsTrackRegTrash(reg);
6271
6272     // For overflow instructions, tree->TypeGet() is the accurate type,
6273     // and gives us the size for the operands.
6274
6275     emitAttr opSize = emitTypeSize(tree->TypeGet());
6276
6277     /* Compute the new value */
6278
6279     noway_assert(op1->gtRegNum == REG_EAX);
6280
6281     // Make sure Edx is free (unless used by op2 itself)
6282     bool op2Released = false;
6283
6284     if ((addrReg & RBM_EDX) == 0)
6285     {
6286         // op2 does not use Edx, so make sure noone else does either
6287         regSet.rsGrabReg(RBM_EDX);
6288     }
6289     else if (regSet.rsMaskMult & RBM_EDX)
6290     {
6291         /* Edx is used by op2 and some other trees.
6292            Spill the other trees besides op2. */
6293
6294         regSet.rsGrabReg(RBM_EDX);
6295         op2Released = true;
6296
6297         /* keepReg==RegSet::FREE_REG so that the other multi-used trees
6298            don't get marked as unspilled as well. */
6299         regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
6300     }
6301
6302     instruction ins;
6303
6304     if (tree->gtFlags & GTF_UNSIGNED)
6305         ins = INS_mulEAX;
6306     else
6307         ins = INS_imulEAX;
6308
6309     inst_TT(ins, op2, 0, 0, opSize);
6310
6311     /* Both EAX and EDX are now trashed */
6312
6313     regTracker.rsTrackRegTrash(REG_EAX);
6314     regTracker.rsTrackRegTrash(REG_EDX);
6315
6316     /* Free up anything that was tied up by the operand */
6317
6318     if (!op2Released)
6319         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
6320
6321     /* The result will be where the first operand is sitting */
6322
6323     /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
6324     genRecoverReg(op1, 0, RegSet::KEEP_REG);
6325
6326     reg = op1->gtRegNum;
6327     noway_assert(reg == REG_EAX);
6328
6329     genReleaseReg(op1);
6330
6331     /* Do we need an overflow check */
6332
6333     if (ovfl)
6334         genCheckOverflow(tree);
6335
6336     genCodeForTree_DONE(tree, reg);
6337 }
6338 #endif // _TARGET_XARCH_
6339
6340 #ifdef _TARGET_ARM_
6341 void CodeGen::genCodeForMult64(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
6342 {
6343     GenTree* op1 = tree->gtOp.gtOp1;
6344     GenTree* op2 = tree->gtGetOp2();
6345
6346     noway_assert(tree->OperGet() == GT_MUL);
6347
6348     /* Generate the first operand into some register */
6349
6350     genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6351     noway_assert(op1->InReg());
6352
6353     /* Generate the second operand into some register */
6354
6355     genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6356     noway_assert(op2->InReg());
6357
6358     /* Make sure the first operand is still in a register */
6359     genRecoverReg(op1, 0, RegSet::KEEP_REG);
6360     noway_assert(op1->InReg());
6361
6362     /* Free up the operands */
6363     genUpdateLife(tree);
6364
6365     genReleaseReg(op1);
6366     genReleaseReg(op2);
6367
6368     regNumber regLo = regSet.rsPickReg(destReg, bestReg);
6369     regNumber regHi;
6370
6371     regSet.rsLockReg(genRegMask(regLo));
6372     regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
6373     regSet.rsUnlockReg(genRegMask(regLo));
6374
6375     instruction ins;
6376     if (tree->gtFlags & GTF_UNSIGNED)
6377         ins = INS_umull;
6378     else
6379         ins = INS_smull;
6380
6381     getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
6382     regTracker.rsTrackRegTrash(regHi);
6383     regTracker.rsTrackRegTrash(regLo);
6384
6385     /* Do we need an overflow check */
6386
6387     if (tree->gtOverflow())
6388     {
6389         // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
6390         //
6391         regSet.rsLockReg(genRegMask(regLo));
6392
6393         if (tree->gtFlags & GTF_MUL_64RSLT)
6394             regSet.rsLockReg(genRegMask(regHi));
6395
6396         regNumber regTmpHi = regHi;
6397         if ((tree->gtFlags & GTF_UNSIGNED) == 0)
6398         {
6399             getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
6400             regTmpHi = regSet.rsPickReg(RBM_ALLINT);
6401             getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
6402             regTracker.rsTrackRegTrash(regTmpHi);
6403         }
6404         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
6405
6406         // Jump to the block which will throw the expection
6407         emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
6408         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
6409
6410         // Unlock regLo [and regHi] after generating code for the gtOverflow() case
6411         //
6412         regSet.rsUnlockReg(genRegMask(regLo));
6413
6414         if (tree->gtFlags & GTF_MUL_64RSLT)
6415             regSet.rsUnlockReg(genRegMask(regHi));
6416     }
6417
6418     genUpdateLife(tree);
6419
6420     if (tree->gtFlags & GTF_MUL_64RSLT)
6421         genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
6422     else
6423         genMarkTreeInReg(tree, regLo);
6424 }
6425 #endif // _TARGET_ARM_
6426
6427 /*****************************************************************************
6428  *
6429  *  Generate code for a simple binary arithmetic or logical operator.
6430  *  Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
6431  */
6432
6433 void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
6434 {
6435     instruction     ins;
6436     genTreeOps      oper     = tree->OperGet();
6437     const var_types treeType = tree->TypeGet();
6438     GenTree*        op1      = tree->gtOp.gtOp1;
6439     GenTree*        op2      = tree->gtGetOp2();
6440     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
6441     regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
6442     regMaskTP       needReg  = destReg;
6443
6444     /* Figure out what instruction to generate */
6445
6446     bool isArith;
6447     switch (oper)
6448     {
6449         case GT_AND:
6450             ins     = INS_AND;
6451             isArith = false;
6452             break;
6453         case GT_OR:
6454             ins     = INS_OR;
6455             isArith = false;
6456             break;
6457         case GT_XOR:
6458             ins     = INS_XOR;
6459             isArith = false;
6460             break;
6461         case GT_ADD:
6462             ins     = INS_add;
6463             isArith = true;
6464             break;
6465         case GT_SUB:
6466             ins     = INS_sub;
6467             isArith = true;
6468             break;
6469         case GT_MUL:
6470             ins     = INS_MUL;
6471             isArith = true;
6472             break;
6473         default:
6474             unreached();
6475     }
6476
6477 #ifdef _TARGET_XARCH_
6478     /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
6479
6480     if ((oper == GT_MUL) &&
6481         op2->IsIntCnsFitsInI32() &&              // op2 is a constant that fits in a sign-extended 32-bit immediate
6482         !op1->IsCnsIntOrI() &&                   // op1 is not a constant
6483         (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
6484         !varTypeIsByte(treeType) &&              // No encoding for say "imul al,al,imm"
6485         !tree->gtOverflow())                     // 3 operand imul doesn't set flags
6486     {
6487         /* Make the first operand addressable */
6488
6489         regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
6490
6491         /* Grab a register for the target */
6492
6493         reg = regSet.rsPickReg(needReg, bestReg);
6494
6495 #if LEA_AVAILABLE
6496         /* Compute the value into the target: reg=op1*op2_icon */
6497         if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
6498         {
6499             regNumber regSrc;
6500             if (op1->InReg())
6501             {
6502                 regSrc = op1->gtRegNum;
6503             }
6504             else
6505             {
6506                 inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
6507                 regSrc = reg;
6508             }
6509             getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
6510                                         (op2->gtIntCon.gtIconVal & -2), 0);
6511         }
6512         else
6513 #endif // LEA_AVAILABLE
6514         {
6515             /* Compute the value into the target: reg=op1*op2_icon */
6516             inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
6517         }
6518
6519         /* The register has been trashed now */
6520
6521         regTracker.rsTrackRegTrash(reg);
6522
6523         /* The address is no longer live */
6524
6525         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
6526
6527         genCodeForTree_DONE(tree, reg);
6528         return;
6529     }
6530 #endif // _TARGET_XARCH_
6531
6532     bool ovfl = false;
6533
6534     if (isArith)
6535     {
6536         // We only reach here for GT_ADD, GT_SUB and GT_MUL.
6537         assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
6538
6539         ovfl = tree->gtOverflow();
6540
6541         /* We record the accurate (small) types in trees only we need to
6542          * check for overflow. Otherwise we record genActualType()
6543          */
6544
6545         noway_assert(ovfl || (treeType == genActualType(treeType)));
6546
6547 #if LEA_AVAILABLE
6548
6549         /* Can we use an 'lea' to compute the result?
6550            Can't use 'lea' for overflow as it doesn't set flags
6551            Can't use 'lea' unless we have at least two free registers */
6552         {
6553             bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
6554                                    genCountBits(regSet.rsMaskLock) +  // Locked registers
6555                                    2                                  // We will need two regisers
6556                                <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
6557
6558             regMaskTP regs = RBM_NONE; // OUT argument
6559             if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
6560             {
6561                 emitAttr size;
6562
6563                 /* Is the value now computed in some register? */
6564
6565                 if (tree->InReg())
6566                 {
6567                     genCodeForTree_REG_VAR1(tree);
6568                     return;
6569                 }
6570
6571                 /* If we can reuse op1/2's register directly, and 'tree' is
6572                    a simple expression (ie. not in scaled index form),
6573                    might as well just use "add" instead of "lea" */
6574
6575                 // However, if we're in a context where we want to evaluate "tree" into a specific
6576                 // register different from the reg we'd use in this optimization, then it doesn't
6577                 // make sense to do the "add", since we'd also have to do a "mov."
6578                 if (op1->InReg())
6579                 {
6580                     reg = op1->gtRegNum;
6581
6582                     if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6583                     {
6584                         if (op2->InReg())
6585                         {
6586                             /* Simply add op2 to the register */
6587
6588                             inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
6589
6590                             if (tree->gtSetFlags())
6591                                 genFlagsEqualToReg(tree, reg);
6592
6593                             goto DONE_LEA_ADD;
6594                         }
6595                         else if (op2->OperGet() == GT_CNS_INT)
6596                         {
6597                             /* Simply add op2 to the register */
6598
6599                             genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
6600
6601                             goto DONE_LEA_ADD;
6602                         }
6603                     }
6604                 }
6605
6606                 if (op2->InReg())
6607                 {
6608                     reg = op2->gtRegNum;
6609
6610                     if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6611                     {
6612                         if (op1->InReg())
6613                         {
6614                             /* Simply add op1 to the register */
6615
6616                             inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
6617
6618                             if (tree->gtSetFlags())
6619                                 genFlagsEqualToReg(tree, reg);
6620
6621                             goto DONE_LEA_ADD;
6622                         }
6623                     }
6624                 }
6625
6626                 // The expression either requires a scaled-index form, or the
6627                 // op1 or op2's register can't be targeted, this can be
6628                 // caused when op1 or op2 are enregistered variables.
6629
6630                 reg  = regSet.rsPickReg(needReg, bestReg);
6631                 size = emitActualTypeSize(treeType);
6632
6633                 /* Generate "lea reg, [addr-mode]" */
6634
6635                 inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
6636
6637 #ifndef _TARGET_XARCH_
6638                 // Don't call genFlagsEqualToReg on x86/x64
6639                 //  as it does not set the flags
6640                 if (tree->gtSetFlags())
6641                     genFlagsEqualToReg(tree, reg);
6642 #endif
6643
6644             DONE_LEA_ADD:
6645                 /* The register has been trashed now */
6646                 regTracker.rsTrackRegTrash(reg);
6647
6648                 genDoneAddressable(tree, regs, RegSet::FREE_REG);
6649
6650                 /* The following could be an 'inner' pointer!!! */
6651
6652                 noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
6653
6654                 if (treeType == TYP_BYREF)
6655                 {
6656                     genUpdateLife(tree);
6657
6658                     gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
6659                     gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
6660                 }
6661
6662                 genCodeForTree_DONE(tree, reg);
6663                 return;
6664             }
6665         }
6666
6667 #endif // LEA_AVAILABLE
6668
6669         noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
6670     }
6671
6672     /* The following makes an assumption about gtSetEvalOrder(this) */
6673
6674     noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
6675
6676     /* Compute a useful register mask */
6677     needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6678     needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6679
6680     // Determine what registers go live between op1 and op2
6681     // Don't bother checking if op1 is already in a register.
6682     // This is not just for efficiency; if it's already in a
6683     // register then it may already be considered "evaluated"
6684     // for the purposes of liveness, in which genNewLiveRegMask
6685     // will assert
6686     if (!op1->InReg())
6687     {
6688         regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
6689         if (newLiveMask)
6690         {
6691             needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
6692         }
6693     }
6694
6695 #if CPU_HAS_BYTE_REGS
6696     /* 8-bit operations can only be done in the byte-regs */
6697     if (varTypeIsByte(treeType))
6698         needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6699 #endif // CPU_HAS_BYTE_REGS
6700
6701     // Try selecting one of the 'bestRegs'
6702     needReg = regSet.rsNarrowHint(needReg, bestReg);
6703
6704     /* Special case: small_val & small_mask */
6705
6706     if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
6707     {
6708         size_t    and_val = op2->gtIntCon.gtIconVal;
6709         size_t    andMask;
6710         var_types typ = op1->TypeGet();
6711
6712         switch (typ)
6713         {
6714             case TYP_BOOL:
6715             case TYP_BYTE:
6716             case TYP_UBYTE:
6717                 andMask = 0x000000FF;
6718                 break;
6719             case TYP_SHORT:
6720             case TYP_USHORT:
6721                 andMask = 0x0000FFFF;
6722                 break;
6723             default:
6724                 noway_assert(!"unexpected type");
6725                 return;
6726         }
6727
6728         // Is the 'and_val' completely contained within the bits found in 'andMask'
6729         if ((and_val & ~andMask) == 0)
6730         {
6731             // We must use unsigned instructions when loading op1
6732             if (varTypeIsByte(typ))
6733             {
6734                 op1->gtType = TYP_UBYTE;
6735             }
6736             else // varTypeIsShort(typ)
6737             {
6738                 assert(varTypeIsShort(typ));
6739                 op1->gtType = TYP_USHORT;
6740             }
6741
6742             /* Generate the first operand into a scratch register */
6743
6744             op1 = genCodeForCommaTree(op1);
6745             genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6746
6747             noway_assert(op1->InReg());
6748
6749             regNumber op1Reg = op1->gtRegNum;
6750
6751             // Did we end up in an acceptable register?
6752             // and do we have an acceptable free register available to grab?
6753             //
6754             if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6755             {
6756                 // See if we can pick a register from bestReg
6757                 bestReg &= needReg;
6758
6759                 // Grab an acceptable register
6760                 regNumber newReg;
6761                 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6762                     newReg = regSet.rsGrabReg(bestReg);
6763                 else
6764                     newReg = regSet.rsGrabReg(needReg);
6765
6766                 noway_assert(op1Reg != newReg);
6767
6768                 /* Update the value in the target register */
6769
6770                 regTracker.rsTrackRegCopy(newReg, op1Reg);
6771
6772                 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6773
6774                 /* The value has been transferred to 'reg' */
6775
6776                 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6777                     gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6778
6779                 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6780
6781                 /* The value is now in an appropriate register */
6782
6783                 op1->gtRegNum = newReg;
6784             }
6785             noway_assert(op1->InReg());
6786             genUpdateLife(op1);
6787
6788             /* Mark the register as 'used' */
6789             regSet.rsMarkRegUsed(op1);
6790             reg = op1->gtRegNum;
6791
6792             if (and_val != andMask) // Does the "and" mask only cover some of the bits?
6793             {
6794                 /* "and" the value */
6795
6796                 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
6797             }
6798
6799 #ifdef DEBUG
6800             /* Update the live set of register variables */
6801             if (compiler->opts.varNames)
6802                 genUpdateLife(tree);
6803 #endif
6804
6805             /* Now we can update the register pointer information */
6806
6807             genReleaseReg(op1);
6808             gcInfo.gcMarkRegPtrVal(reg, treeType);
6809
6810             genCodeForTree_DONE_LIFE(tree, reg);
6811             return;
6812         }
6813     }
6814
6815 #ifdef _TARGET_XARCH_
6816
6817     // Do we have to use the special "imul" instruction
6818     // which has eax as the implicit operand ?
6819     //
6820     bool multEAX = false;
6821
6822     if (oper == GT_MUL)
6823     {
6824         if (tree->gtFlags & GTF_MUL_64RSLT)
6825         {
6826             /* Only multiplying with EAX will leave the 64-bit
6827              * result in EDX:EAX */
6828
6829             multEAX = true;
6830         }
6831         else if (ovfl)
6832         {
6833             if (tree->gtFlags & GTF_UNSIGNED)
6834             {
6835                 /* "mul reg/mem" always has EAX as default operand */
6836
6837                 multEAX = true;
6838             }
6839             else if (varTypeIsSmall(treeType))
6840             {
6841                 /* Only the "imul with EAX" encoding has the 'w' bit
6842                  * to specify the size of the operands */
6843
6844                 multEAX = true;
6845             }
6846         }
6847     }
6848
6849     if (multEAX)
6850     {
6851         noway_assert(oper == GT_MUL);
6852
6853         return genCodeForMultEAX(tree);
6854     }
6855 #endif // _TARGET_XARCH_
6856
6857 #ifdef _TARGET_ARM_
6858
6859     // Do we have to use the special 32x32 => 64 bit multiply
6860     //
6861     bool mult64 = false;
6862
6863     if (oper == GT_MUL)
6864     {
6865         if (tree->gtFlags & GTF_MUL_64RSLT)
6866         {
6867             mult64 = true;
6868         }
6869         else if (ovfl)
6870         {
6871             // We always must use the 32x32 => 64 bit multiply
6872             // to detect overflow
6873             mult64 = true;
6874         }
6875     }
6876
6877     if (mult64)
6878     {
6879         noway_assert(oper == GT_MUL);
6880
6881         return genCodeForMult64(tree, destReg, bestReg);
6882     }
6883 #endif // _TARGET_ARM_
6884
6885     /* Generate the first operand into a scratch register */
6886
6887     op1 = genCodeForCommaTree(op1);
6888     genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6889
6890     noway_assert(op1->InReg());
6891
6892     regNumber op1Reg = op1->gtRegNum;
6893
6894     // Setup needReg with the set of register that we require for op1 to be in
6895     //
6896     needReg = RBM_ALLINT;
6897
6898     /* Compute a useful register mask */
6899     needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6900     needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6901
6902 #if CPU_HAS_BYTE_REGS
6903     /* 8-bit operations can only be done in the byte-regs */
6904     if (varTypeIsByte(treeType))
6905         needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6906 #endif // CPU_HAS_BYTE_REGS
6907
6908     // Did we end up in an acceptable register?
6909     // and do we have an acceptable free register available to grab?
6910     //
6911     if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6912     {
6913         // See if we can pick a register from bestReg
6914         bestReg &= needReg;
6915
6916         // Grab an acceptable register
6917         regNumber newReg;
6918         if ((bestReg & regSet.rsRegMaskFree()) != 0)
6919             newReg = regSet.rsGrabReg(bestReg);
6920         else
6921             newReg = regSet.rsGrabReg(needReg);
6922
6923         noway_assert(op1Reg != newReg);
6924
6925         /* Update the value in the target register */
6926
6927         regTracker.rsTrackRegCopy(newReg, op1Reg);
6928
6929         inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6930
6931         /* The value has been transferred to 'reg' */
6932
6933         if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6934             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6935
6936         gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6937
6938         /* The value is now in an appropriate register */
6939
6940         op1->gtRegNum = newReg;
6941     }
6942     noway_assert(op1->InReg());
6943     op1Reg = op1->gtRegNum;
6944
6945     genUpdateLife(op1);
6946
6947     /* Mark the register as 'used' */
6948     regSet.rsMarkRegUsed(op1);
6949
6950     bool isSmallConst = false;
6951
6952 #ifdef _TARGET_ARM_
6953     if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
6954     {
6955         isSmallConst = true;
6956     }
6957 #endif
6958     /* Make the second operand addressable */
6959
6960     regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
6961
6962 #if CPU_LOAD_STORE_ARCH
6963     genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
6964 #else  // !CPU_LOAD_STORE_ARCH
6965     /* Is op1 spilled and op2 in a register? */
6966
6967     if ((op1->gtFlags & GTF_SPILLED) && (op2->InReg()) && (ins != INS_sub))
6968     {
6969         noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
6970
6971         // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
6972         noway_assert(op2->gtOper != GT_LCL_VAR ||
6973                      varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
6974
6975         reg               = op2->gtRegNum;
6976         regMaskTP regMask = genRegMask(reg);
6977
6978         /* Is the register holding op2 available? */
6979
6980         if (regMask & regSet.rsMaskVars)
6981         {
6982         }
6983         else
6984         {
6985             /* Get the temp we spilled into. */
6986
6987             TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
6988
6989             /* For 8bit operations, we need to make sure that op2 is
6990                in a byte-addressable registers */
6991
6992             if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
6993             {
6994                 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6995
6996                 inst_RV_RV(INS_mov, byteReg, reg);
6997                 regTracker.rsTrackRegTrash(byteReg);
6998
6999                 /* op2 couldn't have spilled as it was not sitting in
7000                    RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
7001                 noway_assert(op2->InReg());
7002
7003                 regSet.rsUnlockReg(regMask);
7004                 regSet.rsMarkRegFree(regMask);
7005
7006                 reg           = byteReg;
7007                 regMask       = genRegMask(reg);
7008                 op2->gtRegNum = reg;
7009                 regSet.rsMarkRegUsed(op2);
7010             }
7011
7012             inst_RV_ST(ins, reg, temp, 0, treeType);
7013
7014             regTracker.rsTrackRegTrash(reg);
7015
7016             /* Free the temp */
7017
7018             compiler->tmpRlsTemp(temp);
7019
7020             /* 'add'/'sub' set all CC flags, others only ZF */
7021
7022             /* If we need to check overflow, for small types, the
7023              * flags can't be used as we perform the arithmetic
7024              * operation (on small registers) and then sign extend it
7025              *
7026              * NOTE : If we ever don't need to sign-extend the result,
7027              * we can use the flags
7028              */
7029
7030             if (tree->gtSetFlags())
7031             {
7032                 genFlagsEqualToReg(tree, reg);
7033             }
7034
7035             /* The result is where the second operand is sitting. Mark result reg as free */
7036             regSet.rsMarkRegFree(genRegMask(reg));
7037
7038             gcInfo.gcMarkRegPtrVal(reg, treeType);
7039
7040             goto CHK_OVF;
7041         }
7042     }
7043 #endif // !CPU_LOAD_STORE_ARCH
7044
7045     /* Make sure the first operand is still in a register */
7046     regSet.rsLockUsedReg(addrReg);
7047     genRecoverReg(op1, 0, RegSet::KEEP_REG);
7048     noway_assert(op1->InReg());
7049     regSet.rsUnlockUsedReg(addrReg);
7050
7051     reg = op1->gtRegNum;
7052
7053     // For 8 bit operations, we need to pick byte addressable registers
7054
7055     if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
7056     {
7057         regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7058
7059         inst_RV_RV(INS_mov, byteReg, reg);
7060
7061         regTracker.rsTrackRegTrash(byteReg);
7062         regSet.rsMarkRegFree(genRegMask(reg));
7063
7064         reg           = byteReg;
7065         op1->gtRegNum = reg;
7066         regSet.rsMarkRegUsed(op1);
7067     }
7068
7069     /* Make sure the operand is still addressable */
7070     addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
7071
7072     /* Free up the operand, if it's a regvar */
7073
7074     genUpdateLife(op2);
7075
7076     /* The register is about to be trashed */
7077
7078     regTracker.rsTrackRegTrash(reg);
7079
7080     {
7081         bool op2Released = false;
7082
7083         // For overflow instructions, tree->gtType is the accurate type,
7084         // and gives us the size for the operands.
7085
7086         emitAttr opSize = emitTypeSize(treeType);
7087
7088         /* Compute the new value */
7089
7090         if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
7091 #if !CPU_HAS_FP_SUPPORT
7092             && (treeType == TYP_INT || treeType == TYP_I_IMPL)
7093 #endif
7094                 )
7095         {
7096             ssize_t ival = op2->gtIntCon.gtIconVal;
7097
7098             if (oper == GT_ADD)
7099             {
7100                 genIncRegBy(reg, ival, tree, treeType, ovfl);
7101             }
7102             else if (oper == GT_SUB)
7103             {
7104                 if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7105                              (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
7106                     // Therefore we can't use -ival.
7107                     )
7108                 {
7109                     /* For unsigned overflow, we have to use INS_sub to set
7110                     the flags correctly */
7111
7112                     genDecRegBy(reg, ival, tree);
7113                 }
7114                 else
7115                 {
7116                     /* Else, we simply add the negative of the value */
7117
7118                     genIncRegBy(reg, -ival, tree, treeType, ovfl);
7119                 }
7120             }
7121             else if (oper == GT_MUL)
7122             {
7123                 genMulRegBy(reg, ival, tree, treeType, ovfl);
7124             }
7125         }
7126         else
7127         {
7128             // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
7129             op2 = op2->gtEffectiveVal();
7130             if (varTypeIsByte(treeType) && op2->InReg())
7131             {
7132                 noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
7133
7134                 regNumber op2reg     = op2->gtRegNum;
7135                 regMaskTP op2regMask = genRegMask(op2reg);
7136
7137                 if (!(op2regMask & RBM_BYTE_REGS))
7138                 {
7139                     regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7140
7141                     inst_RV_RV(INS_mov, byteReg, op2reg);
7142                     regTracker.rsTrackRegTrash(byteReg);
7143
7144                     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7145                     op2Released = true;
7146
7147                     op2->gtRegNum = byteReg;
7148                 }
7149             }
7150
7151             inst_RV_TT(ins, reg, op2, 0, opSize, flags);
7152         }
7153
7154         /* Free up anything that was tied up by the operand */
7155
7156         if (!op2Released)
7157         {
7158             genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7159         }
7160     }
7161     /* The result will be where the first operand is sitting */
7162
7163     /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
7164     genRecoverReg(op1, 0, RegSet::KEEP_REG);
7165
7166     reg = op1->gtRegNum;
7167
7168     /* 'add'/'sub' set all CC flags, others only ZF+SF */
7169
7170     if (tree->gtSetFlags())
7171         genFlagsEqualToReg(tree, reg);
7172
7173     genReleaseReg(op1);
7174
7175 #if !CPU_LOAD_STORE_ARCH
7176 CHK_OVF:
7177 #endif // !CPU_LOAD_STORE_ARCH
7178
7179     /* Do we need an overflow check */
7180
7181     if (ovfl)
7182         genCheckOverflow(tree);
7183
7184     genCodeForTree_DONE(tree, reg);
7185 }
7186
7187 /*****************************************************************************
7188  *
7189  *  Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
7190  *  Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
7191  */
7192
7193 void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7194 {
7195     instruction      ins;
7196     const genTreeOps oper     = tree->OperGet();
7197     const var_types  treeType = tree->TypeGet();
7198     GenTree*         op1      = tree->gtOp.gtOp1;
7199     GenTree*         op2      = tree->gtGetOp2();
7200     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7201     regNumber        reg      = DUMMY_INIT(REG_CORRUPT);
7202     regMaskTP        needReg  = destReg;
7203     regMaskTP        addrReg;
7204
7205     /* Figure out what instruction to generate */
7206
7207     bool isArith;
7208     switch (oper)
7209     {
7210         case GT_ASG_AND:
7211             ins     = INS_AND;
7212             isArith = false;
7213             break;
7214         case GT_ASG_OR:
7215             ins     = INS_OR;
7216             isArith = false;
7217             break;
7218         case GT_ASG_XOR:
7219             ins     = INS_XOR;
7220             isArith = false;
7221             break;
7222         case GT_ASG_ADD:
7223             ins     = INS_add;
7224             isArith = true;
7225             break;
7226         case GT_ASG_SUB:
7227             ins     = INS_sub;
7228             isArith = true;
7229             break;
7230         default:
7231             unreached();
7232     }
7233
7234     bool ovfl = false;
7235
7236     if (isArith)
7237     {
7238         // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
7239
7240         ovfl = tree->gtOverflow();
7241
7242         // We can't use += with overflow if the value cannot be changed
7243         // in case of an overflow-exception which the "+" might cause
7244         noway_assert(!ovfl ||
7245                      ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
7246
7247         /* Do not allow overflow instructions with refs/byrefs */
7248
7249         noway_assert(!ovfl || !varTypeIsGC(treeType));
7250
7251         // We disallow overflow and byte-ops here as it is too much trouble
7252         noway_assert(!ovfl || !varTypeIsByte(treeType));
7253
7254         /* Is the second operand a constant? */
7255
7256         if (op2->IsIntCnsFitsInI32())
7257         {
7258             int ival = (int)op2->gtIntCon.gtIconVal;
7259
7260             /* What is the target of the assignment? */
7261
7262             switch (op1->gtOper)
7263             {
7264                 case GT_REG_VAR:
7265
7266                 REG_VAR4:
7267
7268                     reg = op1->gtRegVar.gtRegNum;
7269
7270                     /* No registers are needed for addressing */
7271
7272                     addrReg = RBM_NONE;
7273 #if !CPU_LOAD_STORE_ARCH
7274                 INCDEC_REG:
7275 #endif
7276                     /* We're adding a constant to a register */
7277
7278                     if (oper == GT_ASG_ADD)
7279                         genIncRegBy(reg, ival, tree, treeType, ovfl);
7280                     else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7281                                       ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
7282                                                                                                  // 0x80000000.
7283                                                                                                  // Therefore we can't
7284                                                                                                  // use -ival.
7285                              )
7286                         /* For unsigned overflow, we have to use INS_sub to set
7287                             the flags correctly */
7288                         genDecRegBy(reg, ival, tree);
7289                     else
7290                         genIncRegBy(reg, -ival, tree, treeType, ovfl);
7291
7292                     break;
7293
7294                 case GT_LCL_VAR:
7295
7296                     /* Does the variable live in a register? */
7297
7298                     if (genMarkLclVar(op1))
7299                         goto REG_VAR4;
7300
7301                     __fallthrough;
7302
7303                 default:
7304
7305                     /* Make the target addressable for load/store */
7306                     addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
7307
7308 #if !CPU_LOAD_STORE_ARCH
7309                     // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7310
7311                     /* For small types with overflow check, we need to
7312                         sign/zero extend the result, so we need it in a reg */
7313
7314                     if (ovfl && genTypeSize(treeType) < sizeof(int))
7315 #endif // !CPU_LOAD_STORE_ARCH
7316                     {
7317                         // Load op1 into a reg
7318
7319                         reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
7320
7321                         inst_RV_TT(INS_mov, reg, op1);
7322
7323                         // Issue the add/sub and the overflow check
7324
7325                         inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
7326                         regTracker.rsTrackRegTrash(reg);
7327
7328                         if (ovfl)
7329                         {
7330                             genCheckOverflow(tree);
7331                         }
7332
7333                         /* Store the (sign/zero extended) result back to
7334                             the stack location of the variable */
7335
7336                         inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7337
7338                         break;
7339                     }
7340 #if !CPU_LOAD_STORE_ARCH
7341                     else
7342                     {
7343                         /* Add/subtract the new value into/from the target */
7344
7345                         if (op1->InReg())
7346                         {
7347                             reg = op1->gtRegNum;
7348                             goto INCDEC_REG;
7349                         }
7350
7351                         /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
7352                         if (!ovfl && (ival == 1 || ival == -1) &&
7353                             !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
7354                         {
7355                             noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
7356                             if (oper == GT_ASG_SUB)
7357                                 ival = -ival;
7358
7359                             ins = (ival > 0) ? INS_inc : INS_dec;
7360                             inst_TT(ins, op1);
7361                         }
7362                         else
7363                         {
7364                             inst_TT_IV(ins, op1, ival);
7365                         }
7366
7367                         if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
7368                         {
7369                             if (tree->gtSetFlags())
7370                                 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
7371                         }
7372
7373                         break;
7374                     }
7375 #endif        // !CPU_LOAD_STORE_ARCH
7376             } // end switch (op1->gtOper)
7377
7378             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7379
7380             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7381             return;
7382         } // end if (op2->IsIntCnsFitsInI32())
7383     }     // end if (isArith)
7384
7385     noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
7386
7387     /* Is the target a register or local variable? */
7388
7389     switch (op1->gtOper)
7390     {
7391         case GT_LCL_VAR:
7392
7393             /* Does the target variable live in a register? */
7394
7395             if (!genMarkLclVar(op1))
7396                 break;
7397
7398             __fallthrough;
7399
7400         case GT_REG_VAR:
7401
7402             /* Get hold of the target register */
7403
7404             reg = op1->gtRegVar.gtRegNum;
7405
7406             /* Make sure the target of the store is available */
7407
7408             if (regSet.rsMaskUsed & genRegMask(reg))
7409             {
7410                 regSet.rsSpillReg(reg);
7411             }
7412
7413             /* Make the RHS addressable */
7414
7415             addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
7416
7417             /* Compute the new value into the target register */
7418             CLANG_FORMAT_COMMENT_ANCHOR;
7419
7420 #if CPU_HAS_BYTE_REGS
7421
7422             // Fix 383833 X86 ILGEN
7423             regNumber reg2;
7424             if (op2->InReg())
7425             {
7426                 reg2 = op2->gtRegNum;
7427             }
7428             else
7429             {
7430                 reg2 = REG_STK;
7431             }
7432
7433             // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
7434             // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
7435             //
7436             if (varTypeIsByte(treeType) &&
7437                 (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
7438             {
7439                 // We will force op2 into a register (via sign/zero extending load)
7440                 // for the cases where op2 is in memory and thus could have
7441                 // an unmapped page just beyond its location
7442                 //
7443                 if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
7444                 {
7445                     genCodeForTree(op2, 0);
7446                     assert(op2->InReg());
7447                 }
7448
7449                 inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
7450
7451                 bool canOmit = false;
7452
7453                 if (varTypeIsUnsigned(treeType))
7454                 {
7455                     // When op2 is a byte sized constant we can omit the zero extend instruction
7456                     if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
7457                     {
7458                         canOmit = true;
7459                     }
7460                 }
7461                 else // treeType is signed
7462                 {
7463                     // When op2 is a positive 7-bit or smaller constant
7464                     // we can omit the sign extension sequence.
7465                     if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
7466                     {
7467                         canOmit = true;
7468                     }
7469                 }
7470
7471                 if (!canOmit)
7472                 {
7473                     // If reg is a byte reg then we can use a movzx/movsx instruction
7474                     //
7475                     if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
7476                     {
7477                         instruction extendIns = ins_Move_Extend(treeType, true);
7478                         inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
7479                     }
7480                     else // we can't encode a movzx/movsx instruction
7481                     {
7482                         if (varTypeIsUnsigned(treeType))
7483                         {
7484                             // otherwise, we must zero the upper 24 bits of 'reg'
7485                             inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
7486                         }
7487                         else // treeType is signed
7488                         {
7489                             // otherwise, we must sign extend the result in the non-byteable register 'reg'
7490                             // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
7491                             // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
7492                             //
7493                             inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
7494                             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
7495                         }
7496                     }
7497                 }
7498             }
7499             else
7500 #endif // CPU_HAS_BYTE_REGS
7501             {
7502                 inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
7503             }
7504
7505             /* The zero flag is now equal to the register value */
7506
7507             if (tree->gtSetFlags())
7508                 genFlagsEqualToReg(tree, reg);
7509
7510             /* Remember that we trashed the target */
7511
7512             regTracker.rsTrackRegTrash(reg);
7513
7514             /* Free up anything that was tied up by the RHS */
7515
7516             genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7517
7518             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7519             return;
7520
7521         default:
7522             break;
7523     } // end switch (op1->gtOper)
7524
7525 #if !CPU_LOAD_STORE_ARCH
7526     /* Special case: "x ^= -1" is actually "not(x)" */
7527
7528     if (oper == GT_ASG_XOR)
7529     {
7530         if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
7531         {
7532             addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
7533             inst_TT(INS_NOT, op1);
7534             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7535
7536             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7537             return;
7538         }
7539     }
7540 #endif // !CPU_LOAD_STORE_ARCH
7541
7542     /* Setup target mask for op2 (byte-regs for small operands) */
7543
7544     unsigned needMask;
7545     needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
7546
7547     /* Is the second operand a constant? */
7548
7549     if (op2->IsIntCnsFitsInI32())
7550     {
7551         int ival = (int)op2->gtIntCon.gtIconVal;
7552
7553         /* Make the target addressable */
7554         addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
7555
7556         inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
7557
7558         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
7559
7560         genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7561         return;
7562     }
7563
7564     /* Is the value or the address to be computed first? */
7565
7566     if (tree->gtFlags & GTF_REVERSE_OPS)
7567     {
7568         /* Compute the new value into a register */
7569
7570         genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7571
7572         /* Make the target addressable for load/store */
7573         addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
7574         regSet.rsLockUsedReg(addrReg);
7575
7576 #if !CPU_LOAD_STORE_ARCH
7577         // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7578         /* For small types with overflow check, we need to
7579             sign/zero extend the result, so we need it in a reg */
7580
7581         if (ovfl && genTypeSize(treeType) < sizeof(int))
7582 #endif // !CPU_LOAD_STORE_ARCH
7583         {
7584             reg = regSet.rsPickReg();
7585             regSet.rsLockReg(genRegMask(reg));
7586
7587             noway_assert(genIsValidReg(reg));
7588
7589             /* Generate "ldr reg, [var]" */
7590
7591             inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
7592
7593             if (op1->gtOper == GT_LCL_VAR)
7594                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7595             else
7596                 regTracker.rsTrackRegTrash(reg);
7597
7598             /* Make sure the new value is in a register */
7599
7600             genRecoverReg(op2, 0, RegSet::KEEP_REG);
7601
7602             /* Compute the new value */
7603
7604             inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7605
7606             if (ovfl)
7607                 genCheckOverflow(tree);
7608
7609             /* Move the new value back to the variable */
7610             /* Generate "str reg, [var]" */
7611
7612             inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7613             regSet.rsUnlockReg(genRegMask(reg));
7614
7615             if (op1->gtOper == GT_LCL_VAR)
7616                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
7617         }
7618 #if !CPU_LOAD_STORE_ARCH
7619         else
7620         {
7621             /* Make sure the new value is in a register */
7622
7623             genRecoverReg(op2, 0, RegSet::KEEP_REG);
7624
7625             /* Add the new value into the target */
7626
7627             inst_TT_RV(ins, op1, op2->gtRegNum);
7628         }
7629 #endif // !CPU_LOAD_STORE_ARCH
7630         /* Free up anything that was tied up either side */
7631         regSet.rsUnlockUsedReg(addrReg);
7632         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7633         genReleaseReg(op2);
7634     }
7635     else
7636     {
7637         /* Make the target addressable */
7638
7639         addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
7640
7641         /* Compute the new value into a register */
7642
7643         genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7644         regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
7645
7646         /* Make sure the target is still addressable */
7647
7648         addrReg = genKeepAddressable(op1, addrReg);
7649         regSet.rsLockUsedReg(addrReg);
7650
7651 #if !CPU_LOAD_STORE_ARCH
7652         // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7653
7654         /* For small types with overflow check, we need to
7655             sign/zero extend the result, so we need it in a reg */
7656
7657         if (ovfl && genTypeSize(treeType) < sizeof(int))
7658 #endif // !CPU_LOAD_STORE_ARCH
7659         {
7660             reg = regSet.rsPickReg();
7661
7662             inst_RV_TT(INS_mov, reg, op1);
7663
7664             inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7665             regTracker.rsTrackRegTrash(reg);
7666
7667             if (ovfl)
7668                 genCheckOverflow(tree);
7669
7670             inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7671
7672             if (op1->gtOper == GT_LCL_VAR)
7673                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7674         }
7675 #if !CPU_LOAD_STORE_ARCH
7676         else
7677         {
7678             /* Add the new value into the target */
7679
7680             inst_TT_RV(ins, op1, op2->gtRegNum);
7681         }
7682 #endif
7683
7684         /* Free up anything that was tied up either side */
7685         regSet.rsUnlockUsedReg(addrReg);
7686         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7687
7688         regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
7689         genReleaseReg(op2);
7690     }
7691
7692     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7693 }
7694
7695 /*****************************************************************************
7696  *
7697  *  Generate code for GT_UMOD.
7698  */
7699
7700 void CodeGen::genCodeForUnsignedMod(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7701 {
7702     assert(tree->OperGet() == GT_UMOD);
7703
7704     GenTree*        op1      = tree->gtOp.gtOp1;
7705     GenTree*        op2      = tree->gtOp.gtOp2;
7706     const var_types treeType = tree->TypeGet();
7707     regMaskTP       needReg  = destReg;
7708     regNumber       reg;
7709
7710     /* Is this a division by an integer constant? */
7711
7712     noway_assert(op2);
7713     if (compiler->fgIsUnsignedModOptimizable(op2))
7714     {
7715         /* Generate the operand into some register */
7716
7717         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7718         noway_assert(op1->InReg());
7719
7720         reg = op1->gtRegNum;
7721
7722         /* Generate the appropriate sequence */
7723         size_t ival = op2->gtIntCon.gtIconVal - 1;
7724         inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
7725
7726         /* The register is now trashed */
7727
7728         regTracker.rsTrackRegTrash(reg);
7729
7730         genCodeForTree_DONE(tree, reg);
7731         return;
7732     }
7733
7734     genCodeForGeneralDivide(tree, destReg, bestReg);
7735 }
7736
7737 /*****************************************************************************
7738  *
7739  *  Generate code for GT_MOD.
7740  */
7741
7742 void CodeGen::genCodeForSignedMod(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7743 {
7744     assert(tree->OperGet() == GT_MOD);
7745
7746     GenTree*        op1      = tree->gtOp.gtOp1;
7747     GenTree*        op2      = tree->gtOp.gtOp2;
7748     const var_types treeType = tree->TypeGet();
7749     regMaskTP       needReg  = destReg;
7750     regNumber       reg;
7751
7752     /* Is this a division by an integer constant? */
7753
7754     noway_assert(op2);
7755     if (compiler->fgIsSignedModOptimizable(op2))
7756     {
7757         ssize_t     ival = op2->gtIntCon.gtIconVal;
7758         BasicBlock* skip = genCreateTempLabel();
7759
7760         /* Generate the operand into some register */
7761
7762         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7763         noway_assert(op1->InReg());
7764
7765         reg = op1->gtRegNum;
7766
7767         /* Generate the appropriate sequence */
7768
7769         inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
7770
7771         /* The register is now trashed */
7772
7773         regTracker.rsTrackRegTrash(reg);
7774
7775         /* Check and branch for a postive value */
7776         emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7777         inst_JMP(jmpGEL, skip);
7778
7779         /* Generate the rest of the sequence and we're done */
7780
7781         genIncRegBy(reg, -1, NULL, treeType);
7782         ival = -ival;
7783         if ((treeType == TYP_LONG) && ((int)ival != ival))
7784         {
7785             regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
7786             instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
7787             inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
7788         }
7789         else
7790         {
7791             inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
7792         }
7793         genIncRegBy(reg, 1, NULL, treeType);
7794
7795         /* Define the 'skip' label and we're done */
7796
7797         genDefineTempLabel(skip);
7798
7799         genCodeForTree_DONE(tree, reg);
7800         return;
7801     }
7802
7803     genCodeForGeneralDivide(tree, destReg, bestReg);
7804 }
7805
7806 /*****************************************************************************
7807  *
7808  *  Generate code for GT_UDIV.
7809  */
7810
7811 void CodeGen::genCodeForUnsignedDiv(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7812 {
7813     assert(tree->OperGet() == GT_UDIV);
7814
7815     GenTree*        op1      = tree->gtOp.gtOp1;
7816     GenTree*        op2      = tree->gtOp.gtOp2;
7817     const var_types treeType = tree->TypeGet();
7818     regMaskTP       needReg  = destReg;
7819     regNumber       reg;
7820
7821     /* Is this a division by an integer constant? */
7822
7823     noway_assert(op2);
7824     if (compiler->fgIsUnsignedDivOptimizable(op2))
7825     {
7826         size_t ival = op2->gtIntCon.gtIconVal;
7827
7828         /* Division by 1 must be handled elsewhere */
7829
7830         noway_assert(ival != 1 || compiler->opts.MinOpts());
7831
7832         /* Generate the operand into some register */
7833
7834         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7835         noway_assert(op1->InReg());
7836
7837         reg = op1->gtRegNum;
7838
7839         /* Generate "shr reg, log2(value)" */
7840
7841         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
7842
7843         /* The register is now trashed */
7844
7845         regTracker.rsTrackRegTrash(reg);
7846
7847         genCodeForTree_DONE(tree, reg);
7848         return;
7849     }
7850
7851     genCodeForGeneralDivide(tree, destReg, bestReg);
7852 }
7853
7854 /*****************************************************************************
7855  *
7856  *  Generate code for GT_DIV.
7857  */
7858
7859 void CodeGen::genCodeForSignedDiv(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7860 {
7861     assert(tree->OperGet() == GT_DIV);
7862
7863     GenTree*        op1      = tree->gtOp.gtOp1;
7864     GenTree*        op2      = tree->gtOp.gtOp2;
7865     const var_types treeType = tree->TypeGet();
7866     regMaskTP       needReg  = destReg;
7867     regNumber       reg;
7868
7869     /* Is this a division by an integer constant? */
7870
7871     noway_assert(op2);
7872     if (compiler->fgIsSignedDivOptimizable(op2))
7873     {
7874         ssize_t ival_s = op2->gtIntConCommon.IconValue();
7875         assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
7876         size_t ival = static_cast<size_t>(ival_s);
7877
7878         /* Division by 1 must be handled elsewhere */
7879
7880         noway_assert(ival != 1);
7881
7882         BasicBlock* onNegDivisee = genCreateTempLabel();
7883
7884         /* Generate the operand into some register */
7885
7886         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7887         noway_assert(op1->InReg());
7888
7889         reg = op1->gtRegNum;
7890
7891         if (ival == 2)
7892         {
7893             /* Generate "sar reg, log2(value)" */
7894
7895             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
7896
7897             // Check and branch for a postive value, skipping the INS_ADDC instruction
7898             emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7899             inst_JMP(jmpGEL, onNegDivisee);
7900
7901             // Add the carry flag to 'reg'
7902             inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
7903
7904             /* Define the 'onNegDivisee' label and we're done */
7905
7906             genDefineTempLabel(onNegDivisee);
7907
7908             /* The register is now trashed */
7909
7910             regTracker.rsTrackRegTrash(reg);
7911
7912             /* The result is the same as the operand */
7913
7914             reg = op1->gtRegNum;
7915         }
7916         else
7917         {
7918             /* Generate the following sequence */
7919             /*
7920             test    reg, reg
7921             jns     onNegDivisee
7922             add     reg, ival-1
7923             onNegDivisee:
7924             sar     reg, log2(ival)
7925             */
7926
7927             instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
7928
7929             // Check and branch for a postive value, skipping the INS_add instruction
7930             emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7931             inst_JMP(jmpGEL, onNegDivisee);
7932
7933             inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
7934
7935             /* Define the 'onNegDivisee' label and we're done */
7936
7937             genDefineTempLabel(onNegDivisee);
7938
7939             /* Generate "sar reg, log2(value)" */
7940
7941             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
7942
7943             /* The register is now trashed */
7944
7945             regTracker.rsTrackRegTrash(reg);
7946
7947             /* The result is the same as the operand */
7948
7949             reg = op1->gtRegNum;
7950         }
7951
7952         genCodeForTree_DONE(tree, reg);
7953         return;
7954     }
7955
7956     genCodeForGeneralDivide(tree, destReg, bestReg);
7957 }
7958
7959 /*****************************************************************************
7960  *
7961  *  Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
7962  *  (if op2 is not a power of 2 constant).
7963  */
7964
7965 void CodeGen::genCodeForGeneralDivide(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7966 {
7967     assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
7968            tree->OperGet() == GT_DIV);
7969
7970     GenTree*        op1      = tree->gtOp.gtOp1;
7971     GenTree*        op2      = tree->gtOp.gtOp2;
7972     const var_types treeType = tree->TypeGet();
7973     regMaskTP       needReg  = destReg;
7974     regNumber       reg;
7975     instruction     ins;
7976     bool            gotOp1;
7977     regMaskTP       addrReg;
7978
7979 #if USE_HELPERS_FOR_INT_DIV
7980     noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
7981 #endif
7982
7983 #if defined(_TARGET_XARCH_)
7984
7985     /* Which operand are we supposed to evaluate first? */
7986
7987     if (tree->gtFlags & GTF_REVERSE_OPS)
7988     {
7989         /* We'll evaluate 'op2' first */
7990
7991         gotOp1 = false;
7992         destReg &= ~op1->gtRsvdRegs;
7993
7994         /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
7995         if (op1->gtOper == GT_LCL_VAR)
7996         {
7997             unsigned varNum = op1->gtLclVarCommon.gtLclNum;
7998             noway_assert(varNum < compiler->lvaCount);
7999             LclVarDsc* varDsc = compiler->lvaTable + varNum;
8000             if (varDsc->lvRegister)
8001             {
8002                 destReg &= ~genRegMask(varDsc->lvRegNum);
8003             }
8004         }
8005     }
8006     else
8007     {
8008         /* We'll evaluate 'op1' first */
8009
8010         gotOp1 = true;
8011
8012         regMaskTP op1Mask;
8013         if (RBM_EAX & op2->gtRsvdRegs)
8014             op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8015         else
8016             op1Mask = RBM_EAX; // EAX would be ideal
8017
8018         /* Generate the dividend into EAX and hold on to it. freeOnly=true */
8019
8020         genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8021     }
8022
8023     /* We want to avoid using EAX or EDX for the second operand */
8024
8025     destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
8026
8027     /* Make the second operand addressable */
8028     op2 = genCodeForCommaTree(op2);
8029
8030     /* Special case: if op2 is a local var we are done */
8031
8032     if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
8033     {
8034         if (!op2->InReg())
8035             addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
8036         else
8037             addrReg = 0;
8038     }
8039     else
8040     {
8041         genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8042
8043         noway_assert(op2->InReg());
8044         addrReg = genRegMask(op2->gtRegNum);
8045     }
8046
8047     /* Make sure we have the dividend in EAX */
8048
8049     if (gotOp1)
8050     {
8051         /* We've previously computed op1 into EAX */
8052
8053         genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
8054     }
8055     else
8056     {
8057         /* Compute op1 into EAX and hold on to it */
8058
8059         genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8060     }
8061
8062     noway_assert(op1->InReg());
8063     noway_assert(op1->gtRegNum == REG_EAX);
8064
8065     /* We can now safely (we think) grab EDX */
8066
8067     regSet.rsGrabReg(RBM_EDX);
8068     regSet.rsLockReg(RBM_EDX);
8069
8070     /* Convert the integer in EAX into a un/signed long in EDX:EAX */
8071
8072     const genTreeOps oper = tree->OperGet();
8073
8074     if (oper == GT_UMOD || oper == GT_UDIV)
8075         instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
8076     else
8077         instGen(INS_cdq);
8078
8079     /* Make sure the divisor is still addressable */
8080
8081     addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
8082
8083     /* Perform the division */
8084
8085     if (oper == GT_UMOD || oper == GT_UDIV)
8086         inst_TT(INS_UNSIGNED_DIVIDE, op2);
8087     else
8088         inst_TT(INS_SIGNED_DIVIDE, op2);
8089
8090     /* Free up anything tied up by the divisor's address */
8091
8092     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
8093
8094     /* Unlock and free EDX */
8095
8096     regSet.rsUnlockReg(RBM_EDX);
8097
8098     /* Free up op1 (which is in EAX) as well */
8099
8100     genReleaseReg(op1);
8101
8102     /* Both EAX and EDX are now trashed */
8103
8104     regTracker.rsTrackRegTrash(REG_EAX);
8105     regTracker.rsTrackRegTrash(REG_EDX);
8106
8107     /* Figure out which register the result is in */
8108
8109     reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
8110
8111     /* Don't forget to mark the first operand as using EAX and EDX */
8112
8113     op1->gtRegNum = reg;
8114
8115     genCodeForTree_DONE(tree, reg);
8116
8117 #elif defined(_TARGET_ARM_)
8118
8119     /* Which operand are we supposed to evaluate first? */
8120
8121     if (tree->gtFlags & GTF_REVERSE_OPS)
8122     {
8123         /* We'll evaluate 'op2' first */
8124
8125         gotOp1 = false;
8126         destReg &= ~op1->gtRsvdRegs;
8127
8128         /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8129         if (op1->gtOper == GT_LCL_VAR)
8130         {
8131             unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8132             noway_assert(varNum < compiler->lvaCount);
8133             LclVarDsc* varDsc = compiler->lvaTable + varNum;
8134             if (varDsc->lvRegister)
8135             {
8136                 destReg &= ~genRegMask(varDsc->lvRegNum);
8137             }
8138         }
8139     }
8140     else
8141     {
8142         /* We'll evaluate 'op1' first */
8143
8144         gotOp1            = true;
8145         regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8146
8147         /* Generate the dividend into a register and hold on to it. */
8148
8149         genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8150     }
8151
8152     /* Evaluate the second operand into a register and hold onto it. */
8153
8154     genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8155
8156     noway_assert(op2->InReg());
8157     addrReg = genRegMask(op2->gtRegNum);
8158
8159     if (gotOp1)
8160     {
8161         // Recover op1 if spilled
8162         genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
8163     }
8164     else
8165     {
8166         /* Compute op1 into any register and hold on to it */
8167         genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8168     }
8169     noway_assert(op1->InReg());
8170
8171     reg = regSet.rsPickReg(needReg, bestReg);
8172
8173     // Perform the divison
8174
8175     const genTreeOps oper = tree->OperGet();
8176
8177     if (oper == GT_UMOD || oper == GT_UDIV)
8178         ins = INS_udiv;
8179     else
8180         ins = INS_sdiv;
8181
8182     getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
8183
8184     if (oper == GT_UMOD || oper == GT_MOD)
8185     {
8186         getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
8187         getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
8188     }
8189     /* Free up op1 and op2 */
8190     genReleaseReg(op1);
8191     genReleaseReg(op2);
8192
8193     genCodeForTree_DONE(tree, reg);
8194
8195 #else
8196 #error "Unknown _TARGET_"
8197 #endif
8198 }
8199
8200 /*****************************************************************************
8201  *
8202  *  Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
8203  */
8204
8205 void CodeGen::genCodeForAsgShift(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
8206 {
8207     assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
8208
8209     const genTreeOps oper     = tree->OperGet();
8210     GenTree*         op1      = tree->gtOp.gtOp1;
8211     GenTree*         op2      = tree->gtOp.gtOp2;
8212     const var_types  treeType = tree->TypeGet();
8213     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8214     regMaskTP        needReg  = destReg;
8215     regNumber        reg;
8216     instruction      ins;
8217     regMaskTP        addrReg;
8218
8219     switch (oper)
8220     {
8221         case GT_ASG_LSH:
8222             ins = INS_SHIFT_LEFT_LOGICAL;
8223             break;
8224         case GT_ASG_RSH:
8225             ins = INS_SHIFT_RIGHT_ARITHM;
8226             break;
8227         case GT_ASG_RSZ:
8228             ins = INS_SHIFT_RIGHT_LOGICAL;
8229             break;
8230         default:
8231             unreached();
8232     }
8233
8234     noway_assert(!varTypeIsGC(treeType));
8235     noway_assert(op2);
8236
8237     /* Shifts by a constant amount are easier */
8238
8239     if (op2->IsCnsIntOrI())
8240     {
8241         /* Make the target addressable */
8242
8243         addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
8244
8245         /* Are we shifting a register left by 1 bit? */
8246
8247         if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && op1->InReg())
8248         {
8249             /* The target lives in a register */
8250
8251             reg = op1->gtRegNum;
8252
8253             /* "add reg, reg" is cheaper than "shl reg, 1" */
8254
8255             inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
8256         }
8257         else
8258         {
8259 #if CPU_LOAD_STORE_ARCH
8260             if (!op1->InReg())
8261             {
8262                 regSet.rsLockUsedReg(addrReg);
8263
8264                 // Load op1 into a reg
8265
8266                 reg = regSet.rsPickReg(RBM_ALLINT);
8267
8268                 inst_RV_TT(INS_mov, reg, op1);
8269
8270                 // Issue the shift
8271
8272                 inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
8273                 regTracker.rsTrackRegTrash(reg);
8274
8275                 /* Store the (sign/zero extended) result back to the stack location of the variable */
8276
8277                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
8278
8279                 regSet.rsUnlockUsedReg(addrReg);
8280             }
8281             else
8282 #endif // CPU_LOAD_STORE_ARCH
8283             {
8284                 /* Shift by the constant value */
8285
8286                 inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
8287             }
8288         }
8289
8290         /* If the target is a register, it has a new value */
8291
8292         if (op1->InReg())
8293             regTracker.rsTrackRegTrash(op1->gtRegNum);
8294
8295         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8296
8297         /* The zero flag is now equal to the target value */
8298         /* X86: But only if the shift count is != 0 */
8299
8300         if (op2->gtIntCon.gtIconVal != 0)
8301         {
8302             if (tree->gtSetFlags())
8303             {
8304                 if (op1->gtOper == GT_LCL_VAR)
8305                 {
8306                     genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
8307                 }
8308                 else if (op1->gtOper == GT_REG_VAR)
8309                 {
8310                     genFlagsEqualToReg(tree, op1->gtRegNum);
8311                 }
8312             }
8313         }
8314         else
8315         {
8316             // It is possible for the shift count to equal 0 with valid
8317             // IL, and not be optimized away, in the case where the node
8318             // is of a small type.  The sequence of instructions looks like
8319             // ldsfld, shr, stsfld and executed on a char field.  This will
8320             // never happen with code produced by our compilers, because the
8321             // compilers will insert a conv.u2 before the stsfld (which will
8322             // lead us down a different codepath in the JIT and optimize away
8323             // the shift by zero).  This case is not worth optimizing and we
8324             // will just make sure to generate correct code for it.
8325
8326             genFlagsEqualToNone();
8327         }
8328     }
8329     else
8330     {
8331         regMaskTP op2Regs = RBM_NONE;
8332         if (REG_SHIFT != REG_NA)
8333             op2Regs = RBM_SHIFT;
8334
8335         regMaskTP tempRegs;
8336
8337         if (tree->gtFlags & GTF_REVERSE_OPS)
8338         {
8339             tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
8340             genCodeForTree(op2, tempRegs);
8341             regSet.rsMarkRegUsed(op2);
8342
8343             tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
8344             addrReg  = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8345
8346             genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
8347         }
8348         else
8349         {
8350             /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
8351             regMaskTP excludeMask = op2->gtRsvdRegs;
8352             if (REG_SHIFT != REG_NA)
8353                 excludeMask |= RBM_SHIFT;
8354
8355             tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
8356             addrReg  = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8357
8358             /* Load the shift count into the necessary register */
8359             genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
8360         }
8361
8362         /* Make sure the address registers are still here */
8363         addrReg = genKeepAddressable(op1, addrReg, op2Regs);
8364
8365 #ifdef _TARGET_XARCH_
8366         /* Perform the shift */
8367         inst_TT_CL(ins, op1);
8368 #else
8369         /* Perform the shift */
8370         noway_assert(op2->InReg());
8371         op2Regs = genRegMask(op2->gtRegNum);
8372
8373         regSet.rsLockUsedReg(addrReg | op2Regs);
8374         inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
8375         regSet.rsUnlockUsedReg(addrReg | op2Regs);
8376 #endif
8377         /* Free the address registers */
8378         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8379
8380         /* If the value is in a register, it's now trash */
8381
8382         if (op1->InReg())
8383             regTracker.rsTrackRegTrash(op1->gtRegNum);
8384
8385         /* Release the op2 [RBM_SHIFT] operand */
8386
8387         genReleaseReg(op2);
8388     }
8389
8390     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
8391 }
8392
8393 /*****************************************************************************
8394  *
8395  *  Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
8396  */
8397
8398 void CodeGen::genCodeForShift(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
8399 {
8400     assert(tree->OperIsShift());
8401
8402     const genTreeOps oper     = tree->OperGet();
8403     GenTree*         op1      = tree->gtOp.gtOp1;
8404     GenTree*         op2      = tree->gtOp.gtOp2;
8405     const var_types  treeType = tree->TypeGet();
8406     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8407     regMaskTP        needReg  = destReg;
8408     regNumber        reg;
8409     instruction      ins;
8410
8411     switch (oper)
8412     {
8413         case GT_LSH:
8414             ins = INS_SHIFT_LEFT_LOGICAL;
8415             break;
8416         case GT_RSH:
8417             ins = INS_SHIFT_RIGHT_ARITHM;
8418             break;
8419         case GT_RSZ:
8420             ins = INS_SHIFT_RIGHT_LOGICAL;
8421             break;
8422         default:
8423             unreached();
8424     }
8425
8426     /* Is the shift count constant? */
8427     noway_assert(op2);
8428     if (op2->IsIntCnsFitsInI32())
8429     {
8430         // TODO: Check to see if we could generate a LEA instead!
8431
8432         /* Compute the left operand into any free register */
8433
8434         genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
8435
8436         noway_assert(op1->InReg());
8437         reg = op1->gtRegNum;
8438
8439         /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
8440
8441         // On ARM, until proven otherwise by performance numbers, just do the shift.
8442         // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
8443         // It's smaller than two "add reg, reg".
8444
8445         CLANG_FORMAT_COMMENT_ANCHOR;
8446
8447 #ifndef _TARGET_ARM_
8448         if (oper == GT_LSH)
8449         {
8450             emitAttr size = emitActualTypeSize(treeType);
8451             if (op2->gtIntConCommon.IconValue() == 1)
8452             {
8453                 /* "add reg, reg" is smaller and faster than "shl reg, 1" */
8454                 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8455             }
8456             else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
8457             {
8458                 /* two "add reg, reg" instructions are faster than "shl reg, 2" */
8459                 inst_RV_RV(INS_add, reg, reg, treeType);
8460                 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8461             }
8462             else
8463                 goto DO_SHIFT_BY_CNS;
8464         }
8465         else
8466 #endif // _TARGET_ARM_
8467         {
8468 #ifndef _TARGET_ARM_
8469         DO_SHIFT_BY_CNS:
8470 #endif // _TARGET_ARM_
8471             // If we are shifting 'reg' by zero bits and do not need the flags to be set
8472             // then we can just skip emitting the instruction as 'reg' is already correct.
8473             //
8474             if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
8475             {
8476                 /* Generate the appropriate shift instruction */
8477                 inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
8478             }
8479         }
8480     }
8481     else
8482     {
8483         /* Calculate a useful register mask for computing op1 */
8484         needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
8485         regMaskTP op2RegMask;
8486 #ifdef _TARGET_XARCH_
8487         op2RegMask = RBM_ECX;
8488 #else
8489         op2RegMask = RBM_NONE;
8490 #endif
8491         needReg = regSet.rsMustExclude(needReg, op2RegMask);
8492
8493         regMaskTP tempRegs;
8494
8495         /* Which operand are we supposed to evaluate first? */
8496         if (tree->gtFlags & GTF_REVERSE_OPS)
8497         {
8498             /* Load the shift count [into ECX on XARCH] */
8499             tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
8500             genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8501
8502             /* We must not target the register that is holding op2 */
8503             needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
8504
8505             /* Now evaluate 'op1' into a free register */
8506             genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8507
8508             /* Recover op2 into ECX */
8509             genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
8510         }
8511         else
8512         {
8513             /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
8514             tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
8515             genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8516
8517             /* Load the shift count [into ECX on XARCH] */
8518             genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8519         }
8520
8521         noway_assert(op2->InReg());
8522 #ifdef _TARGET_XARCH_
8523         noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
8524 #endif
8525         // Check for the case of op1 being spilled during the evaluation of op2
8526         if (op1->gtFlags & GTF_SPILLED)
8527         {
8528             // The register has been spilled -- reload it to any register except ECX
8529             regSet.rsLockUsedReg(op2RegMask);
8530             regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
8531             regSet.rsUnlockUsedReg(op2RegMask);
8532         }
8533
8534         noway_assert(op1->InReg());
8535         reg = op1->gtRegNum;
8536
8537 #ifdef _TARGET_ARM_
8538         /* Perform the shift */
8539         getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
8540 #else
8541         /* Perform the shift */
8542         inst_RV_CL(ins, reg);
8543 #endif
8544         genReleaseReg(op2);
8545     }
8546
8547     noway_assert(op1->InReg());
8548     noway_assert(reg == op1->gtRegNum);
8549
8550     /* The register is now trashed */
8551     genReleaseReg(op1);
8552     regTracker.rsTrackRegTrash(reg);
8553
8554     genCodeForTree_DONE(tree, reg);
8555 }
8556
8557 /*****************************************************************************
8558  *
8559  *  Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
8560  *  Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
8561  */
8562
8563 void CodeGen::genCodeForRelop(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
8564 {
8565     assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
8566            tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
8567
8568     const genTreeOps oper     = tree->OperGet();
8569     GenTree*         op1      = tree->gtOp.gtOp1;
8570     const var_types  treeType = tree->TypeGet();
8571     regMaskTP        needReg  = destReg;
8572     regNumber        reg;
8573
8574     // Longs and float comparisons are converted to "?:"
8575     noway_assert(!compiler->fgMorphRelopToQmark(op1));
8576
8577     // Check if we can use the currently set flags. Else set them
8578
8579     emitJumpKind jumpKind = genCondSetFlags(tree);
8580
8581     // Grab a register to materialize the bool value into
8582
8583     bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
8584
8585     // Check that the predictor did the right job
8586     noway_assert(bestReg);
8587
8588     // If needReg is in bestReg then use it
8589     if (needReg & bestReg)
8590         reg = regSet.rsGrabReg(needReg & bestReg);
8591     else
8592         reg = regSet.rsGrabReg(bestReg);
8593
8594 #if defined(_TARGET_ARM_)
8595
8596     // Generate:
8597     //      jump-if-true L_true
8598     //      mov reg, 0
8599     //      jmp L_end
8600     //    L_true:
8601     //      mov reg, 1
8602     //    L_end:
8603
8604     BasicBlock* L_true;
8605     BasicBlock* L_end;
8606
8607     L_true = genCreateTempLabel();
8608     L_end  = genCreateTempLabel();
8609
8610     inst_JMP(jumpKind, L_true);
8611     getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
8612     inst_JMP(EJ_jmp, L_end);
8613     genDefineTempLabel(L_true);
8614     getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
8615     genDefineTempLabel(L_end);
8616
8617     regTracker.rsTrackRegTrash(reg);
8618
8619 #elif defined(_TARGET_XARCH_)
8620     regMaskTP regs = genRegMask(reg);
8621     noway_assert(regs & RBM_BYTE_REGS);
8622
8623     // Set (lower byte of) reg according to the flags
8624
8625     /* Look for the special case where just want to transfer the carry bit */
8626
8627     if (jumpKind == EJ_jb)
8628     {
8629         inst_RV_RV(INS_SUBC, reg, reg);
8630         inst_RV(INS_NEG, reg, TYP_INT);
8631         regTracker.rsTrackRegTrash(reg);
8632     }
8633     else if (jumpKind == EJ_jae)
8634     {
8635         inst_RV_RV(INS_SUBC, reg, reg);
8636         genIncRegBy(reg, 1, tree, TYP_INT);
8637         regTracker.rsTrackRegTrash(reg);
8638     }
8639     else
8640     {
8641         inst_SET(jumpKind, reg);
8642
8643         regTracker.rsTrackRegTrash(reg);
8644
8645         if (treeType == TYP_INT)
8646         {
8647             // Set the higher bytes to 0
8648             inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
8649         }
8650         else
8651         {
8652             noway_assert(treeType == TYP_BYTE);
8653         }
8654     }
8655 #else
8656     NYI("TARGET");
8657 #endif // _TARGET_XXX
8658
8659     genCodeForTree_DONE(tree, reg);
8660 }
8661
8662 //------------------------------------------------------------------------
8663 // genCodeForCopyObj: Generate code for a CopyObj node
8664 //
8665 // Arguments:
8666 //    tree    - The CopyObj node we are going to generate code for.
8667 //    destReg - The register mask for register(s), if any, that will be defined.
8668 //
8669 // Return Value:
8670 //    None
8671
8672 void CodeGen::genCodeForCopyObj(GenTree* tree, regMaskTP destReg)
8673 {
8674     // If the value class doesn't have any fields that are GC refs or
8675     // the target isn't on the GC-heap, we can merge it with CPBLK.
8676     // GC fields cannot be copied directly, instead we will
8677     // need to use a jit-helper for that.
8678     assert(tree->gtOper == GT_ASG);
8679     assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
8680
8681     GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
8682     assert(cpObjOp->HasGCPtr());
8683
8684 #ifdef _TARGET_ARM_
8685     if (cpObjOp->IsVolatile())
8686     {
8687         // Emit a memory barrier instruction before the CopyBlk
8688         instGen_MemoryBarrier();
8689     }
8690 #endif
8691     assert(tree->gtOp.gtOp2->OperIsIndir());
8692     GenTree* srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
8693     GenTree* dstObj = cpObjOp->Addr();
8694
8695     noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
8696
8697 #ifdef DEBUG
8698     CORINFO_CLASS_HANDLE clsHnd       = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
8699     size_t               debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
8700
8701     // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
8702     // The EE currently does not allow this.  Let's assert it just to be safe.
8703     noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
8704 #endif
8705
8706     size_t   blkSize    = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
8707     unsigned slots      = cpObjOp->gtSlots;
8708     BYTE*    gcPtrs     = cpObjOp->gtGcPtrs;
8709     unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
8710     assert(blkSize == cpObjOp->gtBlkSize);
8711
8712     GenTree*  treeFirst;
8713     GenTree*  treeSecond;
8714     regNumber regFirst, regSecond;
8715
8716     // Check what order the object-ptrs have to be evaluated in ?
8717
8718     if (tree->gtFlags & GTF_REVERSE_OPS)
8719     {
8720         treeFirst  = srcObj;
8721         treeSecond = dstObj;
8722 #if CPU_USES_BLOCK_MOVE
8723         regFirst  = REG_ESI;
8724         regSecond = REG_EDI;
8725 #else
8726         regFirst  = REG_ARG_1;
8727         regSecond = REG_ARG_0;
8728 #endif
8729     }
8730     else
8731     {
8732         treeFirst  = dstObj;
8733         treeSecond = srcObj;
8734 #if CPU_USES_BLOCK_MOVE
8735         regFirst  = REG_EDI;
8736         regSecond = REG_ESI;
8737 #else
8738         regFirst  = REG_ARG_0;
8739         regSecond = REG_ARG_1;
8740 #endif
8741     }
8742
8743     bool     dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
8744     bool     srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
8745     emitAttr srcType      = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8746     emitAttr dstType      = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8747
8748 #if CPU_USES_BLOCK_MOVE
8749     // Materialize the trees in the order desired
8750
8751     genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8752     genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8753     genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8754
8755     // Grab ECX because it will be trashed by the helper
8756     //
8757     regSet.rsGrabReg(RBM_ECX);
8758
8759     while (blkSize >= TARGET_POINTER_SIZE)
8760     {
8761         if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
8762         {
8763             // Note that we can use movsd even if it is a GC pointer being transfered
8764             // because the value is not cached anywhere.  If we did this in two moves,
8765             // we would have to make certain we passed the appropriate GC info on to
8766             // the emitter.
8767             instGen(INS_movsp);
8768         }
8769         else
8770         {
8771             // This helper will act like a MOVSD
8772             //    -- inputs EDI and ESI are byrefs
8773             //    -- including incrementing of ESI and EDI by 4
8774             //    -- helper will trash ECX
8775             //
8776             regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8777             regSet.rsLockUsedReg(argRegs);
8778             genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8779                               0,           // argSize
8780                               EA_PTRSIZE); // retSize
8781             regSet.rsUnlockUsedReg(argRegs);
8782         }
8783
8784         blkSize -= TARGET_POINTER_SIZE;
8785     }
8786
8787     // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
8788
8789     regTracker.rsTrackRegTrash(REG_EDI);
8790     regTracker.rsTrackRegTrash(REG_ESI);
8791     regTracker.rsTrackRegTrash(REG_ECX);
8792
8793     gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
8794
8795     /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8796         it is a emitNoGChelper. However, we have to let the emitter know that
8797         the GC liveness has changed. We do this by creating a new label.
8798         */
8799
8800     noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8801
8802     genDefineTempLabel(&dummyBB);
8803
8804 #else //  !CPU_USES_BLOCK_MOVE
8805
8806 #ifndef _TARGET_ARM_
8807 // Currently only the ARM implementation is provided
8808 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
8809 #endif
8810
8811     // Materialize the trees in the order desired
8812     bool      helperUsed;
8813     regNumber regDst;
8814     regNumber regSrc;
8815     regNumber regTemp;
8816
8817     if ((gcPtrCount > 0) && !dstIsOnStack)
8818     {
8819         genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8820         genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8821         genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8822
8823         /* The helper is a Asm-routine that will trash R2,R3 and LR */
8824         {
8825             /* Spill any callee-saved registers which are being used */
8826             regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
8827
8828             if (spillRegs)
8829             {
8830                 regSet.rsSpillRegs(spillRegs);
8831             }
8832         }
8833
8834         // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
8835         // We will also use it as the temp register for our load/store sequences
8836         //
8837         assert(REG_R2 == REG_TMP_1);
8838         regTemp    = regSet.rsGrabReg(RBM_R2);
8839         helperUsed = true;
8840     }
8841     else
8842     {
8843         genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
8844         genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
8845         genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
8846
8847         // Grab any temp register to use for our load/store sequences
8848         //
8849         regTemp    = regSet.rsGrabReg(RBM_ALLINT);
8850         helperUsed = false;
8851     }
8852     assert(dstObj->InReg());
8853     assert(srcObj->InReg());
8854
8855     regDst = dstObj->gtRegNum;
8856     regSrc = srcObj->gtRegNum;
8857
8858     assert(regDst != regTemp);
8859     assert(regSrc != regTemp);
8860
8861     instruction loadIns  = ins_Load(TYP_I_IMPL);  // INS_ldr
8862     instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
8863
8864     size_t offset = 0;
8865     while (blkSize >= TARGET_POINTER_SIZE)
8866     {
8867         CorInfoGCType gcType;
8868         CorInfoGCType gcTypeNext = TYPE_GC_NONE;
8869         var_types     type       = TYP_I_IMPL;
8870
8871         gcType = (CorInfoGCType)(*gcPtrs++);
8872         if (blkSize > TARGET_POINTER_SIZE)
8873             gcTypeNext = (CorInfoGCType)(*gcPtrs);
8874
8875         if (gcType == TYPE_GC_REF)
8876             type = TYP_REF;
8877         else if (gcType == TYPE_GC_BYREF)
8878             type = TYP_BYREF;
8879
8880         if (helperUsed)
8881         {
8882             assert(regDst == REG_ARG_0);
8883             assert(regSrc == REG_ARG_1);
8884             assert(regTemp == REG_R2);
8885         }
8886
8887         blkSize -= TARGET_POINTER_SIZE;
8888
8889         emitAttr opSize = emitTypeSize(type);
8890
8891         if (!helperUsed || (gcType == TYPE_GC_NONE))
8892         {
8893             getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
8894             getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
8895             offset += TARGET_POINTER_SIZE;
8896
8897             if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
8898             {
8899                 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
8900                 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
8901                 offset = 0;
8902             }
8903         }
8904         else
8905         {
8906             assert(offset == 0);
8907
8908             // The helper will act like this:
8909             //    -- inputs R0 and R1 are byrefs
8910             //    -- helper will perform copy from *R1 into *R0
8911             //    -- helper will perform post increment of R0 and R1 by 4
8912             //    -- helper will trash R2
8913             //    -- helper will trash R3
8914             //    -- calling the helper implicitly trashes LR
8915             //
8916             assert(helperUsed);
8917             regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8918             regSet.rsLockUsedReg(argRegs);
8919             genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8920                               0,           // argSize
8921                               EA_PTRSIZE); // retSize
8922
8923             regSet.rsUnlockUsedReg(argRegs);
8924             regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
8925         }
8926     }
8927
8928     regTracker.rsTrackRegTrash(regDst);
8929     regTracker.rsTrackRegTrash(regSrc);
8930     regTracker.rsTrackRegTrash(regTemp);
8931
8932     gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
8933
8934     /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8935         it is a emitNoGChelper. However, we have to let the emitter know that
8936         the GC liveness has changed. We do this by creating a new label.
8937         */
8938
8939     noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8940
8941     genDefineTempLabel(&dummyBB);
8942
8943 #endif //  !CPU_USES_BLOCK_MOVE
8944
8945     assert(blkSize == 0);
8946
8947     genReleaseReg(dstObj);
8948     genReleaseReg(srcObj);
8949
8950     genCodeForTree_DONE(tree, REG_NA);
8951
8952 #ifdef _TARGET_ARM_
8953     if (cpObjOp->IsVolatile())
8954     {
8955         // Emit a memory barrier instruction after the CopyBlk
8956         instGen_MemoryBarrier();
8957     }
8958 #endif
8959 }
8960
8961 //------------------------------------------------------------------------
8962 // genCodeForBlkOp: Generate code for a block copy or init operation
8963 //
8964 // Arguments:
8965 //    tree    - The block assignment
8966 //    destReg - The expected destination register
8967 //
8968 void CodeGen::genCodeForBlkOp(GenTree* tree, regMaskTP destReg)
8969 {
8970     genTreeOps oper    = tree->OperGet();
8971     GenTree*   dest    = tree->gtOp.gtOp1;
8972     GenTree*   src     = tree->gtGetOp2();
8973     regMaskTP  needReg = destReg;
8974     regMaskTP  regs    = regSet.rsMaskUsed;
8975     GenTree*   opsPtr[3];
8976     regMaskTP  regsPtr[3];
8977     GenTree*   destPtr;
8978     GenTree*   srcPtrOrVal;
8979
8980     noway_assert(tree->OperIsBlkOp());
8981
8982     bool     isCopyBlk    = false;
8983     bool     isInitBlk    = false;
8984     bool     hasGCpointer = false;
8985     unsigned blockSize    = dest->AsBlk()->gtBlkSize;
8986     GenTree* sizeNode     = nullptr;
8987     bool     sizeIsConst  = true;
8988     if (dest->gtOper == GT_DYN_BLK)
8989     {
8990         sizeNode    = dest->AsDynBlk()->gtDynamicSize;
8991         sizeIsConst = false;
8992     }
8993
8994     if (tree->OperIsCopyBlkOp())
8995     {
8996         isCopyBlk = true;
8997         if (dest->gtOper == GT_OBJ)
8998         {
8999             if (dest->AsObj()->gtGcPtrCount != 0)
9000             {
9001                 genCodeForCopyObj(tree, destReg);
9002                 return;
9003             }
9004         }
9005     }
9006     else
9007     {
9008         isInitBlk = true;
9009     }
9010
9011     // Ensure that we have an address in the CopyBlk case.
9012     if (isCopyBlk)
9013     {
9014         // TODO-1stClassStructs: Allow a lclVar here.
9015         assert(src->OperIsIndir());
9016         srcPtrOrVal = src->AsIndir()->Addr();
9017     }
9018     else
9019     {
9020         srcPtrOrVal = src;
9021     }
9022
9023 #ifdef _TARGET_ARM_
9024     if (dest->AsBlk()->IsVolatile())
9025     {
9026         // Emit a memory barrier instruction before the InitBlk/CopyBlk
9027         instGen_MemoryBarrier();
9028     }
9029 #endif
9030     {
9031         destPtr = dest->AsBlk()->Addr();
9032         noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
9033         noway_assert(
9034             (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
9035             (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
9036
9037         noway_assert(destPtr && srcPtrOrVal);
9038
9039 #if CPU_USES_BLOCK_MOVE
9040         regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
9041
9042         /* Some special code for block moves/inits for constant sizes */
9043
9044         //
9045         // Is this a fixed size COPYBLK?
9046         //      or a fixed size INITBLK with a constant init value?
9047         //
9048         if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
9049         {
9050             size_t      length  = blockSize;
9051             size_t      initVal = 0;
9052             instruction ins_P, ins_PR, ins_B;
9053
9054             if (isInitBlk)
9055             {
9056                 ins_P  = INS_stosp;
9057                 ins_PR = INS_r_stosp;
9058                 ins_B  = INS_stosb;
9059
9060                 /* Properly extend the init constant from a U1 to a U4 */
9061                 initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
9062
9063                 /* If it is a non-zero value we have to replicate      */
9064                 /* the byte value four times to form the DWORD         */
9065                 /* Then we change this new value into the tree-node      */
9066
9067                 if (initVal)
9068                 {
9069                     initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9070 #ifdef _TARGET_64BIT_
9071                     if (length > 4)
9072                     {
9073                         initVal             = initVal | (initVal << 32);
9074                         srcPtrOrVal->gtType = TYP_LONG;
9075                     }
9076                     else
9077                     {
9078                         srcPtrOrVal->gtType = TYP_INT;
9079                     }
9080 #endif // _TARGET_64BIT_
9081                 }
9082                 srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9083             }
9084             else
9085             {
9086                 ins_P  = INS_movsp;
9087                 ins_PR = INS_r_movsp;
9088                 ins_B  = INS_movsb;
9089             }
9090
9091             // Determine if we will be using SSE2
9092             unsigned movqLenMin = 8;
9093             unsigned movqLenMax = 24;
9094
9095             bool bWillUseSSE2      = false;
9096             bool bWillUseOnlySSE2  = false;
9097             bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
9098
9099 #ifdef _TARGET_64BIT_
9100
9101 // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
9102 // there is no point in wasting space on the bigger instructions
9103
9104 #else // !_TARGET_64BIT_
9105
9106             if (compiler->opts.compCanUseSSE2)
9107             {
9108                 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
9109
9110                 /* Adjust for BB weight */
9111                 if (curBBweight == BB_ZERO_WEIGHT)
9112                 {
9113                     // Don't bother with this optimization in
9114                     // rarely run blocks
9115                     movqLenMax = movqLenMin = 0;
9116                 }
9117                 else if (curBBweight < BB_UNITY_WEIGHT)
9118                 {
9119                     // Be less aggressive when we are inside a conditional
9120                     movqLenMax = 16;
9121                 }
9122                 else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
9123                 {
9124                     // Be more aggressive when we are inside a loop
9125                     movqLenMax = 48;
9126                 }
9127
9128                 if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
9129                 {
9130                     // Be more aggressive when optimizing for speed
9131                     // InitBlk uses fewer instructions
9132                     movqLenMax += 16;
9133                 }
9134
9135                 if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
9136                 {
9137                     bWillUseSSE2 = true;
9138
9139                     if ((length % 8) == 0)
9140                     {
9141                         bWillUseOnlySSE2 = true;
9142                         if (isInitBlk && (initVal == 0))
9143                         {
9144                             bNeedEvaluateCnst = false;
9145                             noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
9146                         }
9147                     }
9148                 }
9149             }
9150
9151 #endif // !_TARGET_64BIT_
9152
9153             const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
9154             /* Evaluate dest and src/val */
9155
9156             if (tree->gtFlags & GTF_REVERSE_OPS)
9157             {
9158                 if (bNeedEvaluateCnst)
9159                 {
9160                     genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9161                 }
9162                 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9163                 if (bNeedEvaluateCnst)
9164                 {
9165                     genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
9166                 }
9167             }
9168             else
9169             {
9170                 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9171                 if (bNeedEvaluateCnst)
9172                 {
9173                     genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9174                 }
9175                 genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
9176             }
9177
9178             bool bTrashedESI = false;
9179             bool bTrashedEDI = false;
9180
9181             if (bWillUseSSE2)
9182             {
9183                 int       blkDisp = 0;
9184                 regNumber xmmReg  = REG_XMM0;
9185
9186                 if (isInitBlk)
9187                 {
9188                     if (initVal)
9189                     {
9190                         getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
9191                         getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
9192                     }
9193                     else
9194                     {
9195                         getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
9196                     }
9197                 }
9198
9199                 JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
9200                                        length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
9201
9202                 while (length > 7)
9203                 {
9204                     if (isInitBlk)
9205                     {
9206                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9207                     }
9208                     else
9209                     {
9210                         getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
9211                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9212                     }
9213                     blkDisp += 8;
9214                     length -= 8;
9215                 }
9216
9217                 if (length > 0)
9218                 {
9219                     noway_assert(bNeedEvaluateCnst);
9220                     noway_assert(!bWillUseOnlySSE2);
9221
9222                     if (isCopyBlk)
9223                     {
9224                         inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
9225                         bTrashedESI = true;
9226                     }
9227
9228                     inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
9229                     bTrashedEDI = true;
9230
9231                     if (length >= REGSIZE_BYTES)
9232                     {
9233                         instGen(ins_P);
9234                         length -= REGSIZE_BYTES;
9235                     }
9236                 }
9237             }
9238             else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
9239             {
9240                 /* For small code, we can only use ins_DR to generate fast
9241                     and small code. We also can't use "rep movsb" because
9242                     we may not atomically reading and writing the DWORD */
9243
9244                 noway_assert(bNeedEvaluateCnst);
9245
9246                 goto USE_DR;
9247             }
9248             else if (length <= 4 * REGSIZE_BYTES)
9249             {
9250                 noway_assert(bNeedEvaluateCnst);
9251
9252                 while (length >= REGSIZE_BYTES)
9253                 {
9254                     instGen(ins_P);
9255                     length -= REGSIZE_BYTES;
9256                 }
9257
9258                 bTrashedEDI = true;
9259                 if (isCopyBlk)
9260                     bTrashedESI = true;
9261             }
9262             else
9263             {
9264             USE_DR:
9265                 noway_assert(bNeedEvaluateCnst);
9266
9267                 /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
9268                 genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
9269
9270                 length &= (REGSIZE_BYTES - 1);
9271
9272                 instGen(ins_PR);
9273
9274                 regTracker.rsTrackRegTrash(REG_ECX);
9275
9276                 bTrashedEDI = true;
9277                 if (isCopyBlk)
9278                     bTrashedESI = true;
9279             }
9280
9281             /* Now take care of the remainder */
9282             CLANG_FORMAT_COMMENT_ANCHOR;
9283
9284 #ifdef _TARGET_64BIT_
9285             if (length > 4)
9286             {
9287                 noway_assert(bNeedEvaluateCnst);
9288                 noway_assert(length < 8);
9289
9290                 instGen((isInitBlk) ? INS_stosd : INS_movsd);
9291                 length -= 4;
9292
9293                 bTrashedEDI = true;
9294                 if (isCopyBlk)
9295                     bTrashedESI = true;
9296             }
9297
9298 #endif // _TARGET_64BIT_
9299
9300             if (length)
9301             {
9302                 noway_assert(bNeedEvaluateCnst);
9303
9304                 while (length--)
9305                 {
9306                     instGen(ins_B);
9307                 }
9308
9309                 bTrashedEDI = true;
9310                 if (isCopyBlk)
9311                     bTrashedESI = true;
9312             }
9313
9314             noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
9315             if (bTrashedEDI)
9316                 regTracker.rsTrackRegTrash(REG_EDI);
9317             if (bTrashedESI)
9318                 regTracker.rsTrackRegTrash(REG_ESI);
9319             // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9320
9321             genReleaseReg(destPtr);
9322             if (bNeedEvaluateCnst)
9323                 genReleaseReg(srcPtrOrVal);
9324         }
9325         else
9326         {
9327             //
9328             // This a variable-sized COPYBLK/INITBLK,
9329             //   or a fixed size INITBLK with a variable init value,
9330             //
9331
9332             // What order should the Dest, Val/Src, and Size be calculated
9333
9334             compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
9335
9336             noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
9337             genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
9338             genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
9339             if (opsPtr[2] != nullptr)
9340             {
9341                 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
9342             }
9343             genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9344             genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9345
9346             noway_assert((destPtr->InReg()) && // Dest
9347                          (destPtr->gtRegNum == REG_EDI));
9348
9349             noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9350                          (genRegMask(srcPtrOrVal->gtRegNum) == regs));
9351
9352             if (sizeIsConst)
9353             {
9354                 inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
9355             }
9356             else
9357             {
9358                 noway_assert((sizeNode->InReg()) && // Size
9359                              (sizeNode->gtRegNum == REG_ECX));
9360             }
9361
9362             if (isInitBlk)
9363                 instGen(INS_r_stosb);
9364             else
9365                 instGen(INS_r_movsb);
9366
9367             regTracker.rsTrackRegTrash(REG_EDI);
9368             regTracker.rsTrackRegTrash(REG_ECX);
9369
9370             if (isCopyBlk)
9371                 regTracker.rsTrackRegTrash(REG_ESI);
9372             // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9373
9374             genReleaseReg(opsPtr[0]);
9375             genReleaseReg(opsPtr[1]);
9376             if (opsPtr[2] != nullptr)
9377             {
9378                 genReleaseReg(opsPtr[2]);
9379             }
9380         }
9381
9382 #else // !CPU_USES_BLOCK_MOVE
9383
9384 #ifndef _TARGET_ARM_
9385 // Currently only the ARM implementation is provided
9386 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
9387 #endif
9388         //
9389         // Is this a fixed size COPYBLK?
9390         //      or a fixed size INITBLK with a constant init value?
9391         //
9392         if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
9393         {
9394             GenTree* dstOp          = destPtr;
9395             GenTree* srcOp          = srcPtrOrVal;
9396             unsigned length         = blockSize;
9397             unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
9398             unsigned initVal        = 0;
9399             bool     useLoop        = false;
9400
9401             if (isInitBlk)
9402             {
9403                 /* Properly extend the init constant from a U1 to a U4 */
9404                 initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
9405
9406                 /* If it is a non-zero value we have to replicate      */
9407                 /* the byte value four times to form the DWORD         */
9408                 /* Then we store this new value into the tree-node      */
9409
9410                 if (initVal != 0)
9411                 {
9412                     initVal                         = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9413                     srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9414                 }
9415             }
9416
9417             // Will we be using a loop to implement this INITBLK/COPYBLK?
9418             if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
9419             {
9420                 useLoop = true;
9421             }
9422
9423             regMaskTP usedRegs;
9424             regNumber regDst;
9425             regNumber regSrc;
9426             regNumber regTemp;
9427
9428             /* Evaluate dest and src/val */
9429
9430             if (tree->gtFlags & GTF_REVERSE_OPS)
9431             {
9432                 genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9433                 assert(srcOp->InReg());
9434
9435                 genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9436                 assert(dstOp->InReg());
9437                 regDst = dstOp->gtRegNum;
9438
9439                 genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
9440                 regSrc = srcOp->gtRegNum;
9441             }
9442             else
9443             {
9444                 genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9445                 assert(dstOp->InReg());
9446
9447                 genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9448                 assert(srcOp->InReg());
9449                 regSrc = srcOp->gtRegNum;
9450
9451                 genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
9452                 regDst = dstOp->gtRegNum;
9453             }
9454             assert(dstOp->InReg());
9455             assert(srcOp->InReg());
9456
9457             regDst                = dstOp->gtRegNum;
9458             regSrc                = srcOp->gtRegNum;
9459             usedRegs              = (genRegMask(regSrc) | genRegMask(regDst));
9460             bool     dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
9461             emitAttr dstType      = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9462             emitAttr srcType;
9463
9464             if (isCopyBlk)
9465             {
9466                 // Prefer a low register,but avoid one of the ones we've already grabbed
9467                 regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9468                 usedRegs |= genRegMask(regTemp);
9469                 bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
9470                 srcType           = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9471             }
9472             else
9473             {
9474                 regTemp = REG_STK;
9475                 srcType = EA_PTRSIZE;
9476             }
9477
9478             instruction loadIns  = ins_Load(TYP_I_IMPL);  // INS_ldr
9479             instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
9480
9481             int finalOffset;
9482
9483             // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
9484             if (!useLoop)
9485             {
9486                 for (unsigned i = 0; i < fullStoreCount; i++)
9487                 {
9488                     if (isCopyBlk)
9489                     {
9490                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
9491                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
9492                         gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9493                         regTracker.rsTrackRegTrash(regTemp);
9494                     }
9495                     else
9496                     {
9497                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
9498                     }
9499                 }
9500
9501                 finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
9502                 length -= finalOffset;
9503             }
9504             else // We will use a loop to implement this INITBLK/COPYBLK
9505             {
9506                 unsigned pairStoreLoopCount = fullStoreCount / 2;
9507
9508                 // We need a second temp register for CopyBlk
9509                 regNumber regTemp2 = REG_STK;
9510                 if (isCopyBlk)
9511                 {
9512                     // Prefer a low register, but avoid one of the ones we've already grabbed
9513                     regTemp2 =
9514                         regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9515                     usedRegs |= genRegMask(regTemp2);
9516                 }
9517
9518                 // Pick and initialize the loop counter register
9519                 regNumber regLoopIndex;
9520                 regLoopIndex =
9521                     regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9522                 genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
9523
9524                 // Create and define the Basic Block for the loop top
9525                 BasicBlock* loopTopBlock = genCreateTempLabel();
9526                 genDefineTempLabel(loopTopBlock);
9527
9528                 // The loop body
9529                 if (isCopyBlk)
9530                 {
9531                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9532                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
9533                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9534                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
9535                     getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
9536                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9537                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
9538                     regTracker.rsTrackRegTrash(regSrc);
9539                     regTracker.rsTrackRegTrash(regTemp);
9540                     regTracker.rsTrackRegTrash(regTemp2);
9541                 }
9542                 else // isInitBlk
9543                 {
9544                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9545                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
9546                 }
9547
9548                 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
9549                 regTracker.rsTrackRegTrash(regDst);
9550                 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
9551                 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
9552                 inst_JMP(jmpGTS, loopTopBlock);
9553
9554                 regTracker.rsTrackRegIntCns(regLoopIndex, 0);
9555
9556                 length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
9557
9558                 if (length & TARGET_POINTER_SIZE)
9559                 {
9560                     if (isCopyBlk)
9561                     {
9562                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9563                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9564                     }
9565                     else
9566                     {
9567                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9568                     }
9569                     finalOffset = TARGET_POINTER_SIZE;
9570                     length -= TARGET_POINTER_SIZE;
9571                 }
9572                 else
9573                 {
9574                     finalOffset = 0;
9575                 }
9576             }
9577
9578             if (length & sizeof(short))
9579             {
9580                 loadIns  = ins_Load(TYP_USHORT);  // INS_ldrh
9581                 storeIns = ins_Store(TYP_USHORT); // INS_strh
9582
9583                 if (isCopyBlk)
9584                 {
9585                     getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
9586                     getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
9587                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9588                     regTracker.rsTrackRegTrash(regTemp);
9589                 }
9590                 else
9591                 {
9592                     getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
9593                 }
9594                 length -= sizeof(short);
9595                 finalOffset += sizeof(short);
9596             }
9597
9598             if (length & sizeof(char))
9599             {
9600                 loadIns  = ins_Load(TYP_UBYTE);  // INS_ldrb
9601                 storeIns = ins_Store(TYP_UBYTE); // INS_strb
9602
9603                 if (isCopyBlk)
9604                 {
9605                     getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
9606                     getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
9607                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9608                     regTracker.rsTrackRegTrash(regTemp);
9609                 }
9610                 else
9611                 {
9612                     getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
9613                 }
9614                 length -= sizeof(char);
9615             }
9616             assert(length == 0);
9617
9618             genReleaseReg(dstOp);
9619             genReleaseReg(srcOp);
9620         }
9621         else
9622         {
9623             //
9624             // This a variable-sized COPYBLK/INITBLK,
9625             //   or a fixed size INITBLK with a variable init value,
9626             //
9627
9628             // What order should the Dest, Val/Src, and Size be calculated
9629
9630             regMaskTP regsToLock = RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2;
9631
9632             compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
9633
9634             genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
9635             genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
9636             if (opsPtr[2] != nullptr)
9637             {
9638                 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
9639             }
9640             else
9641             {
9642                 regSet.rsLockReg(RBM_ARG_2);
9643                 regsToLock &= ~RBM_ARG_2;
9644             }
9645             genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9646             genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9647
9648             noway_assert((destPtr->InReg()) && // Dest
9649                          (destPtr->gtRegNum == REG_ARG_0));
9650
9651             noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9652                          (srcPtrOrVal->gtRegNum == REG_ARG_1));
9653
9654             if (sizeIsConst)
9655             {
9656                 inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
9657             }
9658             else
9659             {
9660                 noway_assert((sizeNode->InReg()) && // Size
9661                              (sizeNode->gtRegNum == REG_ARG_2));
9662             }
9663
9664             regSet.rsLockUsedReg(regsToLock);
9665
9666             genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
9667                                         /* GT_INITBLK */
9668                                         : CORINFO_HELP_MEMSET,
9669                               0, EA_UNKNOWN);
9670
9671             regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
9672
9673             regSet.rsUnlockUsedReg(regsToLock);
9674             genReleaseReg(opsPtr[0]);
9675             genReleaseReg(opsPtr[1]);
9676             if (opsPtr[2] != nullptr)
9677             {
9678                 genReleaseReg(opsPtr[2]);
9679             }
9680             else
9681             {
9682                 regSet.rsUnlockReg(RBM_ARG_2);
9683             }
9684         }
9685
9686         if (isCopyBlk && dest->AsBlk()->IsVolatile())
9687         {
9688             // Emit a memory barrier instruction after the CopyBlk
9689             instGen_MemoryBarrier();
9690         }
9691 #endif // !CPU_USES_BLOCK_MOVE
9692     }
9693 }
9694 BasicBlock dummyBB;
9695
9696 #ifdef _PREFAST_
9697 #pragma warning(push)
9698 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
9699 #endif
9700 void CodeGen::genCodeForTreeSmpOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
9701 {
9702     const genTreeOps oper     = tree->OperGet();
9703     const var_types  treeType = tree->TypeGet();
9704     GenTree*         op1      = tree->gtOp.gtOp1;
9705     GenTree*         op2      = tree->gtGetOp2IfPresent();
9706     regNumber        reg      = DUMMY_INIT(REG_CORRUPT);
9707     regMaskTP        regs     = regSet.rsMaskUsed;
9708     regMaskTP        needReg  = destReg;
9709     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
9710     emitAttr         size;
9711     instruction      ins;
9712     regMaskTP        addrReg;
9713     GenTree*         opsPtr[3];
9714     regMaskTP        regsPtr[3];
9715
9716 #ifdef DEBUG
9717     addrReg = 0xDEADCAFE;
9718 #endif
9719
9720     noway_assert(tree->OperKind() & GTK_SMPOP);
9721
9722     switch (oper)
9723     {
9724         case GT_ASG:
9725             if (tree->OperIsBlkOp() && op1->gtOper != GT_LCL_VAR)
9726             {
9727                 genCodeForBlkOp(tree, destReg);
9728             }
9729             else
9730             {
9731                 genCodeForTreeSmpOpAsg(tree);
9732             }
9733             return;
9734
9735         case GT_ASG_LSH:
9736         case GT_ASG_RSH:
9737         case GT_ASG_RSZ:
9738             genCodeForAsgShift(tree, destReg, bestReg);
9739             return;
9740
9741         case GT_ASG_AND:
9742         case GT_ASG_OR:
9743         case GT_ASG_XOR:
9744         case GT_ASG_ADD:
9745         case GT_ASG_SUB:
9746             genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
9747             return;
9748
9749         case GT_CHS:
9750             addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
9751 #ifdef _TARGET_XARCH_
9752             // Note that the specialCase here occurs when the treeType specifies a byte sized operation
9753             // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
9754             //
9755             bool specialCase;
9756             specialCase = false;
9757             if (op1->gtOper == GT_REG_VAR)
9758             {
9759                 /* Get hold of the target register */
9760
9761                 reg = op1->gtRegVar.gtRegNum;
9762                 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
9763                 {
9764                     regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
9765
9766                     inst_RV_RV(INS_mov, byteReg, reg);
9767                     regTracker.rsTrackRegTrash(byteReg);
9768
9769                     inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
9770                     var_types   op1Type     = op1->TypeGet();
9771                     instruction wideningIns = ins_Move_Extend(op1Type, true);
9772                     inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
9773                     regTracker.rsTrackRegTrash(reg);
9774                     specialCase = true;
9775                 }
9776             }
9777
9778             if (!specialCase)
9779             {
9780                 inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
9781             }
9782 #else // not  _TARGET_XARCH_
9783             if (op1->InReg())
9784             {
9785                 inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
9786             }
9787             else
9788             {
9789                 // Fix 388382 ARM JitStress WP7
9790                 var_types op1Type = op1->TypeGet();
9791                 regNumber reg     = regSet.rsPickFreeReg();
9792                 inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
9793                 regTracker.rsTrackRegTrash(reg);
9794                 inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
9795                 inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
9796             }
9797 #endif
9798             if (op1->InReg())
9799                 regTracker.rsTrackRegTrash(op1->gtRegNum);
9800             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
9801
9802             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
9803             return;
9804
9805         case GT_AND:
9806         case GT_OR:
9807         case GT_XOR:
9808         case GT_ADD:
9809         case GT_SUB:
9810         case GT_MUL:
9811             genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
9812             return;
9813
9814         case GT_UMOD:
9815             genCodeForUnsignedMod(tree, destReg, bestReg);
9816             return;
9817
9818         case GT_MOD:
9819             genCodeForSignedMod(tree, destReg, bestReg);
9820             return;
9821
9822         case GT_UDIV:
9823             genCodeForUnsignedDiv(tree, destReg, bestReg);
9824             return;
9825
9826         case GT_DIV:
9827             genCodeForSignedDiv(tree, destReg, bestReg);
9828             return;
9829
9830         case GT_LSH:
9831         case GT_RSH:
9832         case GT_RSZ:
9833             genCodeForShift(tree, destReg, bestReg);
9834             return;
9835
9836         case GT_NEG:
9837         case GT_NOT:
9838
9839             /* Generate the operand into some register */
9840
9841             genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
9842             noway_assert(op1->InReg());
9843
9844             reg = op1->gtRegNum;
9845
9846             /* Negate/reverse the value in the register */
9847
9848             inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
9849
9850             /* The register is now trashed */
9851
9852             regTracker.rsTrackRegTrash(reg);
9853
9854             genCodeForTree_DONE(tree, reg);
9855             return;
9856
9857         case GT_IND:
9858         case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
9859
9860             /* Make sure the operand is addressable */
9861
9862             addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
9863
9864             genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9865
9866             /* Figure out the size of the value being loaded */
9867
9868             size = EA_ATTR(genTypeSize(tree->gtType));
9869
9870             /* Pick a register for the value */
9871
9872             if (needReg == RBM_ALLINT && bestReg == 0)
9873             {
9874                 /* Absent a better suggestion, pick a useless register */
9875
9876                 bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
9877             }
9878
9879             reg = regSet.rsPickReg(needReg, bestReg);
9880
9881             if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
9882             {
9883                 noway_assert(size == EA_PTRSIZE);
9884                 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
9885                                           (int)op1->gtIntCon.gtIconVal);
9886             }
9887             else
9888             {
9889                 /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
9890
9891                 inst_mov_RV_ST(reg, tree);
9892             }
9893
9894 #ifdef _TARGET_ARM_
9895             if (tree->gtFlags & GTF_IND_VOLATILE)
9896             {
9897                 // Emit a memory barrier instruction after the load
9898                 instGen_MemoryBarrier();
9899             }
9900 #endif
9901
9902             /* Note the new contents of the register we used */
9903
9904             regTracker.rsTrackRegTrash(reg);
9905
9906 #ifdef DEBUG
9907             /* Update the live set of register variables */
9908             if (compiler->opts.varNames)
9909                 genUpdateLife(tree);
9910 #endif
9911
9912             /* Now we can update the register pointer information */
9913
9914             // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9915             gcInfo.gcMarkRegPtrVal(reg, treeType);
9916
9917             genCodeForTree_DONE_LIFE(tree, reg);
9918             return;
9919
9920         case GT_CAST:
9921
9922             genCodeForNumericCast(tree, destReg, bestReg);
9923             return;
9924
9925         case GT_JTRUE:
9926
9927             /* Is this a test of a relational operator? */
9928
9929             if (op1->OperIsCompare())
9930             {
9931                 /* Generate the conditional jump */
9932
9933                 genCondJump(op1);
9934
9935                 genUpdateLife(tree);
9936                 return;
9937             }
9938
9939 #ifdef DEBUG
9940             compiler->gtDispTree(tree);
9941 #endif
9942             NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
9943             break;
9944
9945         case GT_SWITCH:
9946             genCodeForSwitch(tree);
9947             return;
9948
9949         case GT_RETFILT:
9950             noway_assert(tree->gtType == TYP_VOID || op1 != 0);
9951             if (op1 == 0) // endfinally
9952             {
9953                 reg = REG_NA;
9954
9955 #ifdef _TARGET_XARCH_
9956                 /* Return using a pop-jmp sequence. As the "try" block calls
9957                    the finally with a jmp, this leaves the x86 call-ret stack
9958                    balanced in the normal flow of path. */
9959
9960                 noway_assert(isFramePointerRequired());
9961                 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
9962                 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
9963 #elif defined(_TARGET_ARM_)
9964 // Nothing needed for ARM
9965 #else
9966                 NYI("TARGET");
9967 #endif
9968             }
9969             else // endfilter
9970             {
9971                 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9972                 noway_assert(op1->InReg());
9973                 noway_assert(op1->gtRegNum == REG_INTRET);
9974                 /* The return value has now been computed */
9975                 reg = op1->gtRegNum;
9976
9977                 /* Return */
9978                 instGen_Return(0);
9979             }
9980
9981             genCodeForTree_DONE(tree, reg);
9982             return;
9983
9984         case GT_RETURN:
9985
9986             // TODO: this should be done AFTER we called exit mon so that
9987             //       we are sure that we don't have to keep 'this' alive
9988
9989             if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
9990             {
9991                 /* either it's an "empty" statement or the return statement
9992                    of a synchronized method
9993                  */
9994
9995                 genPInvokeMethodEpilog();
9996             }
9997
9998             /* Is there a return value and/or an exit statement? */
9999
10000             if (op1)
10001             {
10002                 if (op1->gtType == TYP_VOID)
10003                 {
10004                     // We're returning nothing, just generate the block (shared epilog calls).
10005                     genCodeForTree(op1, 0);
10006                 }
10007 #ifdef _TARGET_ARM_
10008                 else if (op1->gtType == TYP_STRUCT)
10009                 {
10010                     if (op1->gtOper == GT_CALL)
10011                     {
10012                         // We have a return call() because we failed to tail call.
10013                         // In any case, just generate the call and be done.
10014                         assert(compiler->IsHfa(op1));
10015                         genCodeForCall(op1->AsCall(), true);
10016                         genMarkTreeInReg(op1, REG_FLOATRET);
10017                     }
10018                     else
10019                     {
10020                         assert(op1->gtOper == GT_LCL_VAR);
10021                         assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
10022                         genLoadIntoFltRetRegs(op1);
10023                     }
10024                 }
10025                 else if (op1->TypeGet() == TYP_FLOAT)
10026                 {
10027                     // This can only occur when we are returning a non-HFA struct
10028                     // that is composed of a single float field and we performed
10029                     // struct promotion and enregistered the float field.
10030                     //
10031                     genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
10032                     getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
10033                 }
10034 #endif // _TARGET_ARM_
10035                 else
10036                 {
10037                     // we can now go through this code for compiler->genReturnBB.  I've regularized all the code.
10038
10039                     // noway_assert(compiler->compCurBB != compiler->genReturnBB);
10040
10041                     noway_assert(op1->gtType != TYP_VOID);
10042
10043                     /* Generate the return value into the return register */
10044
10045                     genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
10046
10047                     /* The result must now be in the return register */
10048
10049                     noway_assert(op1->InReg());
10050                     noway_assert(op1->gtRegNum == REG_INTRET);
10051                 }
10052
10053                 /* The return value has now been computed */
10054
10055                 reg = op1->gtRegNum;
10056
10057                 genCodeForTree_DONE(tree, reg);
10058             }
10059
10060 #ifdef PROFILING_SUPPORTED
10061             // The profiling hook does not trash registers, so it's safe to call after we emit the code for
10062             // the GT_RETURN tree.
10063
10064             if (compiler->compCurBB == compiler->genReturnBB)
10065             {
10066                 genProfilingLeaveCallback();
10067             }
10068 #endif
10069 #ifdef DEBUG
10070             if (compiler->opts.compStackCheckOnRet)
10071             {
10072                 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
10073                              compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
10074                              compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
10075                 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
10076
10077                 BasicBlock*  esp_check = genCreateTempLabel();
10078                 emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
10079                 inst_JMP(jmpEqual, esp_check);
10080                 getEmitter()->emitIns(INS_BREAKPOINT);
10081                 genDefineTempLabel(esp_check);
10082             }
10083 #endif
10084             return;
10085
10086         case GT_COMMA:
10087
10088             if (tree->gtFlags & GTF_REVERSE_OPS)
10089             {
10090                 if (tree->gtType == TYP_VOID)
10091                 {
10092                     genEvalSideEffects(op2);
10093                     genUpdateLife(op2);
10094                     genEvalSideEffects(op1);
10095                     genUpdateLife(tree);
10096                     return;
10097                 }
10098
10099                 // Generate op2
10100                 genCodeForTree(op2, needReg);
10101                 genUpdateLife(op2);
10102
10103                 noway_assert(op2->InReg());
10104
10105                 regSet.rsMarkRegUsed(op2);
10106
10107                 // Do side effects of op1
10108                 genEvalSideEffects(op1);
10109
10110                 // Recover op2 if spilled
10111                 genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
10112
10113                 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
10114
10115                 // set gc info if we need so
10116                 gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
10117
10118                 genUpdateLife(tree);
10119                 genCodeForTree_DONE(tree, op2->gtRegNum);
10120
10121                 return;
10122             }
10123             else
10124             {
10125                 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
10126
10127                 /* Generate side effects of the first operand */
10128
10129                 genEvalSideEffects(op1);
10130                 genUpdateLife(op1);
10131
10132                 /* Is the value of the second operand used? */
10133
10134                 if (tree->gtType == TYP_VOID)
10135                 {
10136                     /* The right operand produces no result. The morpher is
10137                        responsible for resetting the type of GT_COMMA nodes
10138                        to TYP_VOID if op2 isn't meant to yield a result. */
10139
10140                     genEvalSideEffects(op2);
10141                     genUpdateLife(tree);
10142                     return;
10143                 }
10144
10145                 /* Generate the second operand, i.e. the 'real' value */
10146
10147                 genCodeForTree(op2, needReg);
10148                 noway_assert(op2->InReg());
10149
10150                 /* The result of 'op2' is also the final result */
10151
10152                 reg = op2->gtRegNum;
10153
10154                 /* Remember whether we set the flags */
10155
10156                 tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
10157
10158                 genCodeForTree_DONE(tree, reg);
10159                 return;
10160             }
10161
10162         case GT_BOX:
10163             genCodeForTree(op1, needReg);
10164             noway_assert(op1->InReg());
10165
10166             /* The result of 'op1' is also the final result */
10167
10168             reg = op1->gtRegNum;
10169
10170             /* Remember whether we set the flags */
10171
10172             tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
10173
10174             genCodeForTree_DONE(tree, reg);
10175             return;
10176
10177         case GT_QMARK:
10178
10179             genCodeForQmark(tree, destReg, bestReg);
10180             return;
10181
10182         case GT_NOP:
10183
10184 #if OPT_BOOL_OPS
10185             if (op1 == NULL)
10186                 return;
10187 #endif
10188             __fallthrough;
10189
10190         case GT_INIT_VAL:
10191
10192             /* Generate the operand into some register */
10193
10194             genCodeForTree(op1, needReg);
10195
10196             /* The result is the same as the operand */
10197
10198             reg = op1->gtRegNum;
10199
10200             genCodeForTree_DONE(tree, reg);
10201             return;
10202
10203         case GT_INTRINSIC:
10204
10205             switch (tree->gtIntrinsic.gtIntrinsicId)
10206             {
10207                 case CORINFO_INTRINSIC_Round:
10208                 {
10209                     noway_assert(tree->gtType == TYP_INT);
10210
10211 #if FEATURE_STACK_FP_X87
10212                     genCodeForTreeFlt(op1);
10213
10214                     /* Store the FP value into the temp */
10215                     TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
10216
10217                     FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10218                     FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
10219                     inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
10220
10221                     reg = regSet.rsPickReg(needReg, bestReg);
10222                     regTracker.rsTrackRegTrash(reg);
10223
10224                     inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
10225
10226                     compiler->tmpRlsTemp(temp);
10227 #else
10228                     genCodeForTreeFloat(tree, needReg, bestReg);
10229                     return;
10230 #endif
10231                 }
10232                 break;
10233
10234                 default:
10235                     noway_assert(!"unexpected math intrinsic");
10236             }
10237
10238             genCodeForTree_DONE(tree, reg);
10239             return;
10240
10241         case GT_LCLHEAP:
10242
10243             reg = genLclHeap(op1);
10244             genCodeForTree_DONE(tree, reg);
10245             return;
10246
10247         case GT_EQ:
10248         case GT_NE:
10249         case GT_LT:
10250         case GT_LE:
10251         case GT_GE:
10252         case GT_GT:
10253             genCodeForRelop(tree, destReg, bestReg);
10254             return;
10255
10256         case GT_ADDR:
10257
10258             genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
10259             return;
10260
10261 #ifdef _TARGET_XARCH_
10262         case GT_LOCKADD:
10263
10264             // This is for a locked add operation.  We know that the resulting value doesn't "go" anywhere.
10265             // For reference, op1 is the location.  op2 is the addend or the value.
10266             if (op2->OperIsConst())
10267             {
10268                 noway_assert(op2->TypeGet() == TYP_INT);
10269                 ssize_t cns = op2->gtIntCon.gtIconVal;
10270
10271                 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
10272                 switch (cns)
10273                 {
10274                     case 1:
10275                         instGen(INS_lock);
10276                         instEmit_RM(INS_inc, op1, op1, 0);
10277                         break;
10278                     case -1:
10279                         instGen(INS_lock);
10280                         instEmit_RM(INS_dec, op1, op1, 0);
10281                         break;
10282                     default:
10283                         assert((int)cns == cns); // By test above for AMD64.
10284                         instGen(INS_lock);
10285                         inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
10286                         break;
10287                 }
10288                 genReleaseReg(op1);
10289             }
10290             else
10291             {
10292                 // non constant addend means it needs to go into a register.
10293                 ins = INS_add;
10294                 goto LockBinOpCommon;
10295             }
10296
10297             genFlagsEqualToNone(); // We didn't compute a result into a register.
10298             genUpdateLife(tree);   // We didn't compute an operand into anything.
10299             return;
10300
10301         case GT_XADD:
10302             ins = INS_xadd;
10303             goto LockBinOpCommon;
10304         case GT_XCHG:
10305             ins = INS_xchg;
10306             goto LockBinOpCommon;
10307         LockBinOpCommon:
10308         {
10309             // Compute the second operand into a register.  xadd and xchg are r/m32, r32.  So even if op2
10310             // is a constant, it needs to be in a register.  This should be the output register if
10311             // possible.
10312             //
10313             // For reference, gtOp1 is the location.  gtOp2 is the addend or the value.
10314
10315             GenTree* location = op1;
10316             GenTree* value    = op2;
10317
10318             // Again, a friendly reminder.  IL calling convention is left to right.
10319             if (tree->gtFlags & GTF_REVERSE_OPS)
10320             {
10321                 // The atomic operations destroy this argument, so force it into a scratch register
10322                 reg = regSet.rsPickFreeReg();
10323                 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10324
10325                 // Must evaluate location into a register
10326                 genCodeForTree(location, needReg, RBM_NONE);
10327                 assert(location->InReg());
10328                 regSet.rsMarkRegUsed(location);
10329                 regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
10330                 genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
10331                 regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
10332
10333                 if (ins != INS_xchg)
10334                 {
10335                     // xchg implies the lock prefix, but xadd and add require it.
10336                     instGen(INS_lock);
10337                 }
10338                 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10339                 genReleaseReg(value);
10340                 regTracker.rsTrackRegTrash(reg);
10341                 genReleaseReg(location);
10342             }
10343             else
10344             {
10345                 regMaskTP addrReg;
10346                 if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
10347                                        needReg, RegSet::KEEP_REG, &addrReg))
10348                 {
10349                     genUpdateLife(location);
10350
10351                     reg = regSet.rsPickFreeReg();
10352                     genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10353                     addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
10354
10355                     if (ins != INS_xchg)
10356                     {
10357                         // xchg implies the lock prefix, but xadd and add require it.
10358                         instGen(INS_lock);
10359                     }
10360
10361                     // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10362                     // inst_TT_RV(ins, location, reg);
10363                     sched_AM(ins, EA_4BYTE, reg, false, location, 0);
10364
10365                     genReleaseReg(value);
10366                     regTracker.rsTrackRegTrash(reg);
10367                     genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
10368                 }
10369                 else
10370                 {
10371                     // Must evalute location into a register.
10372                     genCodeForTree(location, needReg, RBM_NONE);
10373                     assert(location->InReg());
10374                     regSet.rsMarkRegUsed(location);
10375
10376                     // xadd destroys this argument, so force it into a scratch register
10377                     reg = regSet.rsPickFreeReg();
10378                     genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10379                     regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
10380                     genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
10381                     regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
10382
10383                     if (ins != INS_xchg)
10384                     {
10385                         // xchg implies the lock prefix, but xadd and add require it.
10386                         instGen(INS_lock);
10387                     }
10388
10389                     instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10390
10391                     genReleaseReg(value);
10392                     regTracker.rsTrackRegTrash(reg);
10393                     genReleaseReg(location);
10394                 }
10395             }
10396
10397             // The flags are equal to the target of the tree (i.e. the result of the add), not to the
10398             // result in the register.  If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
10399             // that information to set the flags.  Doesn't seem like there is a good reason for that.
10400             // Therefore, trash the flags.
10401             genFlagsEqualToNone();
10402
10403             if (ins == INS_add)
10404             {
10405                 // If the operator was add, then we were called from the GT_LOCKADD
10406                 // case.  In that case we don't use the result, so we don't need to
10407                 // update anything.
10408                 genUpdateLife(tree);
10409             }
10410             else
10411             {
10412                 genCodeForTree_DONE(tree, reg);
10413             }
10414         }
10415             return;
10416
10417 #endif // _TARGET_XARCH_
10418
10419         case GT_ARR_LENGTH:
10420         {
10421             // Make the corresponding ind(a + c) node, and do codegen for that.
10422             GenTree* addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
10423                                                     compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
10424             tree->SetOper(GT_IND);
10425             tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
10426             assert(tree->TypeGet() == TYP_INT);
10427             tree->gtOp.gtOp1 = addr;
10428             genCodeForTree(tree, destReg, bestReg);
10429             return;
10430         }
10431
10432         case GT_OBJ:
10433             // All GT_OBJ nodes must have been morphed prior to this.
10434             noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
10435
10436         default:
10437 #ifdef DEBUG
10438             compiler->gtDispTree(tree);
10439 #endif
10440             noway_assert(!"unexpected unary/binary operator");
10441     } // end switch (oper)
10442
10443     unreached();
10444 }
10445 #ifdef _PREFAST_
10446 #pragma warning(pop) // End suppress PREFast warning about overly large function
10447 #endif
10448
10449 regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
10450 {
10451     instruction ins;
10452     emitAttr    size;
10453     bool        unsv;
10454     bool        andv = false;
10455     regNumber   reg;
10456     GenTree*    op1     = tree->gtOp.gtOp1->gtEffectiveVal();
10457     var_types   dstType = tree->CastToType();
10458     var_types   srcType = op1->TypeGet();
10459
10460     if (genTypeSize(srcType) < genTypeSize(dstType))
10461     {
10462         // Widening cast
10463
10464         /* we need the source size */
10465
10466         size = EA_ATTR(genTypeSize(srcType));
10467
10468         noway_assert(size < EA_PTRSIZE);
10469
10470         unsv = varTypeIsUnsigned(srcType);
10471         ins  = ins_Move_Extend(srcType, op1->InReg());
10472
10473         /*
10474             Special case: for a cast of byte to char we first
10475             have to expand the byte (w/ sign extension), then
10476             mask off the high bits.
10477             Use 'movsx' followed by 'and'
10478         */
10479         if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
10480         {
10481             noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
10482             andv = true;
10483         }
10484     }
10485     else
10486     {
10487         // Narrowing cast, or sign-changing cast
10488
10489         noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
10490
10491         size = EA_ATTR(genTypeSize(dstType));
10492
10493         unsv = varTypeIsUnsigned(dstType);
10494         ins  = ins_Move_Extend(dstType, op1->InReg());
10495     }
10496
10497     noway_assert(size < EA_PTRSIZE);
10498
10499     // Set bestReg to the same register a op1 if op1 is a regVar and is available
10500     if (op1->InReg())
10501     {
10502         regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
10503         if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
10504         {
10505             bestReg = op1RegMask;
10506         }
10507     }
10508
10509     /* Is the value sitting in a non-byte-addressable register? */
10510
10511     if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
10512     {
10513         if (unsv)
10514         {
10515             // for unsigned values we can AND, so it needs not be a byte register
10516
10517             reg = regSet.rsPickReg(needReg, bestReg);
10518
10519             ins = INS_AND;
10520         }
10521         else
10522         {
10523             /* Move the value into a byte register */
10524
10525             reg = regSet.rsGrabReg(RBM_BYTE_REGS);
10526         }
10527
10528         if (reg != op1->gtRegNum)
10529         {
10530             /* Move the value into that register */
10531
10532             regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
10533             inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
10534
10535             /* The value has a new home now */
10536
10537             op1->gtRegNum = reg;
10538         }
10539     }
10540     else
10541     {
10542         /* Pick a register for the value (general case) */
10543
10544         reg = regSet.rsPickReg(needReg, bestReg);
10545
10546         // if we (might) need to set the flags and the value is in the same register
10547         // and we have an unsigned value then use AND instead of MOVZX
10548         if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
10549         {
10550 #ifdef _TARGET_X86_
10551             noway_assert(ins == INS_movzx);
10552 #endif
10553             ins = INS_AND;
10554         }
10555     }
10556
10557     if (ins == INS_AND)
10558     {
10559         noway_assert(andv == false && unsv);
10560
10561         /* Generate "and reg, MASK */
10562
10563         insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10564         inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
10565
10566         if (tree->gtSetFlags())
10567             genFlagsEqualToReg(tree, reg);
10568     }
10569     else
10570     {
10571 #ifdef _TARGET_XARCH_
10572         noway_assert(ins == INS_movsx || ins == INS_movzx);
10573 #endif
10574
10575         /* Generate "movsx/movzx reg, [addr]" */
10576
10577         inst_RV_ST(ins, size, reg, op1);
10578
10579         /* Mask off high bits for cast from byte to char */
10580
10581         if (andv)
10582         {
10583 #ifdef _TARGET_XARCH_
10584             noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
10585 #endif
10586             insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10587             inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
10588
10589             if (tree->gtSetFlags())
10590                 genFlagsEqualToReg(tree, reg);
10591         }
10592     }
10593
10594     regTracker.rsTrackRegTrash(reg);
10595     return reg;
10596 }
10597
10598 void CodeGen::genCodeForNumericCast(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
10599 {
10600     GenTree*  op1      = tree->gtOp.gtOp1;
10601     var_types dstType  = tree->CastToType();
10602     var_types baseType = TYP_INT;
10603     regNumber reg      = DUMMY_INIT(REG_CORRUPT);
10604     regMaskTP needReg  = destReg;
10605     regMaskTP addrReg;
10606     emitAttr  size;
10607     BOOL      unsv;
10608
10609     /*
10610       * Constant casts should have been folded earlier
10611       * If not finite don't bother
10612       * We don't do this optimization for debug code/no optimization
10613       */
10614
10615     noway_assert(
10616         (op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) || tree->gtOverflow() ||
10617         (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
10618         (op1->gtOper == GT_CNS_DBL && op1->gtDblCon.gtDconVal <= -1.0 && varTypeIsUnsigned(tree->CastToType())) ||
10619         !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
10620
10621     noway_assert(dstType != TYP_VOID);
10622
10623     /* What type are we casting from? */
10624
10625     switch (op1->TypeGet())
10626     {
10627         case TYP_LONG:
10628
10629             /* Special case: the long is generated via the mod of long
10630                with an int.  This is really an int and need not be
10631                converted to a reg pair. NOTE: the flag only indicates
10632                that this is a case to TYP_INT, it hasn't actually
10633                verified the second operand of the MOD! */
10634
10635             if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
10636             {
10637
10638                 /* Verify that the op2 of the mod node is
10639                    1) An integer tree, or
10640                    2) A long constant that is small enough to fit in an integer
10641                 */
10642
10643                 GenTree* modop2 = op1->gtOp.gtOp2;
10644                 if ((genActualType(modop2->gtType) == TYP_INT) ||
10645                     ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
10646                 {
10647                     genCodeForTree(op1, destReg, bestReg);
10648
10649 #ifdef _TARGET_64BIT_
10650                     reg = op1->gtRegNum;
10651 #else  // _TARGET_64BIT_
10652                     reg = genRegPairLo(op1->gtRegPair);
10653 #endif //_TARGET_64BIT_
10654
10655                     genCodeForTree_DONE(tree, reg);
10656                     return;
10657                 }
10658             }
10659
10660             /* Make the operand addressable.  When gtOverflow() is true,
10661                hold on to the addrReg as we will need it to access the higher dword */
10662
10663             op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
10664                                             // twice!)
10665                                             // See, e.g., the TYP_INT case below...
10666
10667             addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
10668
10669             /* Load the lower half of the value into some register */
10670
10671             if (op1->InReg())
10672             {
10673                 /* Can we simply use the low part of the value? */
10674                 reg = genRegPairLo(op1->gtRegPair);
10675
10676                 if (tree->gtOverflow())
10677                     goto REG_OK;
10678
10679                 regMaskTP loMask;
10680                 loMask = genRegMask(reg);
10681                 if (loMask & regSet.rsRegMaskFree())
10682                     bestReg = loMask;
10683             }
10684
10685             // for cast overflow we need to preserve addrReg for testing the hiDword
10686             // so we lock it to prevent regSet.rsPickReg from picking it.
10687             if (tree->gtOverflow())
10688                 regSet.rsLockUsedReg(addrReg);
10689
10690             reg = regSet.rsPickReg(needReg, bestReg);
10691
10692             if (tree->gtOverflow())
10693                 regSet.rsUnlockUsedReg(addrReg);
10694
10695             noway_assert(genStillAddressable(op1));
10696
10697         REG_OK:
10698             if (!op1->InReg() || (reg != genRegPairLo(op1->gtRegPair)))
10699             {
10700                 /* Generate "mov reg, [addr-mode]" */
10701                 inst_RV_TT(ins_Load(TYP_INT), reg, op1);
10702             }
10703
10704             /* conv.ovf.i8i4, or conv.ovf.u8u4 */
10705
10706             if (tree->gtOverflow())
10707             {
10708                 regNumber hiReg = (op1->InReg()) ? genRegPairHi(op1->gtRegPair) : REG_NA;
10709
10710                 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10711                 emitJumpKind jmpLTS      = genJumpKindForOper(GT_LT, CK_SIGNED);
10712
10713                 switch (dstType)
10714                 {
10715                     case TYP_INT:
10716                         // conv.ovf.i8.i4
10717                         /*  Generate the following sequence
10718
10719                                 test loDWord, loDWord   // set flags
10720                                 jl neg
10721                            pos: test hiDWord, hiDWord   // set flags
10722                                 jne ovf
10723                                 jmp done
10724                            neg: cmp hiDWord, 0xFFFFFFFF
10725                                 jne ovf
10726                           done:
10727
10728                         */
10729
10730                         instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
10731                         if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4       (i4 > 0 and upper bits 0)
10732                         {
10733                             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
10734                             goto UPPER_BITS_ZERO;
10735                         }
10736
10737 #if CPU_LOAD_STORE_ARCH
10738                         // This is tricky.
10739                         // We will generate code like
10740                         // if (...)
10741                         // {
10742                         // ...
10743                         // }
10744                         // else
10745                         // {
10746                         // ...
10747                         // }
10748                         // We load the tree op1 into regs when we generate code for if clause.
10749                         // When we generate else clause, we see the tree is already loaded into reg, and start use it
10750                         // directly.
10751                         // Well, when the code is run, we may execute else clause without going through if clause.
10752                         //
10753                         genCodeForTree(op1, 0);
10754 #endif
10755
10756                         BasicBlock* neg;
10757                         BasicBlock* done;
10758
10759                         neg  = genCreateTempLabel();
10760                         done = genCreateTempLabel();
10761
10762                         // Is the loDWord positive or negative
10763                         inst_JMP(jmpLTS, neg);
10764
10765                         // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
10766
10767                         if (hiReg < REG_STK)
10768                         {
10769                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
10770                         }
10771                         else
10772                         {
10773                             inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
10774                         }
10775
10776                         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10777                         inst_JMP(EJ_jmp, done);
10778
10779                         // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
10780
10781                         genDefineTempLabel(neg);
10782
10783                         if (hiReg < REG_STK)
10784                         {
10785                             inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
10786                         }
10787                         else
10788                         {
10789                             inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
10790                         }
10791                         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10792
10793                         // Done
10794
10795                         genDefineTempLabel(done);
10796
10797                         break;
10798
10799                     case TYP_UINT: // conv.ovf.u8u4
10800                     UPPER_BITS_ZERO:
10801                         // Just check that the upper DWord is 0
10802
10803                         if (hiReg < REG_STK)
10804                         {
10805                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
10806                         }
10807                         else
10808                         {
10809                             inst_TT_IV(INS_cmp, op1, 0, 4);
10810                         }
10811
10812                         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10813                         break;
10814
10815                     default:
10816                         noway_assert(!"Unexpected dstType");
10817                         break;
10818                 }
10819
10820                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
10821             }
10822
10823             regTracker.rsTrackRegTrash(reg);
10824             genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
10825
10826             genCodeForTree_DONE(tree, reg);
10827             return;
10828
10829         case TYP_BOOL:
10830         case TYP_BYTE:
10831         case TYP_SHORT:
10832         case TYP_USHORT:
10833         case TYP_UBYTE:
10834             break;
10835
10836         case TYP_UINT:
10837         case TYP_INT:
10838             break;
10839
10840 #if FEATURE_STACK_FP_X87
10841         case TYP_FLOAT:
10842             NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
10843             break;
10844
10845         case TYP_DOUBLE:
10846             if (compiler->opts.compCanUseSSE2)
10847             {
10848                 // do the SSE2 based cast inline
10849                 // getting the fp operand
10850
10851                 regMaskTP addrRegInt = 0;
10852                 regMaskTP addrRegFlt = 0;
10853
10854                 // make the operand addressable
10855                 // We don't want to collapse constant doubles into floats, as the SSE2 instruction
10856                 // operates on doubles. Note that these (casts from constant doubles) usually get
10857                 // folded, but we don't do it for some cases (infinitys, etc). So essentially this
10858                 // shouldn't affect performance or size at all. We're fixing this for #336067
10859                 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
10860                 if (!addrRegFlt && !op1->IsRegVar())
10861                 {
10862                     // we have the address
10863
10864                     inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
10865                     genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10866                     genUpdateLife(op1);
10867
10868                     reg = regSet.rsPickReg(needReg);
10869                     getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10870
10871                     regTracker.rsTrackRegTrash(reg);
10872                     genCodeForTree_DONE(tree, reg);
10873                 }
10874                 else
10875                 {
10876                     // we will need to use a temp to get it into the xmm reg
10877                     var_types typeTemp = op1->TypeGet();
10878                     TempDsc*  temp     = compiler->tmpGetTemp(typeTemp);
10879
10880                     size = EA_ATTR(genTypeSize(typeTemp));
10881
10882                     if (addrRegFlt)
10883                     {
10884                         // On the fp stack; Take reg to top of stack
10885
10886                         FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10887                     }
10888                     else
10889                     {
10890                         // op1->IsRegVar()
10891                         // pick a register
10892                         reg = regSet.PickRegFloat();
10893                         if (!op1->IsRegVarDeath())
10894                         {
10895                             // Load it on the fp stack
10896                             genLoadStackFP(op1, reg);
10897                         }
10898                         else
10899                         {
10900                             // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
10901                             genLoadStackFP(op1, reg);
10902                             FlatFPX87_MoveToTOS(&compCurFPState, reg);
10903                         }
10904                     }
10905
10906                     // pop it off the fp stack
10907                     compCurFPState.Pop();
10908
10909                     getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
10910                     // pick a reg
10911                     reg = regSet.rsPickReg(needReg);
10912
10913                     inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
10914                     getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10915
10916                     // done..release the temp
10917                     compiler->tmpRlsTemp(temp);
10918
10919                     // the reg is now trashed
10920                     regTracker.rsTrackRegTrash(reg);
10921                     genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10922                     genUpdateLife(op1);
10923                     genCodeForTree_DONE(tree, reg);
10924                 }
10925             }
10926 #else
10927         case TYP_FLOAT:
10928         case TYP_DOUBLE:
10929             genCodeForTreeFloat(tree, needReg, bestReg);
10930 #endif // FEATURE_STACK_FP_X87
10931             return;
10932
10933         default:
10934             noway_assert(!"unexpected cast type");
10935     }
10936
10937     if (tree->gtOverflow())
10938     {
10939         /* Compute op1 into a register, and free the register */
10940
10941         genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
10942         reg = op1->gtRegNum;
10943
10944         /* Do we need to compare the value, or just check masks */
10945
10946         ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
10947         ssize_t typeMask;
10948
10949         switch (dstType)
10950         {
10951             case TYP_BYTE:
10952                 typeMask = ssize_t((int)0xFFFFFF80);
10953                 typeMin  = SCHAR_MIN;
10954                 typeMax  = SCHAR_MAX;
10955                 unsv     = (tree->gtFlags & GTF_UNSIGNED);
10956                 break;
10957             case TYP_SHORT:
10958                 typeMask = ssize_t((int)0xFFFF8000);
10959                 typeMin  = SHRT_MIN;
10960                 typeMax  = SHRT_MAX;
10961                 unsv     = (tree->gtFlags & GTF_UNSIGNED);
10962                 break;
10963             case TYP_INT:
10964                 typeMask = ssize_t((int)0x80000000L);
10965 #ifdef _TARGET_64BIT_
10966                 unsv    = (tree->gtFlags & GTF_UNSIGNED);
10967                 typeMin = INT_MIN;
10968                 typeMax = INT_MAX;
10969 #else // _TARGET_64BIT_
10970                 noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
10971                 unsv     = true;
10972 #endif // _TARGET_64BIT_
10973                 break;
10974             case TYP_UBYTE:
10975                 unsv     = true;
10976                 typeMask = ssize_t((int)0xFFFFFF00L);
10977                 break;
10978             case TYP_USHORT:
10979                 unsv     = true;
10980                 typeMask = ssize_t((int)0xFFFF0000L);
10981                 break;
10982             case TYP_UINT:
10983                 unsv = true;
10984 #ifdef _TARGET_64BIT_
10985                 typeMask = 0xFFFFFFFF00000000LL;
10986 #else  // _TARGET_64BIT_
10987                 typeMask = 0x80000000L;
10988                 noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
10989 #endif // _TARGET_64BIT_
10990                 break;
10991             default:
10992                 NO_WAY("Unknown type");
10993                 return;
10994         }
10995
10996         // If we just have to check a mask.
10997         // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
10998         // or conv.i4u4
10999
11000         if (unsv)
11001         {
11002             inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
11003             emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
11004             genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
11005         }
11006         else
11007         {
11008             // Check the value is in range.
11009             // This must be conv.ovf.i4i1, etc.
11010
11011             // Compare with the MAX
11012
11013             noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
11014
11015             inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
11016             emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
11017             genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
11018
11019             // Compare with the MIN
11020
11021             inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
11022             emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
11023             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
11024         }
11025
11026         genCodeForTree_DONE(tree, reg);
11027         return;
11028     }
11029
11030     /* Make the operand addressable */
11031
11032     addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
11033
11034     reg = genIntegerCast(tree, needReg, bestReg);
11035
11036     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11037
11038     genCodeForTree_DONE(tree, reg);
11039 }
11040
11041 /*****************************************************************************
11042  *
11043  *  Generate code for a leaf node of type GT_ADDR
11044  */
11045
11046 void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
11047 {
11048     genTreeOps      oper     = tree->OperGet();
11049     const var_types treeType = tree->TypeGet();
11050     GenTree*        op1;
11051     regNumber       reg;
11052     regMaskTP       needReg = destReg;
11053     regMaskTP       addrReg;
11054
11055 #ifdef DEBUG
11056     reg     = (regNumber)0xFEEFFAAF; // to detect uninitialized use
11057     addrReg = 0xDEADCAFE;
11058 #endif
11059
11060     // We should get here for ldloca, ldarga, ldslfda, ldelema,
11061     // or ldflda.
11062     if (oper == GT_ARR_ELEM)
11063     {
11064         op1 = tree;
11065     }
11066     else
11067     {
11068         op1 = tree->gtOp.gtOp1;
11069     }
11070
11071     // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
11072     if (oper == GT_ARR_ELEM)
11073     {
11074         // To get the address of the array element,
11075         // we first call genMakeAddrArrElem to make the element addressable.
11076         //     (That is, for example, we first emit code to calculate EBX, and EAX.)
11077         // And then use lea to obtain the address.
11078         //     (That is, for example, we then emit
11079         //         lea EBX, bword ptr [EBX+4*EAX+36]
11080         //      to obtain the address of the array element.)
11081         addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
11082     }
11083     else
11084     {
11085         addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
11086     }
11087
11088     noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
11089
11090     // We want to reuse one of the scratch registers that were used
11091     // in forming the address mode as the target register for the lea.
11092     // If bestReg is unset or if it is set to one of the registers used to
11093     // form the address (i.e. addrReg), we calculate the scratch register
11094     // to use as the target register for the LEA
11095
11096     bestReg = regSet.rsUseIfZero(bestReg, addrReg);
11097     bestReg = regSet.rsNarrowHint(bestReg, addrReg);
11098
11099     /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
11100        it since keepReg==false.
11101        If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
11102        So this is guaranteed not to spill addrReg */
11103
11104     reg = regSet.rsPickReg(needReg, bestReg);
11105
11106     // Slight workaround, force the inst routine to think that
11107     // value being loaded is an int (since that is what what
11108     // LEA will return)  otherwise it would try to allocate
11109     // two registers for a long etc.
11110     noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
11111     op1->gtType = treeType;
11112
11113     inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
11114
11115     // The Lea instruction above better not have tried to put the
11116     // 'value' pointed to by 'op1' in a register, LEA will not work.
11117     noway_assert(!(op1->InReg()));
11118
11119     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11120     // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
11121     noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
11122
11123     regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
11124     gcInfo.gcMarkRegPtrVal(reg, treeType);
11125
11126     genCodeForTree_DONE(tree, reg);
11127 }
11128
11129 #ifdef _TARGET_ARM_
11130
11131 /*****************************************************************************
11132  *
11133  * Move (load/store) between float ret regs and struct promoted variable.
11134  *
11135  * varDsc - The struct variable to be loaded from or stored into.
11136  * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
11137  *
11138  */
11139 void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
11140 {
11141     regNumber curReg = REG_FLOATRET;
11142
11143     unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
11144     for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
11145     {
11146         LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
11147
11148         // Is the struct field promoted and sitting in a register?
11149         if (varDscFld->lvRegister)
11150         {
11151             // Move from the struct field into curReg if load
11152             // else move into struct field from curReg if store
11153             regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
11154             regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
11155             if (srcReg != dstReg)
11156             {
11157                 inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
11158                 regTracker.rsTrackRegCopy(dstReg, srcReg);
11159             }
11160         }
11161         else
11162         {
11163             // This field is in memory, do a move between the field and float registers.
11164             emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
11165             if (isLoadIntoFlt)
11166             {
11167                 getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11168                 regTracker.rsTrackRegTrash(curReg);
11169             }
11170             else
11171             {
11172                 getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11173             }
11174         }
11175
11176         // Advance the current reg.
11177         curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
11178     }
11179 }
11180
11181 void CodeGen::genLoadIntoFltRetRegs(GenTree* tree)
11182 {
11183     assert(tree->TypeGet() == TYP_STRUCT);
11184     assert(tree->gtOper == GT_LCL_VAR);
11185     LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
11186     int        slots  = varDsc->lvSize() / REGSIZE_BYTES;
11187     if (varDsc->lvPromoted)
11188     {
11189         genLdStFltRetRegsPromotedVar(varDsc, true);
11190     }
11191     else
11192     {
11193         if (slots <= 2)
11194         {
11195             // Use the load float/double instruction.
11196             inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
11197                        (slots == 1) ? EA_4BYTE : EA_8BYTE);
11198         }
11199         else
11200         {
11201             // Use the load store multiple instruction.
11202             regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11203             inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
11204             regTracker.rsTrackRegTrash(reg);
11205             getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11206         }
11207     }
11208     genMarkTreeInReg(tree, REG_FLOATRET);
11209 }
11210
11211 void CodeGen::genStoreFromFltRetRegs(GenTree* tree)
11212 {
11213     assert(tree->TypeGet() == TYP_STRUCT);
11214     assert(tree->OperGet() == GT_ASG);
11215
11216     // LHS should be lcl var or fld.
11217     GenTree* op1 = tree->gtOp.gtOp1;
11218
11219     // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
11220     // handling multiple levels of inlined functions that return HFA on the right-hand-side.
11221     // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
11222     // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
11223     // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
11224     // as a regular assert().
11225     noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
11226     unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11227     assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
11228
11229     // The RHS should be a call.
11230     GenTree* op2 = tree->gtOp.gtOp2;
11231     assert(op2->gtOper == GT_CALL);
11232
11233     // Generate code for call and copy the return registers into the local.
11234     regMaskTP retMask = genCodeForCall(op2->AsCall(), true);
11235
11236     // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
11237     CLANG_FORMAT_COMMENT_ANCHOR;
11238
11239 #ifdef DEBUG
11240     regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
11241     assert((mask & (mask - 1)) == 0);
11242     assert(mask <= (1 << MAX_HFA_RET_SLOTS));
11243     assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
11244 #endif
11245
11246     int slots = genCountBits(retMask & RBM_ALLFLOAT);
11247
11248     LclVarDsc* varDsc = &compiler->lvaTable[varNum];
11249
11250     if (varDsc->lvPromoted)
11251     {
11252         genLdStFltRetRegsPromotedVar(varDsc, false);
11253     }
11254     else
11255     {
11256         if (slots <= 2)
11257         {
11258             inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
11259                        (slots == 1) ? EA_4BYTE : EA_8BYTE);
11260         }
11261         else
11262         {
11263             regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11264             inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
11265             regTracker.rsTrackRegTrash(reg);
11266             getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11267         }
11268     }
11269 }
11270
11271 #endif // _TARGET_ARM_
11272
11273 /*****************************************************************************
11274  *
11275  *  Generate code for a GT_ASG tree
11276  */
11277
11278 #ifdef _PREFAST_
11279 #pragma warning(push)
11280 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11281 #endif
11282 void CodeGen::genCodeForTreeSmpOpAsg(GenTree* tree)
11283 {
11284     noway_assert(tree->gtOper == GT_ASG);
11285
11286     GenTree*    op1     = tree->gtOp.gtOp1;
11287     GenTree*    op2     = tree->gtOp.gtOp2;
11288     regMaskTP   needReg = RBM_ALLINT;
11289     regMaskTP   bestReg = RBM_CORRUPT;
11290     regMaskTP   addrReg = DUMMY_INIT(RBM_CORRUPT);
11291     bool        ovfl    = false; // Do we need an overflow check
11292     bool        volat   = false; // Is this a volatile store
11293     regMaskTP   regGC;
11294     instruction ins;
11295     unsigned    lclVarNum = compiler->lvaCount;
11296     unsigned    lclILoffs = DUMMY_INIT(0);
11297
11298 #ifdef _TARGET_ARM_
11299     if (tree->gtType == TYP_STRUCT)
11300     {
11301         // We use copy block to assign structs, however to receive HFAs in registers
11302         // from a CALL, we use assignment, var = (hfa) call();
11303         assert(compiler->IsHfa(tree));
11304         genStoreFromFltRetRegs(tree);
11305         return;
11306     }
11307 #endif
11308
11309 #ifdef DEBUG
11310     if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
11311     {
11312         if (varTypeIsFloating(op1))
11313             assert(!"Bad IL: Illegal assignment of integer into float!");
11314         else
11315             assert(!"Bad IL: Illegal assignment of float into integer!");
11316     }
11317 #endif
11318
11319     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
11320     {
11321         op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
11322     }
11323
11324     /* Is the target a register or local variable? */
11325     switch (op1->gtOper)
11326     {
11327         unsigned   varNum;
11328         LclVarDsc* varDsc;
11329
11330         case GT_LCL_VAR:
11331             varNum = op1->gtLclVarCommon.gtLclNum;
11332             noway_assert(varNum < compiler->lvaCount);
11333             varDsc = compiler->lvaTable + varNum;
11334
11335             /* For non-debuggable code, every definition of a lcl-var has
11336              * to be checked to see if we need to open a new scope for it.
11337              * Remember the local var info to call siCheckVarScope
11338              * AFTER code generation of the assignment.
11339              */
11340             if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
11341             {
11342                 lclVarNum = varNum;
11343                 lclILoffs = op1->gtLclVar.gtLclILoffs;
11344             }
11345
11346             /* Check against dead store ? (with min opts we may have dead stores) */
11347
11348             noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
11349
11350             /* Does this variable live in a register? */
11351
11352             if (genMarkLclVar(op1))
11353                 goto REG_VAR2;
11354
11355             break;
11356
11357         REG_VAR2:
11358
11359             /* Get hold of the target register */
11360
11361             regNumber op1Reg;
11362
11363             op1Reg = op1->gtRegVar.gtRegNum;
11364
11365 #ifdef DEBUG
11366             /* Compute the RHS (hopefully) into the variable's register.
11367                For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
11368                as variables are kept alive everywhere. So we have to be
11369                careful if we want to compute the value directly into
11370                the variable's register. */
11371
11372             bool needToUpdateRegSetCheckLevel;
11373             needToUpdateRegSetCheckLevel = false;
11374 #endif
11375
11376             // We should only be accessing lvVarIndex if varDsc is tracked.
11377             assert(varDsc->lvTracked);
11378
11379             if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
11380             {
11381                 noway_assert(compiler->opts.compDbgCode);
11382
11383                 /* The predictor might expect us to generate op2 directly
11384                    into the var's register. However, since the variable is
11385                    already alive, first kill it and its register. */
11386
11387                 if (rpCanAsgOperWithoutReg(op2, true))
11388                 {
11389                     genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
11390                     needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11391 #ifdef DEBUG
11392                     needToUpdateRegSetCheckLevel = true;
11393 #endif
11394                 }
11395             }
11396             else
11397             {
11398                 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11399             }
11400
11401 #ifdef DEBUG
11402
11403             /* Special cases: op2 is a GT_CNS_INT */
11404
11405             if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
11406             {
11407                 /* Save the old life status */
11408
11409                 VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
11410                 VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
11411
11412                 /* Set a flag to avoid printing the message
11413                    and remember that life was changed. */
11414
11415                 genTempLiveChg = false;
11416             }
11417 #endif
11418
11419 #ifdef DEBUG
11420             if (needToUpdateRegSetCheckLevel)
11421                 compiler->compRegSetCheckLevel++;
11422 #endif
11423             genCodeForTree(op2, needReg, genRegMask(op1Reg));
11424 #ifdef DEBUG
11425             if (needToUpdateRegSetCheckLevel)
11426                 compiler->compRegSetCheckLevel--;
11427             noway_assert(compiler->compRegSetCheckLevel >= 0);
11428 #endif
11429             noway_assert(op2->InReg());
11430
11431             /* Make sure the value ends up in the right place ... */
11432
11433             if (op2->gtRegNum != op1Reg)
11434             {
11435                 /* Make sure the target of the store is available */
11436
11437                 if (regSet.rsMaskUsed & genRegMask(op1Reg))
11438                     regSet.rsSpillReg(op1Reg);
11439
11440 #ifdef _TARGET_ARM_
11441                 if (op1->TypeGet() == TYP_FLOAT)
11442                 {
11443                     // This can only occur when we are returning a non-HFA struct
11444                     // that is composed of a single float field.
11445                     //
11446                     inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
11447                 }
11448                 else
11449 #endif // _TARGET_ARM_
11450                 {
11451                     inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
11452                 }
11453
11454                 /* The value has been transferred to 'op1Reg' */
11455
11456                 regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
11457
11458                 if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
11459                     gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
11460
11461                 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11462             }
11463             else
11464             {
11465                 // First we need to remove it from the original reg set mask (or else trigger an
11466                 // assert when we add it to the other reg set mask).
11467                 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
11468                 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11469
11470                 // The emitter has logic that tracks the GCness of registers and asserts if you
11471                 // try to do bad things to a GC pointer (like lose its GCness).
11472
11473                 // An explict cast of a GC pointer to an int (which is legal if the
11474                 // pointer is pinned) is encoded as an assignment of a GC source
11475                 // to a integer variable.  Unfortunately if the source was the last
11476                 // use, and the source register gets reused by the destination, no
11477                 // code gets emitted (That is where we are at right now).  The emitter
11478                 // thinks the register is a GC pointer (it did not see the cast).
11479                 // This causes asserts, as well as bad GC info since we will continue
11480                 // to report the register as a GC pointer even if we do arithmetic
11481                 // with it. So force the emitter to see the change in the type
11482                 // of variable by placing a label.
11483                 // We only have to do this check at this point because in the
11484                 // CAST morphing, we create a temp and assignment whenever we
11485                 // have a cast that loses its GCness.
11486
11487                 if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
11488                 {
11489                     void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
11490                                                              gcInfo.gcRegByrefSetCur);
11491                 }
11492             }
11493
11494             addrReg = 0;
11495
11496             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
11497             goto LExit;
11498
11499         case GT_LCL_FLD:
11500
11501             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
11502             // to worry about it being enregistered.
11503             noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
11504             break;
11505
11506         case GT_CLS_VAR:
11507
11508             __fallthrough;
11509
11510         case GT_IND:
11511         case GT_NULLCHECK:
11512
11513             assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
11514
11515             if (op1->gtFlags & GTF_IND_VOLATILE)
11516             {
11517                 volat = true;
11518             }
11519
11520             break;
11521
11522         default:
11523             break;
11524     }
11525
11526     /* Is the value being assigned a simple one? */
11527
11528     noway_assert(op2);
11529     switch (op2->gtOper)
11530     {
11531         case GT_LCL_VAR:
11532
11533             if (!genMarkLclVar(op2))
11534                 goto SMALL_ASG;
11535
11536             __fallthrough;
11537
11538         case GT_REG_VAR:
11539
11540             /* Is the target a byte/short/char value? */
11541
11542             if (varTypeIsSmall(op1->TypeGet()))
11543                 goto SMALL_ASG;
11544
11545             if (tree->gtFlags & GTF_REVERSE_OPS)
11546                 goto SMALL_ASG;
11547
11548             /* Make the target addressable */
11549
11550             op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
11551
11552             addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11553
11554             /* Does the write barrier helper do the assignment? */
11555
11556             regGC = WriteBarrier(op1, op2, addrReg);
11557
11558             // Was assignment done by the WriteBarrier
11559             if (regGC == RBM_NONE)
11560             {
11561 #ifdef _TARGET_ARM_
11562                 if (volat)
11563                 {
11564                     // Emit a memory barrier instruction before the store
11565                     instGen_MemoryBarrier();
11566                 }
11567 #endif
11568
11569                 /* Move the value into the target */
11570
11571                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
11572
11573                 // This is done in WriteBarrier when (regGC != RBM_NONE)
11574
11575                 /* Free up anything that was tied up by the LHS */
11576                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11577             }
11578
11579             /* Free up the RHS */
11580             genUpdateLife(op2);
11581
11582             /* Remember that we've also touched the op2 register */
11583
11584             addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
11585             break;
11586
11587         case GT_CNS_INT:
11588
11589             GenTreeIntConCommon* con;
11590             con = op2->AsIntConCommon();
11591             ssize_t ival;
11592             ival = con->IconValue();
11593             emitAttr size;
11594             size = emitTypeSize(tree->TypeGet());
11595
11596             ins = ins_Store(op1->TypeGet());
11597
11598             // If we are storing a constant into a local variable
11599             // we extend the size of the store here
11600             // this normally takes place in CodeGen::inst_TT_IV on x86.
11601             //
11602             if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
11603             {
11604                 unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
11605                 LclVarDsc* varDsc = compiler->lvaTable + varNum;
11606
11607                 // Fix the immediate by sign extending if needed
11608                 if (!varTypeIsUnsigned(varDsc->TypeGet()))
11609                 {
11610                     if (size == EA_1BYTE)
11611                     {
11612                         if ((ival & 0x7f) != ival)
11613                             ival = ival | 0xffffff00;
11614                     }
11615                     else
11616                     {
11617                         assert(size == EA_2BYTE);
11618                         if ((ival & 0x7fff) != ival)
11619                             ival = ival | 0xffff0000;
11620                     }
11621                 }
11622
11623                 // A local stack slot is at least 4 bytes in size, regardless of
11624                 // what the local var is typed as, so auto-promote it here
11625                 // unless it is a field of a promoted struct
11626                 if (!varDsc->lvIsStructField)
11627                 {
11628                     size = EA_SET_SIZE(size, EA_4BYTE);
11629                     ins  = ins_Store(TYP_INT);
11630                 }
11631             }
11632
11633             /* Make the target addressable */
11634
11635             addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11636
11637 #ifdef _TARGET_ARM_
11638             if (volat)
11639             {
11640                 // Emit a memory barrier instruction before the store
11641                 instGen_MemoryBarrier();
11642             }
11643 #endif
11644
11645             /* Move the value into the target */
11646
11647             noway_assert(op1->gtOper != GT_REG_VAR);
11648             if (con->ImmedValNeedsReloc(compiler))
11649             {
11650                 /* The constant is actually a handle that may need relocation
11651                    applied to it.  genComputeReg will do the right thing (see
11652                    code in genCodeForTreeConst), so we'll just call it to load
11653                    the constant into a register. */
11654
11655                 genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
11656                 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11657                 noway_assert(op2->InReg());
11658                 inst_TT_RV(ins, op1, op2->gtRegNum);
11659                 genReleaseReg(op2);
11660             }
11661             else
11662             {
11663                 regSet.rsLockUsedReg(addrReg);
11664
11665 #if REDUNDANT_LOAD
11666                 bool      copyIconFromReg = true;
11667                 regNumber iconReg         = REG_NA;
11668
11669 #ifdef _TARGET_ARM_
11670                 // Only if the constant can't be encoded in a small instruction,
11671                 // look for another register to copy the value from. (Assumes
11672                 // target is a small register.)
11673                 if ((op1->InReg()) && !isRegPairType(tree->gtType) &&
11674                     arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
11675                 {
11676                     copyIconFromReg = false;
11677                 }
11678 #endif // _TARGET_ARM_
11679
11680                 if (copyIconFromReg)
11681                 {
11682                     iconReg = regTracker.rsIconIsInReg(ival);
11683                     if (iconReg == REG_NA)
11684                         copyIconFromReg = false;
11685                 }
11686
11687                 if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
11688                                         (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
11689                 {
11690                     /* Move the value into the target */
11691
11692                     inst_TT_RV(ins, op1, iconReg, 0, size);
11693                 }
11694                 else
11695 #endif // REDUNDANT_LOAD
11696                 {
11697                     inst_TT_IV(ins, op1, ival, 0, size);
11698                 }
11699
11700                 regSet.rsUnlockUsedReg(addrReg);
11701             }
11702
11703             /* Free up anything that was tied up by the LHS */
11704
11705             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11706             break;
11707
11708         default:
11709
11710         SMALL_ASG:
11711
11712             bool             isWriteBarrier = false;
11713             regMaskTP        needRegOp1     = RBM_ALLINT;
11714             RegSet::ExactReg mustReg        = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
11715
11716             /*  Is the LHS more complex than the RHS? */
11717
11718             if (tree->gtFlags & GTF_REVERSE_OPS)
11719             {
11720                 /* Is the target a byte/short/char value? */
11721
11722                 if (varTypeIsSmall(op1->TypeGet()))
11723                 {
11724                     noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
11725                                  // TODO: Why does this have to be true?
11726                                  compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
11727                                  compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
11728
11729                     if (op2->gtOper == GT_CAST && !op2->gtOverflow())
11730                     {
11731                         /* Special case: cast to small type */
11732
11733                         if (op2->CastToType() >= op1->gtType)
11734                         {
11735                             /* Make sure the cast operand is not > int */
11736
11737                             if (op2->CastFromType() <= TYP_INT)
11738                             {
11739                                 /* Cast via a non-smaller type */
11740
11741                                 op2 = op2->gtCast.CastOp();
11742                             }
11743                         }
11744                     }
11745
11746                     if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
11747                     {
11748                         unsigned mask;
11749                         switch (op1->gtType)
11750                         {
11751                             case TYP_BYTE:
11752                                 mask = 0x000000FF;
11753                                 break;
11754                             case TYP_SHORT:
11755                                 mask = 0x0000FFFF;
11756                                 break;
11757                             case TYP_USHORT:
11758                                 mask = 0x0000FFFF;
11759                                 break;
11760                             default:
11761                                 goto SIMPLE_SMALL;
11762                         }
11763
11764                         if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
11765                         {
11766                             /* Redundant AND */
11767
11768                             op2 = op2->gtOp.gtOp1;
11769                         }
11770                     }
11771
11772                 /* Must get the new value into a byte register */
11773
11774                 SIMPLE_SMALL:
11775                     if (varTypeIsByte(op1->TypeGet()))
11776                         genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
11777                     else
11778                         goto NOT_SMALL;
11779                 }
11780                 else
11781                 {
11782                 NOT_SMALL:
11783                     /* Generate the RHS into a register */
11784
11785                     isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11786                     if (isWriteBarrier)
11787                     {
11788 #if NOGC_WRITE_BARRIERS
11789                         // Exclude the REG_WRITE_BARRIER from op2's needReg mask
11790                         needReg = Target::exclude_WriteBarrierReg(needReg);
11791                         mustReg = RegSet::EXACT_REG;
11792 #else  // !NOGC_WRITE_BARRIERS
11793                         // This code should be generic across architectures.
11794
11795                         // For the standard JIT Helper calls
11796                         // op1 goes into REG_ARG_0 and
11797                         // op2 goes into REG_ARG_1
11798                         //
11799                         needRegOp1 = RBM_ARG_0;
11800                         needReg    = RBM_ARG_1;
11801 #endif // !NOGC_WRITE_BARRIERS
11802                     }
11803                     genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11804                 }
11805
11806                 noway_assert(op2->InReg());
11807
11808                 /* Make the target addressable */
11809
11810                 op1     = genCodeForCommaTree(op1); // Strip off any comma expressions.
11811                 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11812
11813                 /*  Make sure the RHS register hasn't been spilled;
11814                     keep the register marked as "used", otherwise
11815                     we might get the pointer lifetimes wrong.
11816                 */
11817
11818                 if (varTypeIsByte(op1->TypeGet()))
11819                     needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11820
11821                 genRecoverReg(op2, needReg, RegSet::KEEP_REG);
11822                 noway_assert(op2->InReg());
11823
11824                 /* Lock the RHS temporarily (lock only already used) */
11825
11826                 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
11827
11828                 /* Make sure the LHS is still addressable */
11829
11830                 addrReg = genKeepAddressable(op1, addrReg);
11831
11832                 /* We can unlock (only already used ) the RHS register */
11833
11834                 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
11835
11836                 /* Does the write barrier helper do the assignment? */
11837
11838                 regGC = WriteBarrier(op1, op2, addrReg);
11839
11840                 if (regGC != 0)
11841                 {
11842                     // Yes, assignment done by the WriteBarrier
11843                     noway_assert(isWriteBarrier);
11844                 }
11845                 else
11846                 {
11847 #ifdef _TARGET_ARM_
11848                     if (volat)
11849                     {
11850                         // Emit a memory barrier instruction before the store
11851                         instGen_MemoryBarrier();
11852                     }
11853 #endif
11854
11855                     /* Move the value into the target */
11856
11857                     inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11858                 }
11859
11860 #ifdef DEBUG
11861                 /* Update the current liveness info */
11862                 if (compiler->opts.varNames)
11863                     genUpdateLife(tree);
11864 #endif
11865
11866                 // If op2 register is still in use, free it.  (Might not be in use, if
11867                 // a full-call write barrier was done, and the register was a caller-saved
11868                 // register.)
11869                 regMaskTP op2RM = genRegMask(op2->gtRegNum);
11870                 if (op2RM & regSet.rsMaskUsed)
11871                     regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
11872
11873                 // This is done in WriteBarrier when (regGC != 0)
11874                 if (regGC == 0)
11875                 {
11876                     /* Free up anything that was tied up by the LHS */
11877                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11878                 }
11879             }
11880             else
11881             {
11882                 /* Make the target addressable */
11883
11884                 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11885
11886                 if (isWriteBarrier)
11887                 {
11888 #if NOGC_WRITE_BARRIERS
11889                     /* Try to avoid RBM_TMP_0 */
11890                     needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
11891                     mustReg    = RegSet::EXACT_REG; // For op2
11892 #else                                               // !NOGC_WRITE_BARRIERS
11893                     // This code should be generic across architectures.
11894
11895                     // For the standard JIT Helper calls
11896                     // op1 goes into REG_ARG_0 and
11897                     // op2 goes into REG_ARG_1
11898                     //
11899                     needRegOp1 = RBM_ARG_0;
11900                     needReg    = RBM_ARG_1;
11901                     mustReg    = RegSet::EXACT_REG; // For op2
11902 #endif                                              // !NOGC_WRITE_BARRIERS
11903                 }
11904
11905                 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
11906
11907                 op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
11908
11909                 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11910
11911 #if CPU_HAS_BYTE_REGS
11912                 /* Is the target a byte value? */
11913                 if (varTypeIsByte(op1->TypeGet()))
11914                 {
11915                     /* Must get the new value into a byte register */
11916                     needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11917                     mustReg = RegSet::EXACT_REG;
11918
11919                     if (op2->gtType >= op1->gtType)
11920                         op2->gtFlags |= GTF_SMALL_OK;
11921                 }
11922 #endif
11923
11924 #if NOGC_WRITE_BARRIERS
11925                 /* For WriteBarrier we can't use REG_WRITE_BARRIER */
11926                 if (isWriteBarrier)
11927                     needReg = Target::exclude_WriteBarrierReg(needReg);
11928
11929                 /* Also avoid using the previously computed addrReg(s) */
11930                 bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
11931
11932                 /* If we have a reg available to grab then use bestReg */
11933                 if (bestReg & regSet.rsRegMaskCanGrab())
11934                     needReg = bestReg;
11935
11936                 mustReg = RegSet::EXACT_REG;
11937 #endif
11938
11939                 /* Generate the RHS into a register */
11940                 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11941                 noway_assert(op2->InReg());
11942
11943                 /* Make sure the target is still addressable */
11944                 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11945                 noway_assert(op2->InReg());
11946
11947                 /* Does the write barrier helper do the assignment? */
11948
11949                 regGC = WriteBarrier(op1, op2, addrReg);
11950
11951                 if (regGC != 0)
11952                 {
11953                     // Yes, assignment done by the WriteBarrier
11954                     noway_assert(isWriteBarrier);
11955                 }
11956                 else
11957                 {
11958                     assert(!isWriteBarrier);
11959
11960 #ifdef _TARGET_ARM_
11961                     if (volat)
11962                     {
11963                         // Emit a memory barrier instruction before the store
11964                         instGen_MemoryBarrier();
11965                     }
11966 #endif
11967
11968                     /* Move the value into the target */
11969
11970                     inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11971                 }
11972
11973                 /* The new value is no longer needed */
11974
11975                 genReleaseReg(op2);
11976
11977 #ifdef DEBUG
11978                 /* Update the current liveness info */
11979                 if (compiler->opts.varNames)
11980                     genUpdateLife(tree);
11981 #endif
11982
11983                 // This is done in WriteBarrier when (regGC != 0)
11984                 if (regGC == 0)
11985                 {
11986                     /* Free up anything that was tied up by the LHS */
11987                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11988                 }
11989             }
11990
11991             addrReg = RBM_NONE;
11992             break;
11993     }
11994
11995     noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
11996     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
11997
11998 LExit:
11999     /* For non-debuggable code, every definition of a lcl-var has
12000      * to be checked to see if we need to open a new scope for it.
12001      */
12002     if (lclVarNum < compiler->lvaCount)
12003         siCheckVarScope(lclVarNum, lclILoffs);
12004 }
12005 #ifdef _PREFAST_
12006 #pragma warning(pop)
12007 #endif
12008
12009 /*****************************************************************************
12010  *
12011  *  Generate code to complete the assignment operation
12012  */
12013
12014 void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTree* tree, regMaskTP addrReg, regNumber reg, bool ovfl)
12015 {
12016     const var_types treeType = tree->TypeGet();
12017     GenTree*        op1      = tree->gtOp.gtOp1;
12018     GenTree*        op2      = tree->gtOp.gtOp2;
12019     noway_assert(op2);
12020
12021     if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
12022         genUpdateLife(op1);
12023     genUpdateLife(tree);
12024
12025 #if REDUNDANT_LOAD
12026
12027     if (op1->gtOper == GT_LCL_VAR)
12028         regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
12029
12030     /* Have we just assigned a value that is in a register? */
12031
12032     if (op2->InReg() && tree->gtOper == GT_ASG)
12033     {
12034         regTracker.rsTrackRegAssign(op1, op2);
12035     }
12036
12037 #endif
12038
12039     noway_assert(addrReg != 0xDEADCAFE);
12040
12041     gcInfo.gcMarkRegSetNpt(addrReg);
12042
12043     if (ovfl)
12044     {
12045         noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
12046
12047         /* If it is not in a register and it is a small type, then
12048            we must have loaded it up from memory, done the increment,
12049            checked for overflow, and then stored it back to memory */
12050
12051         bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->InReg());
12052
12053         if (!ovfCheckDone)
12054         {
12055             // For small sizes, reg should be set as we sign/zero extend it.
12056
12057             noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
12058
12059             /* Currently we don't morph x=x+y into x+=y in try blocks
12060              * if we need overflow check, as x+y may throw an exception.
12061              * We can do it if x is not live on entry to the catch block.
12062              */
12063             noway_assert(!compiler->compCurBB->hasTryIndex());
12064
12065             genCheckOverflow(tree);
12066         }
12067     }
12068 }
12069
12070 /*****************************************************************************
12071  *
12072  *  Generate code for a special op tree
12073  */
12074
12075 void CodeGen::genCodeForTreeSpecialOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
12076 {
12077     genTreeOps oper = tree->OperGet();
12078     regNumber  reg  = DUMMY_INIT(REG_CORRUPT);
12079     regMaskTP  regs = regSet.rsMaskUsed;
12080
12081     noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
12082
12083     switch (oper)
12084     {
12085         case GT_CALL:
12086             regs = genCodeForCall(tree->AsCall(), true);
12087
12088             /* If the result is in a register, make sure it ends up in the right place */
12089
12090             if (regs != RBM_NONE)
12091             {
12092                 genMarkTreeInReg(tree, genRegNumFromMask(regs));
12093             }
12094
12095             genUpdateLife(tree);
12096             return;
12097
12098         case GT_FIELD:
12099             NO_WAY("should not see this operator in this phase");
12100             break;
12101
12102         case GT_ARR_BOUNDS_CHECK:
12103         {
12104 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
12105             // MUST NEVER CHECK-IN WITH THIS ENABLED.
12106             // This is just for convenience in doing performance investigations and requires x86ret builds
12107             if (!JitConfig.JitNoRngChk())
12108 #endif
12109                 genRangeCheck(tree);
12110         }
12111             return;
12112
12113         case GT_ARR_ELEM:
12114             genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
12115             return;
12116
12117         case GT_CMPXCHG:
12118         {
12119 #if defined(_TARGET_XARCH_)
12120             // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
12121
12122             // Since this is a "call", evaluate the operands from right to left.  Don't worry about spilling
12123             // right now, just get the trees evaluated.
12124
12125             // As a friendly reminder.  IL args are evaluated left to right.
12126
12127             GenTree*  location  = tree->gtCmpXchg.gtOpLocation;  // arg1
12128             GenTree*  value     = tree->gtCmpXchg.gtOpValue;     // arg2
12129             GenTree*  comparand = tree->gtCmpXchg.gtOpComparand; // arg3
12130             regMaskTP addrReg;
12131
12132             bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
12133                                              RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
12134
12135             if (!isAddr)
12136             {
12137                 genCodeForTree(location, RBM_NONE, RBM_NONE);
12138                 assert(location->InReg());
12139                 addrReg = genRegMask(location->gtRegNum);
12140                 regSet.rsMarkRegUsed(location);
12141             }
12142
12143             // We must have a reg for the Value, but it doesn't really matter which register.
12144
12145             // Try to avoid EAX and the address regsiter if possible.
12146             genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
12147
12148 #ifdef DEBUG
12149             // cmpxchg uses EAX as an implicit operand to hold the comparand
12150             // We're going to destroy EAX in this operation, so we better not be keeping
12151             // anything important in it.
12152             if (RBM_EAX & regSet.rsMaskVars)
12153             {
12154                 // We have a variable enregistered in EAX.  Make sure it goes dead in this tree.
12155                 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
12156                 {
12157                     const LclVarDsc& varDesc = compiler->lvaTable[varNum];
12158                     if (!varDesc.lvIsRegCandidate())
12159                         continue;
12160                     if (!varDesc.lvRegister)
12161                         continue;
12162                     if (isFloatRegType(varDesc.lvType))
12163                         continue;
12164                     if (varDesc.lvRegNum != REG_EAX)
12165                         continue;
12166                     // We may need to check lvOtherReg.
12167
12168                     // If the variable isn't going dead during this tree, we've just trashed a local with
12169                     // cmpxchg.
12170                     noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
12171
12172                     break;
12173                 }
12174             }
12175 #endif
12176             genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
12177
12178             // By this point we've evaluated everything.  However the odds are that we've spilled something by
12179             // now.  Let's recover all the registers and force them to stay.
12180
12181             // Well, we just computed comparand, so it's still in EAX.
12182             noway_assert(comparand->gtRegNum == REG_EAX);
12183             regSet.rsLockUsedReg(RBM_EAX);
12184
12185             // Stick it anywhere other than EAX.
12186             genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
12187             reg = value->gtRegNum;
12188             noway_assert(reg != REG_EAX);
12189             regSet.rsLockUsedReg(genRegMask(reg));
12190
12191             if (isAddr)
12192             {
12193                 addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
12194             }
12195             else
12196             {
12197                 genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
12198             }
12199
12200             regSet.rsUnlockUsedReg(genRegMask(reg));
12201             regSet.rsUnlockUsedReg(RBM_EAX);
12202
12203             instGen(INS_lock);
12204             if (isAddr)
12205             {
12206                 sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
12207                 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
12208             }
12209             else
12210             {
12211                 instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
12212                 genReleaseReg(location);
12213             }
12214
12215             genReleaseReg(value);
12216             genReleaseReg(comparand);
12217
12218             // EAX and the value register are both trashed at this point.
12219             regTracker.rsTrackRegTrash(REG_EAX);
12220             regTracker.rsTrackRegTrash(reg);
12221
12222             reg = REG_EAX;
12223
12224             genFlagsEqualToNone();
12225             break;
12226 #else // not defined(_TARGET_XARCH_)
12227             NYI("GT_CMPXCHG codegen");
12228             break;
12229 #endif
12230         }
12231
12232         default:
12233 #ifdef DEBUG
12234             compiler->gtDispTree(tree);
12235 #endif
12236             noway_assert(!"unexpected operator");
12237             NO_WAY("unexpected operator");
12238     }
12239
12240     noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
12241     genCodeForTree_DONE(tree, reg);
12242 }
12243
12244 /*****************************************************************************
12245  *
12246  *  Generate code for the given tree. tree->gtRegNum will be set to the
12247  *  register where the tree lives.
12248  *
12249  *  If 'destReg' is non-zero, we'll do our best to compute the value into a
12250  *  register that is in that register set.
12251  *  Use genComputeReg() if you need the tree in a specific register.
12252  *  Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
12253  *  the register can only be used for read, but not for write.
12254  *  Use genMakeAddressable() if you only need the tree to be accessible
12255  *  using a complex addressing mode, and do not necessarily need the tree
12256  *  materialized in a register.
12257  *
12258  *  The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
12259  *
12260  *  The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
12261  *  register will not be consumed right away and could possibly be spilled.
12262  */
12263
12264 void CodeGen::genCodeForTree(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
12265 {
12266 #if 0
12267     if  (compiler->verbose)
12268     {
12269         printf("Generating code for tree ");
12270         Compiler::printTreeID(tree);
12271         printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
12272     }
12273     genStressRegs(tree);
12274 #endif
12275
12276     noway_assert(tree);
12277     noway_assert(tree->gtOper != GT_STMT);
12278     assert(tree->IsNodeProperlySized());
12279
12280     // When assigning to a enregistered local variable we receive
12281     // a hint that we should target the register that is used to
12282     // hold the enregistered local variable.
12283     // When receiving this hint both destReg and bestReg masks are set
12284     // to the register that is used by the enregistered local variable.
12285     //
12286     // However it is possible to us to have a different local variable
12287     // targeting the same register to become alive (and later die)
12288     // as we descend the expression tree.
12289     //
12290     // To handle such cases we will remove any registers that are alive from the
12291     // both the destReg and bestReg masks.
12292     //
12293     regMaskTP liveMask = genLiveMask(tree);
12294
12295     // This removes any registers used to hold enregistered locals
12296     // from the destReg and bestReg masks.
12297     // After this either mask could become 0
12298     //
12299     destReg &= ~liveMask;
12300     bestReg &= ~liveMask;
12301
12302     /* 'destReg' of 0 really means 'any' */
12303
12304     destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
12305
12306     if (destReg != RBM_ALL(tree->TypeGet()))
12307         bestReg = regSet.rsUseIfZero(bestReg, destReg);
12308
12309     // Long, float, and double have their own codegen functions
12310     switch (tree->TypeGet())
12311     {
12312
12313         case TYP_LONG:
12314 #if !CPU_HAS_FP_SUPPORT
12315         case TYP_DOUBLE:
12316 #endif
12317             genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
12318             return;
12319
12320 #if CPU_HAS_FP_SUPPORT
12321         case TYP_FLOAT:
12322         case TYP_DOUBLE:
12323
12324             // For comma nodes, we'll get back here for the last node in the comma list.
12325             if (tree->gtOper != GT_COMMA)
12326             {
12327                 genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
12328                 return;
12329             }
12330             break;
12331 #endif
12332
12333 #ifdef DEBUG
12334         case TYP_UINT:
12335         case TYP_ULONG:
12336             noway_assert(!"These types are only used as markers in GT_CAST nodes");
12337             break;
12338 #endif
12339
12340         default:
12341             break;
12342     }
12343
12344     /* Is the value already in a register? */
12345
12346     if (tree->InReg())
12347     {
12348         genCodeForTree_REG_VAR1(tree);
12349         return;
12350     }
12351
12352     /* We better not have a spilled value here */
12353
12354     noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
12355
12356     /* Figure out what kind of a node we have */
12357
12358     unsigned kind = tree->OperKind();
12359
12360     if (kind & GTK_CONST)
12361     {
12362         /* Handle constant nodes */
12363
12364         genCodeForTreeConst(tree, destReg, bestReg);
12365     }
12366     else if (kind & GTK_LEAF)
12367     {
12368         /* Handle leaf nodes */
12369
12370         genCodeForTreeLeaf(tree, destReg, bestReg);
12371     }
12372     else if (kind & GTK_SMPOP)
12373     {
12374         /* Handle 'simple' unary/binary operators */
12375
12376         genCodeForTreeSmpOp(tree, destReg, bestReg);
12377     }
12378     else
12379     {
12380         /* Handle special operators */
12381
12382         genCodeForTreeSpecialOp(tree, destReg, bestReg);
12383     }
12384 }
12385
12386 /*****************************************************************************
12387  *
12388  *  Generate code for all the basic blocks in the function.
12389  */
12390
12391 #ifdef _PREFAST_
12392 #pragma warning(push)
12393 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12394 #endif
12395 void CodeGen::genCodeForBBlist()
12396 {
12397     unsigned   varNum;
12398     LclVarDsc* varDsc;
12399
12400     unsigned savedStkLvl;
12401
12402 #ifdef DEBUG
12403     genInterruptibleUsed = true;
12404     unsigned stmtNum     = 0;
12405     unsigned totalCostEx = 0;
12406     unsigned totalCostSz = 0;
12407
12408     // You have to be careful if you create basic blocks from now on
12409     compiler->fgSafeBasicBlockCreation = false;
12410
12411     // This stress mode is not comptible with fully interruptible GC
12412     if (genInterruptible && compiler->opts.compStackCheckOnCall)
12413     {
12414         compiler->opts.compStackCheckOnCall = false;
12415     }
12416
12417     // This stress mode is not comptible with fully interruptible GC
12418     if (genInterruptible && compiler->opts.compStackCheckOnRet)
12419     {
12420         compiler->opts.compStackCheckOnRet = false;
12421     }
12422 #endif
12423
12424     // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
12425     genPrepForEHCodegen();
12426
12427     assert(!compiler->fgFirstBBScratch ||
12428            compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
12429
12430     /* Initialize the spill tracking logic */
12431
12432     regSet.rsSpillBeg();
12433
12434     /* Initialize the line# tracking logic */
12435
12436     if (compiler->opts.compScopeInfo)
12437     {
12438         siInit();
12439     }
12440
12441 #ifdef _TARGET_X86_
12442     if (compiler->compTailCallUsed)
12443     {
12444         noway_assert(isFramePointerUsed());
12445         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12446     }
12447 #endif
12448
12449     if (compiler->opts.compDbgEnC)
12450     {
12451         noway_assert(isFramePointerUsed());
12452         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12453     }
12454
12455     /* If we have any pinvoke calls, we might potentially trash everything */
12456
12457     if (compiler->info.compCallUnmanaged)
12458     {
12459         noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
12460         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12461     }
12462
12463     /* Initialize the pointer tracking code */
12464
12465     gcInfo.gcRegPtrSetInit();
12466     gcInfo.gcVarPtrSetInit();
12467
12468     /* If any arguments live in registers, mark those regs as such */
12469
12470     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
12471     {
12472         /* Is this variable a parameter assigned to a register? */
12473
12474         if (!varDsc->lvIsParam || !varDsc->lvRegister)
12475             continue;
12476
12477         /* Is the argument live on entry to the method? */
12478
12479         if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
12480             continue;
12481
12482 #if CPU_HAS_FP_SUPPORT
12483         /* Is this a floating-point argument? */
12484
12485         if (varDsc->IsFloatRegType())
12486             continue;
12487
12488         noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
12489 #endif
12490
12491         /* Mark the register as holding the variable */
12492
12493         if (isRegPairType(varDsc->lvType))
12494         {
12495             regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
12496
12497             if (varDsc->lvOtherReg != REG_STK)
12498                 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12499         }
12500         else
12501         {
12502             regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
12503         }
12504     }
12505
12506     unsigned finallyNesting = 0;
12507
12508     // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
12509     // allocation at the start of each basic block.
12510     VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
12511
12512     /*-------------------------------------------------------------------------
12513      *
12514      *  Walk the basic blocks and generate code for each one
12515      *
12516      */
12517
12518     BasicBlock* block;
12519     BasicBlock* lblk; /* previous block */
12520
12521     for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
12522     {
12523 #ifdef DEBUG
12524         if (compiler->verbose)
12525         {
12526             printf("\n=============== Generating ");
12527             block->dspBlockHeader(compiler, true, true);
12528             compiler->fgDispBBLiveness(block);
12529         }
12530 #endif // DEBUG
12531
12532         VARSET_TP liveSet(VarSetOps::UninitVal());
12533
12534         regMaskTP gcrefRegs = 0;
12535         regMaskTP byrefRegs = 0;
12536
12537         /* Does any other block jump to this point ? */
12538
12539         if (block->bbFlags & BBF_JMP_TARGET)
12540         {
12541             /* Someone may jump here, so trash all regs */
12542
12543             regTracker.rsTrackRegClr();
12544
12545             genFlagsEqualToNone();
12546         }
12547         else
12548         {
12549             /* No jump, but pointers always need to get trashed for proper GC tracking */
12550
12551             regTracker.rsTrackRegClrPtr();
12552         }
12553
12554         /* No registers are used or locked on entry to a basic block */
12555
12556         regSet.rsMaskUsed = RBM_NONE;
12557         regSet.rsMaskMult = RBM_NONE;
12558         regSet.rsMaskLock = RBM_NONE;
12559
12560         // If we need to reserve registers such that they are not used
12561         // by CodeGen in this BasicBlock we do so here.
12562         // On the ARM when we have large frame offsets for locals we
12563         // will have RBM_R10 in the regSet.rsMaskResvd set,
12564         // additionally if a LocAlloc or alloca is used RBM_R9 is in
12565         // the regSet.rsMaskResvd set and we lock these registers here.
12566         //
12567         if (regSet.rsMaskResvd != RBM_NONE)
12568         {
12569             regSet.rsLockReg(regSet.rsMaskResvd);
12570             regSet.rsSetRegsModified(regSet.rsMaskResvd);
12571         }
12572
12573         /* Figure out which registers hold variables on entry to this block */
12574
12575         regMaskTP specialUseMask = regSet.rsMaskResvd;
12576
12577         specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
12578         regSet.ClearMaskVars();
12579         VarSetOps::ClearD(compiler, compiler->compCurLife);
12580         VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
12581
12582 #if FEATURE_STACK_FP_X87
12583         VarSetOps::AssignNoCopy(compiler, genFPregVars,
12584                                 VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
12585         genFPregCnt     = VarSetOps::Count(compiler, genFPregVars);
12586         genFPdeadRegCnt = 0;
12587 #endif
12588         gcInfo.gcResetForBB();
12589
12590         genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
12591 #if FEATURE_STACK_FP_X87
12592         VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
12593 #endif
12594
12595         // We should never enregister variables in any of the specialUseMask registers
12596         noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
12597
12598         VarSetOps::Iter iter(compiler, liveSet);
12599         unsigned        varIndex = 0;
12600         while (iter.NextElem(&varIndex))
12601         {
12602             varNum = compiler->lvaTrackedToVarNum[varIndex];
12603             varDsc = compiler->lvaTable + varNum;
12604             assert(varDsc->lvTracked);
12605             /* Ignore the variable if it's not not in a reg */
12606
12607             if (!varDsc->lvRegister)
12608                 continue;
12609             if (isFloatRegType(varDsc->lvType))
12610                 continue;
12611
12612             /* Get hold of the index and the bitmask for the variable */
12613             regNumber regNum  = varDsc->lvRegNum;
12614             regMaskTP regMask = genRegMask(regNum);
12615
12616             regSet.AddMaskVars(regMask);
12617
12618             if (varDsc->lvType == TYP_REF)
12619                 gcrefRegs |= regMask;
12620             else if (varDsc->lvType == TYP_BYREF)
12621                 byrefRegs |= regMask;
12622
12623             /* Mark the register holding the variable as such */
12624
12625             if (varTypeIsMultiReg(varDsc))
12626             {
12627                 regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
12628                 if (varDsc->lvOtherReg != REG_STK)
12629                 {
12630                     regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12631                     regMask |= genRegMask(varDsc->lvOtherReg);
12632                 }
12633             }
12634             else
12635             {
12636                 regTracker.rsTrackRegLclVar(regNum, varNum);
12637             }
12638         }
12639
12640         gcInfo.gcPtrArgCnt = 0;
12641
12642 #if FEATURE_STACK_FP_X87
12643
12644         regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
12645
12646         memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
12647         memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
12648
12649         // Setup fp state on block entry
12650         genSetupStateStackFP(block);
12651
12652 #ifdef DEBUG
12653         if (compiler->verbose)
12654         {
12655             JitDumpFPState();
12656         }
12657 #endif // DEBUG
12658 #endif // FEATURE_STACK_FP_X87
12659
12660         /* Make sure we keep track of what pointers are live */
12661
12662         noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
12663         gcInfo.gcRegGCrefSetCur = gcrefRegs;
12664         gcInfo.gcRegByrefSetCur = byrefRegs;
12665
12666         /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
12667            represent the exception object (TYP_REF).
12668            We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
12669            to the block,  it will be the first thing evaluated
12670            (thanks to GTF_ORDER_SIDEEFF).
12671          */
12672
12673         if (handlerGetsXcptnObj(block->bbCatchTyp))
12674         {
12675             GenTree* firstStmt = block->FirstNonPhiDef();
12676             if (firstStmt != NULL)
12677             {
12678                 GenTree* firstTree = firstStmt->gtStmt.gtStmtExpr;
12679                 if (compiler->gtHasCatchArg(firstTree))
12680                 {
12681                     gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
12682                 }
12683             }
12684         }
12685
12686         /* Start a new code output block */
12687         CLANG_FORMAT_COMMENT_ANCHOR;
12688
12689 #if FEATURE_EH_FUNCLETS
12690 #if defined(_TARGET_ARM_)
12691         genInsertNopForUnwinder(block);
12692 #endif // defined(_TARGET_ARM_)
12693
12694         genUpdateCurrentFunclet(block);
12695 #endif // FEATURE_EH_FUNCLETS
12696
12697 #ifdef _TARGET_XARCH_
12698         if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
12699         {
12700             getEmitter()->emitLoopAlign();
12701         }
12702 #endif
12703
12704 #ifdef DEBUG
12705         if (compiler->opts.dspCode)
12706             printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
12707 #endif
12708
12709         block->bbEmitCookie = NULL;
12710
12711         if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
12712         {
12713             /* Mark a label and update the current set of live GC refs */
12714
12715             block->bbEmitCookie =
12716                 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
12717 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
12718                                            /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
12719 #else
12720                                            FALSE
12721 #endif
12722                                            );
12723         }
12724
12725         if (block == compiler->fgFirstColdBlock)
12726         {
12727 #ifdef DEBUG
12728             if (compiler->verbose)
12729             {
12730                 printf("\nThis is the start of the cold region of the method\n");
12731             }
12732 #endif
12733             // We should never have a block that falls through into the Cold section
12734             noway_assert(!lblk->bbFallsThrough());
12735
12736             // We require the block that starts the Cold section to have a label
12737             noway_assert(block->bbEmitCookie);
12738             getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
12739         }
12740
12741         /* Both stacks are always empty on entry to a basic block */
12742
12743         SetStackLevel(0);
12744 #if FEATURE_STACK_FP_X87
12745         genResetFPstkLevel();
12746 #endif // FEATURE_STACK_FP_X87
12747
12748         genAdjustStackLevel(block);
12749
12750         savedStkLvl = genStackLevel;
12751
12752         /* Tell everyone which basic block we're working on */
12753
12754         compiler->compCurBB = block;
12755
12756         siBeginBlock(block);
12757
12758         // BBF_INTERNAL blocks don't correspond to any single IL instruction.
12759         if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
12760             genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
12761
12762         bool firstMapping = true;
12763
12764         /*---------------------------------------------------------------------
12765          *
12766          *  Generate code for each statement-tree in the block
12767          *
12768          */
12769         CLANG_FORMAT_COMMENT_ANCHOR;
12770
12771 #if FEATURE_EH_FUNCLETS
12772         if (block->bbFlags & BBF_FUNCLET_BEG)
12773         {
12774             genReserveFuncletProlog(block);
12775         }
12776 #endif // FEATURE_EH_FUNCLETS
12777
12778         for (GenTree* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
12779         {
12780             noway_assert(stmt->gtOper == GT_STMT);
12781
12782             /* Do we have a new IL-offset ? */
12783
12784             if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
12785             {
12786                 /* Create and append a new IP-mapping entry */
12787                 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
12788                 firstMapping = false;
12789             }
12790
12791 #ifdef DEBUG
12792             if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
12793             {
12794                 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
12795                 if (compiler->opts.dspCode && compiler->opts.dspInstrs)
12796                 {
12797                     while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
12798                     {
12799                         genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
12800                     }
12801                 }
12802             }
12803 #endif // DEBUG
12804
12805             /* Get hold of the statement tree */
12806             GenTree* tree = stmt->gtStmt.gtStmtExpr;
12807
12808 #ifdef DEBUG
12809             stmtNum++;
12810             if (compiler->verbose)
12811             {
12812                 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
12813                 printf("Holding variables: ");
12814                 dspRegMask(regSet.rsMaskVars);
12815                 printf("\n\n");
12816                 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
12817                 printf("\n");
12818 #if FEATURE_STACK_FP_X87
12819                 JitDumpFPState();
12820 #endif
12821
12822                 printf("Execution Order:\n");
12823                 for (GenTree* treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
12824                 {
12825                     compiler->gtDispTree(treeNode, 0, NULL, true);
12826                 }
12827                 printf("\n");
12828             }
12829             totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
12830             totalCostSz += stmt->gtCostSz;
12831 #endif // DEBUG
12832
12833             compiler->compCurStmt = stmt;
12834
12835             compiler->compCurLifeTree = NULL;
12836             switch (tree->gtOper)
12837             {
12838                 case GT_CALL:
12839                     // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
12840                     // reported as alive even though not used within the caller for managed debugger sake.  So
12841                     // consider the return value of the method as used if generating debuggable code.
12842                     genCodeForCall(tree->AsCall(), compiler->opts.MinOpts() || compiler->opts.compDbgCode);
12843                     genUpdateLife(tree);
12844                     gcInfo.gcMarkRegSetNpt(RBM_INTRET);
12845                     break;
12846
12847                 case GT_IND:
12848                 case GT_NULLCHECK:
12849
12850                     // Just do the side effects
12851                     genEvalSideEffects(tree);
12852                     break;
12853
12854                 default:
12855                     /* Generate code for the tree */
12856
12857                     genCodeForTree(tree, 0);
12858                     break;
12859             }
12860
12861             regSet.rsSpillChk();
12862
12863             /* The value of the tree isn't used, unless it's a return stmt */
12864
12865             if (tree->gtOper != GT_RETURN)
12866                 gcInfo.gcMarkRegPtrVal(tree);
12867
12868 #if FEATURE_STACK_FP_X87
12869             genEndOfStatement();
12870 #endif
12871
12872 #ifdef DEBUG
12873             /* Make sure we didn't bungle pointer register tracking */
12874
12875             regMaskTP ptrRegs       = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
12876             regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
12877
12878             // If return is a GC-type, clear it.  Note that if a common
12879             // epilog is generated (compiler->genReturnBB) it has a void return
12880             // even though we might return a ref.  We can't use the compRetType
12881             // as the determiner because something we are tracking as a byref
12882             // might be used as a return value of a int function (which is legal)
12883             if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
12884                                               (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
12885             {
12886                 nonVarPtrRegs &= ~RBM_INTRET;
12887             }
12888
12889             // When profiling, the first statement in a catch block will be the
12890             // harmless "inc" instruction (does not interfere with the exception
12891             // object).
12892
12893             if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (stmt == block->bbTreeList) &&
12894                 (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
12895             {
12896                 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
12897             }
12898
12899             if (nonVarPtrRegs)
12900             {
12901                 printf("Regset after tree=");
12902                 Compiler::printTreeID(tree);
12903                 printf(" BB%02u gcr=", block->bbNum);
12904                 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12905                 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12906                 printf(", byr=");
12907                 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12908                 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12909                 printf(", regVars=");
12910                 printRegMaskInt(regSet.rsMaskVars);
12911                 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
12912                 printf("\n");
12913             }
12914
12915             noway_assert(nonVarPtrRegs == 0);
12916 #endif // DEBUG
12917
12918             noway_assert(stmt->gtOper == GT_STMT);
12919
12920             genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
12921
12922         } //-------- END-FOR each statement-tree of the current block ---------
12923
12924         if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
12925         {
12926             siEndBlock(block);
12927
12928             /* Is this the last block, and are there any open scopes left ? */
12929
12930             bool isLastBlockProcessed = (block->bbNext == NULL);
12931             if (block->isBBCallAlwaysPair())
12932             {
12933                 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
12934             }
12935
12936             if (isLastBlockProcessed && siOpenScopeList.scNext)
12937             {
12938                 /* This assert no longer holds, because we may insert a throw
12939                    block to demarcate the end of a try or finally region when they
12940                    are at the end of the method.  It would be nice if we could fix
12941                    our code so that this throw block will no longer be necessary. */
12942
12943                 // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
12944
12945                 siCloseAllOpenScopes();
12946             }
12947         }
12948
12949         SubtractStackLevel(savedStkLvl);
12950
12951         gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
12952
12953         if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
12954             compiler->genChangeLife(block->bbLiveOut);
12955
12956         /* Both stacks should always be empty on exit from a basic block */
12957
12958         noway_assert(genStackLevel == 0);
12959 #if FEATURE_STACK_FP_X87
12960         noway_assert(genGetFPstkLevel() == 0);
12961
12962         // Do the FPState matching that may have to be done
12963         genCodeForEndBlockTransitionStackFP(block);
12964 #endif
12965
12966         noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
12967
12968         /* Do we need to generate a jump or return? */
12969
12970         switch (block->bbJumpKind)
12971         {
12972             case BBJ_ALWAYS:
12973                 inst_JMP(EJ_jmp, block->bbJumpDest);
12974                 break;
12975
12976             case BBJ_RETURN:
12977                 genExitCode(block);
12978                 break;
12979
12980             case BBJ_THROW:
12981                 // If we have a throw at the end of a function or funclet, we need to emit another instruction
12982                 // afterwards to help the OS unwinder determine the correct context during unwind.
12983                 // We insert an unexecuted breakpoint instruction in several situations
12984                 // following a throw instruction:
12985                 // 1. If the throw is the last instruction of the function or funclet. This helps
12986                 //    the OS unwinder determine the correct context during an unwind from the
12987                 //    thrown exception.
12988                 // 2. If this is this is the last block of the hot section.
12989                 // 3. If the subsequent block is a special throw block.
12990                 if ((block->bbNext == NULL)
12991 #if FEATURE_EH_FUNCLETS
12992                     || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
12993 #endif // FEATURE_EH_FUNCLETS
12994                     || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
12995                     block->bbNext == compiler->fgFirstColdBlock)
12996                 {
12997                     instGen(INS_BREAKPOINT); // This should never get executed
12998                 }
12999
13000                 break;
13001
13002             case BBJ_CALLFINALLY:
13003
13004 #if defined(_TARGET_X86_)
13005
13006                 /* If we are about to invoke a finally locally from a try block,
13007                    we have to set the hidden slot corresponding to the finally's
13008                    nesting level. When invoked in response to an exception, the
13009                    EE usually does it.
13010
13011                    We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
13012
13013                    This code depends on this order not being messed up.
13014                    We will emit :
13015                         mov [ebp-(n+1)],0
13016                         mov [ebp-  n  ],0xFC
13017                         push &step
13018                         jmp  finallyBlock
13019
13020                   step: mov [ebp-  n  ],0
13021                         jmp leaveTarget
13022                   leaveTarget:
13023                  */
13024
13025                 noway_assert(isFramePointerUsed());
13026
13027                 // Get the nesting level which contains the finally
13028                 compiler->fgGetNestingLevel(block, &finallyNesting);
13029
13030                 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
13031                 unsigned filterEndOffsetSlotOffs;
13032                 filterEndOffsetSlotOffs =
13033                     (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
13034
13035                 unsigned curNestingSlotOffs;
13036                 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
13037
13038                 // Zero out the slot for the next nesting level
13039                 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
13040                                            curNestingSlotOffs - TARGET_POINTER_SIZE);
13041
13042                 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
13043                                            curNestingSlotOffs);
13044
13045                 // Now push the address of where the finally funclet should
13046                 // return to directly.
13047                 if (!(block->bbFlags & BBF_RETLESS_CALL))
13048                 {
13049                     assert(block->isBBCallAlwaysPair());
13050                     getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
13051                 }
13052                 else
13053                 {
13054                     // EE expects a DWORD, so we give him 0
13055                     inst_IV(INS_push_hide, 0);
13056                 }
13057
13058                 // Jump to the finally BB
13059                 inst_JMP(EJ_jmp, block->bbJumpDest);
13060
13061 #elif defined(_TARGET_ARM_)
13062
13063                 // Now set REG_LR to the address of where the finally funclet should
13064                 // return to directly.
13065
13066                 BasicBlock* bbFinallyRet;
13067                 bbFinallyRet = NULL;
13068
13069                 // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
13070                 // we would have otherwise created retless calls.
13071                 assert(block->isBBCallAlwaysPair());
13072
13073                 assert(block->bbNext != NULL);
13074                 assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
13075                 assert(block->bbNext->bbJumpDest != NULL);
13076                 assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
13077
13078                 bbFinallyRet = block->bbNext->bbJumpDest;
13079                 bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
13080
13081                 // Load the address where the finally funclet should return into LR.
13082                 // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
13083                 // the return.
13084                 genMov32RelocatableDisplacement(bbFinallyRet, REG_LR);
13085                 regTracker.rsTrackRegTrash(REG_LR);
13086
13087                 // Jump to the finally BB
13088                 inst_JMP(EJ_jmp, block->bbJumpDest);
13089 #else
13090                 NYI("TARGET");
13091 #endif
13092
13093                 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
13094                 // jump target using bbJumpDest - that is already used to point
13095                 // to the finally block. So just skip past the BBJ_ALWAYS unless the
13096                 // block is RETLESS.
13097                 if (!(block->bbFlags & BBF_RETLESS_CALL))
13098                 {
13099                     assert(block->isBBCallAlwaysPair());
13100
13101                     lblk  = block;
13102                     block = block->bbNext;
13103                 }
13104                 break;
13105
13106 #ifdef _TARGET_ARM_
13107
13108             case BBJ_EHCATCHRET:
13109                 // set r0 to the address the VM should return to after the catch
13110                 genMov32RelocatableDisplacement(block->bbJumpDest, REG_R0);
13111                 regTracker.rsTrackRegTrash(REG_R0);
13112
13113                 __fallthrough;
13114
13115             case BBJ_EHFINALLYRET:
13116             case BBJ_EHFILTERRET:
13117                 genReserveFuncletEpilog(block);
13118                 break;
13119
13120 #else // _TARGET_ARM_
13121
13122             case BBJ_EHFINALLYRET:
13123             case BBJ_EHFILTERRET:
13124             case BBJ_EHCATCHRET:
13125                 break;
13126
13127 #endif // _TARGET_ARM_
13128
13129             case BBJ_NONE:
13130             case BBJ_COND:
13131             case BBJ_SWITCH:
13132                 break;
13133
13134             default:
13135                 noway_assert(!"Unexpected bbJumpKind");
13136                 break;
13137         }
13138
13139 #ifdef DEBUG
13140         compiler->compCurBB = 0;
13141 #endif
13142
13143     } //------------------ END-FOR each block of the method -------------------
13144
13145     /* Nothing is live at this point */
13146     genUpdateLife(VarSetOps::MakeEmpty(compiler));
13147
13148     /* Finalize the spill  tracking logic */
13149
13150     regSet.rsSpillEnd();
13151
13152     /* Finalize the temp   tracking logic */
13153
13154     compiler->tmpEnd();
13155
13156 #ifdef DEBUG
13157     if (compiler->verbose)
13158     {
13159         printf("\n# ");
13160         printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
13161         printf("%s\n", compiler->info.compFullName);
13162     }
13163 #endif
13164 }
13165 #ifdef _PREFAST_
13166 #pragma warning(pop)
13167 #endif
13168
13169 /*****************************************************************************
13170  *
13171  *  Generate code for a long operation.
13172  *  needReg is a recommendation of which registers to use for the tree.
13173  *  For partially enregistered longs, the tree will be marked as in a register
13174  *    without loading the stack part into a register. Note that only leaf
13175  *    nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
13176  *    enregistered so that we can know the memory location of the other half.
13177  */
13178
13179 #ifdef _PREFAST_
13180 #pragma warning(push)
13181 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
13182 #endif
13183 void CodeGen::genCodeForTreeLng(GenTree* tree, regMaskTP needReg, regMaskTP avoidReg)
13184 {
13185     genTreeOps oper;
13186     unsigned   kind;
13187
13188     regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
13189     regMaskTP addrReg;
13190     regNumber regLo;
13191     regNumber regHi;
13192
13193     noway_assert(tree);
13194     noway_assert(tree->gtOper != GT_STMT);
13195     noway_assert(genActualType(tree->gtType) == TYP_LONG);
13196
13197     /* Figure out what kind of a node we have */
13198
13199     oper = tree->OperGet();
13200     kind = tree->OperKind();
13201
13202     if (tree->InReg())
13203     {
13204     REG_VAR_LONG:
13205         regPair = tree->gtRegPair;
13206
13207         gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
13208
13209         goto DONE;
13210     }
13211
13212     /* Is this a constant node? */
13213
13214     if (kind & GTK_CONST)
13215     {
13216         __int64 lval;
13217
13218         /* Pick a register pair for the value */
13219
13220         regPair = regSet.rsPickRegPair(needReg);
13221
13222         /* Load the value into the registers */
13223         CLANG_FORMAT_COMMENT_ANCHOR;
13224
13225 #if !CPU_HAS_FP_SUPPORT
13226         if (oper == GT_CNS_DBL)
13227         {
13228             noway_assert(sizeof(__int64) == sizeof(double));
13229
13230             noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
13231
13232             lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
13233         }
13234         else
13235 #endif
13236         {
13237             noway_assert(oper == GT_CNS_LNG);
13238
13239             lval = tree->gtLngCon.gtLconVal;
13240         }
13241
13242         genSetRegToIcon(genRegPairLo(regPair), int(lval));
13243         genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
13244         goto DONE;
13245     }
13246
13247     /* Is this a leaf node? */
13248
13249     if (kind & GTK_LEAF)
13250     {
13251         switch (oper)
13252         {
13253             case GT_LCL_VAR:
13254
13255 #if REDUNDANT_LOAD
13256
13257                 /*  This case has to consider the case in which an int64 LCL_VAR
13258                  *  may both be enregistered and also have a cached copy of itself
13259                  *  in a different set of registers.
13260                  *  We want to return the registers that have the most in common
13261                  *  with the needReg mask
13262                  */
13263
13264                 /*  Does the var have a copy of itself in the cached registers?
13265                  *  And are these cached registers both free?
13266                  *  If so use these registers if they match any needReg.
13267                  */
13268
13269                 regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
13270
13271                 if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13272                     ((genRegPairMask(regPair) & needReg) != RBM_NONE))
13273                 {
13274                     goto DONE;
13275                 }
13276
13277                 /*  Does the variable live in a register?
13278                  *  If so use these registers.
13279                  */
13280                 if (genMarkLclVar(tree))
13281                     goto REG_VAR_LONG;
13282
13283                 /*  If tree is not an enregistered variable then
13284                  *  be sure to use any cached register that contain
13285                  *  a copy of this local variable
13286                  */
13287                 if (regPair != REG_PAIR_NONE)
13288                 {
13289                     goto DONE;
13290                 }
13291 #endif
13292                 goto MEM_LEAF;
13293
13294             case GT_LCL_FLD:
13295
13296                 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
13297                 // to worry about it being enregistered.
13298                 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
13299                 goto MEM_LEAF;
13300
13301             case GT_CLS_VAR:
13302             MEM_LEAF:
13303
13304                 /* Pick a register pair for the value */
13305
13306                 regPair = regSet.rsPickRegPair(needReg);
13307
13308                 /* Load the value into the registers */
13309
13310                 instruction loadIns;
13311
13312                 loadIns = ins_Load(TYP_INT); // INS_ldr
13313                 regLo   = genRegPairLo(regPair);
13314                 regHi   = genRegPairHi(regPair);
13315
13316 #if CPU_LOAD_STORE_ARCH
13317                 {
13318                     regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
13319                     inst_RV_TT(INS_lea, regAddr, tree, 0);
13320                     regTracker.rsTrackRegTrash(regAddr);
13321
13322                     if (regLo != regAddr)
13323                     {
13324                         // assert(regLo != regAddr);  // forced by if statement
13325                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13326                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13327                     }
13328                     else
13329                     {
13330                         // assert(regHi != regAddr);  // implied by regpair property and the if statement
13331                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13332                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13333                     }
13334                 }
13335 #else
13336                 inst_RV_TT(loadIns, regLo, tree, 0);
13337                 inst_RV_TT(loadIns, regHi, tree, 4);
13338 #endif
13339
13340 #ifdef _TARGET_ARM_
13341                 if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
13342                 {
13343                     // Emit a memory barrier instruction after the load
13344                     instGen_MemoryBarrier();
13345                 }
13346 #endif
13347
13348                 regTracker.rsTrackRegTrash(regLo);
13349                 regTracker.rsTrackRegTrash(regHi);
13350
13351                 goto DONE;
13352
13353             default:
13354 #ifdef DEBUG
13355                 compiler->gtDispTree(tree);
13356 #endif
13357                 noway_assert(!"unexpected leaf");
13358         }
13359     }
13360
13361     /* Is it a 'simple' unary/binary operator? */
13362
13363     if (kind & GTK_SMPOP)
13364     {
13365         instruction insLo;
13366         instruction insHi;
13367         bool        doLo;
13368         bool        doHi;
13369         bool        setCarry = false;
13370         int         helper;
13371
13372         GenTree* op1 = tree->gtOp.gtOp1;
13373         GenTree* op2 = tree->gtGetOp2IfPresent();
13374
13375         switch (oper)
13376         {
13377             case GT_ASG:
13378             {
13379                 unsigned lclVarNum    = compiler->lvaCount;
13380                 unsigned lclVarILoffs = DUMMY_INIT(0);
13381
13382                 /* Is the target a local ? */
13383
13384                 if (op1->gtOper == GT_LCL_VAR)
13385                 {
13386                     unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
13387                     LclVarDsc* varDsc;
13388
13389                     noway_assert(varNum < compiler->lvaCount);
13390                     varDsc = compiler->lvaTable + varNum;
13391
13392                     // No dead stores, (with min opts we may have dead stores)
13393                     noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
13394
13395                     /* For non-debuggable code, every definition of a lcl-var has
13396                      * to be checked to see if we need to open a new scope for it.
13397                      * Remember the local var info to call siCheckVarScope
13398                      * AFTER codegen of the assignment.
13399                      */
13400                     if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
13401                         (compiler->info.compVarScopesCount > 0))
13402                     {
13403                         lclVarNum    = varNum;
13404                         lclVarILoffs = op1->gtLclVar.gtLclILoffs;
13405                     }
13406
13407                     /* Has the variable been assigned to a register (pair) ? */
13408
13409                     if (genMarkLclVar(op1))
13410                     {
13411                         noway_assert(op1->InReg());
13412                         regPair = op1->gtRegPair;
13413                         regLo   = genRegPairLo(regPair);
13414                         regHi   = genRegPairHi(regPair);
13415                         noway_assert(regLo != regHi);
13416
13417                         /* Is the value being assigned a constant? */
13418
13419                         if (op2->gtOper == GT_CNS_LNG)
13420                         {
13421                             /* Move the value into the target */
13422
13423                             genMakeRegPairAvailable(regPair);
13424
13425                             instruction ins;
13426                             if (regLo == REG_STK)
13427                             {
13428                                 ins = ins_Store(TYP_INT);
13429                             }
13430                             else
13431                             {
13432                                 // Always do the stack first (in case it grabs a register it can't
13433                                 // clobber regLo this way)
13434                                 if (regHi == REG_STK)
13435                                 {
13436                                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13437                                 }
13438                                 ins = INS_mov;
13439                             }
13440                             inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
13441
13442                             // The REG_STK case has already been handled
13443                             if (regHi != REG_STK)
13444                             {
13445                                 ins = INS_mov;
13446                                 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13447                             }
13448
13449                             goto DONE_ASSG_REGS;
13450                         }
13451
13452                         /* Compute the RHS into desired register pair */
13453
13454                         if (regHi != REG_STK)
13455                         {
13456                             genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
13457                             noway_assert(op2->InReg());
13458                             noway_assert(op2->gtRegPair == regPair);
13459                         }
13460                         else
13461                         {
13462                             regPairNo curPair;
13463                             regNumber curLo;
13464                             regNumber curHi;
13465
13466                             genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
13467
13468                             noway_assert(op2->InReg());
13469
13470                             curPair = op2->gtRegPair;
13471                             curLo   = genRegPairLo(curPair);
13472                             curHi   = genRegPairHi(curPair);
13473
13474                             /* move high first, target is on stack */
13475                             inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
13476
13477                             if (regLo != curLo)
13478                             {
13479                                 if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
13480                                     regSet.rsSpillReg(regLo);
13481                                 inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
13482                                 regTracker.rsTrackRegCopy(regLo, curLo);
13483                             }
13484                         }
13485
13486                         genReleaseRegPair(op2);
13487                         goto DONE_ASSG_REGS;
13488                     }
13489                 }
13490
13491                 /* Is the value being assigned a constant? */
13492
13493                 if (op2->gtOper == GT_CNS_LNG)
13494                 {
13495                     /* Make the target addressable */
13496
13497                     addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
13498
13499                     /* Move the value into the target */
13500
13501                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
13502                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13503
13504                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13505
13506                     goto LAsgExit;
13507                 }
13508
13509 #if 0
13510                 /* Catch a case where can avoid generating op reg, mem. Better pairing
13511                  * from
13512                  *     mov regHi, mem
13513                  *     op  regHi, reg
13514                  *
13515                  * To avoid problems with order of evaluation, only do this if op2 is
13516                  * a non-enregistered local variable
13517                  */
13518
13519                 if (GenTree::OperIsCommutative(oper) &&
13520                     op1->gtOper == GT_LCL_VAR &&
13521                     op2->gtOper == GT_LCL_VAR)
13522                 {
13523                     regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
13524
13525                     /* Is op2 a non-enregistered local variable? */
13526                     if (regPair == REG_PAIR_NONE)
13527                     {
13528                         regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
13529
13530                         /* Is op1 an enregistered local variable? */
13531                         if (regPair != REG_PAIR_NONE)
13532                         {
13533                             /* Swap the operands */
13534                             GenTree* op = op1;
13535                             op1 = op2;
13536                             op2 = op;
13537                         }
13538                     }
13539                 }
13540 #endif
13541
13542                 /* Eliminate worthless assignment "lcl = lcl" */
13543
13544                 if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
13545                     op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
13546                 {
13547                     genUpdateLife(op2);
13548                     goto LAsgExit;
13549                 }
13550
13551                 if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
13552                     // op1,op2 need to be materialized in the correct order.
13553                     (tree->gtFlags & GTF_REVERSE_OPS))
13554                 {
13555                     /* Generate the small RHS into a register pair */
13556
13557                     GenTree* smallOpr = op2->gtOp.gtOp1;
13558
13559                     genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
13560
13561                     /* Make the target addressable */
13562
13563                     addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
13564
13565                     /* Make sure everything is still addressable */
13566
13567                     genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
13568                     noway_assert(smallOpr->InReg());
13569                     regHi   = smallOpr->gtRegNum;
13570                     addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
13571
13572                     // conv.ovf.u8 could overflow if the original number was negative
13573                     if (op2->gtOverflow())
13574                     {
13575                         noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
13576                                      0);                              // conv.ovf.u8.un should be bashed to conv.u8.un
13577                         instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
13578                         emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
13579                         genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
13580                     }
13581
13582                     /* Move the value into the target */
13583
13584                     inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
13585                     inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
13586
13587                     /* Free up anything that was tied up by either side */
13588
13589                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13590                     genReleaseReg(smallOpr);
13591
13592 #if REDUNDANT_LOAD
13593                     if (op1->gtOper == GT_LCL_VAR)
13594                     {
13595                         /* clear this local from reg table */
13596                         regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13597
13598                         /* mark RHS registers as containing the local var */
13599                         regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
13600                     }
13601 #endif
13602                     goto LAsgExit;
13603                 }
13604
13605                 /* Is the LHS more complex than the RHS? */
13606
13607                 if (tree->gtFlags & GTF_REVERSE_OPS)
13608                 {
13609                     /* Generate the RHS into a register pair */
13610
13611                     genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
13612                     noway_assert(op2->InReg());
13613
13614                     /* Make the target addressable */
13615                     op1     = genCodeForCommaTree(op1);
13616                     addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
13617
13618                     /* Make sure the RHS register hasn't been spilled */
13619
13620                     genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
13621                 }
13622                 else
13623                 {
13624                     /* Make the target addressable */
13625
13626                     op1     = genCodeForCommaTree(op1);
13627                     addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
13628
13629                     /* Generate the RHS into a register pair */
13630
13631                     genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
13632                 }
13633
13634                 /* Lock 'op2' and make sure 'op1' is still addressable */
13635
13636                 noway_assert(op2->InReg());
13637                 regPair = op2->gtRegPair;
13638
13639                 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13640
13641                 /* Move the value into the target */
13642
13643                 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
13644                 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
13645
13646                 /* Free up anything that was tied up by either side */
13647
13648                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13649                 genReleaseRegPair(op2);
13650
13651             DONE_ASSG_REGS:
13652
13653 #if REDUNDANT_LOAD
13654
13655                 if (op1->gtOper == GT_LCL_VAR)
13656                 {
13657                     /* Clear this local from reg table */
13658
13659                     regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13660
13661                     if ((op2->InReg()) &&
13662                         /* constant has precedence over local */
13663                         //                    rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
13664                         tree->gtOper == GT_ASG)
13665                     {
13666                         regNumber regNo;
13667
13668                         /* mark RHS registers as containing the local var */
13669
13670                         regNo = genRegPairLo(op2->gtRegPair);
13671                         if (regNo != REG_STK)
13672                             regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
13673
13674                         regNo = genRegPairHi(op2->gtRegPair);
13675                         if (regNo != REG_STK)
13676                         {
13677                             /* For partially enregistered longs, we might have
13678                                stomped on op2's hiReg */
13679                             if (!(op1->InReg()) || regNo != genRegPairLo(op1->gtRegPair))
13680                             {
13681                                 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
13682                             }
13683                         }
13684                     }
13685                 }
13686 #endif
13687
13688             LAsgExit:
13689
13690                 genUpdateLife(op1);
13691                 genUpdateLife(tree);
13692
13693                 /* For non-debuggable code, every definition of a lcl-var has
13694                  * to be checked to see if we need to open a new scope for it.
13695                  */
13696                 if (lclVarNum < compiler->lvaCount)
13697                     siCheckVarScope(lclVarNum, lclVarILoffs);
13698             }
13699                 return;
13700
13701             case GT_SUB:
13702                 insLo    = INS_sub;
13703                 insHi    = INS_SUBC;
13704                 setCarry = true;
13705                 goto BINOP_OVF;
13706             case GT_ADD:
13707                 insLo    = INS_add;
13708                 insHi    = INS_ADDC;
13709                 setCarry = true;
13710                 goto BINOP_OVF;
13711
13712                 bool ovfl;
13713
13714             BINOP_OVF:
13715                 ovfl = tree->gtOverflow();
13716                 goto _BINOP;
13717
13718             case GT_AND:
13719                 insLo = insHi = INS_AND;
13720                 goto BINOP;
13721             case GT_OR:
13722                 insLo = insHi = INS_OR;
13723                 goto BINOP;
13724             case GT_XOR:
13725                 insLo = insHi = INS_XOR;
13726                 goto BINOP;
13727
13728             BINOP:
13729                 ovfl = false;
13730                 goto _BINOP;
13731
13732             _BINOP:
13733
13734                 /* The following makes an assumption about gtSetEvalOrder(this) */
13735
13736                 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
13737
13738                 /* Special case: check for "(long(intval) << 32) | longval" */
13739
13740                 if (oper == GT_OR && op1->gtOper == GT_LSH)
13741                 {
13742                     GenTree* lshLHS = op1->gtOp.gtOp1;
13743                     GenTree* lshRHS = op1->gtOp.gtOp2;
13744
13745                     if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13746                         genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13747                     {
13748
13749                         /* Throw away the cast of the shift operand. */
13750
13751                         op1 = lshLHS->gtCast.CastOp();
13752
13753                         /* Special case: check op2 for "ulong(intval)" */
13754                         if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
13755                             genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
13756                         {
13757                             /* Throw away the cast of the second operand. */
13758
13759                             op2 = op2->gtCast.CastOp();
13760                             goto SIMPLE_OR_LONG;
13761                         }
13762                         /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
13763                         else if (op2->gtOper == GT_AND)
13764                         {
13765                             GenTree* andLHS;
13766                             andLHS = op2->gtOp.gtOp1;
13767                             GenTree* andRHS;
13768                             andRHS = op2->gtOp.gtOp2;
13769
13770                             if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13771                                 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13772                                 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13773                             {
13774                                 /* Throw away the cast of the second operand. */
13775
13776                                 op2 = andLHS->gtCast.CastOp();
13777
13778                             SIMPLE_OR_LONG:
13779                                 // Load the high DWORD, ie. op1
13780
13781                                 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13782
13783                                 noway_assert(op1->InReg());
13784                                 regHi = op1->gtRegNum;
13785                                 regSet.rsMarkRegUsed(op1);
13786
13787                                 // Load the low DWORD, ie. op2
13788
13789                                 genCodeForTree(op2, needReg & ~genRegMask(regHi));
13790
13791                                 noway_assert(op2->InReg());
13792                                 regLo = op2->gtRegNum;
13793
13794                                 /* Make sure regHi is still around. Also, force
13795                                    regLo to be excluded in case regLo==regHi */
13796
13797                                 genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
13798                                 regHi = op1->gtRegNum;
13799
13800                                 regPair = gen2regs2pair(regLo, regHi);
13801                                 goto DONE;
13802                             }
13803                         }
13804
13805                         /*  Generate the following sequence:
13806                                Prepare op1 (discarding shift)
13807                                Compute op2 into some regpair
13808                                OR regpairhi, op1
13809                          */
13810
13811                         /* First, make op1 addressable */
13812
13813                         /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
13814
13815                            It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
13816                            which the long value is computed is from needReg.  But at this point the safest fix is
13817                            to exclude regSet.rsMaskResvd.
13818
13819                            Note that needReg could be the set of free registers (excluding reserved ones).  If we don't
13820                            exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
13821                            reg from
13822                            reserved set which is bound to fail.  To prevent that we avoid regSet.rsMaskResvd.
13823                          */
13824                         regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
13825
13826                         addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
13827
13828                         genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
13829
13830                         noway_assert(op2->InReg());
13831                         regPair = op2->gtRegPair;
13832                         regHi   = genRegPairHi(regPair);
13833
13834                         /* The operand might have interfered with the address */
13835
13836                         addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13837
13838                         /* Now compute the result */
13839
13840                         inst_RV_TT(insHi, regHi, op1, 0);
13841
13842                         regTracker.rsTrackRegTrash(regHi);
13843
13844                         /* Free up anything that was tied up by the LHS */
13845
13846                         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13847
13848                         /* The result is where the second operand is sitting */
13849
13850                         genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
13851
13852                         regPair = op2->gtRegPair;
13853                         goto DONE;
13854                     }
13855                 }
13856
13857                 /* Special case: check for "longval | (long(intval) << 32)" */
13858
13859                 if (oper == GT_OR && op2->gtOper == GT_LSH)
13860                 {
13861                     GenTree* lshLHS = op2->gtOp.gtOp1;
13862                     GenTree* lshRHS = op2->gtOp.gtOp2;
13863
13864                     if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13865                         genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13866
13867                     {
13868                         /* We throw away the cast of the shift operand. */
13869
13870                         op2 = lshLHS->gtCast.CastOp();
13871
13872                         /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
13873
13874                         if (op1->gtOper == GT_AND)
13875                         {
13876                             GenTree* andLHS = op1->gtOp.gtOp1;
13877                             GenTree* andRHS = op1->gtOp.gtOp2;
13878
13879                             if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13880                                 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13881                                 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13882                             {
13883                                 /* Throw away the cast of the first operand. */
13884
13885                                 op1 = andLHS->gtCast.CastOp();
13886
13887                                 // Load the low DWORD, ie. op1
13888
13889                                 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13890
13891                                 noway_assert(op1->InReg());
13892                                 regLo = op1->gtRegNum;
13893                                 regSet.rsMarkRegUsed(op1);
13894
13895                                 // Load the high DWORD, ie. op2
13896
13897                                 genCodeForTree(op2, needReg & ~genRegMask(regLo));
13898
13899                                 noway_assert(op2->InReg());
13900                                 regHi = op2->gtRegNum;
13901
13902                                 /* Make sure regLo is still around. Also, force
13903                                    regHi to be excluded in case regLo==regHi */
13904
13905                                 genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
13906                                 regLo = op1->gtRegNum;
13907
13908                                 regPair = gen2regs2pair(regLo, regHi);
13909                                 goto DONE;
13910                             }
13911                         }
13912
13913                         /*  Generate the following sequence:
13914                               Compute op1 into some regpair
13915                               Make op2 (ignoring shift) addressable
13916                               OR regPairHi, op2
13917                          */
13918
13919                         // First, generate the first operand into some register
13920
13921                         genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13922                         noway_assert(op1->InReg());
13923
13924                         /* Make the second operand addressable */
13925
13926                         addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
13927
13928                         /* Make sure the result is in a free register pair */
13929
13930                         genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13931                         regPair = op1->gtRegPair;
13932                         regHi   = genRegPairHi(regPair);
13933
13934                         /* The operand might have interfered with the address */
13935
13936                         addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
13937
13938                         /* Compute the new value */
13939
13940                         inst_RV_TT(insHi, regHi, op2, 0);
13941
13942                         /* The value in the high register has been trashed */
13943
13944                         regTracker.rsTrackRegTrash(regHi);
13945
13946                         goto DONE_OR;
13947                     }
13948                 }
13949
13950                 /* Generate the first operand into registers */
13951
13952                 if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13953                     ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
13954                 {
13955                     regPair = regSet.rsPickRegPair(needReg);
13956                     genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13957                 }
13958                 else
13959                 {
13960                     genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13961                 }
13962                 noway_assert(op1->InReg());
13963                 regMaskTP op1Mask;
13964                 regPair = op1->gtRegPair;
13965                 op1Mask = genRegPairMask(regPair);
13966
13967                 /* Make the second operand addressable */
13968                 regMaskTP needReg2;
13969                 needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
13970                 addrReg  = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
13971
13972                 // TODO: If 'op1' got spilled and 'op2' happens to be
13973                 // TODO: in a register, and we have add/mul/and/or/xor,
13974                 // TODO: reverse the operands since we can perform the
13975                 // TODO: operation directly with the spill temp, e.g.
13976                 // TODO: 'add regHi, [temp]'.
13977
13978                 /* Make sure the result is in a free register pair */
13979
13980                 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13981                 regPair = op1->gtRegPair;
13982                 op1Mask = genRegPairMask(regPair);
13983
13984                 regLo = genRegPairLo(regPair);
13985                 regHi = genRegPairHi(regPair);
13986
13987                 /* Make sure that we don't spill regLo/regHi below */
13988                 regSet.rsLockUsedReg(op1Mask);
13989
13990                 /* The operand might have interfered with the address */
13991
13992                 addrReg = genKeepAddressable(op2, addrReg);
13993
13994                 /* The value in the register pair is about to be trashed */
13995
13996                 regTracker.rsTrackRegTrash(regLo);
13997                 regTracker.rsTrackRegTrash(regHi);
13998
13999                 /* Compute the new value */
14000
14001                 doLo = true;
14002                 doHi = true;
14003
14004                 if (op2->gtOper == GT_CNS_LNG)
14005                 {
14006                     __int64 icon = op2->gtLngCon.gtLconVal;
14007
14008                     /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
14009
14010                     switch (oper)
14011                     {
14012                         case GT_AND:
14013                             if ((int)(icon) == -1)
14014                                 doLo = false;
14015                             if ((int)(icon >> 32) == -1)
14016                                 doHi = false;
14017
14018                             if (!(icon & I64(0x00000000FFFFFFFF)))
14019                             {
14020                                 genSetRegToIcon(regLo, 0);
14021                                 doLo = false;
14022                             }
14023
14024                             if (!(icon & I64(0xFFFFFFFF00000000)))
14025                             {
14026                                 /* Just to always set low first*/
14027
14028                                 if (doLo)
14029                                 {
14030                                     inst_RV_TT(insLo, regLo, op2, 0);
14031                                     doLo = false;
14032                                 }
14033                                 genSetRegToIcon(regHi, 0);
14034                                 doHi = false;
14035                             }
14036
14037                             break;
14038
14039                         case GT_OR:
14040                         case GT_XOR:
14041                             if (!(icon & I64(0x00000000FFFFFFFF)))
14042                                 doLo = false;
14043                             if (!(icon & I64(0xFFFFFFFF00000000)))
14044                                 doHi = false;
14045                             break;
14046                         default:
14047                             break;
14048                     }
14049                 }
14050
14051                 // Fix 383813 X86/ARM ILGEN
14052                 // Fix 383793 ARM ILGEN
14053                 // Fix 383911 ARM ILGEN
14054                 regMaskTP newMask;
14055                 newMask = addrReg & ~op1Mask;
14056                 regSet.rsLockUsedReg(newMask);
14057
14058                 if (doLo)
14059                 {
14060                     insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14061                     inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
14062                 }
14063                 if (doHi)
14064                 {
14065                     insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14066                     inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
14067                 }
14068
14069                 regSet.rsUnlockUsedReg(newMask);
14070                 regSet.rsUnlockUsedReg(op1Mask);
14071
14072             DONE_OR:
14073
14074                 /* Free up anything that was tied up by the LHS */
14075
14076                 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
14077
14078                 /* The result is where the first operand is sitting */
14079
14080                 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
14081
14082                 regPair = op1->gtRegPair;
14083
14084                 if (ovfl)
14085                     genCheckOverflow(tree);
14086
14087                 goto DONE;
14088
14089             case GT_UMOD:
14090
14091                 regPair = genCodeForLongModInt(tree, needReg);
14092                 goto DONE;
14093
14094             case GT_MUL:
14095
14096                 /* Special case: both operands promoted from int */
14097
14098                 assert(tree->gtIsValid64RsltMul());
14099
14100                 /* Change to an integer multiply temporarily */
14101
14102                 tree->gtType = TYP_INT;
14103
14104                 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
14105                 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
14106                 tree->gtOp.gtOp2 = op2->gtCast.CastOp();
14107
14108                 assert(tree->gtFlags & GTF_MUL_64RSLT);
14109
14110 #if defined(_TARGET_X86_)
14111                 // imul on x86 requires EDX:EAX
14112                 genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
14113                 noway_assert(tree->InReg());
14114                 noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
14115 #elif defined(_TARGET_ARM_)
14116                 genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
14117                 noway_assert(tree->InReg());
14118 #else
14119                 assert(!"Unsupported target for 64-bit multiply codegen");
14120 #endif
14121
14122                 /* Restore gtType, op1 and op2 from the change above */
14123
14124                 tree->gtType     = TYP_LONG;
14125                 tree->gtOp.gtOp1 = op1;
14126                 tree->gtOp.gtOp2 = op2;
14127
14128 #if defined(_TARGET_X86_)
14129                 /* The result is now in EDX:EAX */
14130                 regPair = REG_PAIR_EAXEDX;
14131 #elif defined(_TARGET_ARM_)
14132                 regPair = tree->gtRegPair;
14133 #endif
14134                 goto DONE;
14135
14136             case GT_LSH:
14137                 helper = CORINFO_HELP_LLSH;
14138                 goto SHIFT;
14139             case GT_RSH:
14140                 helper = CORINFO_HELP_LRSH;
14141                 goto SHIFT;
14142             case GT_RSZ:
14143                 helper = CORINFO_HELP_LRSZ;
14144                 goto SHIFT;
14145
14146             SHIFT:
14147
14148                 noway_assert(op1->gtType == TYP_LONG);
14149                 noway_assert(genActualType(op2->gtType) == TYP_INT);
14150
14151                 /* Is the second operand a constant? */
14152
14153                 if (op2->gtOper == GT_CNS_INT)
14154                 {
14155                     unsigned int count = op2->gtIntCon.gtIconVal;
14156
14157                     /* Compute the left operand into a free register pair */
14158
14159                     genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
14160                     noway_assert(op1->InReg());
14161
14162                     regPair = op1->gtRegPair;
14163                     regLo   = genRegPairLo(regPair);
14164                     regHi   = genRegPairHi(regPair);
14165
14166                     /* Assume the value in the register pair is trashed. In some cases, though,
14167                        a register might be set to zero, and we can use that information to improve
14168                        some code generation.
14169                     */
14170
14171                     regTracker.rsTrackRegTrash(regLo);
14172                     regTracker.rsTrackRegTrash(regHi);
14173
14174                     /* Generate the appropriate shift instructions */
14175
14176                     switch (oper)
14177                     {
14178                         case GT_LSH:
14179                             if (count == 0)
14180                             {
14181                                 // regHi, regLo are correct
14182                             }
14183                             else if (count < 32)
14184                             {
14185 #if defined(_TARGET_XARCH_)
14186                                 inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
14187 #elif defined(_TARGET_ARM_)
14188                                 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
14189                                 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
14190                                                               INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
14191 #else  // _TARGET_*
14192                                 NYI("INS_shld");
14193 #endif // _TARGET_*
14194                                 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
14195                             }
14196                             else // count >= 32
14197                             {
14198                                 assert(count >= 32);
14199                                 if (count < 64)
14200                                 {
14201 #if defined(_TARGET_ARM_)
14202                                     if (count == 32)
14203                                     {
14204                                         // mov low dword into high dword (i.e. shift left by 32-bits)
14205                                         inst_RV_RV(INS_mov, regHi, regLo);
14206                                     }
14207                                     else
14208                                     {
14209                                         assert(count > 32 && count < 64);
14210                                         getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
14211                                                                     count - 32);
14212                                     }
14213 #else  // _TARGET_*
14214                                     // mov low dword into high dword (i.e. shift left by 32-bits)
14215                                     inst_RV_RV(INS_mov, regHi, regLo);
14216                                     if (count > 32)
14217                                     {
14218                                         // Shift high dword left by count - 32
14219                                         inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
14220                                     }
14221 #endif // _TARGET_*
14222                                 }
14223                                 else // count >= 64
14224                                 {
14225                                     assert(count >= 64);
14226                                     genSetRegToIcon(regHi, 0);
14227                                 }
14228                                 genSetRegToIcon(regLo, 0);
14229                             }
14230                             break;
14231
14232                         case GT_RSH:
14233                             if (count == 0)
14234                             {
14235                                 // regHi, regLo are correct
14236                             }
14237                             else if (count < 32)
14238                             {
14239 #if defined(_TARGET_XARCH_)
14240                                 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14241 #elif defined(_TARGET_ARM_)
14242                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14243                                 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14244                                                               INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14245 #else  // _TARGET_*
14246                                 NYI("INS_shrd");
14247 #endif // _TARGET_*
14248                                 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
14249                             }
14250                             else // count >= 32
14251                             {
14252                                 assert(count >= 32);
14253                                 if (count < 64)
14254                                 {
14255 #if defined(_TARGET_ARM_)
14256                                     if (count == 32)
14257                                     {
14258                                         // mov high dword into low dword (i.e. shift right by 32-bits)
14259                                         inst_RV_RV(INS_mov, regLo, regHi);
14260                                     }
14261                                     else
14262                                     {
14263                                         assert(count > 32 && count < 64);
14264                                         getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
14265                                                                     count - 32);
14266                                     }
14267 #else  // _TARGET_*
14268                                     // mov high dword into low dword (i.e. shift right by 32-bits)
14269                                     inst_RV_RV(INS_mov, regLo, regHi);
14270                                     if (count > 32)
14271                                     {
14272                                         // Shift low dword right by count - 32
14273                                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
14274                                     }
14275 #endif // _TARGET_*
14276                                 }
14277
14278                                 // Propagate sign bit in high dword
14279                                 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14280
14281                                 if (count >= 64)
14282                                 {
14283                                     // Propagate the sign from the high dword
14284                                     inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
14285                                 }
14286                             }
14287                             break;
14288
14289                         case GT_RSZ:
14290                             if (count == 0)
14291                             {
14292                                 // regHi, regLo are correct
14293                             }
14294                             else if (count < 32)
14295                             {
14296 #if defined(_TARGET_XARCH_)
14297                                 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14298 #elif defined(_TARGET_ARM_)
14299                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14300                                 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14301                                                               INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14302 #else  // _TARGET_*
14303                                 NYI("INS_shrd");
14304 #endif // _TARGET_*
14305                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
14306                             }
14307                             else // count >= 32
14308                             {
14309                                 assert(count >= 32);
14310                                 if (count < 64)
14311                                 {
14312 #if defined(_TARGET_ARM_)
14313                                     if (count == 32)
14314                                     {
14315                                         // mov high dword into low dword (i.e. shift right by 32-bits)
14316                                         inst_RV_RV(INS_mov, regLo, regHi);
14317                                     }
14318                                     else
14319                                     {
14320                                         assert(count > 32 && count < 64);
14321                                         getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
14322                                                                     count - 32);
14323                                     }
14324 #else  // _TARGET_*
14325                                     // mov high dword into low dword (i.e. shift right by 32-bits)
14326                                     inst_RV_RV(INS_mov, regLo, regHi);
14327                                     if (count > 32)
14328                                     {
14329                                         // Shift low dword right by count - 32
14330                                         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
14331                                     }
14332 #endif // _TARGET_*
14333                                 }
14334                                 else // count >= 64
14335                                 {
14336                                     assert(count >= 64);
14337                                     genSetRegToIcon(regLo, 0);
14338                                 }
14339                                 genSetRegToIcon(regHi, 0);
14340                             }
14341                             break;
14342
14343                         default:
14344                             noway_assert(!"Illegal oper for long shift");
14345                             break;
14346                     }
14347
14348                     goto DONE_SHF;
14349                 }
14350
14351                 /* Which operand are we supposed to compute first? */
14352
14353                 assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
14354
14355                 if (tree->gtFlags & GTF_REVERSE_OPS)
14356                 {
14357                     /* The second operand can't be a constant */
14358
14359                     noway_assert(op2->gtOper != GT_CNS_INT);
14360
14361                     /* Load the shift count, hopefully into RBM_SHIFT */
14362                     RegSet::ExactReg exactReg;
14363                     if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
14364                         exactReg = RegSet::EXACT_REG;
14365                     else
14366                         exactReg = RegSet::ANY_REG;
14367                     genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
14368
14369                     /* Compute the left operand into REG_LNGARG_0 */
14370
14371                     genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14372                     noway_assert(op1->InReg());
14373
14374                     /* Lock op1 so that it doesn't get trashed */
14375
14376                     regSet.rsLockUsedReg(RBM_LNGARG_0);
14377
14378                     /* Make sure the shift count wasn't displaced */
14379
14380                     genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
14381
14382                     /* Lock op2 */
14383
14384                     regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14385                 }
14386                 else
14387                 {
14388                     /* Compute the left operand into REG_LNGARG_0 */
14389
14390                     genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14391                     noway_assert(op1->InReg());
14392
14393                     /* Compute the shift count into RBM_SHIFT */
14394
14395                     genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
14396
14397                     /* Lock op2 */
14398
14399                     regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14400
14401                     /* Make sure the value hasn't been displaced */
14402
14403                     genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
14404
14405                     /* Lock op1 so that it doesn't get trashed */
14406
14407                     regSet.rsLockUsedReg(RBM_LNGARG_0);
14408                 }
14409
14410 #ifndef _TARGET_X86_
14411                 /* The generic helper is a C-routine and so it follows the full ABI */
14412                 {
14413                     /* Spill any callee-saved registers which are being used */
14414                     regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
14415
14416                     /* But do not spill our argument registers. */
14417                     spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14418
14419                     if (spillRegs)
14420                     {
14421                         regSet.rsSpillRegs(spillRegs);
14422                     }
14423                 }
14424 #endif // !_TARGET_X86_
14425
14426                 /* Perform the shift by calling a helper function */
14427
14428                 noway_assert(op1->gtRegPair == REG_LNGARG_0);
14429                 noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
14430                 noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
14431
14432                 genEmitHelperCall(helper,
14433                                   0,         // argSize
14434                                   EA_8BYTE); // retSize
14435
14436 #ifdef _TARGET_X86_
14437                 /* The value in the register pair is trashed */
14438
14439                 regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
14440                 regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
14441 #else  // _TARGET_X86_
14442                 /* The generic helper is a C-routine and so it follows the full ABI */
14443                 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
14444 #endif // _TARGET_X86_
14445
14446                 /* Release both operands */
14447
14448                 regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14449                 genReleaseRegPair(op1);
14450                 genReleaseReg(op2);
14451
14452             DONE_SHF:
14453
14454                 noway_assert(op1->InReg());
14455                 regPair = op1->gtRegPair;
14456                 goto DONE;
14457
14458             case GT_NEG:
14459             case GT_NOT:
14460
14461                 /* Generate the operand into some register pair */
14462
14463                 genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
14464                 noway_assert(op1->InReg());
14465
14466                 regPair = op1->gtRegPair;
14467
14468                 /* Figure out which registers the value is in */
14469
14470                 regLo = genRegPairLo(regPair);
14471                 regHi = genRegPairHi(regPair);
14472
14473                 /* The value in the register pair is about to be trashed */
14474
14475                 regTracker.rsTrackRegTrash(regLo);
14476                 regTracker.rsTrackRegTrash(regHi);
14477
14478                 /* Unary "neg": negate the value  in the register pair */
14479                 if (oper == GT_NEG)
14480                 {
14481 #ifdef _TARGET_ARM_
14482
14483                     // ARM doesn't have an opcode that sets the carry bit like
14484                     // x86, so we can't use neg/addc/neg.  Instead we use subtract
14485                     // with carry.  Too bad this uses an extra register.
14486
14487                     // Lock regLo and regHi so we don't pick them, and then pick
14488                     // a third register to be our 0.
14489                     regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
14490                     regSet.rsLockReg(regPairMask);
14491                     regMaskTP regBest = RBM_ALLINT & ~avoidReg;
14492                     regNumber regZero = genGetRegSetToIcon(0, regBest);
14493                     regSet.rsUnlockReg(regPairMask);
14494
14495                     inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
14496                     getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
14497
14498 #elif defined(_TARGET_XARCH_)
14499
14500                     inst_RV(INS_NEG, regLo, TYP_LONG);
14501                     inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
14502                     inst_RV(INS_NEG, regHi, TYP_LONG);
14503 #else
14504                     NYI("GT_NEG on TYP_LONG");
14505 #endif
14506                 }
14507                 else
14508                 {
14509                     /* Unary "not": flip all the bits in the register pair */
14510
14511                     inst_RV(INS_NOT, regLo, TYP_LONG);
14512                     inst_RV(INS_NOT, regHi, TYP_LONG);
14513                 }
14514
14515                 goto DONE;
14516
14517             case GT_IND:
14518             case GT_NULLCHECK:
14519             {
14520                 regMaskTP tmpMask;
14521                 int       hiFirst;
14522
14523                 regMaskTP availMask = RBM_ALLINT & ~needReg;
14524
14525                 /* Make sure the operand is addressable */
14526
14527                 addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
14528
14529                 GenTree* addr = oper == GT_IND ? op1 : tree;
14530
14531                 /* Pick a register for the value */
14532
14533                 regPair = regSet.rsPickRegPair(needReg);
14534                 tmpMask = genRegPairMask(regPair);
14535
14536                 /* Is there any overlap between the register pair and the address? */
14537
14538                 hiFirst = FALSE;
14539
14540                 if (tmpMask & addrReg)
14541                 {
14542                     /* Does one or both of the target registers overlap? */
14543
14544                     if ((tmpMask & addrReg) != tmpMask)
14545                     {
14546                         /* Only one register overlaps */
14547
14548                         noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
14549
14550                         /* If the low register overlaps, load the upper half first */
14551
14552                         if (addrReg & genRegMask(genRegPairLo(regPair)))
14553                             hiFirst = TRUE;
14554                     }
14555                     else
14556                     {
14557                         regMaskTP regFree;
14558
14559                         /* The register completely overlaps with the address */
14560
14561                         noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
14562
14563                         /* Can we pick another pair easily? */
14564
14565                         regFree = regSet.rsRegMaskFree() & ~addrReg;
14566                         if (needReg)
14567                             regFree &= needReg;
14568
14569                         /* More than one free register available? */
14570
14571                         if (regFree && !genMaxOneBit(regFree))
14572                         {
14573                             regPair = regSet.rsPickRegPair(regFree);
14574                             tmpMask = genRegPairMask(regPair);
14575                         }
14576                         else
14577                         {
14578                             // printf("Overlap: needReg = %08X\n", needReg);
14579
14580                             // Reg-prediction won't allow this
14581                             noway_assert((regSet.rsMaskVars & addrReg) == 0);
14582
14583                             // Grab one fresh reg, and use any one of addrReg
14584
14585                             if (regFree) // Try to follow 'needReg'
14586                                 regLo = regSet.rsGrabReg(regFree);
14587                             else // Pick any reg besides addrReg
14588                                 regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
14589
14590                             unsigned  regBit = 0x1;
14591                             regNumber regNo;
14592
14593                             for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
14594                             {
14595                                 // Found one of addrReg. Use it.
14596                                 if (regBit & addrReg)
14597                                     break;
14598                             }
14599                             noway_assert(genIsValidReg(regNo)); // Should have found regNo
14600
14601                             regPair = gen2regs2pair(regLo, regNo);
14602                             tmpMask = genRegPairMask(regPair);
14603                         }
14604                     }
14605                 }
14606
14607                 /* Make sure the value is still addressable */
14608
14609                 noway_assert(genStillAddressable(tree));
14610
14611                 /* Figure out which registers the value is in */
14612
14613                 regLo = genRegPairLo(regPair);
14614                 regHi = genRegPairHi(regPair);
14615
14616                 /* The value in the register pair is about to be trashed */
14617
14618                 regTracker.rsTrackRegTrash(regLo);
14619                 regTracker.rsTrackRegTrash(regHi);
14620
14621                 /* Load the target registers from where the value is */
14622
14623                 if (hiFirst)
14624                 {
14625                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14626                     regSet.rsLockReg(genRegMask(regHi));
14627                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14628                     regSet.rsUnlockReg(genRegMask(regHi));
14629                 }
14630                 else
14631                 {
14632                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14633                     regSet.rsLockReg(genRegMask(regLo));
14634                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14635                     regSet.rsUnlockReg(genRegMask(regLo));
14636                 }
14637
14638 #ifdef _TARGET_ARM_
14639                 if (tree->gtFlags & GTF_IND_VOLATILE)
14640                 {
14641                     // Emit a memory barrier instruction after the load
14642                     instGen_MemoryBarrier();
14643                 }
14644 #endif
14645
14646                 genUpdateLife(tree);
14647                 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
14648             }
14649                 goto DONE;
14650
14651             case GT_CAST:
14652
14653                 /* What are we casting from? */
14654
14655                 switch (op1->gtType)
14656                 {
14657                     case TYP_BOOL:
14658                     case TYP_BYTE:
14659                     case TYP_USHORT:
14660                     case TYP_SHORT:
14661                     case TYP_INT:
14662                     case TYP_UBYTE:
14663                     case TYP_BYREF:
14664                     {
14665                         regMaskTP hiRegMask;
14666                         regMaskTP loRegMask;
14667
14668                         // For an unsigned cast we don't need to sign-extend the 32 bit value
14669                         if (tree->gtFlags & GTF_UNSIGNED)
14670                         {
14671                             // Does needReg have exactly two bits on and thus
14672                             // specifies the exact register pair that we want to use
14673                             if (!genMaxOneBit(needReg))
14674                             {
14675                                 regPair = regSet.rsFindRegPairNo(needReg);
14676                                 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
14677                                     goto ANY_FREE_REG_UNSIGNED;
14678                                 loRegMask = genRegMask(genRegPairLo(regPair));
14679                                 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14680                                     goto ANY_FREE_REG_UNSIGNED;
14681                                 hiRegMask = genRegMask(genRegPairHi(regPair));
14682                             }
14683                             else
14684                             {
14685                             ANY_FREE_REG_UNSIGNED:
14686                                 loRegMask = needReg;
14687                                 hiRegMask = needReg;
14688                             }
14689
14690                             genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14691                             noway_assert(op1->InReg());
14692
14693                             regLo     = op1->gtRegNum;
14694                             loRegMask = genRegMask(regLo);
14695                             regSet.rsLockUsedReg(loRegMask);
14696                             regHi = regSet.rsPickReg(hiRegMask);
14697                             regSet.rsUnlockUsedReg(loRegMask);
14698
14699                             regPair = gen2regs2pair(regLo, regHi);
14700
14701                             // Move 0 to the higher word of the ULong
14702                             genSetRegToIcon(regHi, 0, TYP_INT);
14703
14704                             /* We can now free up the operand */
14705                             genReleaseReg(op1);
14706
14707                             goto DONE;
14708                         }
14709 #ifdef _TARGET_XARCH_
14710                         /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
14711                            and we need the result to be in those registers.
14712                            cdq is smaller so we use it for SMALL_CODE
14713                         */
14714
14715                         if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
14716                             (regSet.rsRegMaskFree() & RBM_EDX))
14717                         {
14718                             genCodeForTree(op1, RBM_EAX);
14719                             regSet.rsMarkRegUsed(op1);
14720
14721                             /* If we have to spill EDX, might as well use the faster
14722                                sar as the spill will increase code size anyway */
14723
14724                             if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
14725                             {
14726                                 hiRegMask = regSet.rsRegMaskFree();
14727                                 goto USE_SAR_FOR_CAST;
14728                             }
14729
14730                             regSet.rsGrabReg(RBM_EDX);
14731                             regTracker.rsTrackRegTrash(REG_EDX);
14732
14733                             /* Convert the int in EAX into a long in EDX:EAX */
14734
14735                             instGen(INS_cdq);
14736
14737                             /* The result is in EDX:EAX */
14738
14739                             regPair = REG_PAIR_EAXEDX;
14740                         }
14741                         else
14742 #endif
14743                         {
14744                             /* use the sar instruction to sign-extend a 32-bit integer */
14745
14746                             // Does needReg have exactly two bits on and thus
14747                             // specifies the exact register pair that we want to use
14748                             if (!genMaxOneBit(needReg))
14749                             {
14750                                 regPair = regSet.rsFindRegPairNo(needReg);
14751                                 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
14752                                     goto ANY_FREE_REG_SIGNED;
14753                                 loRegMask = genRegMask(genRegPairLo(regPair));
14754                                 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14755                                     goto ANY_FREE_REG_SIGNED;
14756                                 hiRegMask = genRegMask(genRegPairHi(regPair));
14757                             }
14758                             else
14759                             {
14760                             ANY_FREE_REG_SIGNED:
14761                                 loRegMask = needReg;
14762                                 hiRegMask = RBM_NONE;
14763                             }
14764
14765                             genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14766 #ifdef _TARGET_XARCH_
14767                         USE_SAR_FOR_CAST:
14768 #endif
14769                             noway_assert(op1->InReg());
14770
14771                             regLo     = op1->gtRegNum;
14772                             loRegMask = genRegMask(regLo);
14773                             regSet.rsLockUsedReg(loRegMask);
14774                             regHi = regSet.rsPickReg(hiRegMask);
14775                             regSet.rsUnlockUsedReg(loRegMask);
14776
14777                             regPair = gen2regs2pair(regLo, regHi);
14778
14779 #ifdef _TARGET_ARM_
14780                             /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14781                             // Use one instruction instead of two
14782                             getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
14783 #else
14784                             /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14785                             inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
14786                             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14787 #endif
14788
14789                             /* The value in the upper register is trashed */
14790
14791                             regTracker.rsTrackRegTrash(regHi);
14792                         }
14793
14794                         /* We can now free up the operand */
14795                         genReleaseReg(op1);
14796
14797                         // conv.ovf.u8 could overflow if the original number was negative
14798                         if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
14799                         {
14800                             regNumber hiReg = genRegPairHi(regPair);
14801                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14802                             emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14803                             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14804                         }
14805                     }
14806                         goto DONE;
14807
14808                     case TYP_FLOAT:
14809                     case TYP_DOUBLE:
14810
14811 #if 0
14812                 /* Load the FP value onto the coprocessor stack */
14813
14814                 genCodeForTreeFlt(op1);
14815
14816                 /* Allocate a temp for the long value */
14817
14818                 temp = compiler->tmpGetTemp(TYP_LONG);
14819
14820                 /* Store the FP value into the temp */
14821
14822                 inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
14823                 genFPstkLevel--;
14824
14825                 /* Pick a register pair for the value */
14826
14827                 regPair  = regSet.rsPickRegPair(needReg);
14828
14829                 /* Figure out which registers the value is in */
14830
14831                 regLo = genRegPairLo(regPair);
14832                 regHi = genRegPairHi(regPair);
14833
14834                 /* The value in the register pair is about to be trashed */
14835
14836                 regTracker.rsTrackRegTrash(regLo);
14837                 regTracker.rsTrackRegTrash(regHi);
14838
14839                 /* Load the converted value into the registers */
14840
14841                 inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
14842                 inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
14843
14844                 /* We no longer need the temp */
14845
14846                 compiler->tmpRlsTemp(temp);
14847                 goto DONE;
14848 #else
14849                         NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
14850                         break;
14851 #endif
14852                     case TYP_LONG:
14853                     case TYP_ULONG:
14854                     {
14855                         noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
14856
14857                         genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
14858                         regPair = op1->gtRegPair;
14859
14860                         // Do we need to set the sign-flag, or can we checked if it is set?
14861                         // and not do this "test" if so.
14862
14863                         if (op1->InReg())
14864                         {
14865                             regNumber hiReg = genRegPairHi(op1->gtRegPair);
14866                             noway_assert(hiReg != REG_STK);
14867                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14868                         }
14869                         else
14870                         {
14871                             inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
14872                         }
14873
14874                         emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14875                         genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14876                     }
14877                         goto DONE;
14878
14879                     default:
14880 #ifdef DEBUG
14881                         compiler->gtDispTree(tree);
14882 #endif
14883                         NO_WAY("unexpected cast to long");
14884                 }
14885                 break;
14886
14887             case GT_RETURN:
14888
14889                 /* TODO:
14890                  * This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
14891                  * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement.  We should really
14892                  * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
14893                  */
14894
14895                 // TODO: this should be done AFTER we called exit mon so that
14896                 //       we are sure that we don't have to keep 'this' alive
14897
14898                 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
14899                 {
14900                     /* either it's an "empty" statement or the return statement
14901                        of a synchronized method
14902                      */
14903
14904                     genPInvokeMethodEpilog();
14905                 }
14906
14907 #if CPU_LONG_USES_REGPAIR
14908                 /* There must be a long return value */
14909
14910                 noway_assert(op1);
14911
14912                 /* Evaluate the return value into EDX:EAX */
14913
14914                 genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
14915
14916                 noway_assert(op1->InReg());
14917                 noway_assert(op1->gtRegPair == REG_LNGRET);
14918
14919 #else
14920                 NYI("64-bit return");
14921 #endif
14922
14923 #ifdef PROFILING_SUPPORTED
14924                 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
14925                 // the GT_RETURN tree.
14926
14927                 if (compiler->compCurBB == compiler->genReturnBB)
14928                 {
14929                     genProfilingLeaveCallback();
14930                 }
14931 #endif
14932                 return;
14933
14934             case GT_QMARK:
14935                 noway_assert(!"inliner-generated ?: for longs NYI");
14936                 NO_WAY("inliner-generated ?: for longs NYI");
14937                 break;
14938
14939             case GT_COMMA:
14940
14941                 if (tree->gtFlags & GTF_REVERSE_OPS)
14942                 {
14943                     // Generate op2
14944                     genCodeForTreeLng(op2, needReg, avoidReg);
14945                     genUpdateLife(op2);
14946
14947                     noway_assert(op2->InReg());
14948
14949                     regSet.rsMarkRegPairUsed(op2);
14950
14951                     // Do side effects of op1
14952                     genEvalSideEffects(op1);
14953
14954                     // Recover op2 if spilled
14955                     genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
14956
14957                     genReleaseRegPair(op2);
14958
14959                     genUpdateLife(tree);
14960
14961                     regPair = op2->gtRegPair;
14962                 }
14963                 else
14964                 {
14965                     noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
14966
14967                     /* Generate side effects of the first operand */
14968
14969                     genEvalSideEffects(op1);
14970                     genUpdateLife(op1);
14971
14972                     /* Is the value of the second operand used? */
14973
14974                     if (tree->gtType == TYP_VOID)
14975                     {
14976                         /* The right operand produces no result */
14977
14978                         genEvalSideEffects(op2);
14979                         genUpdateLife(tree);
14980                         return;
14981                     }
14982
14983                     /* Generate the second operand, i.e. the 'real' value */
14984
14985                     genCodeForTreeLng(op2, needReg, avoidReg);
14986
14987                     /* The result of 'op2' is also the final result */
14988
14989                     regPair = op2->gtRegPair;
14990                 }
14991
14992                 goto DONE;
14993
14994             case GT_BOX:
14995             {
14996                 /* Generate the  operand, i.e. the 'real' value */
14997
14998                 genCodeForTreeLng(op1, needReg, avoidReg);
14999
15000                 /* The result of 'op1' is also the final result */
15001
15002                 regPair = op1->gtRegPair;
15003             }
15004
15005                 goto DONE;
15006
15007             case GT_NOP:
15008                 if (op1 == NULL)
15009                     return;
15010
15011                 genCodeForTreeLng(op1, needReg, avoidReg);
15012                 regPair = op1->gtRegPair;
15013                 goto DONE;
15014
15015             default:
15016                 break;
15017         }
15018
15019 #ifdef DEBUG
15020         compiler->gtDispTree(tree);
15021 #endif
15022         noway_assert(!"unexpected 64-bit operator");
15023     }
15024
15025     /* See what kind of a special operator we have here */
15026
15027     switch (oper)
15028     {
15029         regMaskTP retMask;
15030         case GT_CALL:
15031             retMask = genCodeForCall(tree->AsCall(), true);
15032             if (retMask == RBM_NONE)
15033                 regPair = REG_PAIR_NONE;
15034             else
15035                 regPair = regSet.rsFindRegPairNo(retMask);
15036             break;
15037
15038         default:
15039 #ifdef DEBUG
15040             compiler->gtDispTree(tree);
15041 #endif
15042             NO_WAY("unexpected long operator");
15043     }
15044
15045 DONE:
15046
15047     genUpdateLife(tree);
15048
15049     /* Here we've computed the value of 'tree' into 'regPair' */
15050
15051     noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
15052
15053     genMarkTreeInRegPair(tree, regPair);
15054 }
15055 #ifdef _PREFAST_
15056 #pragma warning(pop)
15057 #endif
15058
15059 /*****************************************************************************
15060  *
15061  *  Generate code for a mod of a long by an int.
15062  */
15063
15064 regPairNo CodeGen::genCodeForLongModInt(GenTree* tree, regMaskTP needReg)
15065 {
15066 #ifdef _TARGET_X86_
15067
15068     regPairNo regPair;
15069     regMaskTP addrReg;
15070
15071     genTreeOps oper = tree->OperGet();
15072     GenTree*   op1  = tree->gtOp.gtOp1;
15073     GenTree*   op2  = tree->gtOp.gtOp2;
15074
15075     /* Codegen only for Unsigned MOD */
15076     noway_assert(oper == GT_UMOD);
15077
15078     /* op2 must be a long constant in the range 2 to 0x3fffffff */
15079
15080     noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
15081                  (op2->gtLngCon.gtLconVal <= 0x3fffffff));
15082     int val = (int)op2->gtLngCon.gtLconVal;
15083
15084     op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
15085
15086     op2->gtType             = TYP_INT;
15087     op2->gtIntCon.gtIconVal = val;
15088
15089     /* Which operand are we supposed to compute first? */
15090
15091     if (tree->gtFlags & GTF_REVERSE_OPS)
15092     {
15093         /* Compute the second operand into a scratch register, other
15094            than EAX or EDX */
15095
15096         needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15097
15098         /* Special case: if op2 is a local var we are done */
15099
15100         if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15101         {
15102             addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15103         }
15104         else
15105         {
15106             genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15107
15108             noway_assert(op2->InReg());
15109             addrReg = genRegMask(op2->gtRegNum);
15110         }
15111
15112         /* Compute the first operand into EAX:EDX */
15113
15114         genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
15115         noway_assert(op1->InReg());
15116         noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15117
15118         /* And recover the second argument while locking the first one */
15119
15120         addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15121     }
15122     else
15123     {
15124         /* Compute the first operand into EAX:EDX */
15125
15126         genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
15127         noway_assert(op1->InReg());
15128         noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15129
15130         /* Compute the second operand into a scratch register, other
15131            than EAX or EDX */
15132
15133         needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15134
15135         /* Special case: if op2 is a local var we are done */
15136
15137         if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15138         {
15139             addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15140         }
15141         else
15142         {
15143             genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15144
15145             noway_assert(op2->InReg());
15146             addrReg = genRegMask(op2->gtRegNum);
15147         }
15148
15149         /* Recover the first argument */
15150
15151         genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
15152
15153         /* And recover the second argument while locking the first one */
15154
15155         addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15156     }
15157
15158     /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
15159        contains the 32bit divisor.  We want to generate the following code:
15160
15161        ==========================
15162        Unsigned (GT_UMOD)
15163
15164        cmp edx, op2->gtRegNum
15165        jb  lab_no_overflow
15166
15167        mov temp, eax
15168        mov eax, edx
15169        xor edx, edx
15170        div op2->g2RegNum
15171        mov eax, temp
15172
15173        lab_no_overflow:
15174        idiv
15175        ==========================
15176        This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
15177     */
15178
15179     BasicBlock* lab_no_overflow = genCreateTempLabel();
15180
15181     // grab a temporary register other than eax, edx, and op2->gtRegNum
15182
15183     regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
15184
15185     // EAX and tempReg will be trashed by the mov instructions.  Doing
15186     // this early won't hurt, and might prevent confusion in genSetRegToIcon.
15187
15188     regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15189     regTracker.rsTrackRegTrash(tempReg);
15190
15191     inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
15192     inst_JMP(EJ_jb, lab_no_overflow);
15193
15194     inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
15195     inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15196     genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15197     inst_TT(INS_UNSIGNED_DIVIDE, op2);
15198     inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
15199
15200     // Jump point for no overflow divide
15201
15202     genDefineTempLabel(lab_no_overflow);
15203
15204     // Issue the divide instruction
15205
15206     inst_TT(INS_UNSIGNED_DIVIDE, op2);
15207
15208     /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
15209
15210     regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15211     regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
15212     regTracker.rsTrackRegTrash(tempReg);
15213     regTracker.rsTrackRegTrash(op2->gtRegNum);
15214
15215     if (tree->gtFlags & GTF_MOD_INT_RESULT)
15216     {
15217         /* We don't need to normalize the result, because the caller wants
15218            an int (in edx) */
15219
15220         regPair = REG_PAIR_TMP_REVERSE;
15221     }
15222     else
15223     {
15224         /* The result is now in EDX, we now have to normalize it, i.e. we have
15225            to issue:
15226            mov eax, edx; xor edx, edx (for UMOD)
15227         */
15228
15229         inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15230
15231         genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15232
15233         regPair = REG_PAIR_TMP;
15234     }
15235
15236     genReleaseRegPair(op1);
15237     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
15238
15239     return regPair;
15240
15241 #else // !_TARGET_X86_
15242
15243     NYI("codegen for LongModInt");
15244
15245     return REG_PAIR_NONE;
15246
15247 #endif // !_TARGET_X86_
15248 }
15249
15250 // Given a tree, return the number of registers that are currently
15251 // used to hold integer enregistered local variables.
15252 // Note that, an enregistered TYP_LONG can take 1 or 2 registers.
15253 unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTree* tree)
15254 {
15255     unsigned regCount = 0;
15256
15257     VarSetOps::Iter iter(compiler, compiler->compCurLife);
15258     unsigned        varNum = 0;
15259     while (iter.NextElem(&varNum))
15260     {
15261         unsigned   lclNum = compiler->lvaTrackedToVarNum[varNum];
15262         LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
15263
15264         if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
15265         {
15266             ++regCount;
15267
15268             if (varTypeIsLong(varDsc->TypeGet()))
15269             {
15270                 // For enregistered LONG/ULONG, the lower half should always be in a register.
15271                 noway_assert(varDsc->lvRegNum != REG_STK);
15272
15273                 // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
15274                 // well.
15275                 if (varDsc->lvOtherReg != REG_STK)
15276                 {
15277                     ++regCount;
15278                 }
15279             }
15280         }
15281     }
15282
15283     return regCount;
15284 }
15285
15286 /*****************************************************************************/
15287 /*****************************************************************************/
15288 #if CPU_HAS_FP_SUPPORT
15289 /*****************************************************************************
15290  *
15291  *  Generate code for a floating-point operation.
15292  */
15293
15294 void CodeGen::genCodeForTreeFlt(GenTree*  tree,
15295                                 regMaskTP needReg, /* = RBM_ALLFLOAT */
15296                                 regMaskTP bestReg) /* = RBM_NONE */
15297 {
15298     genCodeForTreeFloat(tree, needReg, bestReg);
15299
15300     if (tree->OperGet() == GT_RETURN)
15301     {
15302         // Make sure to get ALL THE EPILOG CODE
15303
15304         // TODO: this should be done AFTER we called exit mon so that
15305         //       we are sure that we don't have to keep 'this' alive
15306
15307         if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15308         {
15309             /* either it's an "empty" statement or the return statement
15310                of a synchronized method
15311              */
15312
15313             genPInvokeMethodEpilog();
15314         }
15315
15316 #ifdef PROFILING_SUPPORTED
15317         // The profiling hook does not trash registers, so it's safe to call after we emit the code for
15318         // the GT_RETURN tree.
15319
15320         if (compiler->compCurBB == compiler->genReturnBB)
15321         {
15322             genProfilingLeaveCallback();
15323         }
15324 #endif
15325     }
15326 }
15327
15328 /*****************************************************************************/
15329 #endif // CPU_HAS_FP_SUPPORT
15330
15331 /*****************************************************************************
15332  *
15333  *  Generate a table switch - the switch value (0-based) is in register 'reg'.
15334  */
15335
15336 void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
15337 {
15338     unsigned jmpTabBase;
15339
15340     if (jumpCnt == 1)
15341     {
15342         // In debug code, we don't optimize away the trivial switch statements.  So we can get here with a
15343         // BBJ_SWITCH with only a default case.  Therefore, don't generate the switch table.
15344         noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
15345         inst_JMP(EJ_jmp, jumpTab[0]);
15346         return;
15347     }
15348
15349     noway_assert(jumpCnt >= 2);
15350
15351     /* Is the number of cases right for a test and jump switch? */
15352
15353     const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
15354     const bool fDefaultFollows   = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
15355     const bool fHaveScratchReg   = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
15356
15357     unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
15358
15359     // This means really just a single cmp/jcc (aka a simple if/else)
15360     if (fFirstCaseFollows || fDefaultFollows)
15361         minSwitchTabJumpCnt++;
15362
15363 #ifdef _TARGET_ARM_
15364     // On the ARM for small switch tables we will
15365     // generate a sequence of compare and branch instructions
15366     // because the code to load the base of the switch
15367     // table is huge and hideous due to the relocation... :(
15368     //
15369     minSwitchTabJumpCnt++;
15370     if (fHaveScratchReg)
15371         minSwitchTabJumpCnt++;
15372
15373 #endif // _TARGET_ARM_
15374
15375     bool useJumpSequence = jumpCnt < minSwitchTabJumpCnt;
15376
15377 #if defined(_TARGET_UNIX_) && defined(_TARGET_ARM_)
15378     // Force using an inlined jumping instead switch table generation.
15379     // Switch jump table is generated with incorrect values in CoreRT case,
15380     // so any large switch will crash after loading to PC any such value.
15381     // I think this is due to the fact that we use absolute addressing
15382     // instead of relative. But in CoreRT is used as a rule relative
15383     // addressing when we generate an executable.
15384     // See also https://github.com/dotnet/coreclr/issues/13194
15385     useJumpSequence = useJumpSequence || compiler->IsTargetAbi(CORINFO_CORERT_ABI);
15386 #endif // defined(_TARGET_UNIX_) && defined(_TARGET_ARM_)
15387
15388     if (useJumpSequence)
15389     {
15390         /* Does the first case label follow? */
15391         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
15392
15393         if (fFirstCaseFollows)
15394         {
15395             /* Check for the default case */
15396             inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15397             emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15398             inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15399
15400             /* No need to jump to the first case */
15401
15402             jumpCnt -= 2;
15403             jumpTab += 1;
15404
15405             /* Generate a series of "dec reg; jmp label" */
15406
15407             // Make sure that we can trash the register so
15408             // that we can generate a series of compares and jumps
15409             //
15410             if ((jumpCnt > 0) && !fHaveScratchReg)
15411             {
15412                 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15413                 inst_RV_RV(INS_mov, tmpReg, reg);
15414                 regTracker.rsTrackRegTrash(tmpReg);
15415                 reg = tmpReg;
15416             }
15417
15418             while (jumpCnt > 0)
15419             {
15420                 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15421                 inst_JMP(jmpEqual, *jumpTab++);
15422                 jumpCnt--;
15423             }
15424         }
15425         else
15426         {
15427             /* Check for case0 first */
15428             instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
15429             inst_JMP(jmpEqual, *jumpTab);
15430
15431             /* No need to jump to the first case or the default */
15432
15433             jumpCnt -= 2;
15434             jumpTab += 1;
15435
15436             /* Generate a series of "dec reg; jmp label" */
15437
15438             // Make sure that we can trash the register so
15439             // that we can generate a series of compares and jumps
15440             //
15441             if ((jumpCnt > 0) && !fHaveScratchReg)
15442             {
15443                 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15444                 inst_RV_RV(INS_mov, tmpReg, reg);
15445                 regTracker.rsTrackRegTrash(tmpReg);
15446                 reg = tmpReg;
15447             }
15448
15449             while (jumpCnt > 0)
15450             {
15451                 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15452                 inst_JMP(jmpEqual, *jumpTab++);
15453                 jumpCnt--;
15454             }
15455
15456             if (!fDefaultFollows)
15457             {
15458                 inst_JMP(EJ_jmp, *jumpTab);
15459             }
15460         }
15461
15462         if ((fFirstCaseFollows || fDefaultFollows) &&
15463             compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
15464         {
15465             inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
15466         }
15467
15468         return;
15469     }
15470
15471     /* First take care of the default case */
15472
15473     inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15474     emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15475     inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15476
15477     /* Generate the jump table contents */
15478
15479     jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
15480
15481 #ifdef DEBUG
15482     if (compiler->opts.dspCode)
15483         printf("\n      J_M%03u_DS%02u LABEL   DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
15484 #endif
15485
15486     for (unsigned index = 0; index < jumpCnt - 1; index++)
15487     {
15488         BasicBlock* target = jumpTab[index];
15489
15490         noway_assert(target->bbFlags & BBF_JMP_TARGET);
15491
15492 #ifdef DEBUG
15493         if (compiler->opts.dspCode)
15494             printf("            DD      L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
15495 #endif
15496
15497         getEmitter()->emitDataGenData(index, target);
15498     }
15499
15500     getEmitter()->emitDataGenEnd();
15501
15502 #ifdef _TARGET_ARM_
15503     // We need to load the address of the table into a register.
15504     // The data section might get placed a long distance away, so we
15505     // can't safely do a PC-relative ADR. :(
15506     // Pick any register except the index register.
15507     //
15508     regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
15509     genMov32RelocatableDataLabel(jmpTabBase, regTabBase);
15510     regTracker.rsTrackRegTrash(regTabBase);
15511
15512     // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
15513     getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
15514
15515 #else // !_TARGET_ARM_
15516
15517     getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
15518
15519 #endif
15520 }
15521
15522 /*****************************************************************************
15523  *
15524  *  Generate code for a switch statement.
15525  */
15526
15527 void CodeGen::genCodeForSwitch(GenTree* tree)
15528 {
15529     unsigned     jumpCnt;
15530     BasicBlock** jumpTab;
15531
15532     GenTree*  oper;
15533     regNumber reg;
15534
15535     noway_assert(tree->gtOper == GT_SWITCH);
15536     oper = tree->gtOp.gtOp1;
15537     noway_assert(genActualTypeIsIntOrI(oper->gtType));
15538
15539     /* Get hold of the jump table */
15540
15541     noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
15542
15543     jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
15544     jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
15545
15546     /* Compute the switch value into some register */
15547
15548     genCodeForTree(oper, 0);
15549
15550     /* Get hold of the register the value is in */
15551
15552     noway_assert(oper->InReg());
15553     reg = oper->gtRegNum;
15554
15555 #if FEATURE_STACK_FP_X87
15556     if (!compCurFPState.IsEmpty())
15557     {
15558         return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
15559     }
15560     else
15561 #endif // FEATURE_STACK_FP_X87
15562     {
15563         return genTableSwitch(reg, jumpCnt, jumpTab);
15564     }
15565 }
15566
15567 /*****************************************************************************/
15568 /*****************************************************************************
15569  *  Emit a call to a helper function.
15570  */
15571
15572 // inline
15573 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
15574 {
15575     // Can we call the helper function directly
15576
15577     void *addr = NULL, **pAddr = NULL;
15578
15579 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
15580     // Don't ask VM if it hasn't requested ELT hooks
15581     if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
15582         (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
15583          helper == CORINFO_HELP_PROF_FCN_TAILCALL))
15584     {
15585         addr = compiler->compProfilerMethHnd;
15586     }
15587     else
15588 #endif
15589     {
15590         addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
15591     }
15592
15593 #ifdef _TARGET_ARM_
15594     if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
15595     {
15596         // Load the address into a register and call  through a register
15597         regNumber indCallReg =
15598             regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
15599         if (addr)
15600         {
15601             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
15602         }
15603         else
15604         {
15605             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
15606             regTracker.rsTrackRegTrash(indCallReg);
15607         }
15608
15609         getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
15610                                    INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
15611                                    argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15612                                    gcInfo.gcRegByrefSetCur,
15613                                    BAD_IL_OFFSET, // ilOffset
15614                                    indCallReg,    // ireg
15615                                    REG_NA, 0, 0,  // xreg, xmul, disp
15616                                    false,         // isJump
15617                                    emitter::emitNoGChelper(helper),
15618                                    (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15619     }
15620     else
15621     {
15622         getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
15623                                    INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
15624                                    gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15625                                    0,     /* ilOffset, ireg, xreg, xmul, disp */
15626                                    false, /* isJump */
15627                                    emitter::emitNoGChelper(helper),
15628                                    (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15629     }
15630 #else
15631
15632     {
15633         emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
15634
15635         if (!addr)
15636         {
15637             callType = emitter::EC_FUNC_TOKEN_INDIR;
15638             addr     = pAddr;
15639         }
15640
15641         getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
15642                                    argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15643                                    gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15644                                    0,     /* ilOffset, ireg, xreg, xmul, disp */
15645                                    false, /* isJump */
15646                                    emitter::emitNoGChelper(helper));
15647     }
15648 #endif
15649
15650     regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
15651     regTracker.rsTrashRegsForGCInterruptability();
15652 }
15653
15654 /*****************************************************************************
15655  *
15656  *  Push the given argument list, right to left; returns the total amount of
15657  *  stuff pushed.
15658  */
15659
15660 #if !FEATURE_FIXED_OUT_ARGS
15661 #ifdef _PREFAST_
15662 #pragma warning(push)
15663 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
15664 #endif
15665 size_t CodeGen::genPushArgList(GenTreeCall* call)
15666 {
15667     GenTreeArgList* regArgs = call->gtCallLateArgs;
15668     size_t          size    = 0;
15669     regMaskTP       addrReg;
15670
15671     GenTreeArgList* args;
15672     // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
15673     // so we can iterate over this argument list more uniformly.
15674     // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
15675     GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCallArgs);
15676     if (call->gtCallObjp == NULL)
15677     {
15678         args = call->gtCallArgs;
15679     }
15680     else
15681     {
15682         firstForObjp.Current() = call->gtCallObjp;
15683         args                   = &firstForObjp;
15684     }
15685
15686     GenTree*  curr;
15687     var_types type;
15688     size_t    opsz;
15689
15690     for (; args; args = args->Rest())
15691     {
15692         addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
15693
15694         /* Get hold of the next argument value */
15695         curr = args->Current();
15696
15697         if (curr->IsArgPlaceHolderNode())
15698         {
15699             assert(curr->gtFlags & GTF_LATE_ARG);
15700
15701             addrReg = 0;
15702             continue;
15703         }
15704
15705         // If we have a comma expression, eval the non-last, then deal with the last.
15706         if (!(curr->gtFlags & GTF_LATE_ARG))
15707             curr = genCodeForCommaTree(curr);
15708
15709         /* See what type of a value we're passing */
15710         type = curr->TypeGet();
15711
15712         opsz = genTypeSize(genActualType(type));
15713
15714         switch (type)
15715         {
15716             case TYP_BOOL:
15717             case TYP_BYTE:
15718             case TYP_SHORT:
15719             case TYP_USHORT:
15720             case TYP_UBYTE:
15721
15722                 /* Don't want to push a small value, make it a full word */
15723
15724                 genCodeForTree(curr, 0);
15725
15726                 __fallthrough; // now the value should be in a register ...
15727
15728             case TYP_INT:
15729             case TYP_REF:
15730             case TYP_BYREF:
15731 #if !CPU_HAS_FP_SUPPORT
15732             case TYP_FLOAT:
15733 #endif
15734
15735                 if (curr->gtFlags & GTF_LATE_ARG)
15736                 {
15737                     assert(curr->gtOper == GT_ASG);
15738                     /* one more argument will be passed in a register */
15739                     noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
15740
15741                     /* arg is passed in the register, nothing on the stack */
15742
15743                     opsz = 0;
15744                 }
15745
15746                 /* Is this value a handle? */
15747
15748                 if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
15749                 {
15750                     /* Emit a fixup for the push instruction */
15751
15752                     inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
15753                     genSinglePush();
15754
15755                     addrReg = 0;
15756                     break;
15757                 }
15758
15759                 /* Is the value a constant? */
15760
15761                 if (curr->gtOper == GT_CNS_INT)
15762                 {
15763
15764 #if REDUNDANT_LOAD
15765                     regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
15766
15767                     if (reg != REG_NA)
15768                     {
15769                         inst_RV(INS_push, reg, TYP_INT);
15770                     }
15771                     else
15772 #endif
15773                     {
15774                         inst_IV(INS_push, curr->gtIntCon.gtIconVal);
15775                     }
15776
15777                     /* If the type is TYP_REF, then this must be a "null". So we can
15778                        treat it as a TYP_INT as we don't need to report it as a GC ptr */
15779
15780                     noway_assert(curr->TypeGet() == TYP_INT ||
15781                                  (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
15782
15783                     genSinglePush();
15784
15785                     addrReg = 0;
15786                     break;
15787                 }
15788
15789                 if (curr->gtFlags & GTF_LATE_ARG)
15790                 {
15791                     /* This must be a register arg temp assignment */
15792
15793                     noway_assert(curr->gtOper == GT_ASG);
15794
15795                     /* Evaluate it to the temp */
15796
15797                     genCodeForTree(curr, 0);
15798
15799                     /* Increment the current argument register counter */
15800
15801                     intRegState.rsCurRegArgNum++;
15802
15803                     addrReg = 0;
15804                 }
15805                 else
15806                 {
15807                     /* This is a 32-bit integer non-register argument */
15808
15809                     addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
15810                     inst_TT(INS_push, curr);
15811                     genSinglePush();
15812                     genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
15813                 }
15814                 break;
15815
15816             case TYP_LONG:
15817 #if !CPU_HAS_FP_SUPPORT
15818             case TYP_DOUBLE:
15819 #endif
15820
15821                 /* Is the value a constant? */
15822
15823                 if (curr->gtOper == GT_CNS_LNG)
15824                 {
15825                     inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
15826                     genSinglePush();
15827                     inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
15828                     genSinglePush();
15829
15830                     addrReg = 0;
15831                 }
15832                 else
15833                 {
15834                     addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
15835
15836                     inst_TT(INS_push, curr, sizeof(int));
15837                     genSinglePush();
15838                     inst_TT(INS_push, curr);
15839                     genSinglePush();
15840                 }
15841                 break;
15842
15843 #if CPU_HAS_FP_SUPPORT
15844             case TYP_FLOAT:
15845             case TYP_DOUBLE:
15846 #endif
15847 #if FEATURE_STACK_FP_X87
15848                 addrReg = genPushArgumentStackFP(curr);
15849 #else
15850                 NYI("FP codegen");
15851                 addrReg = 0;
15852 #endif
15853                 break;
15854
15855             case TYP_VOID:
15856
15857                 /* Is this a nothing node, deferred register argument? */
15858
15859                 if (curr->gtFlags & GTF_LATE_ARG)
15860                 {
15861                     GenTree* arg = curr;
15862                     if (arg->gtOper == GT_COMMA)
15863                     {
15864                         while (arg->gtOper == GT_COMMA)
15865                         {
15866                             GenTree* op1 = arg->gtOp.gtOp1;
15867                             genEvalSideEffects(op1);
15868                             genUpdateLife(op1);
15869                             arg = arg->gtOp.gtOp2;
15870                         }
15871                         if (!arg->IsNothingNode())
15872                         {
15873                             genEvalSideEffects(arg);
15874                             genUpdateLife(arg);
15875                         }
15876                     }
15877
15878                     /* increment the register count and continue with the next argument */
15879
15880                     intRegState.rsCurRegArgNum++;
15881
15882                     noway_assert(opsz == 0);
15883
15884                     addrReg = 0;
15885                     break;
15886                 }
15887
15888                 __fallthrough;
15889
15890             case TYP_STRUCT:
15891             {
15892                 GenTree* arg = curr;
15893                 while (arg->gtOper == GT_COMMA)
15894                 {
15895                     GenTree* op1 = arg->gtOp.gtOp1;
15896                     genEvalSideEffects(op1);
15897                     genUpdateLife(op1);
15898                     arg = arg->gtOp.gtOp2;
15899                 }
15900
15901                 noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
15902                 noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
15903                 noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
15904
15905                 if (arg->gtOper == GT_MKREFANY)
15906                 {
15907                     GenTree* op1 = arg->gtOp.gtOp1;
15908                     GenTree* op2 = arg->gtOp.gtOp2;
15909
15910                     addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
15911
15912                     /* Is this value a handle? */
15913                     if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
15914                     {
15915                         /* Emit a fixup for the push instruction */
15916
15917                         inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
15918                         genSinglePush();
15919                     }
15920                     else
15921                     {
15922                         regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
15923                         inst_TT(INS_push, op2);
15924                         genSinglePush();
15925                         genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
15926                     }
15927                     addrReg = genKeepAddressable(op1, addrReg);
15928                     inst_TT(INS_push, op1);
15929                     genSinglePush();
15930                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
15931
15932                     opsz = 2 * TARGET_POINTER_SIZE;
15933                 }
15934                 else
15935                 {
15936                     noway_assert(arg->gtOper == GT_OBJ);
15937
15938                     if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
15939                     {
15940                         GenTree*   structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
15941                         unsigned   structLclNum    = structLocalTree->gtLclVarCommon.gtLclNum;
15942                         LclVarDsc* varDsc          = &compiler->lvaTable[structLclNum];
15943
15944                         // As much as we would like this to be a noway_assert, we can't because
15945                         // there are some weird casts out there, and backwards compatiblity
15946                         // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
15947                         // lvPromoted in general currently do not require the local to be
15948                         // TYP_STRUCT, so this assert is really more about how we wish the world
15949                         // was then some JIT invariant.
15950                         assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
15951
15952                         Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
15953
15954                         if (varDsc->lvPromoted &&
15955                             promotionType ==
15956                                 Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
15957                         {
15958                             assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
15959
15960                             addrReg = 0;
15961
15962                             // Get the number of BYTES to copy to the stack
15963                             opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass),
15964                                            TARGET_POINTER_SIZE);
15965                             size_t bytesToBeCopied = opsz;
15966
15967                             // postponedFields is true if we have any postponed fields
15968                             //   Any field that does not start on a 4-byte boundary is a postponed field
15969                             //   Such a field is required to be a short or a byte
15970                             //
15971                             // postponedRegKind records the kind of scratch register we will
15972                             //   need to process the postponed fields
15973                             //   RBM_NONE means that we don't need a register
15974                             //
15975                             // expectedAlignedOffset records the aligned offset that
15976                             //   has to exist for a push to cover the postponed fields.
15977                             //   Since all promoted structs have the tightly packed property
15978                             //   we are guaranteed that we will have such a push
15979                             //
15980                             bool      postponedFields       = false;
15981                             regMaskTP postponedRegKind      = RBM_NONE;
15982                             size_t    expectedAlignedOffset = UINT_MAX;
15983
15984                             VARSET_TP* deadVarBits = NULL;
15985                             compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
15986
15987                             // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
15988                             //
15989                             for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
15990                                  varNum >= (int)varDsc->lvFieldLclStart; varNum--)
15991                             {
15992                                 LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
15993 #ifdef DEBUG
15994                                 if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
15995                                 {
15996                                     noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
15997                                     noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
15998                                 }
15999 #endif
16000                                 // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
16001                                 // For packed structs we will go back and store the unaligned bytes and shorts
16002                                 // in the next loop
16003                                 //
16004                                 if (fieldVarDsc->lvStackAligned())
16005                                 {
16006                                     if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
16007                                         fieldVarDsc->lvFldOffset + (unsigned)TARGET_POINTER_SIZE != bytesToBeCopied)
16008                                     {
16009                                         // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
16010                                         // Just push some junk (i.e EAX) on the stack.
16011                                         inst_RV(INS_push, REG_EAX, TYP_INT);
16012                                         genSinglePush();
16013
16014                                         bytesToBeCopied -= TARGET_POINTER_SIZE;
16015                                     }
16016
16017                                     // If we have an expectedAlignedOffset make sure that this push instruction
16018                                     // is what we expect to cover the postponedFields
16019                                     //
16020                                     if (expectedAlignedOffset != UINT_MAX)
16021                                     {
16022                                         // This push must be for a small field
16023                                         noway_assert(fieldVarDsc->lvExactSize < 4);
16024                                         // The fldOffset for this push should be equal to the expectedAlignedOffset
16025                                         noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
16026                                         expectedAlignedOffset = UINT_MAX;
16027                                     }
16028
16029                                     // Push the "upper half" of LONG var first
16030
16031                                     if (isRegPairType(fieldVarDsc->lvType))
16032                                     {
16033                                         if (fieldVarDsc->lvOtherReg != REG_STK)
16034                                         {
16035                                             inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
16036                                             genSinglePush();
16037
16038                                             // Prepare the set of vars to be cleared from gcref/gcbyref set
16039                                             // in case they become dead after genUpdateLife.
16040                                             // genDoneAddressable() will remove dead gc vars by calling
16041                                             // gcInfo.gcMarkRegSetNpt.
16042                                             // Although it is not addrReg, we just borrow the name here.
16043                                             addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
16044                                         }
16045                                         else
16046                                         {
16047                                             getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, TARGET_POINTER_SIZE);
16048                                             genSinglePush();
16049                                         }
16050
16051                                         bytesToBeCopied -= TARGET_POINTER_SIZE;
16052                                     }
16053
16054                                     // Push the "upper half" of DOUBLE var if it is not enregistered.
16055
16056                                     if (fieldVarDsc->lvType == TYP_DOUBLE)
16057                                     {
16058                                         if (!fieldVarDsc->lvRegister)
16059                                         {
16060                                             getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, TARGET_POINTER_SIZE);
16061                                             genSinglePush();
16062                                         }
16063
16064                                         bytesToBeCopied -= TARGET_POINTER_SIZE;
16065                                     }
16066
16067                                     //
16068                                     // Push the field local.
16069                                     //
16070
16071                                     if (fieldVarDsc->lvRegister)
16072                                     {
16073                                         if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
16074                                         {
16075                                             inst_RV(INS_push, fieldVarDsc->lvRegNum,
16076                                                     genActualType(fieldVarDsc->TypeGet()));
16077                                             genSinglePush();
16078
16079                                             // Prepare the set of vars to be cleared from gcref/gcbyref set
16080                                             // in case they become dead after genUpdateLife.
16081                                             // genDoneAddressable() will remove dead gc vars by calling
16082                                             // gcInfo.gcMarkRegSetNpt.
16083                                             // Although it is not addrReg, we just borrow the name here.
16084                                             addrReg |= genRegMask(fieldVarDsc->lvRegNum);
16085                                         }
16086                                         else
16087                                         {
16088                                             // Must be TYP_FLOAT or TYP_DOUBLE
16089                                             noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
16090
16091                                             noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
16092                                                          fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
16093
16094                                             inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
16095
16096                                             genSinglePush();
16097                                             if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
16098                                             {
16099                                                 genSinglePush();
16100                                             }
16101
16102 #if FEATURE_STACK_FP_X87
16103                                             GenTree* fieldTree = new (compiler, GT_REG_VAR)
16104                                                 GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
16105                                             fieldTree->gtOper            = GT_REG_VAR;
16106                                             fieldTree->gtRegNum          = fieldVarDsc->lvRegNum;
16107                                             fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
16108                                             if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
16109                                             {
16110                                                 if (fieldVarDsc->lvTracked &&
16111                                                     (deadVarBits == NULL ||
16112                                                      VarSetOps::IsMember(compiler, *deadVarBits,
16113                                                                          fieldVarDsc->lvVarIndex)))
16114                                                 {
16115                                                     fieldTree->gtFlags |= GTF_VAR_DEATH;
16116                                                 }
16117                                             }
16118                                             genCodeForTreeStackFP_Leaf(fieldTree);
16119
16120                                             // Take reg to top of stack
16121
16122                                             FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
16123
16124                                             // Pop it off to stack
16125                                             compCurFPState.Pop();
16126
16127                                             getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
16128                                                                        REG_NA, REG_SPBASE, 0);
16129 #else
16130                                             NYI_FLAT_FP_X87("FP codegen");
16131 #endif
16132                                         }
16133                                     }
16134                                     else
16135                                     {
16136                                         getEmitter()->emitIns_S(INS_push,
16137                                                                 (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
16138                                                                                                     : EA_4BYTE,
16139                                                                 varNum, 0);
16140                                         genSinglePush();
16141                                     }
16142
16143                                     bytesToBeCopied -= TARGET_POINTER_SIZE;
16144                                 }
16145                                 else // not stack aligned
16146                                 {
16147                                     noway_assert(fieldVarDsc->lvExactSize < 4);
16148
16149                                     // We will need to use a store byte or store word
16150                                     // to set this unaligned location
16151                                     postponedFields = true;
16152
16153                                     if (expectedAlignedOffset != UINT_MAX)
16154                                     {
16155                                         // This should never change until it is set back to UINT_MAX by an aligned
16156                                         // offset
16157                                         noway_assert(expectedAlignedOffset ==
16158                                                      roundUp(fieldVarDsc->lvFldOffset, TARGET_POINTER_SIZE) -
16159                                                          TARGET_POINTER_SIZE);
16160                                     }
16161
16162                                     expectedAlignedOffset =
16163                                         roundUp(fieldVarDsc->lvFldOffset, TARGET_POINTER_SIZE) - TARGET_POINTER_SIZE;
16164
16165                                     noway_assert(expectedAlignedOffset < bytesToBeCopied);
16166
16167                                     if (fieldVarDsc->lvRegister)
16168                                     {
16169                                         // Do we need to use a byte-able register?
16170                                         if (fieldVarDsc->lvExactSize == 1)
16171                                         {
16172                                             // Did we enregister fieldVarDsc2 in a non byte-able register?
16173                                             if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
16174                                             {
16175                                                 // then we will need to grab a byte-able register
16176                                                 postponedRegKind = RBM_BYTE_REGS;
16177                                             }
16178                                         }
16179                                     }
16180                                     else // not enregistered
16181                                     {
16182                                         if (fieldVarDsc->lvExactSize == 1)
16183                                         {
16184                                             // We will need to grab a byte-able register
16185                                             postponedRegKind = RBM_BYTE_REGS;
16186                                         }
16187                                         else
16188                                         {
16189                                             // We will need to grab any scratch register
16190                                             if (postponedRegKind != RBM_BYTE_REGS)
16191                                                 postponedRegKind = RBM_ALLINT;
16192                                         }
16193                                     }
16194                                 }
16195                             }
16196
16197                             // Now we've pushed all of the aligned fields.
16198                             //
16199                             // We should have pushed bytes equal to the entire struct
16200                             noway_assert(bytesToBeCopied == 0);
16201
16202                             // We should have seen a push that covers every postponed field
16203                             noway_assert(expectedAlignedOffset == UINT_MAX);
16204
16205                             // Did we have any postponed fields?
16206                             if (postponedFields)
16207                             {
16208                                 regNumber regNum = REG_STK; // means no register
16209
16210                                 // If we needed a scratch register then grab it here
16211
16212                                 if (postponedRegKind != RBM_NONE)
16213                                     regNum = regSet.rsGrabReg(postponedRegKind);
16214
16215                                 // Forward loop, starts from the lowest field offset
16216                                 //
16217                                 for (unsigned varNum = varDsc->lvFieldLclStart;
16218                                      varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
16219                                 {
16220                                     LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
16221
16222                                     // All stack aligned fields have already been pushed
16223                                     if (fieldVarDsc->lvStackAligned())
16224                                         continue;
16225
16226                                     // We have a postponed field
16227
16228                                     // It must be a byte or a short
16229                                     noway_assert(fieldVarDsc->lvExactSize < 4);
16230
16231                                     // Is the field enregistered?
16232                                     if (fieldVarDsc->lvRegister)
16233                                     {
16234                                         // Frequently we can just use that register
16235                                         regNumber tmpRegNum = fieldVarDsc->lvRegNum;
16236
16237                                         // Do we need to use a byte-able register?
16238                                         if (fieldVarDsc->lvExactSize == 1)
16239                                         {
16240                                             // Did we enregister the field in a non byte-able register?
16241                                             if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
16242                                             {
16243                                                 // then we will need to use the byte-able register 'regNum'
16244                                                 noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
16245
16246                                                 // Copy the register that contains fieldVarDsc into 'regNum'
16247                                                 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
16248                                                                           fieldVarDsc->lvRegNum);
16249                                                 regTracker.rsTrackRegLclVar(regNum, varNum);
16250
16251                                                 // tmpRegNum is the register that we will extract the byte value from
16252                                                 tmpRegNum = regNum;
16253                                             }
16254                                             noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
16255                                         }
16256
16257                                         getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16258                                                                    (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
16259                                                                    REG_SPBASE, fieldVarDsc->lvFldOffset);
16260                                     }
16261                                     else // not enregistered
16262                                     {
16263                                         // We will copy the non-enregister fieldVar into our scratch register 'regNum'
16264
16265                                         noway_assert(regNum != REG_STK);
16266                                         getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
16267                                                                   (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
16268                                                                   0);
16269
16270                                         regTracker.rsTrackRegLclVar(regNum, varNum);
16271
16272                                         // Store the value (byte or short) into the stack
16273
16274                                         getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16275                                                                    (emitAttr)fieldVarDsc->lvExactSize, regNum,
16276                                                                    REG_SPBASE, fieldVarDsc->lvFldOffset);
16277                                     }
16278                                 }
16279                             }
16280                             genUpdateLife(structLocalTree);
16281
16282                             break;
16283                         }
16284                     }
16285
16286                     genCodeForTree(arg->gtObj.gtOp1, 0);
16287                     noway_assert(arg->gtObj.gtOp1->InReg());
16288                     regNumber reg = arg->gtObj.gtOp1->gtRegNum;
16289                     // Get the number of DWORDS to copy to the stack
16290                     opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(DWORD));
16291                     unsigned slots = (unsigned)(opsz / sizeof(DWORD));
16292
16293                     BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16294
16295                     compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
16296
16297                     BOOL bNoneGC = TRUE;
16298                     for (int i = slots - 1; i >= 0; --i)
16299                     {
16300                         if (gcLayout[i] != TYPE_GC_NONE)
16301                         {
16302                             bNoneGC = FALSE;
16303                             break;
16304                         }
16305                     }
16306
16307                     /* passing large structures using movq instead of pushes does not increase codesize very much */
16308                     unsigned movqLenMin  = 8;
16309                     unsigned movqLenMax  = 64;
16310                     unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
16311
16312                     if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
16313                     {
16314                         // Don't bother with this optimization in
16315                         // rarely run blocks or when optimizing for size
16316                         movqLenMax = movqLenMin = 0;
16317                     }
16318                     else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
16319                     {
16320                         // Be more aggressive when optimizing for speed
16321                         movqLenMax *= 2;
16322                     }
16323
16324                     /* Adjust for BB weight */
16325                     if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
16326                     {
16327                         // Be more aggressive when we are inside a loop
16328                         movqLenMax *= 2;
16329                     }
16330
16331                     if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
16332                     {
16333                         JITLOG_THIS(compiler, (LL_INFO10000,
16334                                                "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
16335                                                opsz, compiler->info.compFullName));
16336
16337                         int       stkDisp = (int)(unsigned)opsz;
16338                         int       curDisp = 0;
16339                         regNumber xmmReg  = REG_XMM0;
16340
16341                         if (opsz & 0x4)
16342                         {
16343                             stkDisp -= TARGET_POINTER_SIZE;
16344                             getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
16345                             genSinglePush();
16346                         }
16347
16348                         inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
16349                         AddStackLevel(stkDisp);
16350
16351                         while (curDisp < stkDisp)
16352                         {
16353                             getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
16354                             getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
16355                             curDisp += 2 * TARGET_POINTER_SIZE;
16356                         }
16357                         noway_assert(curDisp == stkDisp);
16358                     }
16359                     else
16360                     {
16361                         for (int i = slots - 1; i >= 0; --i)
16362                         {
16363                             emitAttr fieldSize;
16364                             if (gcLayout[i] == TYPE_GC_NONE)
16365                                 fieldSize = EA_4BYTE;
16366                             else if (gcLayout[i] == TYPE_GC_REF)
16367                                 fieldSize = EA_GCREF;
16368                             else
16369                             {
16370                                 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16371                                 fieldSize = EA_BYREF;
16372                             }
16373                             getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * TARGET_POINTER_SIZE);
16374                             genSinglePush();
16375                         }
16376                     }
16377                     gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
16378                 }
16379
16380                 addrReg = 0;
16381                 break;
16382             }
16383
16384             default:
16385                 noway_assert(!"unhandled/unexpected arg type");
16386                 NO_WAY("unhandled/unexpected arg type");
16387         }
16388
16389         /* Update the current set of live variables */
16390
16391         genUpdateLife(curr);
16392
16393         /* Update the current set of register pointers */
16394
16395         noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
16396         genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
16397
16398         /* Remember how much stuff we've pushed on the stack */
16399
16400         size += opsz;
16401
16402         /* Update the current argument stack offset */
16403
16404         /* Continue with the next argument, if any more are present */
16405
16406     } // while args
16407
16408     /* Move the deferred arguments to registers */
16409
16410     for (args = regArgs; args; args = args->Rest())
16411     {
16412         curr = args->Current();
16413
16414         assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
16415
16416         fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16417         assert(curArgTabEntry);
16418         regNumber regNum = curArgTabEntry->regNum;
16419
16420         noway_assert(isRegParamType(curr->TypeGet()));
16421         noway_assert(curr->gtType != TYP_VOID);
16422
16423         /* Evaluate the argument to a register [pair] */
16424
16425         if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
16426         {
16427             /* Check if this is the guess area for the resolve interface call
16428              * Pass a size of EA_OFFSET*/
16429             if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
16430             {
16431                 getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
16432                 regTracker.rsTrackRegTrash(regNum);
16433
16434                 /* The value is now in the appropriate register */
16435
16436                 genMarkTreeInReg(curr, regNum);
16437             }
16438             else
16439             {
16440                 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
16441             }
16442
16443             noway_assert(curr->gtRegNum == regNum);
16444
16445             /* If the register is already marked as used, it will become
16446                multi-used. However, since it is a callee-trashed register,
16447                we will have to spill it before the call anyway. So do it now */
16448
16449             if (regSet.rsMaskUsed & genRegMask(regNum))
16450             {
16451                 noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
16452                 regSet.rsSpillReg(regNum);
16453             }
16454
16455             /* Mark the register as 'used' */
16456
16457             regSet.rsMarkRegUsed(curr);
16458         }
16459         else
16460         {
16461             noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
16462         }
16463     }
16464
16465     /* If any of the previously loaded arguments were spilled - reload them */
16466
16467     for (args = regArgs; args; args = args->Rest())
16468     {
16469         curr = args->Current();
16470         assert(curr);
16471
16472         if (curr->gtFlags & GTF_SPILLED)
16473         {
16474             if (isRegPairType(curr->gtType))
16475             {
16476                 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
16477             }
16478             else
16479             {
16480                 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
16481             }
16482         }
16483     }
16484
16485     /* Return the total size pushed */
16486
16487     return size;
16488 }
16489 #ifdef _PREFAST_
16490 #pragma warning(pop)
16491 #endif
16492
16493 #else // FEATURE_FIXED_OUT_ARGS
16494
16495 //
16496 // ARM and AMD64 uses this method to pass the stack based args
16497 //
16498 // returns size pushed (always zero)
16499 size_t CodeGen::genPushArgList(GenTreeCall* call)
16500 {
16501     GenTreeArgList* lateArgs = call->gtCallLateArgs;
16502     GenTree*        curr;
16503     var_types       type;
16504     int             argSize;
16505
16506     GenTreeArgList* args;
16507     // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16508     // so we can iterate over this argument list more uniformly.
16509     // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16510     GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCallArgs);
16511     if (call->gtCallObjp == NULL)
16512     {
16513         args = call->gtCallArgs;
16514     }
16515     else
16516     {
16517         objpArgList.Current() = call->gtCallObjp;
16518         args                  = &objpArgList;
16519     }
16520
16521     for (; args; args = args->Rest())
16522     {
16523         /* Get hold of the next argument value */
16524         curr = args->Current();
16525
16526         fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16527         assert(curArgTabEntry);
16528         regNumber regNum    = curArgTabEntry->regNum;
16529         int       argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
16530
16531         /* See what type of a value we're passing */
16532         type = curr->TypeGet();
16533
16534         if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
16535         {
16536             type = TYP_VOID;
16537         }
16538
16539         // This holds the set of registers corresponding to enregistered promoted struct field variables
16540         // that go dead after this use of the variable in the argument list.
16541         regMaskTP deadFieldVarRegs = RBM_NONE;
16542
16543         argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
16544
16545         if (curr->IsArgPlaceHolderNode())
16546         {
16547             assert(curr->gtFlags & GTF_LATE_ARG);
16548             goto DEFERRED;
16549         }
16550
16551         if (varTypeIsSmall(type))
16552         {
16553             // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
16554             type = TYP_I_IMPL;
16555         }
16556
16557         switch (type)
16558         {
16559
16560             case TYP_DOUBLE:
16561             case TYP_LONG:
16562
16563 #if defined(_TARGET_ARM_)
16564
16565                 argSize = (TARGET_POINTER_SIZE * 2);
16566
16567                 /* Is the value a constant? */
16568
16569                 if (curr->gtOper == GT_CNS_LNG)
16570                 {
16571                     assert((curr->gtFlags & GTF_LATE_ARG) == 0);
16572
16573                     int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
16574                     int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
16575
16576                     instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
16577
16578                     instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
16579                                                argOffset + 4);
16580
16581                     break;
16582                 }
16583                 else
16584                 {
16585                     genCodeForTree(curr, 0);
16586
16587                     if (curr->gtFlags & GTF_LATE_ARG)
16588                     {
16589                         // The arg was assigned into a temp and
16590                         // will be moved to the correct register or slot later
16591
16592                         argSize = 0; // nothing is passed on the stack
16593                     }
16594                     else
16595                     {
16596                         // The arg is passed in the outgoing argument area of the stack frame
16597                         //
16598                         assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16599                         assert(curr->InReg());          // should be enregistered after genCodeForTree(curr, 0)
16600
16601                         if (type == TYP_LONG)
16602                         {
16603                             regNumber regLo = genRegPairLo(curr->gtRegPair);
16604                             regNumber regHi = genRegPairHi(curr->gtRegPair);
16605
16606                             assert(regLo != REG_STK);
16607                             inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
16608                             if (regHi == REG_STK)
16609                             {
16610                                 regHi = regSet.rsPickFreeReg();
16611                                 inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
16612                                 regTracker.rsTrackRegTrash(regHi);
16613                             }
16614                             inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
16615                         }
16616                         else // (type == TYP_DOUBLE)
16617                         {
16618                             inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16619                         }
16620                     }
16621                 }
16622                 break;
16623
16624 #elif defined(_TARGET_64BIT_)
16625                 __fallthrough;
16626 #else
16627 #error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
16628 #endif
16629
16630             case TYP_REF:
16631             case TYP_BYREF:
16632
16633             case TYP_FLOAT:
16634             case TYP_INT:
16635                 /* Is the value a constant? */
16636
16637                 if (curr->gtOper == GT_CNS_INT)
16638                 {
16639                     assert(!(curr->gtFlags & GTF_LATE_ARG));
16640
16641 #if REDUNDANT_LOAD
16642                     regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
16643
16644                     if (reg != REG_NA)
16645                     {
16646                         inst_SA_RV(ins_Store(type), argOffset, reg, type);
16647                     }
16648                     else
16649 #endif
16650                     {
16651                         GenTreeIntConCommon* con       = curr->AsIntConCommon();
16652                         bool                 needReloc = con->ImmedValNeedsReloc(compiler);
16653                         emitAttr             attr      = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
16654
16655                         instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
16656                                                    compiler->lvaOutgoingArgSpaceVar, argOffset);
16657                     }
16658                     break;
16659                 }
16660
16661                 /* This is passed as a pointer-sized integer argument */
16662
16663                 genCodeForTree(curr, 0);
16664
16665                 // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
16666                 if (curr->gtFlags & GTF_LATE_ARG)
16667                 {
16668 #ifdef _TARGET_ARM_
16669                     argSize = 0; // nothing is passed on the stack
16670 #endif
16671                 }
16672                 else
16673                 {
16674                     // The arg is passed in the outgoing argument area of the stack frame
16675
16676                     assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16677                     assert(curr->InReg());          // should be enregistered after genCodeForTree(curr, 0)
16678                     inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16679
16680                     if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
16681                         gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
16682                 }
16683                 break;
16684
16685             case TYP_VOID:
16686                 /* Is this a nothing node, deferred register argument? */
16687
16688                 if (curr->gtFlags & GTF_LATE_ARG)
16689                 {
16690                 /* Handle side-effects */
16691                 DEFERRED:
16692                     if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
16693                     {
16694 #ifdef _TARGET_ARM_
16695                         {
16696                             GenTree*  curArgNode    = curArgTabEntry->node;
16697                             var_types curRegArgType = curArgNode->gtType;
16698                             assert(curRegArgType != TYP_UNDEF);
16699
16700                             if (curRegArgType == TYP_STRUCT)
16701                             {
16702                                 // If the RHS of the COPYBLK is a promoted struct local, then the use of that
16703                                 // is an implicit use of all its field vars.  If these are last uses, remember that,
16704                                 // so we can later update the GC compiler->info.
16705                                 if (curr->OperIsCopyBlkOp())
16706                                     deadFieldVarRegs |= genFindDeadFieldRegs(curr);
16707                             }
16708                         }
16709 #endif // _TARGET_ARM_
16710
16711                         genCodeForTree(curr, 0);
16712                     }
16713                     else
16714                     {
16715                         assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
16716                     }
16717
16718 #if defined(_TARGET_ARM_)
16719                     argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
16720 #endif
16721                 }
16722                 else
16723                 {
16724                     for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
16725                     {
16726                         GenTree* op1 = arg->gtOp.gtOp1;
16727
16728                         genEvalSideEffects(op1);
16729                         genUpdateLife(op1);
16730                     }
16731                 }
16732                 break;
16733
16734 #ifdef _TARGET_ARM_
16735
16736             case TYP_STRUCT:
16737             {
16738                 GenTree* arg = curr;
16739                 while (arg->gtOper == GT_COMMA)
16740                 {
16741                     GenTree* op1 = arg->gtOp.gtOp1;
16742                     genEvalSideEffects(op1);
16743                     genUpdateLife(op1);
16744                     arg = arg->gtOp.gtOp2;
16745                 }
16746                 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
16747
16748                 CORINFO_CLASS_HANDLE clsHnd;
16749                 unsigned             argAlign;
16750                 unsigned             slots;
16751                 BYTE*                gcLayout = NULL;
16752
16753                 // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
16754                 // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
16755                 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
16756                 // table entry for the promoted struct local.  As we fill slots with the contents of a
16757                 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
16758                 // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
16759                 // variable number of the next field variable to be copied.
16760                 LclVarDsc* promotedStructLocalVarDesc           = NULL;
16761                 GenTree*   structLocalTree                      = NULL;
16762                 unsigned   bytesOfNextSlotOfCurPromotedStruct   = TARGET_POINTER_SIZE; // Size of slot.
16763                 unsigned   nextPromotedStructFieldVar           = BAD_VAR_NUM;
16764                 unsigned   promotedStructOffsetOfFirstStackSlot = 0;
16765                 unsigned   argOffsetOfFirstStackSlot            = UINT32_MAX; // Indicates uninitialized.
16766
16767                 if (arg->OperGet() == GT_OBJ)
16768                 {
16769                     clsHnd                = arg->gtObj.gtClass;
16770                     unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
16771                     argAlign =
16772                         roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
16773                     argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
16774
16775                     slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
16776
16777                     gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16778
16779                     compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
16780
16781                     // Are we loading a promoted struct local var?
16782                     if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16783                     {
16784                         structLocalTree         = arg->gtObj.gtOp1->gtOp.gtOp1;
16785                         unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
16786                         LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
16787
16788                         // As much as we would like this to be a noway_assert, we can't because
16789                         // there are some weird casts out there, and backwards compatiblity
16790                         // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
16791                         // lvPromoted in general currently do not require the local to be
16792                         // TYP_STRUCT, so this assert is really more about how we wish the world
16793                         // was then some JIT invariant.
16794                         assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
16795
16796                         Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
16797
16798                         if (varDsc->lvPromoted &&
16799                             promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
16800                                                                                    // on stack.
16801                         {
16802                             assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
16803                             promotedStructLocalVarDesc = varDsc;
16804                             nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
16805                         }
16806                     }
16807                 }
16808                 else
16809                 {
16810                     noway_assert(arg->OperGet() == GT_MKREFANY);
16811
16812                     clsHnd   = NULL;
16813                     argAlign = TARGET_POINTER_SIZE;
16814                     argSize  = 2 * TARGET_POINTER_SIZE;
16815                     slots    = 2;
16816                 }
16817
16818                 // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
16819                 noway_assert(regNum == REG_STK);
16820
16821                 // This code passes a TYP_STRUCT by value using the outgoing arg space var
16822                 //
16823                 if (arg->OperGet() == GT_OBJ)
16824                 {
16825                     regNumber regSrc = REG_STK;
16826                     regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
16827                     int       cStackSlots = 0;
16828
16829                     if (promotedStructLocalVarDesc == NULL)
16830                     {
16831                         genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
16832                         noway_assert(arg->gtObj.gtOp1->InReg());
16833                         regSrc = arg->gtObj.gtOp1->gtRegNum;
16834                     }
16835
16836                     // The number of bytes to add "argOffset" to get the arg offset of the current slot.
16837                     int extraArgOffset = 0;
16838
16839                     for (unsigned i = 0; i < slots; i++)
16840                     {
16841                         emitAttr fieldSize;
16842                         if (gcLayout[i] == TYPE_GC_NONE)
16843                             fieldSize = EA_PTRSIZE;
16844                         else if (gcLayout[i] == TYPE_GC_REF)
16845                             fieldSize = EA_GCREF;
16846                         else
16847                         {
16848                             noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16849                             fieldSize = EA_BYREF;
16850                         }
16851
16852                         // Pass the argument using the lvaOutgoingArgSpaceVar
16853
16854                         if (promotedStructLocalVarDesc != NULL)
16855                         {
16856                             if (argOffsetOfFirstStackSlot == UINT32_MAX)
16857                                 argOffsetOfFirstStackSlot = argOffset;
16858
16859                             regNumber maxRegArg       = regNumber(MAX_REG_ARG);
16860                             bool      filledExtraSlot = genFillSlotFromPromotedStruct(
16861                                 arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
16862                                 &bytesOfNextSlotOfCurPromotedStruct,
16863                                 /*pCurRegNum*/ &maxRegArg,
16864                                 /*argOffset*/ argOffset + extraArgOffset,
16865                                 /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
16866                                 argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
16867                             extraArgOffset += TARGET_POINTER_SIZE;
16868                             // If we filled an extra slot with an 8-byte value, skip a slot.
16869                             if (filledExtraSlot)
16870                             {
16871                                 i++;
16872                                 cStackSlots++;
16873                                 extraArgOffset += TARGET_POINTER_SIZE;
16874                             }
16875                         }
16876                         else
16877                         {
16878                             if (regTmp == REG_STK)
16879                             {
16880                                 regTmp = regSet.rsPickFreeReg();
16881                             }
16882
16883                             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
16884                                                        i * TARGET_POINTER_SIZE);
16885
16886                             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
16887                                                       compiler->lvaOutgoingArgSpaceVar,
16888                                                       argOffset + cStackSlots * TARGET_POINTER_SIZE);
16889                             regTracker.rsTrackRegTrash(regTmp);
16890                         }
16891                         cStackSlots++;
16892                     }
16893
16894                     if (promotedStructLocalVarDesc == NULL)
16895                     {
16896                         regSet.rsMarkRegFree(genRegMask(regSrc));
16897                     }
16898                     if (structLocalTree != NULL)
16899                         genUpdateLife(structLocalTree);
16900                 }
16901                 else
16902                 {
16903                     assert(arg->OperGet() == GT_MKREFANY);
16904                     PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
16905                     argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
16906                 }
16907             }
16908             break;
16909 #endif // _TARGET_ARM_
16910
16911             default:
16912                 assert(!"unhandled/unexpected arg type");
16913                 NO_WAY("unhandled/unexpected arg type");
16914         }
16915
16916         /* Update the current set of live variables */
16917
16918         genUpdateLife(curr);
16919
16920         // Now, if some copied field locals were enregistered, and they're now dead, update the set of
16921         // register holding gc pointers.
16922         if (deadFieldVarRegs != 0)
16923             gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
16924
16925         /* Update the current argument stack offset */
16926
16927         argOffset += argSize;
16928
16929         /* Continue with the next argument, if any more are present */
16930     } // while (args)
16931
16932     if (lateArgs)
16933     {
16934         SetupLateArgs(call);
16935     }
16936
16937     /* Return the total size pushed */
16938
16939     return 0;
16940 }
16941
16942 #ifdef _TARGET_ARM_
16943 bool CodeGen::genFillSlotFromPromotedStruct(GenTree*       arg,
16944                                             fgArgTabEntry* curArgTabEntry,
16945                                             LclVarDsc*     promotedStructLocalVarDesc,
16946                                             emitAttr       fieldSize,
16947                                             unsigned*      pNextPromotedStructFieldVar,
16948                                             unsigned*      pBytesOfNextSlotOfCurPromotedStruct,
16949                                             regNumber*     pCurRegNum,
16950                                             int            argOffset,
16951                                             int            fieldOffsetOfFirstStackSlot,
16952                                             int            argOffsetOfFirstStackSlot,
16953                                             regMaskTP*     deadFieldVarRegs,
16954                                             regNumber*     pRegTmp)
16955 {
16956     unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
16957     unsigned limitPromotedStructFieldVar =
16958         promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
16959     unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
16960
16961     regNumber curRegNum       = *pCurRegNum;
16962     regNumber regTmp          = *pRegTmp;
16963     bool      filledExtraSlot = false;
16964
16965     if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
16966     {
16967         // We've already finished; just return.
16968         // We can reach this because the calling loop computes a # of slots based on the size of the struct.
16969         // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
16970         // the fourth slot, even though we've copied all the fields.
16971         return false;
16972     }
16973
16974     LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
16975
16976     // Does this field fill an entire slot, and does it go at the start of the slot?
16977     // If so, things are easier...
16978
16979     bool oneFieldFillsSlotFromStart =
16980         (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
16981         && ((fieldVarDsc->lvFldOffset % 4) == 0)                        // at the start of the slot, and...
16982         && (nextPromotedStructFieldVar + 1 ==
16983                 limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
16984             || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
16985
16986     // Compute the proper size.
16987     if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
16988     {
16989         switch (fieldVarDsc->lvExactSize)
16990         {
16991             case 1:
16992                 fieldSize = EA_1BYTE;
16993                 break;
16994             case 2:
16995                 fieldSize = EA_2BYTE;
16996                 break;
16997             case 8:
16998                 // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
16999                 // in which case we should not have promoted the struct variable.
17000                 noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
17001
17002                 // If the current reg number is not aligned, align it, and return to the calling loop, which will
17003                 // consider that a filled slot and move on to the next argument register.
17004                 if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
17005                 {
17006                     // We must update the slot target, however!
17007                     bytesOfNextSlotOfCurPromotedStruct += 4;
17008                     *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17009                     return false;
17010                 }
17011                 // Dest is an aligned pair of arg regs, if the struct type demands it.
17012                 noway_assert((curRegNum % 2) == 0);
17013                 // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
17014                 break;
17015             default:
17016                 assert(fieldVarDsc->lvExactSize == 4);
17017                 break;
17018         }
17019     }
17020     else
17021     {
17022         // If the gc layout said it's a GC ref or byref, then the field size must be 4.
17023         noway_assert(fieldVarDsc->lvExactSize == 4);
17024     }
17025
17026     // We may need the type of the field to influence instruction selection.
17027     // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
17028     // If the fieldVarDsc is enregistered float we must use the field's exact type
17029     // however if it is in memory we can use an integer type TYP_I_IMPL
17030     //
17031     var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
17032     if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
17033     {
17034         fieldTypeForInstr = TYP_I_IMPL;
17035     }
17036
17037     // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
17038     if (curArgTabEntry->isHfaRegArg)
17039     {
17040         assert(oneFieldFillsSlotFromStart);
17041
17042         // Is the field variable promoted?
17043         if (fieldVarDsc->lvRegister)
17044         {
17045             // Move the field var living in register to dst, if they are different registers.
17046             regNumber srcReg = fieldVarDsc->lvRegNum;
17047             regNumber dstReg = curRegNum;
17048             if (srcReg != dstReg)
17049             {
17050                 inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
17051                 assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
17052             }
17053         }
17054         else
17055         {
17056             // Move the field var living in stack to dst.
17057             getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
17058                                       fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
17059                                       nextPromotedStructFieldVar, 0);
17060             assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
17061         }
17062
17063         // Mark the arg as used and using reg val.
17064         genMarkTreeInReg(arg, curRegNum);
17065         regSet.SetUsedRegFloat(arg, true);
17066
17067         // Advance for double.
17068         if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
17069         {
17070             bytesOfNextSlotOfCurPromotedStruct += 4;
17071             curRegNum     = REG_NEXT(curRegNum);
17072             arg->gtRegNum = curRegNum;
17073             regSet.SetUsedRegFloat(arg, true);
17074             filledExtraSlot = true;
17075         }
17076         arg->gtRegNum = curArgTabEntry->regNum;
17077
17078         // Advance.
17079         bytesOfNextSlotOfCurPromotedStruct += 4;
17080         nextPromotedStructFieldVar++;
17081     }
17082     else
17083     {
17084         if (oneFieldFillsSlotFromStart)
17085         {
17086             // If we write to the stack, offset in outgoing args at which we'll write.
17087             int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17088             assert(fieldArgOffset >= 0);
17089
17090             // Is the source a register or memory?
17091             if (fieldVarDsc->lvRegister)
17092             {
17093                 if (fieldTypeForInstr == TYP_DOUBLE)
17094                 {
17095                     fieldSize = EA_8BYTE;
17096                 }
17097
17098                 // Are we writing to a register or to the stack?
17099                 if (curRegNum != MAX_REG_ARG)
17100                 {
17101                     // Source is register and Dest is register.
17102
17103                     instruction insCopy = INS_mov;
17104
17105                     if (varTypeIsFloating(fieldTypeForInstr))
17106                     {
17107                         if (fieldTypeForInstr == TYP_FLOAT)
17108                         {
17109                             insCopy = INS_vmov_f2i;
17110                         }
17111                         else
17112                         {
17113                             assert(fieldTypeForInstr == TYP_DOUBLE);
17114                             insCopy = INS_vmov_d2i;
17115                         }
17116                     }
17117
17118                     // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers.  Record the second
17119                     // register (which may become a tmp register, if its held in the argument register that the first
17120                     // register to be copied will overwrite).
17121                     regNumber otherRegNum = REG_STK;
17122                     if (fieldVarDsc->lvType == TYP_LONG)
17123                     {
17124                         otherRegNum = fieldVarDsc->lvOtherReg;
17125                         // Are we about to overwrite?
17126                         if (otherRegNum == curRegNum)
17127                         {
17128                             if (regTmp == REG_STK)
17129                             {
17130                                 regTmp = regSet.rsPickFreeReg();
17131                             }
17132                             // Copy the second register to the temp reg.
17133                             getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
17134                             regTracker.rsTrackRegCopy(regTmp, otherRegNum);
17135                             otherRegNum = regTmp;
17136                         }
17137                     }
17138
17139                     if (fieldVarDsc->lvType == TYP_DOUBLE)
17140                     {
17141                         assert(curRegNum <= REG_R2);
17142                         getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
17143                                                     fieldVarDsc->lvRegNum);
17144                         regTracker.rsTrackRegTrash(curRegNum);
17145                         regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
17146                     }
17147                     else
17148                     {
17149                         // Now do the first register.
17150                         // It might be the case that it's already in the desired register; if so do nothing.
17151                         if (curRegNum != fieldVarDsc->lvRegNum)
17152                         {
17153                             getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
17154                             regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
17155                         }
17156                     }
17157
17158                     // In either case, mark the arg register as used.
17159                     regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17160
17161                     // Is there a second half of the value?
17162                     if (fieldVarDsc->lvExactSize == 8)
17163                     {
17164                         curRegNum = genRegArgNext(curRegNum);
17165                         // The second dest reg must also be an argument register.
17166                         noway_assert(curRegNum < MAX_REG_ARG);
17167
17168                         // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17169                         if (fieldVarDsc->lvType == TYP_LONG)
17170                         {
17171                             // Copy the second register into the next argument register
17172
17173                             // If it's a register variable for a TYP_LONG value, then otherReg now should
17174                             //  hold the second register or it might say that it's in the stack.
17175                             if (otherRegNum == REG_STK)
17176                             {
17177                                 // Apparently when we partially enregister, we allocate stack space for the full
17178                                 // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
17179                                 // parameter, to get the high half.
17180                                 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17181                                                           nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17182                                 regTracker.rsTrackRegTrash(curRegNum);
17183                             }
17184                             else
17185                             {
17186                                 // The other half is in a register.
17187                                 // Again, it might be the case that it's already in the desired register; if so do
17188                                 // nothing.
17189                                 if (curRegNum != otherRegNum)
17190                                 {
17191                                     getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
17192                                     regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
17193                                 }
17194                             }
17195                         }
17196
17197                         // Also mark the 2nd arg register as used.
17198                         regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
17199                         // Record the fact that we filled in an extra register slot
17200                         filledExtraSlot = true;
17201                     }
17202                 }
17203                 else
17204                 {
17205                     // Source is register and Dest is memory (OutgoingArgSpace).
17206
17207                     // Now write the srcReg into the right location in the outgoing argument list.
17208                     getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17209                                               compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17210
17211                     if (fieldVarDsc->lvExactSize == 8)
17212                     {
17213                         // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17214                         if (fieldVarDsc->lvType == TYP_LONG)
17215                         {
17216                             if (fieldVarDsc->lvOtherReg == REG_STK)
17217                             {
17218                                 // Source is stack.
17219                                 if (regTmp == REG_STK)
17220                                 {
17221                                     regTmp = regSet.rsPickFreeReg();
17222                                 }
17223                                 // Apparently if we partially enregister, we allocate stack space for the full
17224                                 // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
17225                                 // parameter, to get the high half.
17226                                 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17227                                                           nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17228                                 regTracker.rsTrackRegTrash(regTmp);
17229                                 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17230                                                           compiler->lvaOutgoingArgSpaceVar,
17231                                                           fieldArgOffset + TARGET_POINTER_SIZE);
17232                             }
17233                             else
17234                             {
17235                                 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
17236                                                           compiler->lvaOutgoingArgSpaceVar,
17237                                                           fieldArgOffset + TARGET_POINTER_SIZE);
17238                             }
17239                         }
17240                         // Record the fact that we filled in an extra register slot
17241                         filledExtraSlot = true;
17242                     }
17243                 }
17244                 assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
17245                 // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
17246                 if (arg->gtFlags & GTF_VAR_DEATH)
17247                 {
17248                     *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
17249                     // We don't bother with the second reg of a register pair, since if it has one,
17250                     // it obviously doesn't hold a pointer.
17251                 }
17252             }
17253             else
17254             {
17255                 // Source is in memory.
17256
17257                 if (curRegNum != MAX_REG_ARG)
17258                 {
17259                     // Dest is reg.
17260                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17261                                               nextPromotedStructFieldVar, 0);
17262                     regTracker.rsTrackRegTrash(curRegNum);
17263
17264                     regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17265
17266                     if (fieldVarDsc->lvExactSize == 8)
17267                     {
17268                         noway_assert(fieldSize == EA_4BYTE);
17269                         curRegNum = genRegArgNext(curRegNum);
17270                         noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
17271                         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
17272                                                   nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17273                         regTracker.rsTrackRegTrash(curRegNum);
17274                         regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17275                         // Record the fact that we filled in an extra stack slot
17276                         filledExtraSlot = true;
17277                     }
17278                 }
17279                 else
17280                 {
17281                     // Dest is stack.
17282                     if (regTmp == REG_STK)
17283                     {
17284                         regTmp = regSet.rsPickFreeReg();
17285                     }
17286                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17287                                               nextPromotedStructFieldVar, 0);
17288
17289                     // Now write regTmp into the right location in the outgoing argument list.
17290                     getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17291                                               compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17292                     // We overwrote "regTmp", so erase any previous value we recorded that it contained.
17293                     regTracker.rsTrackRegTrash(regTmp);
17294
17295                     if (fieldVarDsc->lvExactSize == 8)
17296                     {
17297                         getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17298                                                   nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17299
17300                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17301                                                   compiler->lvaOutgoingArgSpaceVar,
17302                                                   fieldArgOffset + TARGET_POINTER_SIZE);
17303                         // Record the fact that we filled in an extra stack slot
17304                         filledExtraSlot = true;
17305                     }
17306                 }
17307             }
17308
17309             // Bump up the following if we filled in an extra slot
17310             if (filledExtraSlot)
17311                 bytesOfNextSlotOfCurPromotedStruct += 4;
17312
17313             // Go to the next field.
17314             nextPromotedStructFieldVar++;
17315             if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17316             {
17317                 fieldVarDsc = NULL;
17318             }
17319             else
17320             {
17321                 // The next field should have the same parent variable, and we should have put the field vars in order
17322                 // sorted by offset.
17323                 assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17324                        fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17325                        fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17326                 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17327             }
17328             bytesOfNextSlotOfCurPromotedStruct += 4;
17329         }
17330         else // oneFieldFillsSlotFromStart == false
17331         {
17332             // The current slot should contain more than one field.
17333             // We'll construct a word in memory for the slot, then load it into a register.
17334             // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
17335             // slot, in which case we'll just skip this loop altogether.)
17336             while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
17337             {
17338                 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
17339                 // whose fields have their natural alignment, and alignment == size on ARM).
17340                 noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
17341
17342                 // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
17343                 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17344                 noway_assert(argOffset == INT32_MAX ||
17345                              (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
17346
17347                 if (fieldVarDsc->lvRegister)
17348                 {
17349                     if (curRegNum != MAX_REG_ARG)
17350                     {
17351                         noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17352
17353                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17354                                                   compiler->lvaPromotedStructAssemblyScratchVar,
17355                                                   fieldVarDsc->lvFldOffset % 4);
17356                     }
17357                     else
17358                     {
17359                         // Dest is stack; write directly.
17360                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17361                                                   compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17362                     }
17363                 }
17364                 else
17365                 {
17366                     // Source is in memory.
17367
17368                     // Make sure we have a temporary register to use...
17369                     if (regTmp == REG_STK)
17370                     {
17371                         regTmp = regSet.rsPickFreeReg();
17372                     }
17373                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17374                                               nextPromotedStructFieldVar, 0);
17375                     regTracker.rsTrackRegTrash(regTmp);
17376
17377                     if (curRegNum != MAX_REG_ARG)
17378                     {
17379                         noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17380
17381                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17382                                                   compiler->lvaPromotedStructAssemblyScratchVar,
17383                                                   fieldVarDsc->lvFldOffset % 4);
17384                     }
17385                     else
17386                     {
17387                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17388                                                   compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17389                     }
17390                 }
17391                 // Go to the next field.
17392                 nextPromotedStructFieldVar++;
17393                 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17394                 {
17395                     fieldVarDsc = NULL;
17396                 }
17397                 else
17398                 {
17399                     // The next field should have the same parent variable, and we should have put the field vars in
17400                     // order sorted by offset.
17401                     noway_assert(fieldVarDsc->lvIsStructField &&
17402                                  compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17403                                  fieldVarDsc->lvParentLcl ==
17404                                      compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17405                                  fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17406                     fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17407                 }
17408             }
17409             // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
17410             // write to an argument register, do so.
17411             if (curRegNum != MAX_REG_ARG)
17412             {
17413                 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17414
17415                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
17416                                           compiler->lvaPromotedStructAssemblyScratchVar, 0);
17417                 regTracker.rsTrackRegTrash(curRegNum);
17418                 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17419             }
17420             // We've finished a slot; set the goal of the next slot.
17421             bytesOfNextSlotOfCurPromotedStruct += 4;
17422         }
17423     }
17424
17425     // Write back the updates.
17426     *pNextPromotedStructFieldVar         = nextPromotedStructFieldVar;
17427     *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17428     *pCurRegNum                          = curRegNum;
17429     *pRegTmp                             = regTmp;
17430
17431     return filledExtraSlot;
17432 }
17433 #endif // _TARGET_ARM_
17434
17435 regMaskTP CodeGen::genFindDeadFieldRegs(GenTree* cpBlk)
17436 {
17437     noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
17438     GenTree*  rhs = cpBlk->gtOp.gtOp1;
17439     regMaskTP res = 0;
17440     if (rhs->OperIsIndir())
17441     {
17442         GenTree* addr = rhs->AsIndir()->Addr();
17443         if (addr->gtOper == GT_ADDR)
17444         {
17445             rhs = addr->gtOp.gtOp1;
17446         }
17447     }
17448     if (rhs->OperGet() == GT_LCL_VAR)
17449     {
17450         LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
17451         if (rhsDsc->lvPromoted)
17452         {
17453             // It is promoted; iterate over its field vars.
17454             unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
17455             for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
17456             {
17457                 LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
17458                 // Did the variable go dead, and is it enregistered?
17459                 if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
17460                 {
17461                     // Add the register number to the set of registers holding field vars that are going dead.
17462                     res |= genRegMask(fieldVarDsc->lvRegNum);
17463                 }
17464             }
17465         }
17466     }
17467     return res;
17468 }
17469
17470 void CodeGen::SetupLateArgs(GenTreeCall* call)
17471 {
17472     GenTreeArgList* lateArgs;
17473     GenTree*        curr;
17474
17475     /* Generate the code to move the late arguments into registers */
17476
17477     for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
17478     {
17479         curr = lateArgs->Current();
17480         assert(curr);
17481
17482         fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
17483         assert(curArgTabEntry);
17484         regNumber regNum    = curArgTabEntry->regNum;
17485         unsigned  argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
17486
17487         assert(isRegParamType(curr->TypeGet()));
17488         assert(curr->gtType != TYP_VOID);
17489
17490         /* If the register is already marked as used, it will become
17491            multi-used. However, since it is a callee-trashed register,
17492            we will have to spill it before the call anyway. So do it now */
17493
17494         {
17495             // Remember which registers hold pointers. We will spill
17496             // them, but the code that follows will fetch reg vars from
17497             // the registers, so we need that gc compiler->info.
17498             // Also regSet.rsSpillReg doesn't like to spill enregistered
17499             // variables, but if this is their last use that is *exactly*
17500             // what we need to do, so we have to temporarily pretend
17501             // they are no longer live.
17502             // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
17503             // when their last use is about to occur?
17504             // It is because this is the second operand to be evaluated
17505             // of some parent binary op, and the first operand is
17506             // live across this tree, and thought it could re-use the
17507             // variables register (like a GT_REG_VAR). This probably
17508             // is caused by RegAlloc assuming the first operand would
17509             // evaluate into another register.
17510             regMaskTP rsTemp          = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
17511             regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
17512             regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
17513             regSet.RemoveMaskVars(rsTemp);
17514
17515             regNumber regNum2 = regNum;
17516             for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
17517             {
17518                 if (regSet.rsMaskUsed & genRegMask(regNum2))
17519                 {
17520                     assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
17521                     regSet.rsSpillReg(regNum2);
17522                 }
17523                 regNum2 = genRegArgNext(regNum2);
17524                 assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
17525             }
17526
17527             // Restore gc tracking masks.
17528             gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
17529             gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
17530
17531             // Set maskvars back to normal
17532             regSet.AddMaskVars(rsTemp);
17533         }
17534
17535         /* Evaluate the argument to a register */
17536
17537         /* Check if this is the guess area for the resolve interface call
17538          * Pass a size of EA_OFFSET*/
17539         if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
17540         {
17541             getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
17542             regTracker.rsTrackRegTrash(regNum);
17543
17544             /* The value is now in the appropriate register */
17545
17546             genMarkTreeInReg(curr, regNum);
17547
17548             regSet.rsMarkRegUsed(curr);
17549         }
17550 #ifdef _TARGET_ARM_
17551         else if (curr->gtType == TYP_STRUCT)
17552         {
17553             GenTree* arg = curr;
17554             while (arg->gtOper == GT_COMMA)
17555             {
17556                 GenTree* op1 = arg->gtOp.gtOp1;
17557                 genEvalSideEffects(op1);
17558                 genUpdateLife(op1);
17559                 arg = arg->gtOp.gtOp2;
17560             }
17561             noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
17562                          (arg->OperGet() == GT_MKREFANY));
17563
17564             // This code passes a TYP_STRUCT by value using
17565             // the argument registers first and
17566             // then the lvaOutgoingArgSpaceVar area.
17567             //
17568
17569             // We prefer to choose low registers here to reduce code bloat
17570             regMaskTP regNeedMask    = RBM_LOW_REGS;
17571             unsigned  firstStackSlot = 0;
17572             unsigned  argAlign       = TARGET_POINTER_SIZE;
17573             size_t    originalSize   = InferStructOpSizeAlign(arg, &argAlign);
17574
17575             unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
17576             assert(slots > 0);
17577
17578             if (regNum == REG_STK)
17579             {
17580                 firstStackSlot = 0;
17581             }
17582             else
17583             {
17584                 if (argAlign == (TARGET_POINTER_SIZE * 2))
17585                 {
17586                     assert((regNum & 1) == 0);
17587                 }
17588
17589                 // firstStackSlot is an index of the first slot of the struct
17590                 // that is on the stack, in the range [0,slots]. If it is 'slots',
17591                 // then the entire struct is in registers. It is also equal to
17592                 // the number of slots of the struct that are passed in registers.
17593
17594                 if (curArgTabEntry->isHfaRegArg)
17595                 {
17596                     // HFA arguments that have been decided to go into registers fit the reg space.
17597                     assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
17598                     assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
17599                            "HFA argument doesn't fit entirely in FP argument registers");
17600                     firstStackSlot = slots;
17601                 }
17602                 else if (regNum + slots > MAX_REG_ARG)
17603                 {
17604                     firstStackSlot = MAX_REG_ARG - regNum;
17605                     assert(firstStackSlot > 0);
17606                 }
17607                 else
17608                 {
17609                     firstStackSlot = slots;
17610                 }
17611
17612                 if (curArgTabEntry->isHfaRegArg)
17613                 {
17614                     // Mask out the registers used by an HFA arg from the ones used to compute tree into.
17615                     for (unsigned i = regNum; i < regNum + slots; i++)
17616                     {
17617                         regNeedMask &= ~genRegMask(regNumber(i));
17618                     }
17619                 }
17620             }
17621
17622             // This holds the set of registers corresponding to enregistered promoted struct field variables
17623             // that go dead after this use of the variable in the argument list.
17624             regMaskTP deadFieldVarRegs = RBM_NONE;
17625
17626             // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
17627             // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
17628             // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
17629             // table entry for the promoted struct local.  As we fill slots with the contents of a
17630             // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
17631             // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
17632             // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
17633             // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
17634             // to be copied.
17635             LclVarDsc* promotedStructLocalVarDesc         = NULL;
17636             unsigned   bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
17637             unsigned   nextPromotedStructFieldVar         = BAD_VAR_NUM;
17638             GenTree*   structLocalTree                    = NULL;
17639
17640             BYTE*     gcLayout = NULL;
17641             regNumber regSrc   = REG_NA;
17642             if (arg->gtOper == GT_OBJ)
17643             {
17644                 // Are we loading a promoted struct local var?
17645                 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17646                 {
17647                     structLocalTree         = arg->gtObj.gtOp1->gtOp.gtOp1;
17648                     unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17649                     LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
17650
17651                     Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17652
17653                     if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17654                                                                                                      // guaranteed to
17655                                                                                                      // live on stack.
17656                     {
17657                         // Fix 388395 ARM JitStress WP7
17658                         noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17659
17660                         assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17661                         promotedStructLocalVarDesc = varDsc;
17662                         nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17663                     }
17664                 }
17665
17666                 if (promotedStructLocalVarDesc == NULL)
17667                 {
17668                     // If it's not a promoted struct variable, set "regSrc" to the address
17669                     // of the struct local.
17670                     genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
17671                     noway_assert(arg->gtObj.gtOp1->InReg());
17672                     regSrc = arg->gtObj.gtOp1->gtRegNum;
17673                     // Remove this register from the set of registers that we pick from, unless slots equals 1
17674                     if (slots > 1)
17675                         regNeedMask &= ~genRegMask(regSrc);
17676                 }
17677
17678                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17679                 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
17680             }
17681             else if (arg->gtOper == GT_LCL_VAR)
17682             {
17683                 // Move the address of the LCL_VAR in arg into reg
17684
17685                 unsigned varNum = arg->gtLclVarCommon.gtLclNum;
17686
17687                 // Are we loading a promoted struct local var?
17688                 structLocalTree         = arg;
17689                 unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17690                 LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
17691
17692                 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17693
17694                 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17695
17696                 if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17697                                                                                                  // guaranteed to live
17698                                                                                                  // on stack.
17699                 {
17700                     assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17701                     promotedStructLocalVarDesc = varDsc;
17702                     nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17703                 }
17704
17705                 if (promotedStructLocalVarDesc == NULL)
17706                 {
17707                     regSrc = regSet.rsPickFreeReg(regNeedMask);
17708                     // Remove this register from the set of registers that we pick from, unless slots equals 1
17709                     if (slots > 1)
17710                         regNeedMask &= ~genRegMask(regSrc);
17711
17712                     getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
17713                     regTracker.rsTrackRegTrash(regSrc);
17714
17715                     if (varDsc->lvExactSize >= TARGET_POINTER_SIZE)
17716                     {
17717                         gcLayout = compiler->lvaGetGcLayout(varNum);
17718                     }
17719                     else
17720                     {
17721                         gcLayout    = new (compiler, CMK_Codegen) BYTE[1];
17722                         gcLayout[0] = TYPE_GC_NONE;
17723                     }
17724                 }
17725             }
17726             else if (arg->gtOper == GT_MKREFANY)
17727             {
17728                 assert(slots == 2);
17729                 assert((firstStackSlot == 1) || (firstStackSlot == 2));
17730                 assert(argOffset == 0); // ???
17731                 PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
17732
17733                 // Adjust argOffset if part of this guy was pushed onto the stack
17734                 if (firstStackSlot < slots)
17735                 {
17736                     argOffset += TARGET_POINTER_SIZE;
17737                 }
17738
17739                 // Skip the copy loop below because we have already placed the argument in the right place
17740                 slots    = 0;
17741                 gcLayout = NULL;
17742             }
17743             else
17744             {
17745                 assert(!"Unsupported TYP_STRUCT arg kind");
17746                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17747             }
17748
17749             if (promotedStructLocalVarDesc != NULL)
17750             {
17751                 // We must do do the stack parts first, since those might need values
17752                 // from argument registers that will be overwritten in the portion of the
17753                 // loop that writes into the argument registers.
17754                 bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
17755                 // Now find the var number of the first that starts in the first stack slot.
17756                 unsigned fieldVarLim =
17757                     promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
17758                 while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
17759                            (firstStackSlot * TARGET_POINTER_SIZE) &&
17760                        nextPromotedStructFieldVar < fieldVarLim)
17761                 {
17762                     nextPromotedStructFieldVar++;
17763                 }
17764                 // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
17765                 // first stack slot is after the last slot.
17766                 assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
17767             }
17768
17769             if (slots > 0) // the mkref case may have set "slots" to zero.
17770             {
17771                 // First pass the stack portion of the struct (if any)
17772                 //
17773                 int argOffsetOfFirstStackSlot = argOffset;
17774                 for (unsigned i = firstStackSlot; i < slots; i++)
17775                 {
17776                     emitAttr fieldSize;
17777                     if (gcLayout[i] == TYPE_GC_NONE)
17778                         fieldSize = EA_PTRSIZE;
17779                     else if (gcLayout[i] == TYPE_GC_REF)
17780                         fieldSize = EA_GCREF;
17781                     else
17782                     {
17783                         noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17784                         fieldSize = EA_BYREF;
17785                     }
17786
17787                     regNumber maxRegArg = regNumber(MAX_REG_ARG);
17788                     if (promotedStructLocalVarDesc != NULL)
17789                     {
17790                         regNumber regTmp = REG_STK;
17791
17792                         bool filledExtraSlot =
17793                             genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
17794                                                           &nextPromotedStructFieldVar,
17795                                                           &bytesOfNextSlotOfCurPromotedStruct,
17796                                                           /*pCurRegNum*/ &maxRegArg, argOffset,
17797                                                           /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
17798                                                               TARGET_POINTER_SIZE,
17799                                                           argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
17800                         if (filledExtraSlot)
17801                         {
17802                             i++;
17803                             argOffset += TARGET_POINTER_SIZE;
17804                         }
17805                     }
17806                     else // (promotedStructLocalVarDesc == NULL)
17807                     {
17808                         // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
17809                         // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
17810                         // to regSet.rsPickFreeReg, so we need to be a little more forceful.
17811                         // Otherwise, just re-use the same register.
17812                         //
17813                         regNumber regTmp = regSrc;
17814                         if (slots != 1)
17815                         {
17816                             regMaskTP regSrcUsed;
17817                             regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
17818
17819                             regTmp = regSet.rsPickFreeReg(regNeedMask);
17820
17821                             noway_assert(regTmp != regSrc);
17822
17823                             regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
17824                         }
17825
17826                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
17827                                                    i * TARGET_POINTER_SIZE);
17828
17829                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17830                                                   compiler->lvaOutgoingArgSpaceVar, argOffset);
17831                         regTracker.rsTrackRegTrash(regTmp);
17832                     }
17833                     argOffset += TARGET_POINTER_SIZE;
17834                 }
17835
17836                 // Now pass the register portion of the struct
17837                 //
17838
17839                 bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
17840                 if (promotedStructLocalVarDesc != NULL)
17841                     nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17842
17843                 // Create a nested loop here so that the first time thru the loop
17844                 // we setup all of the regArg registers except for possibly
17845                 // the one that would overwrite regSrc.  Then in the final loop
17846                 // (if necessary) we just setup regArg/regSrc with the overwrite
17847                 //
17848                 bool overwriteRegSrc     = false;
17849                 bool needOverwriteRegSrc = false;
17850                 do
17851                 {
17852                     if (needOverwriteRegSrc)
17853                         overwriteRegSrc = true;
17854
17855                     for (unsigned i = 0; i < firstStackSlot; i++)
17856                     {
17857                         regNumber regArg = (regNumber)(regNum + i);
17858
17859                         if (overwriteRegSrc == false)
17860                         {
17861                             if (regArg == regSrc)
17862                             {
17863                                 needOverwriteRegSrc = true;
17864                                 continue;
17865                             }
17866                         }
17867                         else
17868                         {
17869                             if (regArg != regSrc)
17870                                 continue;
17871                         }
17872
17873                         emitAttr fieldSize;
17874                         if (gcLayout[i] == TYPE_GC_NONE)
17875                             fieldSize = EA_PTRSIZE;
17876                         else if (gcLayout[i] == TYPE_GC_REF)
17877                             fieldSize = EA_GCREF;
17878                         else
17879                         {
17880                             noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17881                             fieldSize = EA_BYREF;
17882                         }
17883
17884                         regNumber regTmp = REG_STK;
17885                         if (promotedStructLocalVarDesc != NULL)
17886                         {
17887                             bool filledExtraSlot =
17888                                 genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
17889                                                               fieldSize, &nextPromotedStructFieldVar,
17890                                                               &bytesOfNextSlotOfCurPromotedStruct,
17891                                                               /*pCurRegNum*/ &regArg,
17892                                                               /*argOffset*/ INT32_MAX,
17893                                                               /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
17894                                                               /*argOffsetOfFirstStackSlot*/ INT32_MAX,
17895                                                               &deadFieldVarRegs, &regTmp);
17896                             if (filledExtraSlot)
17897                                 i++;
17898                         }
17899                         else
17900                         {
17901                             getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
17902                                                        fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
17903                         }
17904                         regTracker.rsTrackRegTrash(regArg);
17905                     }
17906                 } while (needOverwriteRegSrc != overwriteRegSrc);
17907             }
17908
17909             if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
17910             {
17911                 regSet.rsMarkRegFree(genRegMask(regSrc));
17912             }
17913
17914             if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
17915                                                                          // used.
17916             {
17917                 arg->SetInReg();
17918                 for (unsigned i = 1; i < firstStackSlot; i++)
17919                 {
17920                     arg->gtRegNum = (regNumber)(regNum + i);
17921                     curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17922                 }
17923                 arg->gtRegNum = regNum;
17924                 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17925             }
17926
17927             // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
17928             // so update liveness.
17929             genUpdateLife(arg);
17930
17931             // Now, if some copied field locals were enregistered, and they're now dead, update the set of
17932             // register holding gc pointers.
17933             if (deadFieldVarRegs != RBM_NONE)
17934                 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
17935         }
17936         else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
17937         {
17938             if (curArgTabEntry->regNum == REG_STK)
17939             {
17940                 // The arg is passed in the outgoing argument area of the stack frame
17941                 genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
17942                 assert(curr->InReg()); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
17943
17944                 inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
17945                 inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
17946             }
17947             else
17948             {
17949                 assert(regNum < REG_ARG_LAST);
17950                 regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
17951                 genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
17952                 assert(curr->gtRegPair == regPair);
17953                 regSet.rsMarkRegPairUsed(curr);
17954             }
17955         }
17956 #endif // _TARGET_ARM_
17957         else if (curArgTabEntry->regNum == REG_STK)
17958         {
17959             // The arg is passed in the outgoing argument area of the stack frame
17960             //
17961             genCodeForTree(curr, 0);
17962             assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
17963
17964             inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
17965
17966             if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
17967                 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
17968         }
17969         else
17970         {
17971             if (!varTypeIsFloating(curr->gtType))
17972             {
17973                 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
17974                 assert(curr->gtRegNum == regNum);
17975                 regSet.rsMarkRegUsed(curr);
17976             }
17977             else // varTypeIsFloating(curr->gtType)
17978             {
17979                 if (genIsValidFloatReg(regNum))
17980                 {
17981                     genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
17982                                   false);
17983                     assert(curr->gtRegNum == regNum);
17984                     regSet.rsMarkRegUsed(curr);
17985                 }
17986                 else
17987                 {
17988                     genCodeForTree(curr, 0);
17989                     // If we are loading a floating point type into integer registers
17990                     // then it must be for varargs.
17991                     // genCodeForTree will load it into a floating point register,
17992                     // now copy it into the correct integer register(s)
17993                     if (curr->TypeGet() == TYP_FLOAT)
17994                     {
17995                         assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
17996                         regSet.rsSpillRegIfUsed(regNum);
17997 #ifdef _TARGET_ARM_
17998                         getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
17999 #else
18000 #error "Unsupported target"
18001 #endif
18002                         regTracker.rsTrackRegTrash(regNum);
18003
18004                         curr->gtType   = TYP_INT; // Change this to TYP_INT in case we need to spill this register
18005                         curr->gtRegNum = regNum;
18006                         regSet.rsMarkRegUsed(curr);
18007                     }
18008                     else
18009                     {
18010                         assert(curr->TypeGet() == TYP_DOUBLE);
18011                         regNumber intRegNumLo = regNum;
18012                         curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
18013 #ifdef _TARGET_ARM_
18014                         regNumber intRegNumHi = regNumber(intRegNumLo + 1);
18015                         assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
18016                         assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
18017                         regSet.rsSpillRegIfUsed(intRegNumHi);
18018                         regSet.rsSpillRegIfUsed(intRegNumLo);
18019
18020                         getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
18021                         regTracker.rsTrackRegTrash(intRegNumLo);
18022                         regTracker.rsTrackRegTrash(intRegNumHi);
18023                         curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
18024                         regSet.rsMarkRegPairUsed(curr);
18025 #else
18026 #error "Unsupported target"
18027 #endif
18028                     }
18029                 }
18030             }
18031         }
18032     }
18033
18034     /* If any of the previously loaded arguments were spilled - reload them */
18035
18036     for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
18037     {
18038         curr = lateArgs->Current();
18039         assert(curr);
18040
18041         if (curr->gtFlags & GTF_SPILLED)
18042         {
18043             if (isRegPairType(curr->gtType))
18044             {
18045                 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
18046             }
18047             else
18048             {
18049                 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
18050             }
18051         }
18052     }
18053 }
18054
18055 #ifdef _TARGET_ARM_
18056
18057 // 'Push' a single GT_MKREFANY argument onto a call's argument list
18058 // The argument is passed as described by the fgArgTabEntry
18059 // If any part of the struct is to be passed in a register the
18060 // regNum value will be equal to the the registers used to pass the
18061 // the first part of the struct.
18062 // If any part is to go onto the stack, we first generate the
18063 // value into a register specified by 'regNeedMask' and
18064 // then store it to the out going argument area.
18065 // When this method returns, both parts of the TypeReference have
18066 // been pushed onto the stack, but *no* registers have been marked
18067 // as 'in-use', that is the responsibility of the caller.
18068 //
18069 void CodeGen::PushMkRefAnyArg(GenTree* mkRefAnyTree, fgArgTabEntry* curArgTabEntry, regMaskTP regNeedMask)
18070 {
18071     regNumber regNum = curArgTabEntry->regNum;
18072     regNumber regNum2;
18073     assert(mkRefAnyTree->gtOper == GT_MKREFANY);
18074     regMaskTP arg1RegMask = 0;
18075     int       argOffset   = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18076
18077     // Construct the TypedReference directly into the argument list of the call by
18078     // 'pushing' the first field of the typed reference: the pointer.
18079     // Do this by directly generating it into the argument register or outgoing arg area of the stack.
18080     // Mark it as used so we don't trash it while generating the second field.
18081     //
18082     if (regNum == REG_STK)
18083     {
18084         genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18085         noway_assert(mkRefAnyTree->gtOp.gtOp1->InReg());
18086         regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
18087         inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
18088         regTracker.rsTrackRegTrash(tmpReg1);
18089         argOffset += TARGET_POINTER_SIZE;
18090         regNum2 = REG_STK;
18091     }
18092     else
18093     {
18094         assert(regNum <= REG_ARG_LAST);
18095         arg1RegMask = genRegMask(regNum);
18096         genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18097         regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
18098     }
18099
18100     // Now 'push' the second field of the typed reference: the method table.
18101     if (regNum2 == REG_STK)
18102     {
18103         genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18104         noway_assert(mkRefAnyTree->gtOp.gtOp2->InReg());
18105         regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
18106         inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
18107         regTracker.rsTrackRegTrash(tmpReg2);
18108     }
18109     else
18110     {
18111         assert(regNum2 <= REG_ARG_LAST);
18112         // We don't have to mark this register as being in use here because it will
18113         // be done by the caller, and we don't want to double-count it.
18114         genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
18115     }
18116
18117     // Now that we are done generating the second part of the TypeReference, we can mark
18118     // the first register as free.
18119     // The caller in the shared path we will re-mark all registers used by this argument
18120     // as being used, so we don't want to double-count this one.
18121     if (arg1RegMask != 0)
18122     {
18123         GenTree* op1 = mkRefAnyTree->gtOp.gtOp1;
18124         if (op1->gtFlags & GTF_SPILLED)
18125         {
18126             /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
18127
18128             regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
18129         }
18130         else
18131         {
18132             regSet.rsMarkRegFree(arg1RegMask);
18133         }
18134     }
18135 }
18136 #endif // _TARGET_ARM_
18137
18138 #endif // FEATURE_FIXED_OUT_ARGS
18139
18140 regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreeCall* call)
18141 {
18142     assert((gtCallTypes)call->gtCallType == CT_INDIRECT);
18143
18144     regMaskTP fptrRegs;
18145
18146     /* Loading the indirect call target might cause one or more of the previously
18147        loaded argument registers to be spilled. So, we save information about all
18148        the argument registers, and unspill any of them that get spilled, after
18149        the call target is loaded.
18150     */
18151     struct
18152     {
18153         GenTree* node;
18154         union {
18155             regNumber regNum;
18156             regPairNo regPair;
18157         };
18158     } regArgTab[MAX_REG_ARG];
18159
18160     /* Record the previously loaded arguments, if any */
18161
18162     unsigned  regIndex;
18163     regMaskTP prefRegs = regSet.rsRegMaskFree();
18164     regMaskTP argRegs  = RBM_NONE;
18165     for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18166     {
18167         regMaskTP mask;
18168         regNumber regNum         = genMapRegArgNumToRegNum(regIndex, TYP_INT);
18169         GenTree*  argTree        = regSet.rsUsedTree[regNum];
18170         regArgTab[regIndex].node = argTree;
18171         if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
18172         {
18173             assert(argTree->InReg());
18174             if (isRegPairType(argTree->gtType))
18175             {
18176                 regPairNo regPair = argTree->gtRegPair;
18177                 assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
18178                 regArgTab[regIndex].regPair = regPair;
18179                 mask                        = genRegPairMask(regPair);
18180             }
18181             else
18182             {
18183                 assert(regNum == argTree->gtRegNum);
18184                 regArgTab[regIndex].regNum = regNum;
18185                 mask                       = genRegMask(regNum);
18186             }
18187             assert(!(prefRegs & mask));
18188             argRegs |= mask;
18189         }
18190     }
18191
18192     /* Record the register(s) used for the indirect call func ptr */
18193     fptrRegs = genMakeRvalueAddressable(call->gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
18194
18195     /* If any of the previously loaded arguments were spilled, reload them */
18196
18197     for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18198     {
18199         GenTree* argTree = regArgTab[regIndex].node;
18200         if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
18201         {
18202             assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
18203             if (isRegPairType(argTree->gtType))
18204             {
18205                 regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
18206             }
18207             else
18208             {
18209                 regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
18210             }
18211         }
18212     }
18213
18214     /* Make sure the target is still addressable while avoiding the argument registers */
18215
18216     fptrRegs = genKeepAddressable(call->gtCallAddr, fptrRegs, argRegs);
18217
18218     return fptrRegs;
18219 }
18220
18221 /*****************************************************************************
18222  *
18223  *  Generate code for a call. If the call returns a value in register(s), the
18224  *  register mask that describes where the result will be found is returned;
18225  *  otherwise, RBM_NONE is returned.
18226  */
18227
18228 #ifdef _PREFAST_
18229 #pragma warning(push)
18230 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
18231 #endif
18232 regMaskTP CodeGen::genCodeForCall(GenTreeCall* call, bool valUsed)
18233 {
18234     emitAttr              retSize;
18235     size_t                argSize;
18236     size_t                args;
18237     regMaskTP             retVal;
18238     emitter::EmitCallType emitCallType;
18239
18240     unsigned saveStackLvl;
18241
18242     BasicBlock* returnLabel   = DUMMY_INIT(NULL);
18243     LclVarDsc*  frameListRoot = NULL;
18244
18245     unsigned savCurIntArgReg;
18246     unsigned savCurFloatArgReg;
18247
18248     unsigned areg;
18249
18250     regMaskTP fptrRegs = RBM_NONE;
18251     regMaskTP vptrMask = RBM_NONE;
18252
18253 #ifdef DEBUG
18254     unsigned stackLvl = getEmitter()->emitCurStackLvl;
18255
18256     if (compiler->verbose)
18257     {
18258         printf("\t\t\t\t\t\t\tBeg call ");
18259         Compiler::printTreeID(call);
18260         printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
18261     }
18262 #endif
18263
18264 #ifdef _TARGET_ARM_
18265     if (compiler->opts.ShouldUsePInvokeHelpers() && (call->gtFlags & GTF_CALL_UNMANAGED) && !call->IsVirtual())
18266     {
18267         (void)genPInvokeCallProlog(nullptr, 0, (CORINFO_METHOD_HANDLE) nullptr, nullptr);
18268     }
18269 #endif
18270
18271     gtCallTypes callType = (gtCallTypes)call->gtCallType;
18272     IL_OFFSETX  ilOffset = BAD_IL_OFFSET;
18273
18274     CORINFO_SIG_INFO* sigInfo = nullptr;
18275
18276     if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
18277     {
18278         (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
18279     }
18280
18281     /* Make some sanity checks on the call node */
18282
18283     // "this" only makes sense for user functions
18284     noway_assert(call->gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
18285     // tailcalls won't be done for helpers, caller-pop args, and check that
18286     // the global flag is set
18287     noway_assert(!call->IsTailCall() ||
18288                  (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
18289
18290 #ifdef DEBUG
18291     // Pass the call signature information down into the emitter so the emitter can associate
18292     // native call sites with the signatures they were generated from.
18293     if (callType != CT_HELPER)
18294     {
18295         sigInfo = call->callSig;
18296     }
18297 #endif // DEBUG
18298
18299     unsigned pseudoStackLvl = 0;
18300
18301     if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
18302     {
18303         noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
18304
18305         pseudoStackLvl = genStackLevel;
18306
18307         noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
18308                       "so fgAddCodeRef() should have set isFramePointerRequired()");
18309     }
18310
18311     /* Mark the current stack level and list of pointer arguments */
18312
18313     saveStackLvl = genStackLevel;
18314
18315     /*-------------------------------------------------------------------------
18316      *  Set up the registers and arguments
18317      */
18318
18319     /* We'll keep track of how much we've pushed on the stack */
18320
18321     argSize = 0;
18322
18323     /* We need to get a label for the return address with the proper stack depth. */
18324     /* For the callee pops case (the default) that is before the args are pushed. */
18325
18326     if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
18327     {
18328         returnLabel = genCreateTempLabel();
18329     }
18330
18331     /*
18332         Make sure to save the current argument register status
18333         in case we have nested calls.
18334      */
18335
18336     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
18337     savCurIntArgReg              = intRegState.rsCurRegArgNum;
18338     savCurFloatArgReg            = floatRegState.rsCurRegArgNum;
18339     intRegState.rsCurRegArgNum   = 0;
18340     floatRegState.rsCurRegArgNum = 0;
18341
18342     /* Pass the arguments */
18343
18344     if ((call->gtCallObjp != NULL) || (call->gtCallArgs != NULL))
18345     {
18346         argSize += genPushArgList(call);
18347     }
18348
18349     /* We need to get a label for the return address with the proper stack depth. */
18350     /* For the caller pops case (cdecl) that is after the args are pushed. */
18351
18352     if (call->gtFlags & GTF_CALL_UNMANAGED)
18353     {
18354         if (call->gtFlags & GTF_CALL_POP_ARGS)
18355             returnLabel = genCreateTempLabel();
18356
18357         /* Make sure that we now have a label */
18358         noway_assert(returnLabel != DUMMY_INIT(NULL));
18359     }
18360
18361     if (callType == CT_INDIRECT)
18362     {
18363         fptrRegs = genLoadIndirectCallTarget(call);
18364     }
18365
18366     /* Make sure any callee-trashed registers are saved */
18367
18368     regMaskTP calleeTrashedRegs = RBM_NONE;
18369
18370 #if GTF_CALL_REG_SAVE
18371     if (call->gtFlags & GTF_CALL_REG_SAVE)
18372     {
18373         /* The return value reg(s) will definitely be trashed */
18374
18375         switch (call->gtType)
18376         {
18377             case TYP_INT:
18378             case TYP_REF:
18379             case TYP_BYREF:
18380 #if !CPU_HAS_FP_SUPPORT
18381             case TYP_FLOAT:
18382 #endif
18383                 calleeTrashedRegs = RBM_INTRET;
18384                 break;
18385
18386             case TYP_LONG:
18387 #if !CPU_HAS_FP_SUPPORT
18388             case TYP_DOUBLE:
18389 #endif
18390                 calleeTrashedRegs = RBM_LNGRET;
18391                 break;
18392
18393             case TYP_VOID:
18394 #if CPU_HAS_FP_SUPPORT
18395             case TYP_FLOAT:
18396             case TYP_DOUBLE:
18397 #endif
18398                 calleeTrashedRegs = 0;
18399                 break;
18400
18401             default:
18402                 noway_assert(!"unhandled/unexpected type");
18403         }
18404     }
18405     else
18406 #endif
18407     {
18408         calleeTrashedRegs = RBM_CALLEE_TRASH;
18409     }
18410
18411     /* Spill any callee-saved registers which are being used */
18412
18413     regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
18414
18415     /* We need to save all GC registers to the InlinedCallFrame.
18416        Instead, just spill them to temps. */
18417
18418     if (call->gtFlags & GTF_CALL_UNMANAGED)
18419         spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
18420
18421     // Ignore fptrRegs as it is needed only to perform the indirect call
18422
18423     spillRegs &= ~fptrRegs;
18424
18425     /* Do not spill the argument registers.
18426        Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
18427
18428     noway_assert((regSet.rsMaskMult & call->gtCallRegUsedMask) == 0);
18429     spillRegs &= ~call->gtCallRegUsedMask;
18430
18431     if (spillRegs)
18432     {
18433         regSet.rsSpillRegs(spillRegs);
18434     }
18435
18436 #if FEATURE_STACK_FP_X87
18437     // Spill fp stack
18438     SpillForCallStackFP();
18439
18440     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
18441     {
18442         // Pick up a reg
18443         regNumber regReturn = regSet.PickRegFloat();
18444
18445         // Assign reg to tree
18446         genMarkTreeInReg(call, regReturn);
18447
18448         // Mark as used
18449         regSet.SetUsedRegFloat(call, true);
18450
18451         // Update fp state
18452         compCurFPState.Push(regReturn);
18453     }
18454 #else
18455     SpillForCallRegisterFP(call->gtCallRegUsedMask);
18456 #endif
18457
18458     /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
18459
18460     retSize = EA_PTRSIZE;
18461
18462     if (valUsed)
18463     {
18464         if (call->gtType == TYP_REF)
18465         {
18466             retSize = EA_GCREF;
18467         }
18468         else if (call->gtType == TYP_BYREF)
18469         {
18470             retSize = EA_BYREF;
18471         }
18472     }
18473
18474     /*-------------------------------------------------------------------------
18475      * For caller-pop calls, the GC info will report the arguments as pending
18476        arguments as the caller explicitly pops them. Also should be
18477        reported as non-GC arguments as they effectively go dead at the
18478        call site (callee owns them)
18479      */
18480
18481     args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
18482
18483 #ifdef PROFILING_SUPPORTED
18484
18485     /*-------------------------------------------------------------------------
18486      *  Generate the profiling hooks for the call
18487      */
18488
18489     /* Treat special cases first */
18490
18491     /* fire the event at the call site */
18492     /* alas, right now I can only handle calls via a method handle */
18493     if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->IsTailCall())
18494     {
18495         unsigned saveStackLvl2 = genStackLevel;
18496
18497         //
18498         // Push the profilerHandle
18499         //
18500         CLANG_FORMAT_COMMENT_ANCHOR;
18501
18502 #ifdef _TARGET_X86_
18503         regMaskTP byrefPushedRegs;
18504         regMaskTP norefPushedRegs;
18505         regMaskTP pushedArgRegs = genPushRegs(call->gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
18506
18507         if (compiler->compProfilerMethHndIndirected)
18508         {
18509             getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
18510                                        (ssize_t)compiler->compProfilerMethHnd);
18511         }
18512         else
18513         {
18514             inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
18515         }
18516         genSinglePush();
18517
18518         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18519                           sizeof(int) * 1, // argSize
18520                           EA_UNKNOWN);     // retSize
18521
18522         //
18523         // Adjust the number of stack slots used by this managed method if necessary.
18524         //
18525         if (compiler->fgPtrArgCntMax < 1)
18526         {
18527             JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
18528             compiler->fgPtrArgCntMax = 1;
18529         }
18530
18531         genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
18532 #elif _TARGET_ARM_
18533         // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
18534         // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
18535         // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
18536         //
18537         // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
18538         // to disturb them and hence argument registers are locked here.
18539         regMaskTP usedMask = RBM_NONE;
18540         regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
18541
18542         regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
18543         regSet.rsLockReg(genRegMask(scratchReg));
18544
18545         emitAttr attr = EA_UNKNOWN;
18546         if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
18547         {
18548             attr = EA_GCREF;
18549             gcInfo.gcMarkRegSetGCref(scratchReg);
18550         }
18551         else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
18552         {
18553             attr = EA_BYREF;
18554             gcInfo.gcMarkRegSetByref(scratchReg);
18555         }
18556         else
18557         {
18558             attr = EA_4BYTE;
18559         }
18560
18561         getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
18562         regTracker.rsTrackRegTrash(scratchReg);
18563
18564         if (compiler->compProfilerMethHndIndirected)
18565         {
18566             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18567             regTracker.rsTrackRegTrash(REG_R0);
18568         }
18569         else
18570         {
18571             instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18572         }
18573
18574         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18575                           0,           // argSize
18576                           EA_UNKNOWN); // retSize
18577
18578         // Restore back to the state that existed before profiler callback
18579         gcInfo.gcMarkRegSetNpt(scratchReg);
18580         getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
18581         regTracker.rsTrackRegTrash(REG_R0);
18582         regSet.rsUnlockReg(genRegMask(scratchReg));
18583         regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
18584 #else
18585         NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
18586 #endif //_TARGET_X86_
18587
18588         /* Restore the stack level */
18589         SetStackLevel(saveStackLvl2);
18590     }
18591
18592 #endif // PROFILING_SUPPORTED
18593
18594 #ifdef DEBUG
18595     /*-------------------------------------------------------------------------
18596      *  Generate an ESP check for the call
18597      */
18598
18599     if (compiler->opts.compStackCheckOnCall
18600 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
18601         // check the stacks as frequently as possible
18602         && !call->IsHelperCall()
18603 #else
18604         && call->gtCallType == CT_USER_FUNC
18605 #endif
18606             )
18607     {
18608         noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
18609                      compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
18610                      compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
18611         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
18612     }
18613 #endif
18614
18615     /*-------------------------------------------------------------------------
18616      *  Generate the call
18617      */
18618
18619     bool            fPossibleSyncHelperCall = false;
18620     CorInfoHelpFunc helperNum               = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
18621
18622     bool fTailCallTargetIsVSD = false;
18623
18624     bool fTailCall = (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
18625
18626     /* Check for Delegate.Invoke. If so, we inline it. We get the
18627        target-object and target-function from the delegate-object, and do
18628        an indirect call.
18629      */
18630
18631     if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
18632     {
18633         noway_assert(call->gtCallType == CT_USER_FUNC);
18634
18635         assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
18636                 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
18637                (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
18638
18639         /* Find the offsets of the 'this' pointer and new target */
18640
18641         CORINFO_EE_INFO* pInfo;
18642         unsigned         instOffs;     // offset of new 'this' pointer
18643         unsigned         firstTgtOffs; // offset of first target to invoke
18644         const regNumber  regThis = genGetThisArgReg(call);
18645
18646         pInfo        = compiler->eeGetEEInfo();
18647         instOffs     = pInfo->offsetOfDelegateInstance;
18648         firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
18649
18650 #ifdef _TARGET_ARM_
18651         // Ensure that we don't trash any of these registers if we have to load
18652         // the helper call target into a register to invoke it.
18653         regMaskTP regsUsed = 0;
18654
18655         if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
18656         {
18657             getEmitter()->emitIns_R_R_I(INS_add, EA_BYREF, compiler->virtualStubParamInfo->GetReg(), regThis,
18658                                         pInfo->offsetOfSecureDelegateIndirectCell);
18659             regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18660
18661             // Ensure that the virtual stub param info register doesn't get reused before the call is taken
18662             regSet.rsLockReg(compiler->virtualStubParamInfo->GetRegMask(), &regsUsed);
18663         }
18664
18665 #endif // _TARGET_ARM_
18666
18667         // Grab an available register to use for the CALL indirection
18668         regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
18669
18670         //  Save the invoke-target-function in indCallReg
18671         //  'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
18672         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
18673         regTracker.rsTrackRegTrash(indCallReg);
18674
18675         /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
18676
18677         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
18678         regTracker.rsTrackRegTrash(regThis);
18679         noway_assert(instOffs < 127);
18680
18681         /* Call through indCallReg */
18682
18683         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18684                                    NULL,                                // methHnd
18685                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18686                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18687                                    gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
18688
18689 #ifdef _TARGET_ARM_
18690         if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
18691         {
18692             regSet.rsUnlockReg(compiler->virtualStubParamInfo->GetRegMask(), regsUsed);
18693         }
18694 #endif // _TARGET_ARM_
18695     }
18696     else
18697
18698         /*-------------------------------------------------------------------------
18699          *  Virtual and interface calls
18700          */
18701
18702         switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
18703         {
18704             case GTF_CALL_VIRT_STUB:
18705             {
18706                 regSet.rsSetRegsModified(compiler->virtualStubParamInfo->GetRegMask());
18707
18708                 // An x86 JIT which uses full stub dispatch must generate only
18709                 // the following stub dispatch calls:
18710                 //
18711                 // (1) isCallRelativeIndirect:
18712                 //        call dword ptr [rel32]  ;  FF 15 ---rel32----
18713                 // (2) isCallRelative:
18714                 //        call abc                ;     E8 ---rel32----
18715                 // (3) isCallRegisterIndirect:
18716                 //     3-byte nop                 ;
18717                 //     call dword ptr [eax]       ;     FF 10
18718                 //
18719                 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18720                 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18721
18722                 //
18723                 // Please do not insert any Random NOPs while constructing this VSD call
18724                 //
18725                 getEmitter()->emitDisableRandomNops();
18726
18727                 if (!fTailCall)
18728                 {
18729                     // This is code to set up an indirect call to a stub address computed
18730                     // via dictionary lookup.  However the dispatch stub receivers aren't set up
18731                     // to accept such calls at the moment.
18732                     if (callType == CT_INDIRECT)
18733                     {
18734                         regNumber indReg;
18735
18736                         // -------------------------------------------------------------------------
18737                         // The importer decided we needed a stub call via a computed
18738                         // stub dispatch address, i.e. an address which came from a dictionary lookup.
18739                         //   - The dictionary lookup produces an indirected address, suitable for call
18740                         //     via "call [virtualStubParamInfo.reg]"
18741                         //
18742                         // This combination will only be generated for shared generic code and when
18743                         // stub dispatch is active.
18744
18745                         // No need to null check the this pointer - the dispatch code will deal with this.
18746
18747                         noway_assert(genStillAddressable(call->gtCallAddr));
18748
18749                         // Now put the address in virtualStubParamInfo.reg.
18750                         // This is typically a nop when the register used for
18751                         // the gtCallAddr is virtualStubParamInfo.reg
18752                         //
18753                         inst_RV_TT(INS_mov, compiler->virtualStubParamInfo->GetReg(), call->gtCallAddr);
18754                         regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18755
18756 #if defined(_TARGET_X86_)
18757                         // Emit enough bytes of nops so that this sequence can be distinguished
18758                         // from other virtual stub dispatch calls.
18759                         //
18760                         // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18761                         //        vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18762                         //
18763                         getEmitter()->emitIns_Nop(3);
18764
18765                         // Make the virtual stub call:
18766                         //     call   [virtualStubParamInfo.reg]
18767                         //
18768                         emitCallType = emitter::EC_INDIR_ARD;
18769
18770                         indReg = compiler->virtualStubParamInfo->GetReg();
18771                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18772
18773 #elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
18774
18775                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18776
18777                         // Make the virtual stub call:
18778                         //     ldr   indReg, [virtualStubParamInfo.reg]
18779                         //     call  indReg
18780                         //
18781                         emitCallType = emitter::EC_INDIR_R;
18782
18783                         // Now dereference [virtualStubParamInfo.reg] and put it in a new temp register 'indReg'
18784                         //
18785                         indReg = regSet.rsGrabReg(RBM_ALLINT & ~compiler->virtualStubParamInfo->GetRegMask());
18786                         assert(call->gtCallAddr->InReg());
18787                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg,
18788                                                     compiler->virtualStubParamInfo->GetReg(), 0);
18789                         regTracker.rsTrackRegTrash(indReg);
18790
18791 #else
18792 #error "Unknown target for VSD call"
18793 #endif
18794
18795                         getEmitter()->emitIns_Call(emitCallType,
18796                                                    NULL,                                // methHnd
18797                                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18798                                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18799                                                    gcInfo.gcRegByrefSetCur, ilOffset, indReg);
18800                     }
18801                     else
18802                     {
18803                         // -------------------------------------------------------------------------
18804                         // Check for a direct stub call.
18805                         //
18806
18807                         // Get stub addr. This will return NULL if virtual call stubs are not active
18808                         void* stubAddr = NULL;
18809
18810                         stubAddr = (void*)call->gtStubCallStubAddr;
18811
18812                         noway_assert(stubAddr != NULL);
18813
18814                         // -------------------------------------------------------------------------
18815                         // Direct stub calls, though the stubAddr itself may still need to be
18816                         // accesed via an indirection.
18817                         //
18818
18819                         // No need to null check - the dispatch code will deal with null this.
18820
18821                         emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
18822                         void*                 addr             = stubAddr;
18823                         int                   disp             = 0;
18824                         regNumber             callReg          = REG_NA;
18825
18826                         if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
18827                         {
18828 #if CPU_LOAD_STORE_ARCH
18829                             callReg = regSet.rsGrabReg(compiler->virtualStubParamInfo->GetRegMask());
18830                             noway_assert(callReg == compiler->virtualStubParamInfo->GetReg());
18831
18832                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, compiler->virtualStubParamInfo->GetReg(),
18833                                                    (ssize_t)stubAddr);
18834                             // The stub will write-back to this register, so don't track it
18835                             regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18836                             regNumber indReg;
18837                             if (compiler->IsTargetAbi(CORINFO_CORERT_ABI))
18838                             {
18839                                 indReg = regSet.rsGrabReg(RBM_ALLINT & ~compiler->virtualStubParamInfo->GetRegMask());
18840                             }
18841                             else
18842                             {
18843                                 indReg = REG_JUMP_THUNK_PARAM;
18844                             }
18845                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg,
18846                                                         compiler->virtualStubParamInfo->GetReg(), 0);
18847                             regTracker.rsTrackRegTrash(indReg);
18848                             callTypeStubAddr = emitter::EC_INDIR_R;
18849                             getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18850                                                        NULL,                                // methHnd
18851                                                        INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18852                                                        args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18853                                                        gcInfo.gcRegByrefSetCur, ilOffset, indReg);
18854
18855 #else
18856                             // emit an indirect call
18857                             callTypeStubAddr = emitter::EC_INDIR_C;
18858                             addr             = 0;
18859                             disp             = (ssize_t)stubAddr;
18860 #endif
18861                         }
18862 #if CPU_LOAD_STORE_ARCH
18863                         if (callTypeStubAddr != emitter::EC_INDIR_R)
18864 #endif
18865                         {
18866                             getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCallMethHnd,
18867                                                        INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
18868                                                        gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18869                                                        gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
18870                         }
18871                     }
18872                 }
18873                 else // tailCall is true
18874                 {
18875
18876 // Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
18877 // moves the this pointer out of it's usual place and into the argument list.
18878 #ifdef _TARGET_X86_
18879
18880                     // Generate "cmp ECX, [ECX]" to trap null pointers
18881                     const regNumber regThis = genGetThisArgReg(call);
18882                     getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
18883
18884 #endif // _TARGET_X86_
18885
18886                     if (callType == CT_INDIRECT)
18887                     {
18888                         noway_assert(genStillAddressable(call->gtCallAddr));
18889
18890                         // Now put the address in EAX.
18891                         inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
18892                         regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
18893
18894                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18895                     }
18896                     else
18897                     {
18898                         // importer/EE should guarantee the indirection
18899                         noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
18900
18901                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
18902                                                ssize_t(call->gtStubCallStubAddr));
18903                     }
18904
18905                     fTailCallTargetIsVSD = true;
18906                 }
18907
18908                 //
18909                 // OK to start inserting random NOPs again
18910                 //
18911                 getEmitter()->emitEnableRandomNops();
18912             }
18913             break;
18914
18915             case GTF_CALL_VIRT_VTABLE:
18916                 // stub dispatching is off or this is not a virtual call (could be a tailcall)
18917                 {
18918                     regNumber vptrReg;
18919                     regNumber vptrReg1  = REG_NA;
18920                     regMaskTP vptrMask1 = RBM_NONE;
18921                     unsigned  vtabOffsOfIndirection;
18922                     unsigned  vtabOffsAfterIndirection;
18923                     bool      isRelative;
18924
18925                     noway_assert(callType == CT_USER_FUNC);
18926
18927                     /* Get hold of the vtable offset (note: this might be expensive) */
18928
18929                     compiler->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
18930                                                                       &vtabOffsAfterIndirection, &isRelative);
18931
18932                     vptrReg =
18933                         regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
18934                     vptrMask = genRegMask(vptrReg);
18935
18936                     if (isRelative)
18937                     {
18938                         vptrReg1  = regSet.rsGrabReg(RBM_ALLINT & ~vptrMask);
18939                         vptrMask1 = genRegMask(vptrReg1);
18940                     }
18941
18942                     /* The register no longer holds a live pointer value */
18943                     gcInfo.gcMarkRegSetNpt(vptrMask);
18944
18945                     if (isRelative)
18946                     {
18947                         gcInfo.gcMarkRegSetNpt(vptrMask1);
18948                     }
18949
18950                     // MOV vptrReg, [REG_CALL_THIS + offs]
18951                     getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
18952                                                VPTR_OFFS);
18953                     regTracker.rsTrackRegTrash(vptrReg);
18954
18955                     if (isRelative)
18956                     {
18957                         regTracker.rsTrackRegTrash(vptrReg1);
18958                     }
18959
18960                     noway_assert(vptrMask & ~call->gtCallRegUsedMask);
18961
18962                     /* The register no longer holds a live pointer value */
18963                     gcInfo.gcMarkRegSetNpt(vptrMask);
18964
18965                     /* Get the appropriate vtable chunk */
18966
18967                     if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
18968                     {
18969                         if (isRelative)
18970                         {
18971 #if defined(_TARGET_ARM_)
18972                             unsigned offset = vtabOffsOfIndirection + vtabOffsAfterIndirection;
18973
18974                             // ADD vptrReg1, REG_CALL_IND_SCRATCH, vtabOffsOfIndirection + vtabOffsAfterIndirection
18975                             getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, vptrReg1, vptrReg, offset);
18976 #else
18977                             unreached();
18978 #endif
18979                         }
18980
18981                         // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
18982                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
18983                                                    vtabOffsOfIndirection);
18984                     }
18985                     else
18986                     {
18987                         assert(!isRelative);
18988                     }
18989
18990                     /* Call through the appropriate vtable slot */
18991
18992                     if (fTailCall)
18993                     {
18994                         if (isRelative)
18995                         {
18996 #if defined(_TARGET_ARM_)
18997                             /* Load the function address: "[vptrReg1 + vptrReg] -> reg_intret" */
18998                             getEmitter()->emitIns_R_ARR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg1,
18999                                                         vptrReg, 0);
19000 #else
19001                             unreached();
19002 #endif
19003                         }
19004                         else
19005                         {
19006                             /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
19007                             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
19008                                                        vtabOffsAfterIndirection);
19009                         }
19010                     }
19011                     else
19012                     {
19013 #if CPU_LOAD_STORE_ARCH
19014                         if (isRelative)
19015                         {
19016                             getEmitter()->emitIns_R_ARR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg1, vptrReg,
19017                                                         0);
19018                         }
19019                         else
19020                         {
19021                             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
19022                                                        vtabOffsAfterIndirection);
19023                         }
19024
19025                         getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCallMethHnd,
19026                                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19027                                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19028                                                    gcInfo.gcRegByrefSetCur, ilOffset,
19029                                                    vptrReg); // ireg
19030 #else
19031                         assert(!isRelative);
19032                         getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCallMethHnd,
19033                                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19034                                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19035                                                    gcInfo.gcRegByrefSetCur, ilOffset,
19036                                                    vptrReg,                   // ireg
19037                                                    REG_NA,                    // xreg
19038                                                    0,                         // xmul
19039                                                    vtabOffsAfterIndirection); // disp
19040 #endif // CPU_LOAD_STORE_ARCH
19041                     }
19042                 }
19043                 break;
19044
19045             case GTF_CALL_NONVIRT:
19046             {
19047                 //------------------------ Non-virtual/Indirect calls -------------------------
19048                 // Lots of cases follow
19049                 //    - Direct P/Invoke calls
19050                 //    - Indirect calls to P/Invoke functions via the P/Invoke stub
19051                 //    - Direct Helper calls
19052                 //    - Indirect Helper calls
19053                 //    - Direct calls to known addresses
19054                 //    - Direct calls where address is accessed by one or two indirections
19055                 //    - Indirect calls to computed addresses
19056                 //    - Tailcall versions of all of the above
19057
19058                 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
19059
19060                 //------------------------------------------------------
19061                 // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
19062                 //
19063                 // For (final and private) functions which were called with
19064                 //  invokevirtual, but which we call directly, we need to
19065                 //  dereference the object pointer to make sure it's not NULL.
19066                 //
19067
19068                 if (call->gtFlags & GTF_CALL_NULLCHECK)
19069                 {
19070                     /* Generate "cmp ECX, [ECX]" to trap null pointers */
19071                     const regNumber regThis = genGetThisArgReg(call);
19072 #if CPU_LOAD_STORE_ARCH
19073                     regNumber indReg =
19074                         regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
19075                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
19076                     regTracker.rsTrackRegTrash(indReg);
19077 #else
19078                     getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
19079 #endif
19080                 }
19081
19082                 if (call->gtFlags & GTF_CALL_UNMANAGED)
19083                 {
19084                     //------------------------------------------------------
19085                     // Non-virtual/Indirect calls: PInvoke calls.
19086
19087                     noway_assert(compiler->info.compCallUnmanaged != 0);
19088
19089                     /* args shouldn't be greater than 64K */
19090
19091                     noway_assert((argSize & 0xffff0000) == 0);
19092
19093                     /* Remember the varDsc for the callsite-epilog */
19094
19095                     frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
19096
19097                     // exact codegen is required
19098                     getEmitter()->emitDisableRandomNops();
19099
19100                     int nArgSize = 0;
19101
19102                     regNumber indCallReg = REG_NA;
19103
19104                     if (callType == CT_INDIRECT)
19105                     {
19106                         noway_assert(genStillAddressable(call->gtCallAddr));
19107
19108                         if (call->gtCallAddr->InReg())
19109                             indCallReg = call->gtCallAddr->gtRegNum;
19110
19111                         nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
19112                         methHnd  = 0;
19113                     }
19114                     else
19115                     {
19116                         noway_assert(callType == CT_USER_FUNC);
19117                     }
19118
19119                     regNumber tcbReg = REG_NA;
19120
19121                     if (!compiler->opts.ShouldUsePInvokeHelpers())
19122                     {
19123                         tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
19124                     }
19125
19126                     void* addr = NULL;
19127
19128                     if (callType == CT_INDIRECT)
19129                     {
19130                         /* Double check that the callee didn't use/trash the
19131                            registers holding the call target.
19132                         */
19133                         noway_assert(tcbReg != indCallReg);
19134
19135                         if (indCallReg == REG_NA)
19136                         {
19137                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19138                                                                        // indirection
19139
19140                             /* Please note that this even works with tcbReg == REG_EAX.
19141                             tcbReg contains an interesting value only if frameListRoot is
19142                             an enregistered local that stays alive across the call
19143                             (certainly not EAX). If frameListRoot has been moved into
19144                             EAX, we can trash it since it won't survive across the call
19145                             anyways.
19146                             */
19147
19148                             inst_RV_TT(INS_mov, indCallReg, call->gtCallAddr);
19149                             regTracker.rsTrackRegTrash(indCallReg);
19150                         }
19151
19152                         emitCallType = emitter::EC_INDIR_R;
19153                     }
19154                     else
19155                     {
19156                         noway_assert(callType == CT_USER_FUNC);
19157
19158                         CORINFO_CONST_LOOKUP lookup;
19159                         compiler->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
19160
19161                         addr = lookup.addr;
19162
19163                         assert(addr != NULL);
19164
19165 #if defined(_TARGET_ARM_)
19166                         // Legacy backend does not handle the `IAT_VALUE` case that does not
19167                         // fit. It is not reachable currently from any front end so just check
19168                         // for it via assert.
19169                         assert(lookup.accessType != IAT_VALUE || arm_Valid_Imm_For_BL((ssize_t)addr));
19170 #endif
19171                         if (lookup.accessType == IAT_VALUE || lookup.accessType == IAT_PVALUE)
19172                         {
19173 #if CPU_LOAD_STORE_ARCH
19174                             // Load the address into a register, indirect it and call  through a register
19175                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19176                                                                        // indirection
19177                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19178
19179                             if (lookup.accessType == IAT_PVALUE)
19180                             {
19181                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19182                             }
19183
19184                             regTracker.rsTrackRegTrash(indCallReg);
19185                             // Now make the call "call indCallReg"
19186
19187                             getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19188                                                        methHnd,                       // methHnd
19189                                                        INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
19190                                                        NULL,                          // addr
19191                                                        args,
19192                                                        retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19193                                                        gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
19194
19195                             emitCallType = emitter::EC_INDIR_R;
19196                             break;
19197 #else
19198                             emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19199                             indCallReg   = REG_NA;
19200 #endif
19201                         }
19202                         else
19203                         {
19204                             assert(lookup.accessType == IAT_PPVALUE);
19205                             // Double-indirection. Load the address into a register
19206                             // and call indirectly through a register
19207                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19208                                                                        // indirection
19209
19210 #if CPU_LOAD_STORE_ARCH
19211                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19212                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19213                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19214                             regTracker.rsTrackRegTrash(indCallReg);
19215
19216                             emitCallType = emitter::EC_INDIR_R;
19217
19218 #else
19219                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
19220                             regTracker.rsTrackRegTrash(indCallReg);
19221                             emitCallType = emitter::EC_INDIR_ARD;
19222
19223 #endif // CPU_LOAD_STORE_ARCH
19224
19225                             // For a indirect calls, we don't want to pass the address (used below),
19226                             // so set it to nullptr. (We've already used the address to load up the target register.)
19227                             addr = nullptr;
19228                         }
19229                     }
19230
19231                     getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
19232                                                INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19233                                                gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
19234                                                ilOffset, indCallReg);
19235
19236                     if (callType == CT_INDIRECT)
19237                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19238
19239                     getEmitter()->emitEnableRandomNops();
19240
19241                     // Done with PInvoke calls
19242                     break;
19243                 }
19244
19245                 if (callType == CT_INDIRECT)
19246                 {
19247                     noway_assert(genStillAddressable(call->gtCallAddr));
19248
19249                     if (call->gtCallCookie)
19250                     {
19251                         //------------------------------------------------------
19252                         // Non-virtual indirect calls via the P/Invoke stub
19253
19254                         GenTree* cookie = call->gtCallCookie;
19255                         GenTree* target = call->gtCallAddr;
19256
19257                         noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
19258
19259                         noway_assert(cookie->gtOper == GT_CNS_INT ||
19260                                      cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
19261
19262                         noway_assert(args == argSize);
19263
19264 #if defined(_TARGET_X86_)
19265                         /* load eax with the real target */
19266
19267                         inst_RV_TT(INS_mov, REG_EAX, target);
19268                         regTracker.rsTrackRegTrash(REG_EAX);
19269
19270                         if (cookie->gtOper == GT_CNS_INT)
19271                             inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
19272                         else
19273                             inst_TT(INS_push, cookie);
19274
19275                         /* Keep track of ESP for EBP-less frames */
19276                         genSinglePush();
19277
19278                         argSize += REGSIZE_BYTES;
19279
19280 #elif defined(_TARGET_ARM_)
19281
19282                         // Ensure that we spill these registers (if caller saved) in the prolog
19283                         regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
19284
19285                         // ARM: load r12 with the real target
19286                         // X64: load r10 with the real target
19287                         inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
19288                         regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
19289
19290                         // ARM: load r4  with the pinvoke VASigCookie
19291                         // X64: load r11 with the pinvoke VASigCookie
19292                         if (cookie->gtOper == GT_CNS_INT)
19293                             inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
19294                                        EA_HANDLE_CNS_RELOC);
19295                         else
19296                             inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
19297                         regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19298
19299                         noway_assert(args == argSize);
19300
19301                         // Ensure that we don't trash any of these registers if we have to load
19302                         // the helper call target into a register to invoke it.
19303                         regMaskTP regsUsed;
19304                         regSet.rsLockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | RBM_PINVOKE_COOKIE_PARAM,
19305                                          &regsUsed);
19306 #else
19307                         NYI("Non-virtual indirect calls via the P/Invoke stub");
19308 #endif
19309
19310                         args = argSize;
19311                         noway_assert((size_t)(int)args == args);
19312
19313                         genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
19314
19315 #if defined(_TARGET_ARM_)
19316                         regSet.rsUnlockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
19317                                                RBM_PINVOKE_COOKIE_PARAM,
19318                                            regsUsed);
19319 #endif
19320
19321 #ifdef _TARGET_ARM_
19322                         // genEmitHelperCall doesn't record all registers a helper call would trash.
19323                         regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19324 #endif
19325                     }
19326                     else
19327                     {
19328                         //------------------------------------------------------
19329                         // Non-virtual indirect calls
19330
19331                         if (fTailCall)
19332                         {
19333                             inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
19334                             regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19335                         }
19336                         else
19337                             instEmit_indCall(call, args, retSize);
19338                     }
19339
19340                     genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19341
19342                     // Done with indirect calls
19343                     break;
19344                 }
19345
19346                 //------------------------------------------------------
19347                 // Non-virtual direct/indirect calls: Work out if the address of the
19348                 // call is known at JIT time (if not it is either an indirect call
19349                 // or the address must be accessed via an single/double indirection)
19350
19351                 noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
19352
19353                 void*          addr;
19354                 InfoAccessType accessType;
19355
19356                 helperNum = compiler->eeGetHelperNum(methHnd);
19357
19358                 if (callType == CT_HELPER)
19359                 {
19360                     noway_assert(helperNum != CORINFO_HELP_UNDEF);
19361
19362 #ifdef FEATURE_READYTORUN_COMPILER
19363                     if (call->gtEntryPoint.addr != NULL)
19364                     {
19365                         accessType = call->gtEntryPoint.accessType;
19366                         addr       = call->gtEntryPoint.addr;
19367                     }
19368                     else
19369 #endif // FEATURE_READYTORUN_COMPILER
19370                     {
19371                         void* pAddr;
19372
19373                         accessType = IAT_VALUE;
19374                         addr       = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
19375
19376                         if (!addr)
19377                         {
19378                             accessType = IAT_PVALUE;
19379                             addr       = pAddr;
19380                         }
19381                     }
19382                 }
19383                 else
19384                 {
19385                     noway_assert(helperNum == CORINFO_HELP_UNDEF);
19386
19387                     CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
19388
19389                     if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
19390                         aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
19391
19392                     if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
19393                         aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
19394
19395 #ifdef FEATURE_READYTORUN_COMPILER
19396                     if (call->gtEntryPoint.addr != NULL)
19397                     {
19398                         accessType = call->gtEntryPoint.accessType;
19399                         addr       = call->gtEntryPoint.addr;
19400                     }
19401                     else
19402 #endif // FEATURE_READYTORUN_COMPILER
19403                     {
19404                         CORINFO_CONST_LOOKUP addrInfo;
19405                         compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
19406
19407                         accessType = addrInfo.accessType;
19408                         addr       = addrInfo.addr;
19409                     }
19410                 }
19411
19412                 if (fTailCall)
19413                 {
19414                     noway_assert(callType == CT_USER_FUNC);
19415
19416                     switch (accessType)
19417                     {
19418                         case IAT_VALUE:
19419                             //------------------------------------------------------
19420                             // Non-virtual direct calls to known addressess
19421                             //
19422                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19423                             break;
19424
19425                         case IAT_PVALUE:
19426                             //------------------------------------------------------
19427                             // Non-virtual direct calls to addresses accessed by
19428                             // a single indirection.
19429                             //
19430                             // For tailcalls we place the target address in REG_TAILCALL_ADDR
19431                             CLANG_FORMAT_COMMENT_ANCHOR;
19432
19433 #if CPU_LOAD_STORE_ARCH
19434                             {
19435                                 regNumber indReg = REG_TAILCALL_ADDR;
19436                                 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19437                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19438                                 regTracker.rsTrackRegTrash(indReg);
19439                             }
19440 #else
19441                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19442                             regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19443 #endif
19444                             break;
19445
19446                         case IAT_PPVALUE:
19447                             //------------------------------------------------------
19448                             // Non-virtual direct calls to addresses accessed by
19449                             // a double indirection.
19450                             //
19451                             // For tailcalls we place the target address in REG_TAILCALL_ADDR
19452                             CLANG_FORMAT_COMMENT_ANCHOR;
19453
19454 #if CPU_LOAD_STORE_ARCH
19455                             {
19456                                 regNumber indReg = REG_TAILCALL_ADDR;
19457                                 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19458                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19459                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19460                                 regTracker.rsTrackRegTrash(indReg);
19461                             }
19462 #else
19463                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19464                             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19465                                                        REG_TAILCALL_ADDR, 0);
19466                             regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19467 #endif
19468                             break;
19469
19470                         default:
19471                             noway_assert(!"Bad accessType");
19472                             break;
19473                     }
19474                 }
19475                 else
19476                 {
19477                     switch (accessType)
19478                     {
19479                         regNumber indCallReg;
19480
19481                         case IAT_VALUE:
19482                         {
19483                             //------------------------------------------------------
19484                             // Non-virtual direct calls to known addressess
19485                             //
19486                             // The vast majority of calls end up here....  Wouldn't
19487                             // it be nice if they all did!
19488                             CLANG_FORMAT_COMMENT_ANCHOR;
19489 #ifdef _TARGET_ARM_
19490                             // We may use direct call for some of recursive calls
19491                             // as we can safely estimate the distance from the call site to the top of the method
19492                             const int codeOffset = MAX_PROLOG_SIZE_BYTES +           // prolog size
19493                                                    getEmitter()->emitCurCodeOffset + // offset of the current IG
19494                                                    getEmitter()->emitCurIGsize +     // size of the current IG
19495                                                    4;                                // size of the jump instruction
19496                                                                                      // that we are now emitting
19497                             if (compiler->gtIsRecursiveCall(call) && codeOffset <= -CALL_DIST_MAX_NEG)
19498                             {
19499                                 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19500                                                            INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19501                                                            args, retSize, gcInfo.gcVarPtrSetCur,
19502                                                            gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19503                                                            REG_NA, REG_NA, 0, 0, // ireg, xreg, xmul, disp
19504                                                            false,                // isJump
19505                                                            emitter::emitNoGChelper(helperNum));
19506                             }
19507                             else if (!arm_Valid_Imm_For_BL((ssize_t)addr))
19508                             {
19509                                 // Load the address into a register and call  through a register
19510                                 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
19511                                                                            // CALL indirection
19512                                 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19513
19514                                 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
19515                                                            INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19516                                                            args, retSize, gcInfo.gcVarPtrSetCur,
19517                                                            gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19518                                                            indCallReg,   // ireg
19519                                                            REG_NA, 0, 0, // xreg, xmul, disp
19520                                                            false,        // isJump
19521                                                            emitter::emitNoGChelper(helperNum));
19522                             }
19523                             else
19524 #endif
19525                             {
19526                                 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19527                                                            INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19528                                                            gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19529                                                            gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
19530                                                            0,     /* ireg, xreg, xmul, disp */
19531                                                            false, /* isJump */
19532                                                            emitter::emitNoGChelper(helperNum));
19533                             }
19534                         }
19535                         break;
19536
19537                         case IAT_PVALUE:
19538                         {
19539                             //------------------------------------------------------
19540                             // Non-virtual direct calls to addresses accessed by
19541                             // a single indirection.
19542                             //
19543
19544                             // Load the address into a register, load indirect and call  through a register
19545                             CLANG_FORMAT_COMMENT_ANCHOR;
19546 #if CPU_LOAD_STORE_ARCH
19547                             regMaskTP indCallMask = RBM_ALLINT;
19548
19549 #ifdef FEATURE_READYTORUN_COMPILER
19550                             if (call->IsR2RRelativeIndir())
19551                             {
19552                                 indCallMask &= ~RBM_R2R_INDIRECT_PARAM;
19553                             }
19554 #endif // FEATURE_READYTORUN_COMPILER
19555
19556                             // Grab an available register to use for the CALL indirection
19557                             indCallReg = regSet.rsGrabReg(indCallMask);
19558
19559                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19560
19561 #ifdef FEATURE_READYTORUN_COMPILER
19562                             if (call->IsR2RRelativeIndir())
19563                             {
19564                                 noway_assert(regSet.rsRegMaskCanGrab() & RBM_R2R_INDIRECT_PARAM);
19565                                 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R2R_INDIRECT_PARAM, indCallReg);
19566                                 regTracker.rsTrackRegTrash(REG_R2R_INDIRECT_PARAM);
19567                             }
19568 #endif // FEATURE_READYTORUN_COMPILER
19569
19570                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19571                             regTracker.rsTrackRegTrash(indCallReg);
19572
19573                             emitCallType = emitter::EC_INDIR_R;
19574                             addr         = NULL;
19575
19576 #else
19577                             emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19578                             indCallReg   = REG_NA;
19579
19580 #endif // CPU_LOAD_STORE_ARCH
19581
19582                             getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
19583                                                        retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19584                                                        gcInfo.gcRegByrefSetCur, ilOffset,
19585                                                        indCallReg,   // ireg
19586                                                        REG_NA, 0, 0, // xreg, xmul, disp
19587                                                        false,        /* isJump */
19588                                                        emitter::emitNoGChelper(helperNum));
19589                         }
19590                         break;
19591
19592                         case IAT_PPVALUE:
19593                         {
19594                             //------------------------------------------------------
19595                             // Non-virtual direct calls to addresses accessed by
19596                             // a double indirection.
19597                             //
19598                             // Double-indirection. Load the address into a register
19599                             // and call indirectly through the register
19600
19601                             noway_assert(helperNum == CORINFO_HELP_UNDEF);
19602
19603                             // Grab an available register to use for the CALL indirection
19604                             indCallReg = regSet.rsGrabReg(RBM_ALLINT);
19605
19606 #if CPU_LOAD_STORE_ARCH
19607                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19608                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19609                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19610                             regTracker.rsTrackRegTrash(indCallReg);
19611
19612                             emitCallType = emitter::EC_INDIR_R;
19613
19614 #else
19615
19616                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
19617                             regTracker.rsTrackRegTrash(indCallReg);
19618
19619                             emitCallType = emitter::EC_INDIR_ARD;
19620
19621 #endif // CPU_LOAD_STORE_ARCH
19622
19623                             getEmitter()->emitIns_Call(emitCallType, methHnd,
19624                                                        INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19625                                                        args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19626                                                        gcInfo.gcRegByrefSetCur, ilOffset,
19627                                                        indCallReg,   // ireg
19628                                                        REG_NA, 0, 0, // xreg, xmul, disp
19629                                                        false,        // isJump
19630                                                        emitter::emitNoGChelper(helperNum));
19631                         }
19632                         break;
19633
19634                         default:
19635                             noway_assert(!"Bad accessType");
19636                             break;
19637                     }
19638
19639                     // tracking of region protected by the monitor in synchronized methods
19640                     if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
19641                     {
19642                         fPossibleSyncHelperCall = true;
19643                     }
19644                 }
19645             }
19646             break;
19647
19648             default:
19649                 noway_assert(!"strange call type");
19650                 break;
19651         }
19652
19653     /*-------------------------------------------------------------------------
19654      *  For tailcalls, REG_INTRET contains the address of the target function,
19655      *  enregistered args are in the correct registers, and the stack arguments
19656      *  have been pushed on the stack. Now call the stub-sliding helper
19657      */
19658
19659     if (fTailCall)
19660     {
19661
19662         if (compiler->info.compCallUnmanaged)
19663             genPInvokeMethodEpilog();
19664
19665 #ifdef _TARGET_X86_
19666         noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
19667
19668         // Push the count of the incoming stack arguments
19669
19670         unsigned nOldStkArgs =
19671             (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES);
19672         getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
19673         genSinglePush(); // Keep track of ESP for EBP-less frames
19674         args += REGSIZE_BYTES;
19675
19676         // Push the count of the outgoing stack arguments
19677
19678         getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / REGSIZE_BYTES);
19679         genSinglePush(); // Keep track of ESP for EBP-less frames
19680         args += REGSIZE_BYTES;
19681
19682         // Push info about the callee-saved registers to be restored
19683         // For now, we always spill all registers if compiler->compTailCallUsed
19684
19685         DWORD calleeSavedRegInfo = 1 |                                 // always restore EDI,ESI,EBX
19686                                    (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
19687         getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
19688         genSinglePush(); // Keep track of ESP for EBP-less frames
19689         args += REGSIZE_BYTES;
19690
19691         // Push the address of the target function
19692
19693         getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
19694         genSinglePush(); // Keep track of ESP for EBP-less frames
19695         args += REGSIZE_BYTES;
19696
19697 #else // _TARGET_X86_
19698
19699         args    = 0;
19700         retSize = EA_UNKNOWN;
19701
19702 #endif // _TARGET_X86_
19703
19704         if (compiler->getNeedsGSSecurityCookie())
19705         {
19706             genEmitGSCookieCheck(true);
19707         }
19708
19709         // TailCall helper does not poll for GC. An explicit GC poll
19710         // Should have been placed in when we morphed this into a tail call.
19711         noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
19712
19713         // Now call the helper
19714
19715         genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
19716     }
19717
19718     /*-------------------------------------------------------------------------
19719      *  Done with call.
19720      *  Trash registers, pop arguments if needed, etc
19721      */
19722
19723     /* Mark the argument registers as free */
19724
19725     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19726
19727     for (areg = 0; areg < MAX_REG_ARG; areg++)
19728     {
19729         regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
19730
19731         // Is this one of the used argument registers?
19732         if ((curArgMask & call->gtCallRegUsedMask) == 0)
19733             continue;
19734
19735 #ifdef _TARGET_ARM_
19736         if (regSet.rsUsedTree[areg] == NULL)
19737         {
19738             noway_assert(areg % 2 == 1 &&
19739                          (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
19740                           (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
19741             continue;
19742         }
19743 #endif
19744
19745         regSet.rsMarkRegFree(curArgMask);
19746
19747         // We keep regSet.rsMaskVars current during codegen, so we have to remove any
19748         // that have been copied into arg regs.
19749
19750         regSet.RemoveMaskVars(curArgMask);
19751         gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
19752         gcInfo.gcRegByrefSetCur &= ~(curArgMask);
19753     }
19754
19755 #if !FEATURE_STACK_FP_X87
19756     //-------------------------------------------------------------------------
19757     // free up the FP args
19758
19759     for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
19760     {
19761         regNumber argRegNum  = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
19762         regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
19763
19764         // Is this one of the used argument registers?
19765         if ((curArgMask & call->gtCallRegUsedMask) == 0)
19766             continue;
19767
19768         regSet.rsMaskUsed &= ~curArgMask;
19769         regSet.rsUsedTree[argRegNum] = NULL;
19770     }
19771 #endif // !FEATURE_STACK_FP_X87
19772
19773     /* restore the old argument register status */
19774
19775     intRegState.rsCurRegArgNum   = savCurIntArgReg;
19776     floatRegState.rsCurRegArgNum = savCurFloatArgReg;
19777
19778     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19779
19780     /* Mark all trashed registers as such */
19781
19782     if (calleeTrashedRegs)
19783         regTracker.rsTrashRegSet(calleeTrashedRegs);
19784
19785     regTracker.rsTrashRegsForGCInterruptability();
19786
19787 #ifdef DEBUG
19788
19789     if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19790     {
19791         if (compiler->verbose)
19792         {
19793             printf("\t\t\t\t\t\t\tEnd call ");
19794             Compiler::printTreeID(call);
19795             printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
19796         }
19797         noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
19798     }
19799
19800 #endif
19801
19802 #if FEATURE_STACK_FP_X87
19803     /* All float temps must be spilled around function calls */
19804     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19805     {
19806         noway_assert(compCurFPState.m_uStackSize == 1);
19807     }
19808     else
19809     {
19810         noway_assert(compCurFPState.m_uStackSize == 0);
19811     }
19812 #else
19813     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19814     {
19815 #ifdef _TARGET_ARM_
19816         if (call->IsVarargs() || compiler->opts.compUseSoftFP)
19817         {
19818             // Result return for vararg methods is in r0, r1, but our callers would
19819             // expect the return in s0, s1 because of floating type. Do the move now.
19820             if (call->gtType == TYP_FLOAT)
19821             {
19822                 inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
19823             }
19824             else
19825             {
19826                 inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
19827             }
19828         }
19829 #endif
19830         genMarkTreeInReg(call, REG_FLOATRET);
19831     }
19832 #endif
19833
19834     /* The function will pop all arguments before returning */
19835
19836     SetStackLevel(saveStackLvl);
19837
19838     /* No trashed registers may possibly hold a pointer at this point */
19839     CLANG_FORMAT_COMMENT_ANCHOR;
19840
19841 #ifdef DEBUG
19842
19843     regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
19844                         ~regSet.rsMaskVars & ~vptrMask;
19845     if (ptrRegs)
19846     {
19847         // A reg may be dead already.  The assertion is too strong.
19848         LclVarDsc* varDsc;
19849         unsigned   varNum;
19850
19851         // use compiler->compCurLife
19852         for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
19853         {
19854             /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
19855
19856             if (!varDsc->lvTracked)
19857                 continue;
19858             if (!varDsc->lvRegister)
19859                 continue;
19860             if (varDsc->IsFloatRegType())
19861                 continue;
19862
19863             /* Get hold of the index and the bitmask for the variable */
19864
19865             unsigned varIndex = varDsc->lvVarIndex;
19866
19867             /* Is this variable live currently? */
19868
19869             if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
19870             {
19871                 regNumber regNum  = varDsc->lvRegNum;
19872                 regMaskTP regMask = genRegMask(regNum);
19873
19874                 if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
19875                     ptrRegs &= ~regMask;
19876             }
19877         }
19878         if (ptrRegs)
19879         {
19880             printf("Bad call handling for ");
19881             Compiler::printTreeID(call);
19882             printf("\n");
19883             noway_assert(!"A callee trashed reg is holding a GC pointer");
19884         }
19885     }
19886 #endif
19887
19888 #if defined(_TARGET_X86_)
19889     //-------------------------------------------------------------------------
19890     // Create a label for tracking of region protected by the monitor in synchronized methods.
19891     // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
19892     // so the GC state vars have been updated before creating the label.
19893
19894     if (fPossibleSyncHelperCall)
19895     {
19896         switch (helperNum)
19897         {
19898             case CORINFO_HELP_MON_ENTER:
19899             case CORINFO_HELP_MON_ENTER_STATIC:
19900                 noway_assert(compiler->syncStartEmitCookie == NULL);
19901                 compiler->syncStartEmitCookie =
19902                     getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19903                 noway_assert(compiler->syncStartEmitCookie != NULL);
19904                 break;
19905             case CORINFO_HELP_MON_EXIT:
19906             case CORINFO_HELP_MON_EXIT_STATIC:
19907                 noway_assert(compiler->syncEndEmitCookie == NULL);
19908                 compiler->syncEndEmitCookie =
19909                     getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19910                 noway_assert(compiler->syncEndEmitCookie != NULL);
19911                 break;
19912             default:
19913                 break;
19914         }
19915     }
19916 #endif // _TARGET_X86_
19917
19918     if (call->gtFlags & GTF_CALL_UNMANAGED)
19919     {
19920         genDefineTempLabel(returnLabel);
19921
19922 #ifdef _TARGET_X86_
19923         if (getInlinePInvokeCheckEnabled())
19924         {
19925             noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
19926             BasicBlock* esp_check;
19927
19928             CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19929             /* mov   ecx, dword ptr [frame.callSiteTracker] */
19930
19931             getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
19932                                       pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19933             regTracker.rsTrackRegTrash(REG_ARG_0);
19934
19935             /* Generate the conditional jump */
19936
19937             if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19938             {
19939                 if (argSize)
19940                 {
19941                     getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
19942                 }
19943             }
19944             /* cmp   ecx, esp */
19945
19946             getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
19947
19948             esp_check = genCreateTempLabel();
19949
19950             emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
19951             inst_JMP(jmpEqual, esp_check);
19952
19953             getEmitter()->emitIns(INS_BREAKPOINT);
19954
19955             /* genCondJump() closes the current emitter block */
19956
19957             genDefineTempLabel(esp_check);
19958         }
19959 #endif
19960     }
19961
19962     /* Are we supposed to pop the arguments? */
19963     CLANG_FORMAT_COMMENT_ANCHOR;
19964
19965 #if defined(_TARGET_X86_)
19966     if (call->gtFlags & GTF_CALL_UNMANAGED)
19967     {
19968         if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PINVOKE_RESTORE_ESP) ||
19969             compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
19970         {
19971             // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
19972             // take care of the cdecl argument popping here as well but the stack depth tracking logic
19973             // makes this very hard, i.e. it needs to "see" the actual pop.
19974
19975             CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19976
19977             if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
19978             {
19979                 /* mov   esp, dword ptr [frame.callSiteTracker] */
19980                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
19981                                           compiler->lvaInlinedPInvokeFrameVar,
19982                                           pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19983             }
19984             else
19985             {
19986                 /* mov   ecx, dword ptr [frame.callSiteTracker] */
19987                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
19988                                           compiler->lvaInlinedPInvokeFrameVar,
19989                                           pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19990                 regTracker.rsTrackRegTrash(REG_ARG_0);
19991
19992                 /* lea   esp, [ecx + argSize] */
19993                 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
19994             }
19995         }
19996     }
19997 #endif // _TARGET_X86_
19998
19999     if (call->gtFlags & GTF_CALL_POP_ARGS)
20000     {
20001         noway_assert(args == (size_t) - (int)argSize);
20002
20003         if (argSize)
20004         {
20005             genAdjustSP(argSize);
20006         }
20007     }
20008
20009     if (pseudoStackLvl)
20010     {
20011         noway_assert(call->gtType == TYP_VOID);
20012
20013         /* Generate NOP */
20014
20015         instGen(INS_nop);
20016     }
20017
20018     /* What does the function return? */
20019
20020     retVal = RBM_NONE;
20021
20022     switch (call->gtType)
20023     {
20024         case TYP_REF:
20025         case TYP_BYREF:
20026             gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
20027
20028             __fallthrough;
20029
20030         case TYP_INT:
20031 #if !CPU_HAS_FP_SUPPORT
20032         case TYP_FLOAT:
20033 #endif
20034             retVal = RBM_INTRET;
20035             break;
20036
20037 #ifdef _TARGET_ARM_
20038         case TYP_STRUCT:
20039         {
20040             assert(call->gtRetClsHnd != NULL);
20041             assert(compiler->IsHfa(call->gtRetClsHnd));
20042             int retSlots = compiler->GetHfaCount(call->gtRetClsHnd);
20043             assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
20044             assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
20045             retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
20046         }
20047         break;
20048 #endif
20049
20050         case TYP_LONG:
20051 #if !CPU_HAS_FP_SUPPORT
20052         case TYP_DOUBLE:
20053 #endif
20054             retVal = RBM_LNGRET;
20055             break;
20056
20057 #if CPU_HAS_FP_SUPPORT
20058         case TYP_FLOAT:
20059         case TYP_DOUBLE:
20060
20061             break;
20062 #endif
20063
20064         case TYP_VOID:
20065             break;
20066
20067         default:
20068             noway_assert(!"unexpected/unhandled fn return type");
20069     }
20070
20071     // We now have to generate the "call epilog" (if it was a call to unmanaged code).
20072     /* if it is a call to unmanaged code, frameListRoot must be set */
20073
20074     noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
20075
20076     if (frameListRoot)
20077         genPInvokeCallEpilog(frameListRoot, retVal);
20078
20079     if (frameListRoot && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
20080     {
20081         if (frameListRoot->lvRegister)
20082         {
20083             bool isBorn  = false;
20084             bool isDying = true;
20085             genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
20086         }
20087     }
20088
20089 #ifdef DEBUG
20090     if (compiler->opts.compStackCheckOnCall
20091 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
20092         // check the stack as frequently as possible
20093         && !call->IsHelperCall()
20094 #else
20095         && call->gtCallType == CT_USER_FUNC
20096 #endif
20097             )
20098     {
20099         noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
20100                      compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
20101                      compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
20102         if (argSize > 0)
20103         {
20104             getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
20105             getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
20106             getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
20107             regTracker.rsTrackRegTrash(REG_ARG_0);
20108         }
20109         else
20110             getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
20111
20112         BasicBlock*  esp_check = genCreateTempLabel();
20113         emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
20114         inst_JMP(jmpEqual, esp_check);
20115         getEmitter()->emitIns(INS_BREAKPOINT);
20116         genDefineTempLabel(esp_check);
20117     }
20118 #endif // DEBUG
20119
20120 #if FEATURE_STACK_FP_X87
20121     UnspillRegVarsStackFp();
20122 #endif // FEATURE_STACK_FP_X87
20123
20124     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20125     {
20126         // Restore return node if necessary
20127         if (call->gtFlags & GTF_SPILLED)
20128         {
20129             UnspillFloat(call);
20130         }
20131
20132 #if FEATURE_STACK_FP_X87
20133         // Mark as free
20134         regSet.SetUsedRegFloat(call, false);
20135 #endif
20136     }
20137
20138 #if FEATURE_STACK_FP_X87
20139 #ifdef DEBUG
20140     if (compiler->verbose)
20141     {
20142         JitDumpFPState();
20143     }
20144 #endif
20145 #endif
20146
20147     return retVal;
20148 }
20149 #ifdef _PREFAST_
20150 #pragma warning(pop)
20151 #endif
20152
20153 /*****************************************************************************
20154  *
20155  *  Create and record GC Info for the function.
20156  */
20157 #ifdef JIT32_GCENCODER
20158 void*
20159 #else
20160 void
20161 #endif
20162 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20163 {
20164 #ifdef JIT32_GCENCODER
20165     return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
20166 #else
20167     genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
20168 #endif
20169 }
20170
20171 #ifdef JIT32_GCENCODER
20172 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
20173                                             unsigned prologSize,
20174                                             unsigned epilogSize DEBUGARG(void* codePtr))
20175 {
20176     BYTE    headerBuf[64];
20177     InfoHdr header;
20178
20179     int s_cached;
20180 #ifdef DEBUG
20181     size_t headerSize =
20182 #endif
20183         compiler->compInfoBlkSize =
20184             gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
20185
20186     size_t argTabOffset = 0;
20187     size_t ptrMapSize   = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
20188
20189 #if DISPLAY_SIZES
20190
20191     if (genInterruptible)
20192     {
20193         gcHeaderISize += compiler->compInfoBlkSize;
20194         gcPtrMapISize += ptrMapSize;
20195     }
20196     else
20197     {
20198         gcHeaderNSize += compiler->compInfoBlkSize;
20199         gcPtrMapNSize += ptrMapSize;
20200     }
20201
20202 #endif // DISPLAY_SIZES
20203
20204     compiler->compInfoBlkSize += ptrMapSize;
20205
20206     /* Allocate the info block for the method */
20207
20208     compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
20209
20210 #if 0 // VERBOSE_SIZES
20211     // TODO-Review: 'dataSize', below, is not defined
20212
20213 //  if  (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
20214     {
20215         printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
20216                compiler->info.compILCodeSize,
20217                compiler->compInfoBlkSize,
20218                codeSize + dataSize,
20219                codeSize + dataSize - prologSize - epilogSize,
20220                100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
20221                100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
20222                compiler->info.compClassName,
20223                compiler->info.compMethodName);
20224     }
20225
20226 #endif
20227
20228     /* Fill in the info block and return it to the caller */
20229
20230     void* infoPtr = compiler->compInfoBlkAddr;
20231
20232     /* Create the method info block: header followed by GC tracking tables */
20233
20234     compiler->compInfoBlkAddr +=
20235         gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
20236
20237     assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
20238     compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
20239     assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
20240
20241 #ifdef DEBUG
20242
20243     if (0)
20244     {
20245         BYTE*    temp = (BYTE*)infoPtr;
20246         unsigned size = compiler->compInfoBlkAddr - temp;
20247         BYTE*    ptab = temp + headerSize;
20248
20249         noway_assert(size == headerSize + ptrMapSize);
20250
20251         printf("Method info block - header [%u bytes]:", headerSize);
20252
20253         for (unsigned i = 0; i < size; i++)
20254         {
20255             if (temp == ptab)
20256             {
20257                 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
20258                 printf("\n    %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
20259             }
20260             else
20261             {
20262                 if (!(i % 16))
20263                     printf("\n    %04X: ", i);
20264             }
20265
20266             printf("%02X ", *temp++);
20267         }
20268
20269         printf("\n");
20270     }
20271
20272 #endif // DEBUG
20273
20274 #if DUMP_GC_TABLES
20275
20276     if (compiler->opts.dspGCtbls)
20277     {
20278         const BYTE* base = (BYTE*)infoPtr;
20279         unsigned    size;
20280         unsigned    methodSize;
20281         InfoHdr     dumpHeader;
20282
20283         printf("GC Info for method %s\n", compiler->info.compFullName);
20284         printf("GC info size = %3u\n", compiler->compInfoBlkSize);
20285
20286         size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
20287         // printf("size of header encoding is %3u\n", size);
20288         printf("\n");
20289
20290         if (compiler->opts.dspGCtbls)
20291         {
20292             base += size;
20293             size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
20294             // printf("size of pointer table is %3u\n", size);
20295             printf("\n");
20296             noway_assert(compiler->compInfoBlkAddr == (base + size));
20297         }
20298     }
20299
20300 #ifdef DEBUG
20301     if (jitOpts.testMask & 128)
20302     {
20303         for (unsigned offs = 0; offs < codeSize; offs++)
20304         {
20305             gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
20306         }
20307     }
20308 #endif // DEBUG
20309 #endif // DUMP_GC_TABLES
20310
20311     /* Make sure we ended up generating the expected number of bytes */
20312
20313     noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
20314
20315     return infoPtr;
20316 }
20317
20318 #else // JIT32_GCENCODER
20319
20320 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
20321 {
20322     IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
20323     GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
20324         GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
20325     assert(gcInfoEncoder);
20326
20327     // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
20328     gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
20329
20330     // We keep the call count for the second call to gcMakeRegPtrTable() below.
20331     unsigned callCnt = 0;
20332     // First we figure out the encoder ID's for the stack slots and registers.
20333     gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
20334     // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
20335     gcInfoEncoder->FinalizeSlotIds();
20336     // Now we can actually use those slot ID's to declare live ranges.
20337     gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
20338
20339     gcInfoEncoder->Build();
20340
20341     // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
20342     // let's save the values anyway for debugging purposes
20343     compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
20344     compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
20345 }
20346 #endif
20347
20348 /*****************************************************************************
20349  *  For CEE_LOCALLOC
20350  */
20351
20352 regNumber CodeGen::genLclHeap(GenTree* size)
20353 {
20354     noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
20355
20356     // regCnt is a register used to hold both
20357     //              the amount to stack alloc (either in bytes or pointer sized words)
20358     //          and the final stack alloc address to return as the result
20359     //
20360     regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
20361     var_types type   = genActualType(size->gtType);
20362     emitAttr  easz   = emitTypeSize(type);
20363
20364 #ifdef DEBUG
20365     // Verify ESP
20366     if (compiler->opts.compStackCheckOnRet)
20367     {
20368         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20369                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20370                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20371         getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20372
20373         BasicBlock*  esp_check = genCreateTempLabel();
20374         emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
20375         inst_JMP(jmpEqual, esp_check);
20376         getEmitter()->emitIns(INS_BREAKPOINT);
20377         genDefineTempLabel(esp_check);
20378     }
20379 #endif
20380
20381     noway_assert(isFramePointerUsed());
20382     noway_assert(genStackLevel == 0); // Can't have anything on the stack
20383
20384     BasicBlock* endLabel = NULL;
20385 #if FEATURE_FIXED_OUT_ARGS
20386     bool stackAdjusted = false;
20387 #endif
20388
20389     if (size->IsCnsIntOrI())
20390     {
20391 #if FEATURE_FIXED_OUT_ARGS
20392         // If we have an outgoing arg area then we must adjust the SP
20393         // essentially popping off the outgoing arg area,
20394         // We will restore it right before we return from this method
20395         //
20396         if (compiler->lvaOutgoingArgSpaceSize > 0)
20397         {
20398             assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20399                    0); // This must be true for the stack to remain aligned
20400             inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20401             stackAdjusted = true;
20402         }
20403 #endif
20404         size_t amount = size->gtIntCon.gtIconVal;
20405
20406         // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
20407         amount += (STACK_ALIGN - 1);
20408         amount &= ~(STACK_ALIGN - 1);
20409         amount >>= STACK_ALIGN_SHIFT;      // amount is number of pointer-sized words to locAlloc
20410         size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20411
20412         /* If amount is zero then return null in RegCnt */
20413         if (amount == 0)
20414         {
20415             regCnt = regSet.rsGrabReg(RBM_ALLINT);
20416             instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20417             goto DONE;
20418         }
20419
20420         /* For small allocations we will generate up to six push 0 inline */
20421         if (amount <= 6)
20422         {
20423             regCnt = regSet.rsGrabReg(RBM_ALLINT);
20424 #if CPU_LOAD_STORE_ARCH
20425             regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20426             // Set 'regZero' to zero
20427             instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
20428 #endif
20429
20430             while (amount != 0)
20431             {
20432 #if CPU_LOAD_STORE_ARCH
20433                 inst_IV(INS_push, (unsigned)genRegMask(regZero));
20434 #else
20435                 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
20436 #endif
20437                 amount--;
20438             }
20439
20440             regTracker.rsTrackRegTrash(regCnt);
20441             // --- move regCnt, ESP
20442             inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20443             goto DONE;
20444         }
20445         else
20446         {
20447             if (!compiler->info.compInitMem)
20448             {
20449                 // Re-bias amount to be number of bytes to adjust the SP
20450                 amount <<= STACK_ALIGN_SHIFT;
20451                 size->gtIntCon.gtIconVal = amount;      // update the GT_CNS value in the node
20452                 if (amount < compiler->eeGetPageSize()) // must be < not <=
20453                 {
20454                     // Since the size is a page or less, simply adjust ESP
20455
20456                     // ESP might already be in the guard page, must touch it BEFORE
20457                     // the alloc, not after.
20458                     regCnt = regSet.rsGrabReg(RBM_ALLINT);
20459                     inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20460 #if CPU_LOAD_STORE_ARCH
20461                     regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20462                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
20463                     regTracker.rsTrackRegTrash(regTmp);
20464 #else
20465                     getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20466 #endif
20467                     inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
20468                     inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
20469                     regTracker.rsTrackRegTrash(regCnt);
20470                     goto DONE;
20471                 }
20472             }
20473         }
20474     }
20475
20476     // Compute the size of the block to allocate
20477     genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
20478     noway_assert(size->InReg());
20479     regCnt = size->gtRegNum;
20480
20481 #if FEATURE_FIXED_OUT_ARGS
20482     // If we have an outgoing arg area then we must adjust the SP
20483     // essentially popping off the outgoing arg area,
20484     // We will restore it right before we return from this method
20485     //
20486     if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
20487     {
20488         assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20489                0); // This must be true for the stack to remain aligned
20490         inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20491         stackAdjusted = true;
20492     }
20493 #endif
20494
20495     //  Perform alignment if we don't have a GT_CNS size
20496     //
20497     if (!size->IsCnsIntOrI())
20498     {
20499         endLabel = genCreateTempLabel();
20500
20501         // If 0 we bail out
20502         instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
20503         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20504         inst_JMP(jmpEqual, endLabel);
20505
20506         // Align to STACK_ALIGN
20507         inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
20508
20509         if (compiler->info.compInitMem)
20510         {
20511 #if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
20512             // regCnt will be the number of pointer-sized words to locAlloc
20513             // If the shift right won't do the 'and' do it here
20514             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20515 #endif
20516             // --- shr regCnt, 2 ---
20517             inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
20518         }
20519         else
20520         {
20521             // regCnt will be the total number of bytes to locAlloc
20522
20523             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20524         }
20525     }
20526
20527     BasicBlock* loop;
20528     loop = genCreateTempLabel();
20529
20530     if (compiler->info.compInitMem)
20531     {
20532         // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
20533
20534         /* Since we have to zero out the allocated memory AND ensure that
20535            ESP is always valid by tickling the pages, we will just push 0's
20536            on the stack */
20537         CLANG_FORMAT_COMMENT_ANCHOR;
20538
20539 #if defined(_TARGET_ARM_)
20540         regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20541         regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
20542         // Set 'regZero1' and 'regZero2' to zero
20543         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
20544         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
20545 #endif
20546
20547         // Loop:
20548         genDefineTempLabel(loop);
20549
20550 #if defined(_TARGET_X86_)
20551
20552         inst_IV(INS_push_hide, 0); // --- push 0
20553         // Are we done?
20554         inst_RV(INS_dec, regCnt, type);
20555
20556 #elif defined(_TARGET_ARM_)
20557
20558         inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
20559         // Are we done?
20560         inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
20561
20562 #else
20563         assert(!"Codegen missing");
20564 #endif // TARGETS
20565
20566         emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
20567         inst_JMP(jmpNotEqual, loop);
20568
20569         // Move the final value of ESP into regCnt
20570         inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
20571         regTracker.rsTrackRegTrash(regCnt);
20572     }
20573     else
20574     {
20575         // At this point 'regCnt' is set to the total number of bytes to locAlloc
20576
20577         /* We don't need to zero out the allocated memory. However, we do have
20578            to tickle the pages to ensure that ESP is always valid and is
20579            in sync with the "stack guard page".  Note that in the worst
20580            case ESP is on the last byte of the guard page.  Thus you must
20581            touch ESP+0 first not ESP+x01000.
20582
20583            Another subtlety is that you don't want ESP to be exactly on the
20584            boundary of the guard page because PUSH is predecrement, thus
20585            call setup would not touch the guard page but just beyond it */
20586
20587         /* Note that we go through a few hoops so that ESP never points to
20588            illegal pages at any time during the ticking process
20589
20590                   neg   REG
20591                   add   REG, ESP         // reg now holds ultimate ESP
20592                   jb    loop             // result is smaller than orignial ESP (no wrap around)
20593                   xor   REG, REG,        // Overflow, pick lowest possible number
20594              loop:
20595                   test  ESP, [ESP+0]     // X86 - tickle the page
20596                   ldr   REGH,[ESP+0]     // ARM - tickle the page
20597                   mov   REGH, ESP
20598                   sub   REGH, GetOsPageSize()
20599                   mov   ESP, REGH
20600                   cmp   ESP, REG
20601                   jae   loop
20602
20603                   mov   ESP, REG
20604              end:
20605           */
20606         CLANG_FORMAT_COMMENT_ANCHOR;
20607
20608 #ifdef _TARGET_ARM_
20609
20610         inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
20611         inst_JMP(EJ_hs, loop);
20612 #else
20613         inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
20614         inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
20615         inst_JMP(EJ_jb, loop);
20616 #endif
20617         regTracker.rsTrackRegTrash(regCnt);
20618
20619         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20620
20621         genDefineTempLabel(loop);
20622
20623         // This is a workaround to avoid the emitter trying to track the
20624         // decrement of the ESP - we do the subtraction in another reg
20625         // instead of adjusting ESP directly.
20626
20627         regNumber regTemp = regSet.rsPickReg();
20628
20629         // Tickle the decremented value, and move back to ESP,
20630         // note that it has to be done BEFORE the update of ESP since
20631         // ESP might already be on the guard page.  It is OK to leave
20632         // the final value of ESP on the guard page
20633         CLANG_FORMAT_COMMENT_ANCHOR;
20634
20635 #if CPU_LOAD_STORE_ARCH
20636         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
20637 #else
20638         getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20639 #endif
20640
20641         inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
20642         regTracker.rsTrackRegTrash(regTemp);
20643
20644         inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
20645         inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
20646
20647         genRecoverReg(size, RBM_ALLINT,
20648                       RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
20649         noway_assert(size->InReg());
20650         regCnt = size->gtRegNum;
20651         inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
20652         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
20653         inst_JMP(jmpGEU, loop);
20654
20655         // Move the final value to ESP
20656         inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
20657     }
20658     regSet.rsMarkRegFree(genRegMask(regCnt));
20659
20660 DONE:
20661
20662     noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
20663
20664     if (endLabel != NULL)
20665         genDefineTempLabel(endLabel);
20666
20667 #if FEATURE_FIXED_OUT_ARGS
20668     // If we have an outgoing arg area then we must readjust the SP
20669     //
20670     if (stackAdjusted)
20671     {
20672         assert(compiler->lvaOutgoingArgSpaceSize > 0);
20673         assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20674                0); // This must be true for the stack to remain aligned
20675         inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20676     }
20677 #endif
20678
20679     /* Write the lvaLocAllocSPvar stack frame slot */
20680     if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
20681     {
20682         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
20683     }
20684
20685 #if STACK_PROBES
20686     // Don't think it is worth it the codegen complexity to embed this
20687     // when it's possible in each of the customized allocas.
20688     if (compiler->opts.compNeedStackProbes)
20689     {
20690         genGenerateStackProbe();
20691     }
20692 #endif
20693
20694 #ifdef DEBUG
20695     // Update new ESP
20696     if (compiler->opts.compStackCheckOnRet)
20697     {
20698         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20699                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20700                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20701         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20702     }
20703 #endif
20704
20705     return regCnt;
20706 }
20707
20708 /*****************************************************************************
20709  *
20710  *  Return non-zero if the given register is free after the given tree is
20711  *  evaluated (i.e. the register is either not used at all, or it holds a
20712  *  register variable which is not live after the given node).
20713  *  This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
20714  *  constant operand, and one that's in a register.  Thus, the only thing we
20715  *  need to determine is whether the register holding op1 is dead.
20716  */
20717 bool CodeGen::genRegTrashable(regNumber reg, GenTree* tree)
20718 {
20719     regMaskTP vars;
20720     regMaskTP mask = genRegMask(reg);
20721
20722     if (regSet.rsMaskUsed & mask)
20723         return false;
20724
20725     assert(tree->gtOper == GT_ADD);
20726     GenTree* regValTree = tree->gtOp.gtOp1;
20727     if (!tree->gtOp.gtOp2->IsCnsIntOrI())
20728     {
20729         regValTree = tree->gtOp.gtOp2;
20730         assert(tree->gtOp.gtOp1->IsCnsIntOrI());
20731     }
20732     assert(regValTree->InReg());
20733
20734     /* At this point, the only way that the register will remain live
20735      * is if it is itself a register variable that isn't dying.
20736      */
20737     assert(regValTree->gtRegNum == reg);
20738     if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
20739         return false;
20740     else
20741         return true;
20742 }
20743
20744 /*****************************************************************************/
20745 //
20746 // This method calculates the USE and DEF values for a statement.
20747 // It also calls fgSetRngChkTarget for the statement.
20748 //
20749 // We refactor out this code from fgPerBlockLocalVarLiveness
20750 // and add QMARK logics to it.
20751 //
20752 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20753 //
20754 // The usage of this method is very limited.
20755 // We should only call it for the first node in the statement or
20756 // for the node after the GTF_RELOP_QMARK node.
20757 //
20758 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20759
20760 /*
20761        Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
20762        when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
20763        from both trees.
20764
20765        Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
20766        linked by gtNext.
20767
20768        The algorithm we use is:
20769        (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
20770        (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
20771            We cache copies of current fgCurDefSet and fgCurUseSet.
20772            (The fact that it is recursively calling itself is for nested QMARK case,
20773             where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
20774        (3) We walk the thenTree.
20775        (4) When we see GT_COLON node, we know that we just finished the thenTree.
20776            We then make a copy of the current fgCurDefSet and fgCurUseSet,
20777            restore them to the ones before the thenTree, and then continue walking
20778            the elseTree.
20779        (5) When we see the GT_QMARK node, we know we just finished the elseTree.
20780            So we combine the results from the thenTree and elseTree and then return.
20781
20782
20783                                  +--------------------+
20784                                  |      GT_QMARK    11|
20785                                  +----------+---------+
20786                                             |
20787                                             *
20788                                            / \
20789                                          /     \
20790                                        /         \
20791                   +---------------------+       +--------------------+
20792                   |      GT_<cond>    3 |       |     GT_COLON     7 |
20793                   |  w/ GTF_RELOP_QMARK |       |  w/ GTF_COLON_COND |
20794                   +----------+----------+       +---------+----------+
20795                              |                            |
20796                              *                            *
20797                             / \                          / \
20798                           /     \                      /     \
20799                         /         \                  /         \
20800                        2           1          thenTree 6       elseTree 10
20801                                   x               |                |
20802                                  /                *                *
20803      +----------------+        /                 / \              / \
20804      |prevExpr->gtNext+------/                 /     \          /     \
20805      +----------------+                      /         \      /         \
20806                                             5           4    9           8
20807
20808
20809 */
20810
20811 GenTree* Compiler::fgLegacyPerStatementLocalVarLiveness(GenTree* startNode, // The node to start walking with.
20812                                                         GenTree* relopNode) // The node before the startNode.
20813                                                                             // (It should either be NULL or
20814                                                                             // a GTF_RELOP_QMARK node.)
20815 {
20816     GenTree* tree;
20817
20818     VARSET_TP defSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurDefSet)); // Store the current fgCurDefSet and
20819                                                                           // fgCurUseSet so
20820     VARSET_TP useSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurUseSet)); // we can restore then before entering the
20821                                                                           // elseTree.
20822
20823     MemoryKindSet memoryUse_BeforeSplit   = fgCurMemoryUse;
20824     MemoryKindSet memoryDef_BeforeSplit   = fgCurMemoryDef;
20825     MemoryKindSet memoryHavoc_BeforeSplit = fgCurMemoryHavoc;
20826
20827     VARSET_TP defSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // These two variables will store
20828                                                                 // the USE and DEF sets after
20829     VARSET_TP useSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
20830
20831     MemoryKindSet memoryUse_AfterThenTree   = fgCurMemoryUse;
20832     MemoryKindSet memoryDef_AfterThenTree   = fgCurMemoryDef;
20833     MemoryKindSet memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20834
20835     // relopNode is either NULL or a GTF_RELOP_QMARK node.
20836     assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
20837
20838     // If relopNode is NULL, then the startNode must be the 1st node of the statement.
20839     // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
20840     assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
20841            (relopNode && startNode == relopNode->gtNext));
20842
20843     for (tree = startNode; tree; tree = tree->gtNext)
20844     {
20845         switch (tree->gtOper)
20846         {
20847
20848             case GT_QMARK:
20849
20850                 // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
20851                 noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
20852
20853                 // By the time we see a GT_QMARK, we must have finished processing the elseTree.
20854                 // So it's the time to combine the results
20855                 // from the the thenTree and the elseTree, and then return.
20856
20857                 VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
20858                 VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
20859
20860                 fgCurMemoryDef   = fgCurMemoryDef & memoryDef_AfterThenTree;
20861                 fgCurMemoryHavoc = fgCurMemoryHavoc & memoryHavoc_AfterThenTree;
20862                 fgCurMemoryUse   = fgCurMemoryUse | memoryUse_AfterThenTree;
20863
20864                 // Return the GT_QMARK node itself so the caller can continue from there.
20865                 // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
20866                 // in the "for" statement.
20867                 goto _return;
20868
20869             case GT_COLON:
20870                 // By the time we see GT_COLON, we must have just walked the thenTree.
20871                 // So we need to do two things here.
20872                 // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
20873                 //     with the result from the elseTree.
20874                 // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
20875                 //     and then continue walking the elseTree.
20876                 VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
20877                 VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
20878
20879                 memoryDef_AfterThenTree   = fgCurMemoryDef;
20880                 memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20881                 memoryUse_AfterThenTree   = fgCurMemoryUse;
20882
20883                 VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
20884                 VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
20885
20886                 fgCurMemoryDef   = memoryDef_BeforeSplit;
20887                 fgCurMemoryHavoc = memoryHavoc_BeforeSplit;
20888                 fgCurMemoryUse   = memoryUse_BeforeSplit;
20889
20890                 break;
20891
20892             case GT_LCL_VAR:
20893             case GT_LCL_FLD:
20894             case GT_LCL_VAR_ADDR:
20895             case GT_LCL_FLD_ADDR:
20896             case GT_STORE_LCL_VAR:
20897             case GT_STORE_LCL_FLD:
20898                 fgMarkUseDef(tree->AsLclVarCommon());
20899                 break;
20900
20901             case GT_CLS_VAR:
20902                 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20903                 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20904                 // This models Volatile reads as def-then-use of the heap.
20905                 // and allows for a CSE of a subsequent non-volatile read
20906                 if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
20907                 {
20908                     // For any Volatile indirection, we must handle it as a
20909                     // definition of GcHeap/ByrefExposed
20910                     fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20911                 }
20912                 // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
20913                 // assignment.
20914                 // Otherwise, we treat it as a use here.
20915                 if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
20916                 {
20917                     fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20918                 }
20919                 break;
20920
20921             case GT_IND:
20922                 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20923                 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20924                 // This models Volatile reads as def-then-use of the heap.
20925                 // and allows for a CSE of a subsequent non-volatile read
20926                 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
20927                 {
20928                     // For any Volatile indirection, we must handle it as a
20929                     // definition of GcHeap/ByrefExposed
20930                     fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20931                 }
20932
20933                 // If the GT_IND is the lhs of an assignment, we'll handle it
20934                 // as a heap/byref def, when we get to assignment.
20935                 // Otherwise, we treat it as a use here.
20936                 if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
20937                 {
20938                     GenTreeLclVarCommon* dummyLclVarTree = NULL;
20939                     bool                 dummyIsEntire   = false;
20940                     GenTree*             addrArg         = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
20941                     if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
20942                     {
20943                         fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20944                     }
20945                     else
20946                     {
20947                         // Defines a local addr
20948                         assert(dummyLclVarTree != nullptr);
20949                         fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
20950                     }
20951                 }
20952                 break;
20953
20954             // These should have been morphed away to become GT_INDs:
20955             case GT_FIELD:
20956             case GT_INDEX:
20957                 unreached();
20958                 break;
20959
20960             // We'll assume these are use-then-defs of GcHeap/ByrefExposed.
20961             case GT_LOCKADD:
20962             case GT_XADD:
20963             case GT_XCHG:
20964             case GT_CMPXCHG:
20965                 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20966                 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20967                 fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20968                 break;
20969
20970             case GT_MEMORYBARRIER:
20971                 // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
20972                 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20973                 break;
20974
20975             // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety.  Might tighten this case
20976             // later.
20977             case GT_CALL:
20978             {
20979                 GenTreeCall* call    = tree->AsCall();
20980                 bool         modHeap = true;
20981                 if (call->gtCallType == CT_HELPER)
20982                 {
20983                     CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
20984
20985                     if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
20986                     {
20987                         modHeap = false;
20988                     }
20989                 }
20990                 if (modHeap)
20991                 {
20992                     fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20993                     fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20994                     fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20995                 }
20996             }
20997
20998                 // If this is a p/invoke unmanaged call or if this is a tail-call
20999                 // and we have an unmanaged p/invoke call in the method,
21000                 // then we're going to run the p/invoke epilog.
21001                 // So we mark the FrameRoot as used by this instruction.
21002                 // This ensures that the block->bbVarUse will contain
21003                 // the FrameRoot local var if is it a tracked variable.
21004
21005                 if (!opts.ShouldUsePInvokeHelpers())
21006                 {
21007                     if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
21008                     {
21009                         /* Get the TCB local and mark it as used */
21010
21011                         noway_assert(info.compLvFrameListRoot < lvaCount);
21012
21013                         LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
21014
21015                         if (varDsc->lvTracked)
21016                         {
21017                             if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
21018                             {
21019                                 VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
21020                             }
21021                         }
21022                     }
21023                 }
21024
21025                 break;
21026
21027             default:
21028
21029                 // Determine what memory kinds it defines.
21030                 if (tree->OperIsAssignment() || tree->OperIsBlkOp())
21031                 {
21032                     GenTreeLclVarCommon* dummyLclVarTree = NULL;
21033                     if (tree->DefinesLocal(this, &dummyLclVarTree))
21034                     {
21035                         if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
21036                         {
21037                             fgCurMemoryDef |= memoryKindSet(ByrefExposed);
21038
21039                             // We've found a store that modifies ByrefExposed
21040                             // memory but not GcHeap memory, so track their
21041                             // states separately.
21042                             byrefStatesMatchGcHeapStates = false;
21043                         }
21044                     }
21045                     else
21046                     {
21047                         // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
21048                         fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
21049                     }
21050                 }
21051
21052                 // Are we seeing a GT_<cond> for a GT_QMARK node?
21053                 if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
21054                 {
21055                     // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
21056                     // Recursively call fgLegacyPerStatementLocalVarLiveness.
21057                     // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
21058                     // current
21059                     // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
21060                     // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
21061                     // node.
21062                     tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree);
21063
21064                     // We must have been returned here after seeing a GT_QMARK node.
21065                     noway_assert(tree->gtOper == GT_QMARK);
21066                 }
21067
21068                 break;
21069         }
21070     }
21071
21072 _return:
21073     return tree;
21074 }
21075
21076 /*****************************************************************************/
21077
21078 /*****************************************************************************
21079  * Initialize the TCB local and the NDirect stub, afterwards "push"
21080  * the hoisted NDirect stub.
21081  *
21082  * 'initRegs' is the set of registers which will be zeroed out by the prolog
21083  *             typically initRegs is zero
21084  *
21085  * The layout of the NDirect Inlined Call Frame is as follows:
21086  * (see VM/frames.h and VM/JITInterface.cpp for more information)
21087  *
21088  *   offset     field name                        when set
21089  *  --------------------------------------------------------------
21090  *    +00h      vptr for class InlinedCallFrame   method prolog
21091  *    +04h      m_Next                            method prolog
21092  *    +08h      m_Datum                           call site
21093  *    +0ch      m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
21094  *    +10h      m_pCallerReturnAddress            call site
21095  *    +14h      m_pCalleeSavedRegisters           not set by JIT
21096  *    +18h      JIT retval spill area (int)       before call_gc
21097  *    +1ch      JIT retval spill area (long)      before call_gc
21098  *    +20h      Saved value of EBP                method prolog
21099  */
21100
21101 regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
21102 {
21103     assert(compiler->compGeneratingProlog);
21104     noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21105     noway_assert(compiler->info.compCallUnmanaged);
21106
21107     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21108     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21109
21110     /* let's find out if compLvFrameListRoot is enregistered */
21111
21112     LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21113
21114     noway_assert(!varDsc->lvIsParam);
21115     noway_assert(varDsc->lvType == TYP_I_IMPL);
21116
21117     DWORD threadTlsIndex, *pThreadTlsIndex;
21118
21119     threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
21120 #if defined(_TARGET_X86_)
21121     if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
21122 #else
21123     if (true)
21124 #endif
21125     {
21126         // Instead of calling GetThread(), and getting GS cookie and
21127         // InlinedCallFrame vptr through indirections, we'll call only one helper.
21128         // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
21129         // and uses REG_PINVOKE_SCRATCH as scratch register.
21130         getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
21131                                   pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21132         regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21133
21134         // We're about to trask REG_PINVOKE_TCB, it better not be in use!
21135         assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
21136
21137         // Don't use the argument registers (including the special argument in
21138         // REG_PINVOKE_FRAME) for computing the target address.
21139         regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
21140
21141         genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
21142
21143         regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
21144
21145         if (varDsc->lvRegister)
21146         {
21147             regNumber regTgt = varDsc->lvRegNum;
21148
21149             // we are about to initialize it. So turn the bit off in initRegs to prevent
21150             // the prolog reinitializing it.
21151             initRegs &= ~genRegMask(regTgt);
21152
21153             if (regTgt != REG_PINVOKE_TCB)
21154             {
21155                 // move TCB to the its register if necessary
21156                 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
21157                 regTracker.rsTrackRegTrash(regTgt);
21158             }
21159         }
21160         else
21161         {
21162             // move TCB to its stack location
21163             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21164                                       compiler->info.compLvFrameListRoot, 0);
21165         }
21166
21167         // We are done, the rest of this function deals with the inlined case.
21168         return initRegs;
21169     }
21170
21171     regNumber regTCB;
21172
21173     if (varDsc->lvRegister)
21174     {
21175         regTCB = varDsc->lvRegNum;
21176
21177         // we are about to initialize it. So turn the bit off in initRegs to prevent
21178         // the prolog reinitializing it.
21179         initRegs &= ~genRegMask(regTCB);
21180     }
21181     else // varDsc is allocated on the Stack
21182     {
21183         regTCB = REG_PINVOKE_TCB;
21184     }
21185
21186 #if !defined(_TARGET_ARM_)
21187 #define WIN_NT_TLS_OFFSET (0xE10)
21188 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
21189
21190     /* get TCB,  mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
21191
21192     // TODO-ARM-CQ: should we inline TlsGetValue here?
21193
21194     if (threadTlsIndex < 64)
21195     {
21196         //  mov  reg, FS:[0xE10+threadTlsIndex*4]
21197         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
21198                                   WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
21199         regTracker.rsTrackRegTrash(regTCB);
21200     }
21201     else
21202     {
21203         DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
21204         threadTlsIndex -= 64;
21205
21206         // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
21207         // mov reg, [reg+threadTlsIndex*4]
21208
21209         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
21210         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
21211         regTracker.rsTrackRegTrash(regTCB);
21212     }
21213 #endif
21214
21215     /* save TCB in local var if not enregistered */
21216
21217     if (!varDsc->lvRegister)
21218     {
21219         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
21220     }
21221
21222     /* set frame's vptr */
21223
21224     const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
21225     inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
21226     noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
21227
21228     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
21229                                compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
21230                                REG_PINVOKE_SCRATCH);
21231
21232     // Set the GSCookie
21233     GSCookie gsCookie, *pGSCookie;
21234     compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
21235     noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
21236
21237     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
21238                                pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
21239
21240     /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
21241        set next field in frame */
21242
21243     getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21244                                pInfo->offsetOfThreadFrame);
21245     regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21246
21247     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
21248                               compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21249
21250     noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
21251
21252     /* set EBP value in frame */
21253     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
21254                               compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
21255
21256     /* reset track field in frame */
21257     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21258                                pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
21259
21260     /* get address of our frame */
21261
21262     getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
21263                               pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21264     regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21265
21266     /* now "push" our N/direct frame */
21267
21268     getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21269                                pInfo->offsetOfThreadFrame);
21270
21271     return initRegs;
21272 }
21273
21274 /*****************************************************************************
21275  *  Unchain the InlinedCallFrame.
21276  *  Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
21277  *  or tail call.
21278  */
21279 void CodeGen::genPInvokeMethodEpilog()
21280 {
21281     if (compiler->opts.ShouldUsePInvokeHelpers())
21282         return;
21283
21284     noway_assert(compiler->info.compCallUnmanaged);
21285     noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21286     noway_assert(compiler->compCurBB == compiler->genReturnBB ||
21287                  (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
21288                  (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
21289
21290     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21291     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21292
21293     getEmitter()->emitDisableRandomNops();
21294     // debug check to make sure that we're not using ESI and/or EDI across this call, except for
21295     // compLvFrameListRoot.
21296     unsigned regTrashCheck = 0;
21297
21298     /* XXX Tue 5/29/2007
21299      * We explicitly add interference for these in CodeGen::rgPredictRegUse.  If you change the code
21300      * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
21301      */
21302     LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21303     regNumber  reg;
21304     regNumber  reg2 = REG_PINVOKE_FRAME;
21305
21306     //
21307     // Two cases for epilog invocation:
21308     //
21309     // 1. Return
21310     //    We can trash the ESI/EDI registers.
21311     //
21312     // 2. Tail call
21313     //    When tail called, we'd like to preserve enregistered args,
21314     //    in ESI/EDI so we can pass it to the callee.
21315     //
21316     // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
21317     // Instead use the reserved local variable slot.
21318     //
21319     if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21320     {
21321         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21322         {
21323 #if FEATURE_FIXED_OUT_ARGS
21324             // Save the register in the reserved local var slot.
21325             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21326                                       compiler->lvaPInvokeFrameRegSaveVar, 0);
21327 #else
21328             inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
21329 #endif
21330         }
21331         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21332         {
21333 #if FEATURE_FIXED_OUT_ARGS
21334             // Save the register in the reserved local var slot.
21335             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21336                                       compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21337 #else
21338             inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
21339 #endif
21340         }
21341     }
21342
21343     if (varDsc->lvRegister)
21344     {
21345         reg = varDsc->lvRegNum;
21346         if (reg == reg2)
21347             reg2 = REG_PINVOKE_TCB;
21348
21349         regTrashCheck |= genRegMask(reg2);
21350     }
21351     else
21352     {
21353         /* mov esi, [tcb address]    */
21354
21355         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
21356                                   0);
21357         regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21358         reg = REG_PINVOKE_TCB;
21359
21360         regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
21361     }
21362
21363     /* mov edi, [ebp-frame.next] */
21364
21365     getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
21366                               pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21367     regTracker.rsTrackRegTrash(reg2);
21368
21369     /* mov [esi+offsetOfThreadFrame], edi */
21370
21371     getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
21372
21373     noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
21374
21375     if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
21376         compiler->lvaTable[compiler->genReturnLocal].lvRegister)
21377     {
21378         // really make sure we're not clobbering compiler->genReturnLocal.
21379         noway_assert(
21380             !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
21381               ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
21382     }
21383
21384     (void)regTrashCheck;
21385
21386     // Restore the registers ESI and EDI.
21387     if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21388     {
21389         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21390         {
21391 #if FEATURE_FIXED_OUT_ARGS
21392             // Restore the register from the reserved local var slot.
21393             getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21394                                       compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21395 #else
21396             inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
21397 #endif
21398             regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21399         }
21400         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21401         {
21402 #if FEATURE_FIXED_OUT_ARGS
21403             // Restore the register from the reserved local var slot.
21404             getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21405                                       compiler->lvaPInvokeFrameRegSaveVar, 0);
21406 #else
21407             inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
21408 #endif
21409             regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21410         }
21411     }
21412     getEmitter()->emitEnableRandomNops();
21413 }
21414
21415 /*****************************************************************************
21416     This function emits the call-site prolog for direct calls to unmanaged code.
21417     It does all the necessary setup of the InlinedCallFrame.
21418     frameListRoot specifies the local containing the thread control block.
21419     argSize or methodToken is the value to be copied into the m_datum
21420             field of the frame (methodToken may be indirected & have a reloc)
21421     The function returns  the register now containing the thread control block,
21422     (it could be either enregistered or loaded into one of the scratch registers)
21423 */
21424
21425 regNumber CodeGen::genPInvokeCallProlog(LclVarDsc*            frameListRoot,
21426                                         int                   argSize,
21427                                         CORINFO_METHOD_HANDLE methodToken,
21428                                         BasicBlock*           returnLabel)
21429 {
21430     // Some stack locals might be 'cached' in registers, we need to trash them
21431     // from the regTracker *and* also ensure the gc tracker does not consider
21432     // them live (see the next assert).  However, they might be live reg vars
21433     // that are non-pointers CSE'd from pointers.
21434     // That means the register will be live in rsMaskVars, so we can't just
21435     // call gcMarkSetNpt().
21436     {
21437         regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
21438         gcInfo.gcRegGCrefSetCur &= ~deadRegs;
21439         gcInfo.gcRegByrefSetCur &= ~deadRegs;
21440
21441 #ifdef DEBUG
21442         deadRegs &= regSet.rsMaskVars;
21443         if (deadRegs)
21444         {
21445             for (LclVarDsc* varDsc = compiler->lvaTable;
21446                  ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
21447             {
21448                 if (!varDsc->lvTracked || !varDsc->lvRegister)
21449                     continue;
21450
21451                 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
21452                     continue;
21453
21454                 regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
21455                 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
21456                     varRegMask |= genRegMask(varDsc->lvOtherReg);
21457
21458                 if (varRegMask & deadRegs)
21459                 {
21460                     // We found the enregistered var that should not be live if it
21461                     // was a GC pointer.
21462                     noway_assert(!varTypeIsGC(varDsc));
21463                     deadRegs &= ~varRegMask;
21464                 }
21465             }
21466         }
21467 #endif // DEBUG
21468     }
21469
21470     /* Since we are using the InlinedCallFrame, we should have spilled all
21471        GC pointers to it - even from callee-saved registers */
21472
21473     noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
21474
21475     /* must specify only one of these parameters */
21476     noway_assert((argSize == 0) || (methodToken == NULL));
21477
21478     /* We are about to call unmanaged code directly.
21479        Before we can do that we have to emit the following sequence:
21480
21481        mov  dword ptr [frame.callTarget], MethodToken
21482        mov  dword ptr [frame.callSiteTracker], esp
21483        mov  reg, dword ptr [tcb_address]
21484        mov  byte  ptr [tcb+offsetOfGcState], 0
21485
21486      */
21487
21488     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21489
21490     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21491
21492 #ifdef _TARGET_ARM_
21493     if (compiler->opts.ShouldUsePInvokeHelpers())
21494     {
21495         regNumber baseReg;
21496         int       adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, false, &baseReg, 0);
21497
21498         getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21499         genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_BEGIN,
21500                           0,           // argSize
21501                           EA_UNKNOWN); // retSize
21502         regTracker.rsTrackRegTrash(REG_ARG_0);
21503         return REG_ARG_0;
21504     }
21505 #endif
21506
21507     /* mov   dword ptr [frame.callSiteTarget], value */
21508
21509     if (methodToken == NULL)
21510     {
21511         /* mov   dword ptr [frame.callSiteTarget], argSize */
21512         instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
21513                                    pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21514     }
21515     else
21516     {
21517         void *embedMethHnd, *pEmbedMethHnd;
21518
21519         embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
21520
21521         noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
21522
21523         if (embedMethHnd != NULL)
21524         {
21525             /* mov   dword ptr [frame.callSiteTarget], "MethodDesc" */
21526
21527             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
21528                                        compiler->lvaInlinedPInvokeFrameVar,
21529                                        pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21530         }
21531         else
21532         {
21533             /* mov   reg, dword ptr [MethodDescIndir]
21534                mov   dword ptr [frame.callSiteTarget], reg */
21535
21536             regNumber reg = regSet.rsPickFreeReg();
21537
21538 #if CPU_LOAD_STORE_ARCH
21539             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
21540             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
21541 #else  // !CPU_LOAD_STORE_ARCH
21542             getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
21543 #endif // !CPU_LOAD_STORE_ARCH
21544             regTracker.rsTrackRegTrash(reg);
21545             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
21546                                       pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21547         }
21548     }
21549
21550     regNumber tcbReg = REG_NA;
21551
21552     if (frameListRoot->lvRegister)
21553     {
21554         tcbReg = frameListRoot->lvRegNum;
21555     }
21556     else
21557     {
21558         tcbReg = regSet.rsGrabReg(RBM_ALLINT);
21559
21560         /* mov reg, dword ptr [tcb address]    */
21561
21562         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
21563                                   (unsigned)(frameListRoot - compiler->lvaTable), 0);
21564         regTracker.rsTrackRegTrash(tcbReg);
21565     }
21566
21567 #ifdef _TARGET_X86_
21568     /* mov   dword ptr [frame.callSiteTracker], esp */
21569
21570     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
21571                               pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
21572 #endif // _TARGET_X86_
21573
21574 #if CPU_LOAD_STORE_ARCH
21575     regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
21576     getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
21577     regTracker.rsTrackRegTrash(tmpReg);
21578     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
21579                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21580 #else  // !CPU_LOAD_STORE_ARCH
21581     /* mov   dword ptr [frame.callSiteReturnAddress], label */
21582
21583     getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
21584                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21585 #endif // !CPU_LOAD_STORE_ARCH
21586
21587 #if CPU_LOAD_STORE_ARCH
21588     instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
21589
21590     noway_assert(tmpReg != tcbReg);
21591
21592     getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
21593 #else  // !CPU_LOAD_STORE_ARCH
21594     /* mov   byte  ptr [tcbReg+offsetOfGcState], 0 */
21595
21596     getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
21597 #endif // !CPU_LOAD_STORE_ARCH
21598
21599     return tcbReg;
21600 }
21601
21602 /*****************************************************************************
21603  *
21604    First we have to mark in the hoisted NDirect stub that we are back
21605    in managed code. Then we have to check (a global flag) whether GC is
21606    pending or not. If so, we just call into a jit-helper.
21607    Right now we have this call always inlined, i.e. we always skip around
21608    the jit-helper call.
21609    Note:
21610    The tcb address is a regular local (initialized in the prolog), so it is either
21611    enregistered or in the frame:
21612
21613         tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
21614         mov  byte ptr[tcb_reg+offsetOfGcState], 1
21615         cmp  'global GC pending flag', 0
21616         je   @f
21617         [mov  ECX, tcb_reg]  OR [ecx was setup above]     ; we pass the tcb value to callGC
21618         [mov  [EBP+spill_area+0], eax]                    ; spill the int  return value if any
21619         [mov  [EBP+spill_area+4], edx]                    ; spill the long return value if any
21620         call @callGC
21621         [mov  eax, [EBP+spill_area+0] ]                   ; reload the int  return value if any
21622         [mov  edx, [EBP+spill_area+4] ]                   ; reload the long return value if any
21623     @f:
21624  */
21625
21626 void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
21627 {
21628 #ifdef _TARGET_ARM_
21629     if (compiler->opts.ShouldUsePInvokeHelpers())
21630     {
21631         noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21632
21633         regNumber baseReg;
21634         int       adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, false, &baseReg, 0);
21635
21636         getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21637         genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_END,
21638                           0,           // argSize
21639                           EA_UNKNOWN); // retSize
21640         regTracker.rsTrackRegTrash(REG_ARG_0);
21641         return;
21642     }
21643 #endif
21644
21645     BasicBlock*      clab_nostop;
21646     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21647     regNumber        reg2;
21648     regNumber        reg3;
21649
21650 #ifdef _TARGET_ARM_
21651     reg3 = REG_R3;
21652 #else
21653     reg3     = REG_EDX;
21654 #endif
21655
21656     getEmitter()->emitDisableRandomNops();
21657
21658     if (frameListRoot->lvRegister)
21659     {
21660         /* make sure that register is live across the call */
21661
21662         reg2 = frameListRoot->lvRegNum;
21663         noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
21664     }
21665     else
21666     {
21667         /* mov   reg2, dword ptr [tcb address]    */
21668         CLANG_FORMAT_COMMENT_ANCHOR;
21669
21670 #ifdef _TARGET_ARM_
21671         reg2 = REG_R2;
21672 #else
21673         reg2 = REG_ECX;
21674 #endif
21675
21676         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
21677                                   (unsigned)(frameListRoot - compiler->lvaTable), 0);
21678         regTracker.rsTrackRegTrash(reg2);
21679     }
21680
21681 #ifdef _TARGET_ARM_
21682     /* mov   r3, 1 */
21683     /* strb  [r2+offsetOfGcState], r3 */
21684     instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
21685     getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
21686 #else
21687     /* mov   byte ptr [tcb+offsetOfGcState], 1 */
21688     getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
21689 #endif
21690
21691     /* test global flag (we return to managed code) */
21692
21693     LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
21694
21695     addrOfCaptureThreadGlobal =
21696         compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
21697     noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
21698
21699     // Can we directly use addrOfCaptureThreadGlobal?
21700
21701     if (addrOfCaptureThreadGlobal)
21702     {
21703 #ifdef _TARGET_ARM_
21704         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
21705         getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21706         regTracker.rsTrackRegTrash(reg3);
21707         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21708 #else
21709         getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
21710 #endif
21711     }
21712     else
21713     {
21714 #ifdef _TARGET_ARM_
21715         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
21716         getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21717         regTracker.rsTrackRegTrash(reg3);
21718         getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21719         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21720 #else // !_TARGET_ARM_
21721
21722         getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
21723                                    (ssize_t)pAddrOfCaptureThreadGlobal);
21724         regTracker.rsTrackRegTrash(REG_ECX);
21725
21726         getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
21727
21728 #endif // !_TARGET_ARM_
21729     }
21730
21731     /* */
21732     clab_nostop = genCreateTempLabel();
21733
21734     /* Generate the conditional jump */
21735     emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
21736     inst_JMP(jmpEqual, clab_nostop);
21737
21738 #ifdef _TARGET_ARM_
21739 // The helper preserves the return value on ARM
21740 #else
21741     /* save return value (if necessary) */
21742     if (retVal != RBM_NONE)
21743     {
21744         if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21745         {
21746             /* push eax */
21747
21748             inst_RV(INS_push, REG_INTRET, TYP_INT);
21749
21750             if (retVal == RBM_LNGRET)
21751             {
21752                 /* push edx */
21753
21754                 inst_RV(INS_push, REG_EDX, TYP_INT);
21755             }
21756         }
21757     }
21758 #endif
21759
21760     /* emit the call to the EE-helper that stops for GC (or other reasons) */
21761
21762     genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
21763                       EA_UNKNOWN);                 /* retSize */
21764
21765 #ifdef _TARGET_ARM_
21766 // The helper preserves the return value on ARM
21767 #else
21768     /* restore return value (if necessary) */
21769
21770     if (retVal != RBM_NONE)
21771     {
21772         if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21773         {
21774             if (retVal == RBM_LNGRET)
21775             {
21776                 /* pop edx */
21777
21778                 inst_RV(INS_pop, REG_EDX, TYP_INT);
21779                 regTracker.rsTrackRegTrash(REG_EDX);
21780             }
21781
21782             /* pop eax */
21783
21784             inst_RV(INS_pop, REG_INTRET, TYP_INT);
21785             regTracker.rsTrackRegTrash(REG_INTRET);
21786         }
21787     }
21788 #endif
21789
21790     /* genCondJump() closes the current emitter block */
21791
21792     genDefineTempLabel(clab_nostop);
21793
21794     // This marks the InlinedCallFrame as "inactive".  In fully interruptible code, this is not atomic with
21795     // the above code.  So the process is:
21796     // 1) Return to cooperative mode
21797     // 2) Check to see if we need to stop for GC
21798     // 3) Return from the p/invoke (as far as the stack walker is concerned).
21799
21800     /* mov  dword ptr [frame.callSiteTracker], 0 */
21801
21802     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21803                                pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21804
21805     getEmitter()->emitEnableRandomNops();
21806 }
21807
21808 /*****************************************************************************/
21809
21810 /*****************************************************************************
21811 *           TRACKING OF FLAGS
21812 *****************************************************************************/
21813
21814 void CodeGen::genFlagsEqualToNone()
21815 {
21816     genFlagsEqReg = REG_NA;
21817     genFlagsEqVar = (unsigned)-1;
21818     genFlagsEqLoc.Init();
21819 }
21820
21821 /*****************************************************************************
21822  *
21823  *  Record the fact that the flags register has a value that reflects the
21824  *  contents of the given register.
21825  */
21826
21827 void CodeGen::genFlagsEqualToReg(GenTree* tree, regNumber reg)
21828 {
21829     genFlagsEqLoc.CaptureLocation(getEmitter());
21830     genFlagsEqReg = reg;
21831
21832     /* previous setting of flags by a var becomes invalid */
21833
21834     genFlagsEqVar = 0xFFFFFFFF;
21835
21836     /* Set appropriate flags on the tree */
21837
21838     if (tree)
21839     {
21840         tree->gtFlags |= GTF_ZSF_SET;
21841         assert(tree->gtSetFlags());
21842     }
21843 }
21844
21845 /*****************************************************************************
21846  *
21847  *  Record the fact that the flags register has a value that reflects the
21848  *  contents of the given local variable.
21849  */
21850
21851 void CodeGen::genFlagsEqualToVar(GenTree* tree, unsigned var)
21852 {
21853     genFlagsEqLoc.CaptureLocation(getEmitter());
21854     genFlagsEqVar = var;
21855
21856     /* previous setting of flags by a register becomes invalid */
21857
21858     genFlagsEqReg = REG_NA;
21859
21860     /* Set appropriate flags on the tree */
21861
21862     if (tree)
21863     {
21864         tree->gtFlags |= GTF_ZSF_SET;
21865         assert(tree->gtSetFlags());
21866     }
21867 }
21868
21869 /*****************************************************************************
21870  *
21871  *  Return an indication of whether the flags register is set to the current
21872  *  value of the given register/variable. The return value is as follows:
21873  *
21874  *      false  ..  nothing
21875  *      true   ..  the zero flag (ZF) and sign flag (SF) is set
21876  */
21877
21878 bool CodeGen::genFlagsAreReg(regNumber reg)
21879 {
21880     if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21881     {
21882         return true;
21883     }
21884
21885     return false;
21886 }
21887
21888 bool CodeGen::genFlagsAreVar(unsigned var)
21889 {
21890     if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21891     {
21892         return true;
21893     }
21894
21895     return false;
21896 }
21897
21898 /*****************************************************************************
21899  * This utility function returns true iff the execution path from "from"
21900  * (inclusive) to "to" (exclusive) contains a death of the given var
21901  */
21902 bool CodeGen::genContainsVarDeath(GenTree* from, GenTree* to, unsigned varNum)
21903 {
21904     GenTree* tree;
21905     for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
21906     {
21907         if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
21908         {
21909             unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
21910             if (dyingVarNum == varNum)
21911                 return true;
21912             LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
21913             if (varDsc->lvPromoted)
21914             {
21915                 assert(varDsc->lvType == TYP_STRUCT);
21916                 unsigned firstFieldNum = varDsc->lvFieldLclStart;
21917                 if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
21918                 {
21919                     return true;
21920                 }
21921             }
21922         }
21923     }
21924     assert(tree != NULL);
21925     return false;
21926 }
21927
21928 #endif // LEGACY_BACKEND