b2a5a87a391003345f61fad246f76ad8e7d3ccdc
[platform/upstream/coreclr.git] / src / jit / codegenlegacy.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           CodeGenerator                                   XX
9 XX                                                                           XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 */
13 #include "jitpch.h"
14 #ifdef _MSC_VER
15 #pragma hdrstop
16 #endif
17 #include "codegen.h"
18
19 #ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
20
21 #ifdef _TARGET_AMD64_
22 #error AMD64 must be !LEGACY_BACKEND
23 #endif
24
25 #ifdef _TARGET_ARM64_
26 #error ARM64 must be !LEGACY_BACKEND
27 #endif
28
29 #include "gcinfo.h"
30 #include "emit.h"
31
32 #ifndef JIT32_GCENCODER
33 #include "gcinfoencoder.h"
34 #endif
35
36 /*****************************************************************************
37  *
38  *  Determine what variables die between beforeSet and afterSet, and
39  *  update the liveness globals accordingly:
40  *  compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
41  */
42
43 void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
44 {
45     unsigned   varNum;
46     LclVarDsc* varDsc;
47     regMaskTP  regBit;
48     VARSET_TP  deadSet(VarSetOps::Diff(compiler, beforeSet, afterSet));
49
50     if (VarSetOps::IsEmpty(compiler, deadSet))
51         return;
52
53     /* iterate through the dead variables */
54
55     VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
56     while (iter.NextElem(&varIndex))
57     {
58         varNum = compiler->lvaTrackedToVarNum[varIndex];
59         varDsc = compiler->lvaTable + varNum;
60
61         /* Remove this variable from the 'deadSet' bit set */
62
63         noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
64
65         VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
66
67         noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
68                      VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
69
70         VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
71
72         /* We are done if the variable is not enregistered */
73
74         if (!varDsc->lvRegister)
75         {
76 #ifdef DEBUG
77             if (compiler->verbose)
78             {
79                 printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
80             }
81 #endif
82             continue;
83         }
84
85 #if !FEATURE_FP_REGALLOC
86         // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
87         if (!varDsc->IsFloatRegType())
88 #endif
89         {
90             /* Get hold of the appropriate register bit(s) */
91
92             if (varTypeIsFloating(varDsc->TypeGet()))
93             {
94                 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
95             }
96             else
97             {
98                 regBit = genRegMask(varDsc->lvRegNum);
99                 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
100                     regBit |= genRegMask(varDsc->lvOtherReg);
101             }
102
103 #ifdef DEBUG
104             if (compiler->verbose)
105             {
106                 printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
107                        compiler->compRegVarName(varDsc->lvRegNum));
108             }
109 #endif
110             noway_assert((regSet.rsMaskVars & regBit) != 0);
111
112             regSet.RemoveMaskVars(regBit);
113
114             // Remove GC tracking if any for this register
115
116             if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
117                 gcInfo.gcMarkRegSetNpt(regBit);
118         }
119     }
120 }
121
122 /*****************************************************************************
123  *
124  *  Change the given enregistered local variable node to a register variable node
125  */
126
127 void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
128 {
129     noway_assert(tree->gtOper == GT_LCL_VAR);
130     noway_assert(varDsc->lvRegister);
131
132     if (isRegPairType(varDsc->lvType))
133     {
134         /* Check for the case of a variable that was narrowed to an int */
135
136         if (isRegPairType(tree->gtType))
137         {
138             genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
139             return;
140         }
141
142         noway_assert(tree->gtFlags & GTF_VAR_CAST);
143         noway_assert(tree->gtType == TYP_INT);
144     }
145     else
146     {
147         noway_assert(!isRegPairType(tree->gtType));
148     }
149
150     /* It's a register variable -- modify the node */
151
152     unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
153
154     ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
155     tree->SetOper(GT_REG_VAR);
156     tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
157
158     tree->gtFlags |= livenessFlags;
159     tree->SetInReg();
160     tree->gtRegNum          = varDsc->lvRegNum;
161     tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
162     tree->gtRegVar.SetLclNum(varNum);
163 }
164
165 // inline
166 void CodeGen::saveLiveness(genLivenessSet* ls)
167 {
168     VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
169     VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
170     ls->maskVars  = (regMaskSmall)regSet.rsMaskVars;
171     ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
172     ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
173 }
174
175 // inline
176 void CodeGen::restoreLiveness(genLivenessSet* ls)
177 {
178     VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
179     VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
180     regSet.rsMaskVars       = ls->maskVars;
181     gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
182     gcInfo.gcRegByrefSetCur = ls->byRefRegs;
183 }
184
185 // inline
186 void CodeGen::checkLiveness(genLivenessSet* ls)
187 {
188     assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
189     assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
190     assert(regSet.rsMaskVars == ls->maskVars);
191     assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
192     assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
193 }
194
195 // inline
196 bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
197 {
198     unsigned   varNum;
199     LclVarDsc* varDsc;
200
201     assert(tree->gtOper == GT_LCL_VAR);
202
203     /* Does the variable live in a register? */
204
205     varNum = tree->gtLclVarCommon.gtLclNum;
206     assert(varNum < compiler->lvaCount);
207     varDsc = compiler->lvaTable + varNum;
208
209     if (varDsc->lvRegister)
210     {
211         genBashLclVar(tree, varNum, varDsc);
212         return true;
213     }
214     else
215     {
216         return false;
217     }
218 }
219
220 // inline
221 GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
222 {
223     bool       rev;
224     unsigned   mul;
225     unsigned   cns;
226     GenTreePtr adr;
227     GenTreePtr idx;
228
229     if (genCreateAddrMode(tree,     // address
230                           0,        // mode
231                           false,    // fold
232                           RBM_NONE, // reg mask
233                           &rev,     // reverse ops
234                           &adr,     // base addr
235                           &idx,     // index val
236 #if SCALED_ADDR_MODES
237                           &mul, // scaling
238 #endif
239                           &cns,  // displacement
240                           true)) // don't generate code
241         return adr;
242     else
243         return NULL;
244 }
245
246 #if FEATURE_STACK_FP_X87
247 // inline
248 void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
249 {
250     genFPstkLevel = newValue;
251 }
252
253 // inline
254 unsigned CodeGenInterface::genGetFPstkLevel()
255 {
256     return genFPstkLevel;
257 }
258
259 // inline
260 void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
261 {
262     noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
263     genFPstkLevel += inc;
264 }
265
266 // inline
267 void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
268 {
269     noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
270     genFPstkLevel -= dec;
271 }
272
273 #endif // FEATURE_STACK_FP_X87
274
275 /*****************************************************************************
276  *
277  *  Generate code that will set the given register to the integer constant.
278  */
279
280 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
281 {
282     noway_assert(type != TYP_REF || val == NULL);
283
284     /* Does the reg already hold this constant? */
285
286     if (!regTracker.rsIconIsInReg(val, reg))
287     {
288         if (val == 0)
289         {
290             instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
291         }
292 #ifdef _TARGET_ARM_
293         // If we can set a register to a constant with a small encoding, then do that.
294         else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
295         {
296             instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
297         }
298 #endif
299         else
300         {
301             /* See if a register holds the value or a close value? */
302             bool      constantLoaded = false;
303             ssize_t   delta;
304             regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
305
306             if (srcReg != REG_NA)
307             {
308                 if (delta == 0)
309                 {
310                     inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
311                     constantLoaded = true;
312                 }
313                 else
314                 {
315 #if defined(_TARGET_XARCH_)
316                     /* delta should fit inside a byte */
317                     if (delta == (signed char)delta)
318                     {
319                         /* use an lea instruction to set reg */
320                         getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
321                         constantLoaded = true;
322                     }
323 #elif defined(_TARGET_ARM_)
324                     /* We found a register 'regS' that has the value we need, modulo a small delta.
325                        That is, the value we need is 'regS + delta'.
326                        We one to generate one of the following instructions, listed in order of preference:
327
328                             adds  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and
329                        0<=delta<=255
330                             subs  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and
331                        -255<=delta<=0
332                             adds  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
333                             subs  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
334                             mov   regD, icon         ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
335                             movw  regD, icon         ; 4 bytes. 0<=icon<=65535
336                             add.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
337                             sub.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
338                             addw  regD, regS, delta  ; 4 bytes. 0<=delta<=4095
339                             subw  regD, regS, delta  ; 4 bytes. -4095<=delta<=0
340
341                        If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
342                        than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
343                        can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
344                        0x80000000. In this case, delta will be 1.
345                     */
346
347                     bool      useAdd     = false;
348                     regMaskTP regMask    = genRegMask(reg);
349                     regMaskTP srcRegMask = genRegMask(srcReg);
350
351                     if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
352                         (unsigned_abs(delta) <= 255))
353                     {
354                         useAdd = true;
355                     }
356                     else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
357                              (unsigned_abs(delta) <= 7))
358                     {
359                         useAdd = true;
360                     }
361                     else if (arm_Valid_Imm_For_Mov(val))
362                     {
363                         // fall through to general "!constantLoaded" case below
364                     }
365                     else if (arm_Valid_Imm_For_Add(delta, flags))
366                     {
367                         useAdd = true;
368                     }
369
370                     if (useAdd)
371                     {
372                         getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
373                         constantLoaded = true;
374                     }
375 #else
376                     assert(!"Codegen missing");
377 #endif
378                 }
379             }
380
381             if (!constantLoaded) // Have we loaded it yet?
382             {
383 #ifdef _TARGET_X86_
384                 if (val == -1)
385                 {
386                     /* or reg,-1 takes 3 bytes */
387                     inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
388                 }
389                 else
390                     /* For SMALL_CODE it is smaller to push a small immediate and
391                        then pop it into the dest register */
392                     if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
393                 {
394                     /* "mov" has no s(sign)-bit and so always takes 6 bytes,
395                        whereas push+pop takes 2+1 bytes */
396
397                     inst_IV(INS_push, val);
398                     genSinglePush();
399
400                     inst_RV(INS_pop, reg, type);
401                     genSinglePop();
402                 }
403                 else
404 #endif // _TARGET_X86_
405                 {
406                     instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
407                 }
408             }
409         }
410     }
411     regTracker.rsTrackRegIntCns(reg, val);
412     gcInfo.gcMarkRegPtrVal(reg, type);
413 }
414
415 /*****************************************************************************
416  *
417  *  Find an existing register set to the given integer constant, or
418  *  pick a register and generate code that will set it to the integer constant.
419  *
420  *  If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
421  *  to pick some register to set.  NOTE that this means the returned regNumber
422  *  might *not* be in regBest.  It also implies that you should lock any registers
423  *  you don't want spilled (not just mark as used).
424  *
425  */
426
427 regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
428 {
429     regNumber regCns;
430 #if REDUNDANT_LOAD
431
432     // Is there already a register with zero that we can use?
433     regCns = regTracker.rsIconIsInReg(val);
434
435     if (regCns == REG_NA)
436 #endif
437     {
438         // If not, grab a register to hold the constant, preferring
439         // any register besides RBM_TMP_0 so it can hopefully be re-used
440         regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
441
442         // Now set the constant
443         genSetRegToIcon(regCns, val, type);
444     }
445
446     // NOTE: there is guarantee that regCns is in regBest's mask
447     return regCns;
448 }
449
450 /*****************************************************************************/
451 /*****************************************************************************
452  *
453  *  Add the given constant to the specified register.
454  *  'tree' is the resulting tree
455  */
456
457 void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
458 {
459     bool setFlags = (tree != NULL) && tree->gtSetFlags();
460
461 #ifdef _TARGET_XARCH_
462     /* First check to see if we can generate inc or dec instruction(s) */
463     /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
464     if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
465     {
466         emitAttr size = emitTypeSize(dstType);
467
468         switch (ival)
469         {
470             case 2:
471                 inst_RV(INS_inc, reg, dstType, size);
472                 __fallthrough;
473             case 1:
474                 inst_RV(INS_inc, reg, dstType, size);
475
476                 goto UPDATE_LIVENESS;
477
478             case -2:
479                 inst_RV(INS_dec, reg, dstType, size);
480                 __fallthrough;
481             case -1:
482                 inst_RV(INS_dec, reg, dstType, size);
483
484                 goto UPDATE_LIVENESS;
485         }
486     }
487 #endif
488     {
489         insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
490         inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
491     }
492
493 #ifdef _TARGET_XARCH_
494 UPDATE_LIVENESS:
495 #endif
496
497     if (setFlags)
498         genFlagsEqualToReg(tree, reg);
499
500     regTracker.rsTrackRegTrash(reg);
501
502     gcInfo.gcMarkRegSetNpt(genRegMask(reg));
503
504     if (tree != NULL)
505     {
506         if (!tree->OperIsAssignment())
507         {
508             genMarkTreeInReg(tree, reg);
509             if (varTypeIsGC(tree->TypeGet()))
510                 gcInfo.gcMarkRegSetByref(genRegMask(reg));
511         }
512     }
513 }
514
515 /*****************************************************************************
516  *
517  *  Subtract the given constant from the specified register.
518  *  Should only be used for unsigned sub with overflow. Else
519  *  genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
520  *  for these cases as the flags are set differently, and the following
521  *  check for overflow won't work correctly.
522  *  'tree' is the resulting tree.
523  */
524
525 void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
526 {
527     noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
528                  ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
529     noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
530
531     regTracker.rsTrackRegTrash(reg);
532
533     noway_assert(!varTypeIsGC(tree->TypeGet()));
534     gcInfo.gcMarkRegSetNpt(genRegMask(reg));
535
536     insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
537     inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
538
539     if (tree->gtSetFlags())
540         genFlagsEqualToReg(tree, reg);
541
542     if (tree)
543     {
544         genMarkTreeInReg(tree, reg);
545     }
546 }
547
548 /*****************************************************************************
549  *
550  *  Multiply the specified register by the given value.
551  *  'tree' is the resulting tree
552  */
553
554 void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
555 {
556     noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
557
558     regTracker.rsTrackRegTrash(reg);
559
560     if (tree)
561     {
562         genMarkTreeInReg(tree, reg);
563     }
564
565     bool     use_shift = false;
566     unsigned shift_by  = 0;
567
568     if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
569     {
570         use_shift = true;
571         BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
572     }
573
574     if (use_shift)
575     {
576         if (shift_by != 0)
577         {
578             insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
579             inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
580             if (tree->gtSetFlags())
581                 genFlagsEqualToReg(tree, reg);
582         }
583     }
584     else
585     {
586         instruction ins;
587 #ifdef _TARGET_XARCH_
588         ins = getEmitter()->inst3opImulForReg(reg);
589 #else
590         ins = INS_mul;
591 #endif
592
593         inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
594     }
595 }
596
597 /*****************************************************************************/
598 /*****************************************************************************/
599 /*****************************************************************************
600  *
601  *  Compute the value 'tree' into a register that's in 'needReg'
602  *  (or any free register if 'needReg' is RBM_NONE).
603  *
604  *  Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
605  *  If keepReg==RegSet::KEEP_REG, we mark the register as being used.
606  *
607  *  If you require that the register returned is trashable, pass true for 'freeOnly'.
608  */
609
610 void CodeGen::genComputeReg(
611     GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
612 {
613     noway_assert(tree->gtType != TYP_VOID);
614
615     regNumber reg;
616     regNumber rg2;
617
618 #if FEATURE_STACK_FP_X87
619     noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
620                  genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
621 #elif defined(_TARGET_ARM_)
622     noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
623                  genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
624                  genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
625                  genActualType(tree->gtType) == TYP_STRUCT);
626 #else
627     noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
628                  genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
629                  genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
630 #endif
631
632     /* Generate the value, hopefully into the right register */
633
634     genCodeForTree(tree, needReg);
635     noway_assert(tree->InReg());
636
637     // There is a workaround in genCodeForTreeLng() that changes the type of the
638     // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
639     // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
640     // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
641     // running the rest of this code, because anything looking at gtRegNum on ARM or
642     // attempting to move from EAX/EDX will be wrong.
643     if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
644         goto REG_OK;
645
646     reg = tree->gtRegNum;
647
648     /* Did the value end up in an acceptable register? */
649
650     if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
651     {
652         /* Not good enough to satisfy the caller's orders */
653
654         if (varTypeIsFloating(tree))
655         {
656             RegSet::RegisterPreference pref(needReg, RBM_NONE);
657             rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
658         }
659         else
660         {
661             rg2 = regSet.rsGrabReg(needReg);
662         }
663     }
664     else
665     {
666         /* Do we have to end up with a free register? */
667
668         if (!freeOnly)
669             goto REG_OK;
670
671         /* Did we luck out and the value got computed into an unused reg? */
672
673         if (genRegMask(reg) & regSet.rsRegMaskFree())
674             goto REG_OK;
675
676         /* Register already in use, so spill previous value */
677
678         if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
679         {
680             rg2 = regSet.rsGrabReg(needReg);
681             if (rg2 == reg)
682             {
683                 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
684                 tree->gtRegNum = reg;
685                 goto REG_OK;
686             }
687         }
688         else
689         {
690             /* OK, let's find a trashable home for the value */
691
692             regMaskTP rv1RegUsed;
693
694             regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
695             rg2 = regSet.rsPickReg(needReg);
696             regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
697         }
698     }
699
700     noway_assert(reg != rg2);
701
702     /* Update the value in the target register */
703
704     regTracker.rsTrackRegCopy(rg2, reg);
705
706     inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
707
708     /* The value has been transferred to 'reg' */
709
710     if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
711         gcInfo.gcMarkRegSetNpt(genRegMask(reg));
712
713     gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
714
715     /* The value is now in an appropriate register */
716
717     tree->gtRegNum = rg2;
718
719 REG_OK:
720
721     /* Does the caller want us to mark the register as used? */
722
723     if (keepReg == RegSet::KEEP_REG)
724     {
725         /* In case we're computing a value into a register variable */
726
727         genUpdateLife(tree);
728
729         /* Mark the register as 'used' */
730
731         regSet.rsMarkRegUsed(tree);
732     }
733 }
734
735 /*****************************************************************************
736  *
737  *  Same as genComputeReg(), the only difference being that the result is
738  *  guaranteed to end up in a trashable register.
739  */
740
741 // inline
742 void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
743 {
744     genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
745 }
746
747 /*****************************************************************************
748  *
749  *  The value 'tree' was earlier computed into a register; free up that
750  *  register (but also make sure the value is presently in a register).
751  */
752
753 void CodeGen::genReleaseReg(GenTreePtr tree)
754 {
755     if (tree->gtFlags & GTF_SPILLED)
756     {
757         /* The register has been spilled -- reload it */
758
759         regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
760         return;
761     }
762
763     regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
764 }
765
766 /*****************************************************************************
767  *
768  *  The value 'tree' was earlier computed into a register. Check whether that
769  *  register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
770  *  free the register. The caller shouldn't need to be setting GCness of the register
771  *  where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
772  */
773
774 void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
775 {
776     if (tree->gtFlags & GTF_SPILLED)
777     {
778         /* The register has been spilled -- reload it */
779
780         regSet.rsUnspillReg(tree, needReg, keepReg);
781         return;
782     }
783     else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
784     {
785         /* We need the tree in another register. So move it there */
786
787         noway_assert(tree->InReg());
788         regNumber oldReg = tree->gtRegNum;
789
790         /* Pick an acceptable register */
791
792         regNumber reg = regSet.rsGrabReg(needReg);
793
794         /* Copy the value */
795
796         inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
797         tree->gtRegNum = reg;
798
799         gcInfo.gcMarkRegPtrVal(tree);
800         regSet.rsMarkRegUsed(tree);
801         regSet.rsMarkRegFree(oldReg, tree);
802
803         regTracker.rsTrackRegCopy(reg, oldReg);
804     }
805
806     /* Free the register if the caller desired so */
807
808     if (keepReg == RegSet::FREE_REG)
809     {
810         regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
811         // Can't use RegSet::FREE_REG on a GC type
812         noway_assert(!varTypeIsGC(tree->gtType));
813     }
814     else
815     {
816         noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
817     }
818 }
819
820 /*****************************************************************************
821  *
822  * Move one half of a register pair to its new regPair(half).
823  */
824
825 // inline
826 void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
827 {
828     if (src == REG_STK)
829     {
830         // handle long to unsigned long overflow casts
831         while (tree->gtOper == GT_CAST)
832         {
833             noway_assert(tree->gtType == TYP_LONG);
834             tree = tree->gtCast.CastOp();
835         }
836         noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
837         noway_assert(tree->gtType == TYP_LONG);
838         inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
839         regTracker.rsTrackRegTrash(dst);
840     }
841     else
842     {
843         regTracker.rsTrackRegCopy(dst, src);
844         inst_RV_RV(INS_mov, dst, src, TYP_INT);
845     }
846 }
847
848 /*****************************************************************************
849  *
850  *  The given long value is in a register pair, but it's not an acceptable
851  *  one. We have to move the value into a register pair in 'needReg' (if
852  *  non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
853  *
854  *  Important note: if 'needReg' is non-zero, we assume the current pair
855  *  has not been marked as free. If, OTOH, 'newPair' is specified, we
856  *  assume that the current register pair is marked as used and free it.
857  */
858
859 void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
860 {
861     regPairNo oldPair;
862
863     regNumber oldLo;
864     regNumber oldHi;
865     regNumber newLo;
866     regNumber newHi;
867
868     /* Either a target set or a specific pair may be requested */
869
870     noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
871
872     /* Get hold of the current pair */
873
874     oldPair = tree->gtRegPair;
875     noway_assert(oldPair != newPair);
876
877     /* Are we supposed to move to a specific pair? */
878
879     if (newPair != REG_PAIR_NONE)
880     {
881         regMaskTP oldMask = genRegPairMask(oldPair);
882         regMaskTP loMask  = genRegMask(genRegPairLo(newPair));
883         regMaskTP hiMask  = genRegMask(genRegPairHi(newPair));
884         regMaskTP overlap = oldMask & (loMask | hiMask);
885
886         /* First lock any registers that are in both pairs */
887
888         noway_assert((regSet.rsMaskUsed & overlap) == overlap);
889         noway_assert((regSet.rsMaskLock & overlap) == 0);
890         regSet.rsMaskLock |= overlap;
891
892         /* Make sure any additional registers we need are free */
893
894         if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
895         {
896             regSet.rsGrabReg(loMask);
897         }
898
899         if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
900         {
901             regSet.rsGrabReg(hiMask);
902         }
903
904         /* Unlock those registers we have temporarily locked */
905
906         noway_assert((regSet.rsMaskUsed & overlap) == overlap);
907         noway_assert((regSet.rsMaskLock & overlap) == overlap);
908         regSet.rsMaskLock -= overlap;
909
910         /* We can now free the old pair */
911
912         regSet.rsMarkRegFree(oldMask);
913     }
914     else
915     {
916         /* Pick the new pair based on the caller's stated preference */
917
918         newPair = regSet.rsGrabRegPair(needReg);
919     }
920
921     // If grabbed pair is the same as old one we're done
922     if (newPair == oldPair)
923     {
924         noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
925                       newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
926         return;
927     }
928
929     /* Move the values from the old pair into the new one */
930
931     oldLo = genRegPairLo(oldPair);
932     oldHi = genRegPairHi(oldPair);
933     newLo = genRegPairLo(newPair);
934     newHi = genRegPairHi(newPair);
935
936     noway_assert(newLo != REG_STK && newHi != REG_STK);
937
938     /* Careful - the register pairs might overlap */
939
940     if (newLo == oldLo)
941     {
942         /* The low registers are identical, just move the upper half */
943
944         noway_assert(newHi != oldHi);
945         genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
946     }
947     else
948     {
949         /* The low registers are different, are the upper ones the same? */
950
951         if (newHi == oldHi)
952         {
953             /* Just move the lower half, then */
954             genMoveRegPairHalf(tree, newLo, oldLo, 0);
955         }
956         else
957         {
958             /* Both sets are different - is there an overlap? */
959
960             if (newLo == oldHi)
961             {
962                 /* Are high and low simply swapped ? */
963
964                 if (newHi == oldLo)
965                 {
966 #ifdef _TARGET_ARM_
967                     /* Let's use XOR swap to reduce register pressure. */
968                     inst_RV_RV(INS_eor, oldLo, oldHi);
969                     inst_RV_RV(INS_eor, oldHi, oldLo);
970                     inst_RV_RV(INS_eor, oldLo, oldHi);
971 #else
972                     inst_RV_RV(INS_xchg, oldHi, oldLo);
973 #endif
974                     regTracker.rsTrackRegSwap(oldHi, oldLo);
975                 }
976                 else
977                 {
978                     /* New lower == old higher, so move higher half first */
979
980                     noway_assert(newHi != oldLo);
981                     genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
982                     genMoveRegPairHalf(tree, newLo, oldLo, 0);
983                 }
984             }
985             else
986             {
987                 /* Move lower half first */
988                 genMoveRegPairHalf(tree, newLo, oldLo, 0);
989                 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
990             }
991         }
992     }
993
994     /* Record the fact that we're switching to another pair */
995
996     tree->gtRegPair = newPair;
997 }
998
999 /*****************************************************************************
1000  *
1001  *  Compute the value 'tree' into the register pair specified by 'needRegPair'
1002  *  if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
1003  *  those in avoidReg.
1004  *  If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
1005  *  value ends up in as being used.
1006  */
1007
1008 void CodeGen::genComputeRegPair(
1009     GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
1010 {
1011     regMaskTP regMask;
1012     regPairNo regPair;
1013     regMaskTP tmpMask;
1014     regMaskTP tmpUsedMask;
1015     regNumber rLo;
1016     regNumber rHi;
1017
1018     noway_assert(isRegPairType(tree->gtType));
1019
1020     if (needRegPair == REG_PAIR_NONE)
1021     {
1022         if (freeOnly)
1023         {
1024             regMask = regSet.rsRegMaskFree() & ~avoidReg;
1025             if (genMaxOneBit(regMask))
1026                 regMask = regSet.rsRegMaskFree();
1027         }
1028         else
1029         {
1030             regMask = RBM_ALLINT & ~avoidReg;
1031         }
1032
1033         if (genMaxOneBit(regMask))
1034             regMask = regSet.rsRegMaskCanGrab();
1035     }
1036     else
1037     {
1038         regMask = genRegPairMask(needRegPair);
1039     }
1040
1041     /* Generate the value, hopefully into the right register pair */
1042
1043     genCodeForTreeLng(tree, regMask, avoidReg);
1044
1045     noway_assert(tree->InReg());
1046
1047     regPair = tree->gtRegPair;
1048     tmpMask = genRegPairMask(regPair);
1049
1050     rLo = genRegPairLo(regPair);
1051     rHi = genRegPairHi(regPair);
1052
1053     /* At least one half is in a real register */
1054
1055     noway_assert(rLo != REG_STK || rHi != REG_STK);
1056
1057     /* Did the value end up in an acceptable register pair? */
1058
1059     if (needRegPair != REG_PAIR_NONE)
1060     {
1061         if (needRegPair != regPair)
1062         {
1063             /* This is a workaround. If we specify a regPair for genMoveRegPair */
1064             /* it expects the source pair being marked as used */
1065             regSet.rsMarkRegPairUsed(tree);
1066             genMoveRegPair(tree, 0, needRegPair);
1067         }
1068     }
1069     else if (freeOnly)
1070     {
1071         /* Do we have to end up with a free register pair?
1072            Something might have gotten freed up above */
1073         bool mustMoveReg = false;
1074
1075         regMask = regSet.rsRegMaskFree() & ~avoidReg;
1076
1077         if (genMaxOneBit(regMask))
1078             regMask = regSet.rsRegMaskFree();
1079
1080         if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
1081         {
1082             /* Note that we must call genMoveRegPair if one of our registers
1083                comes from the used mask, so that it will be properly spilled. */
1084
1085             mustMoveReg = true;
1086         }
1087
1088         if (genMaxOneBit(regMask))
1089             regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
1090
1091         if (genMaxOneBit(regMask))
1092             regMask |= regSet.rsRegMaskCanGrab();
1093
1094         /* Did the value end up in a free register pair? */
1095
1096         if (mustMoveReg)
1097         {
1098             /* We'll have to move the value to a free (trashable) pair */
1099             genMoveRegPair(tree, regMask, REG_PAIR_NONE);
1100         }
1101     }
1102     else
1103     {
1104         noway_assert(needRegPair == REG_PAIR_NONE);
1105         noway_assert(!freeOnly);
1106
1107         /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
1108         tmpUsedMask = tmpMask & regSet.rsMaskUsed;
1109         tmpMask &= ~regSet.rsMaskUsed;
1110
1111         /* Make sure that the value is in "real" registers*/
1112         if (rLo == REG_STK)
1113         {
1114             /* Get one of the desired registers, but exclude rHi */
1115
1116             regSet.rsLockReg(tmpMask);
1117             regSet.rsLockUsedReg(tmpUsedMask);
1118
1119             regNumber reg = regSet.rsPickReg(regMask);
1120
1121             regSet.rsUnlockUsedReg(tmpUsedMask);
1122             regSet.rsUnlockReg(tmpMask);
1123
1124             inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
1125
1126             tree->gtRegPair = gen2regs2pair(reg, rHi);
1127
1128             regTracker.rsTrackRegTrash(reg);
1129             gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1130         }
1131         else if (rHi == REG_STK)
1132         {
1133             /* Get one of the desired registers, but exclude rLo */
1134
1135             regSet.rsLockReg(tmpMask);
1136             regSet.rsLockUsedReg(tmpUsedMask);
1137
1138             regNumber reg = regSet.rsPickReg(regMask);
1139
1140             regSet.rsUnlockUsedReg(tmpUsedMask);
1141             regSet.rsUnlockReg(tmpMask);
1142
1143             inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
1144
1145             tree->gtRegPair = gen2regs2pair(rLo, reg);
1146
1147             regTracker.rsTrackRegTrash(reg);
1148             gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1149         }
1150     }
1151
1152     /* Does the caller want us to mark the register as used? */
1153
1154     if (keepReg == RegSet::KEEP_REG)
1155     {
1156         /* In case we're computing a value into a register variable */
1157
1158         genUpdateLife(tree);
1159
1160         /* Mark the register as 'used' */
1161
1162         regSet.rsMarkRegPairUsed(tree);
1163     }
1164 }
1165
1166 /*****************************************************************************
1167  *
1168  *  Same as genComputeRegPair(), the only difference being that the result
1169  *  is guaranteed to end up in a trashable register pair.
1170  */
1171
1172 // inline
1173 void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
1174 {
1175     genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
1176 }
1177
1178 /*****************************************************************************
1179  *
1180  *  The value 'tree' was earlier computed into a register pair; free up that
1181  *  register pair (but also make sure the value is presently in a register
1182  *  pair).
1183  */
1184
1185 void CodeGen::genReleaseRegPair(GenTreePtr tree)
1186 {
1187     if (tree->gtFlags & GTF_SPILLED)
1188     {
1189         /* The register has been spilled -- reload it */
1190
1191         regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
1192         return;
1193     }
1194
1195     regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1196 }
1197
1198 /*****************************************************************************
1199  *
1200  *  The value 'tree' was earlier computed into a register pair. Check whether
1201  *  either register of that pair has been spilled (and reload it if so), and
1202  *  if 'keepReg' is 0, free the register pair.
1203  */
1204
1205 void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
1206 {
1207     if (tree->gtFlags & GTF_SPILLED)
1208     {
1209         regMaskTP regMask;
1210
1211         if (regPair == REG_PAIR_NONE)
1212             regMask = RBM_NONE;
1213         else
1214             regMask = genRegPairMask(regPair);
1215
1216         /* The register pair has been spilled -- reload it */
1217
1218         regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
1219     }
1220
1221     /* Does the caller insist on the value being in a specific place? */
1222
1223     if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
1224     {
1225         /* No good -- we'll have to move the value to a new place */
1226
1227         genMoveRegPair(tree, 0, regPair);
1228
1229         /* Mark the pair as used if appropriate */
1230
1231         if (keepReg == RegSet::KEEP_REG)
1232             regSet.rsMarkRegPairUsed(tree);
1233
1234         return;
1235     }
1236
1237     /* Free the register pair if the caller desired so */
1238
1239     if (keepReg == RegSet::FREE_REG)
1240         regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1241 }
1242
1243 /*****************************************************************************
1244  *
1245  *  Compute the given long value into the specified register pair; don't mark
1246  *  the register pair as used.
1247  */
1248
1249 // inline
1250 void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
1251 {
1252     genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
1253     genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
1254 }
1255
1256 /*****************************************************************************
1257  *  This helper makes sure that the regpair target of an assignment is
1258  *  available for use.  This needs to be called in genCodeForTreeLng just before
1259  *  a long assignment, but must not be called until everything has been
1260  *  evaluated, or else we might try to spill enregistered variables.
1261  *
1262  */
1263
1264 // inline
1265 void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
1266 {
1267     /* Make sure the target of the store is available */
1268
1269     regNumber regLo = genRegPairLo(regPair);
1270     regNumber regHi = genRegPairHi(regPair);
1271
1272     if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
1273         regSet.rsSpillReg(regHi);
1274
1275     if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
1276         regSet.rsSpillReg(regLo);
1277 }
1278
1279 /*****************************************************************************/
1280 /*****************************************************************************
1281  *
1282  *  Return true if the given tree 'addr' can be computed via an addressing mode,
1283  *  such as "[ebx+esi*4+20]". If the expression isn't an address mode already
1284  *  try to make it so (but we don't try 'too hard' to accomplish this).
1285  *
1286  *  If we end up needing a register (or two registers) to hold some part(s) of the
1287  *  address, we return the use register mask via '*useMaskPtr'.
1288  *
1289  *  If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
1290  *  in use. The caller would then be responsible for calling
1291  *  regSet.rsMarkRegFree(*useMaskPtr).
1292  *
1293  *  If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
1294  *  calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
1295  */
1296
1297 bool CodeGen::genMakeIndAddrMode(GenTreePtr      addr,
1298                                  GenTreePtr      oper,
1299                                  bool            forLea,
1300                                  regMaskTP       regMask,
1301                                  RegSet::KeepReg keepReg,
1302                                  regMaskTP*      useMaskPtr,
1303                                  bool            deferOK)
1304 {
1305     if (addr->gtOper == GT_ARR_ELEM)
1306     {
1307         regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
1308         *useMaskPtr    = regs;
1309         return true;
1310     }
1311
1312     bool       rev;
1313     GenTreePtr rv1;
1314     GenTreePtr rv2;
1315     bool       operIsArrIndex; // is oper an array index
1316     GenTreePtr scaledIndex;    // If scaled addressing mode can't be used
1317
1318     regMaskTP anyMask = RBM_ALLINT;
1319
1320     unsigned cns;
1321     unsigned mul;
1322
1323     GenTreePtr tmp;
1324     int        ixv = INT_MAX; // unset value
1325
1326     GenTreePtr scaledIndexVal;
1327
1328     regMaskTP newLiveMask;
1329     regMaskTP rv1Mask;
1330     regMaskTP rv2Mask;
1331
1332     /* Deferred address mode forming NYI for x86 */
1333
1334     noway_assert(deferOK == false);
1335
1336     noway_assert(oper == NULL ||
1337                  ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
1338                   ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
1339     operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
1340
1341     if (addr->gtOper == GT_LEA)
1342     {
1343         rev                  = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
1344         GenTreeAddrMode* lea = addr->AsAddrMode();
1345         rv1                  = lea->Base();
1346         rv2                  = lea->Index();
1347         mul                  = lea->gtScale;
1348         cns                  = lea->gtOffset;
1349
1350         if (rv1 != NULL && rv2 == NULL && cns == 0 && rv1->InReg())
1351         {
1352             scaledIndex = NULL;
1353             goto YES;
1354         }
1355     }
1356     else
1357     {
1358         // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
1359         // EASIER TO MERGE
1360
1361         /* Is the complete address already sitting in a register? */
1362
1363         if ((addr->InReg()) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
1364         {
1365             genUpdateLife(addr);
1366
1367             rv1 = addr;
1368             rv2 = scaledIndex = 0;
1369             cns               = 0;
1370
1371             goto YES;
1372         }
1373
1374         /* Is it an absolute address */
1375
1376         if (addr->IsCnsIntOrI())
1377         {
1378             rv1 = rv2 = scaledIndex = 0;
1379             // along this code path cns is never used, so place a BOGUS value in it as proof
1380             // cns = addr->gtIntCon.gtIconVal;
1381             cns = UINT_MAX;
1382
1383             goto YES;
1384         }
1385
1386         /* Is there a chance of forming an address mode? */
1387
1388         if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
1389         {
1390             /* This better not be an array index */
1391             noway_assert(!operIsArrIndex);
1392
1393             return false;
1394         }
1395         // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
1396     }
1397
1398     /*  For scaled array access, RV2 may not be pointing to the index of the
1399         array if the CPU does not support the needed scaling factor.  We will
1400         make it point to the actual index, and scaledIndex will point to
1401         the scaled value */
1402
1403     scaledIndex    = NULL;
1404     scaledIndexVal = NULL;
1405
1406     if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
1407         rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
1408     {
1409         scaledIndex = rv2;
1410         compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
1411
1412         noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
1413     }
1414
1415     /* Has the address already been computed? */
1416
1417     if (addr->InReg())
1418     {
1419         if (forLea)
1420             return true;
1421
1422         rv1         = addr;
1423         rv2         = NULL;
1424         scaledIndex = NULL;
1425         genUpdateLife(addr);
1426         goto YES;
1427     }
1428
1429     /*
1430         Here we have the following operands:
1431
1432             rv1     .....       base address
1433             rv2     .....       offset value        (or NULL)
1434             mul     .....       multiplier for rv2  (or 0)
1435             cns     .....       additional constant (or 0)
1436
1437         The first operand must be present (and be an address) unless we're
1438         computing an expression via 'LEA'. The scaled operand is optional,
1439         but must not be a pointer if present.
1440      */
1441
1442     noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
1443
1444     /*-------------------------------------------------------------------------
1445      *
1446      * Make sure both rv1 and rv2 (if present) are in registers
1447      *
1448      */
1449
1450     // Trivial case : Is either rv1 or rv2 a NULL ?
1451
1452     if (!rv2)
1453     {
1454         /* A single operand, make sure it's in a register */
1455
1456         if (cns != 0)
1457         {
1458             // In the case where "rv1" is already in a register, there's no reason to get into a
1459             // register in "regMask" yet, if there's a non-zero constant that we're going to add;
1460             // if there is, we can do an LEA.
1461             genCodeForTree(rv1, RBM_NONE);
1462         }
1463         else
1464         {
1465             genCodeForTree(rv1, regMask);
1466         }
1467         goto DONE_REGS;
1468     }
1469     else if (!rv1)
1470     {
1471         /* A single (scaled) operand, make sure it's in a register */
1472
1473         genCodeForTree(rv2, 0);
1474         goto DONE_REGS;
1475     }
1476
1477     /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
1478        they are in registers */
1479
1480     noway_assert(rv1 && rv2);
1481
1482     /*  If we have to check a constant array index, compare it against
1483         the array dimension (see below) but then fold the index with a
1484         scaling factor (if any) and additional offset (if any).
1485      */
1486
1487     if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
1488     {
1489         if (scaledIndex != NULL)
1490         {
1491             assert(rv2 == scaledIndex && scaledIndexVal != NULL);
1492             rv2 = scaledIndexVal;
1493         }
1494         /* We must have a range-checked index operation */
1495
1496         noway_assert(operIsArrIndex);
1497
1498         /* Get hold of the index value and see if it's a constant */
1499
1500         if (rv2->IsIntCnsFitsInI32())
1501         {
1502             ixv = (int)rv2->gtIntCon.gtIconVal;
1503             // Maybe I should just set "fold" true in the call to genMakeAddressable above.
1504             if (scaledIndex != NULL)
1505             {
1506                 int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
1507                                                                                      // multiple of 2^6.
1508                 if (mul == 0)
1509                 {
1510                     mul = scale;
1511                 }
1512                 else
1513                 {
1514                     mul *= scale;
1515                 }
1516             }
1517             rv2 = scaledIndex = NULL;
1518
1519             /* Add the scaled index into the added value */
1520
1521             if (mul)
1522                 cns += ixv * mul;
1523             else
1524                 cns += ixv;
1525
1526             /* Make sure 'rv1' is in a register */
1527
1528             genCodeForTree(rv1, regMask);
1529
1530             goto DONE_REGS;
1531         }
1532     }
1533
1534     if (rv1->InReg())
1535     {
1536         /* op1 already in register - how about op2? */
1537
1538         if (rv2->InReg())
1539         {
1540             /* Great - both operands are in registers already. Just update
1541                the liveness and we are done. */
1542
1543             if (rev)
1544             {
1545                 genUpdateLife(rv2);
1546                 genUpdateLife(rv1);
1547             }
1548             else
1549             {
1550                 genUpdateLife(rv1);
1551                 genUpdateLife(rv2);
1552             }
1553
1554             goto DONE_REGS;
1555         }
1556
1557         /* rv1 is in a register, but rv2 isn't */
1558
1559         if (!rev)
1560         {
1561             /* rv1 is already materialized in a register. Just update liveness
1562                to rv1 and generate code for rv2 */
1563
1564             genUpdateLife(rv1);
1565             regSet.rsMarkRegUsed(rv1, oper);
1566         }
1567
1568         goto GEN_RV2;
1569     }
1570     else if (rv2->InReg())
1571     {
1572         /* rv2 is in a register, but rv1 isn't */
1573
1574         noway_assert(rv2->gtOper == GT_REG_VAR);
1575
1576         if (rev)
1577         {
1578             /* rv2 is already materialized in a register. Update liveness
1579                to after rv2 and then hang on to rv2 */
1580
1581             genUpdateLife(rv2);
1582             regSet.rsMarkRegUsed(rv2, oper);
1583         }
1584
1585         /* Generate the for the first operand */
1586
1587         genCodeForTree(rv1, regMask);
1588
1589         if (rev)
1590         {
1591             // Free up rv2 in the right fashion (it might be re-marked if keepReg)
1592             regSet.rsMarkRegUsed(rv1, oper);
1593             regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1594             genReleaseReg(rv2);
1595             regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1596             genReleaseReg(rv1);
1597         }
1598         else
1599         {
1600             /* We have evaluated rv1, and now we just need to update liveness
1601                to rv2 which was already in a register */
1602
1603             genUpdateLife(rv2);
1604         }
1605
1606         goto DONE_REGS;
1607     }
1608
1609     if (forLea && !cns)
1610         return false;
1611
1612     /* Make sure we preserve the correct operand order */
1613
1614     if (rev)
1615     {
1616         /* Generate the second operand first */
1617
1618         // Determine what registers go live between rv2 and rv1
1619         newLiveMask = genNewLiveRegMask(rv2, rv1);
1620
1621         rv2Mask = regMask & ~newLiveMask;
1622         rv2Mask &= ~rv1->gtRsvdRegs;
1623
1624         if (rv2Mask == RBM_NONE)
1625         {
1626             // The regMask hint cannot be honored
1627             // We probably have a call that trashes the register(s) in regMask
1628             // so ignore the regMask hint, but try to avoid using
1629             // the registers in newLiveMask and the rv1->gtRsvdRegs
1630             //
1631             rv2Mask = RBM_ALLINT & ~newLiveMask;
1632             rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
1633         }
1634
1635         genCodeForTree(rv2, rv2Mask);
1636         regMask &= ~genRegMask(rv2->gtRegNum);
1637
1638         regSet.rsMarkRegUsed(rv2, oper);
1639
1640         /* Generate the first operand second */
1641
1642         genCodeForTree(rv1, regMask);
1643         regSet.rsMarkRegUsed(rv1, oper);
1644
1645         /* Free up both operands in the right order (they might be
1646            re-marked as used below)
1647         */
1648         regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1649         genReleaseReg(rv2);
1650         regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1651         genReleaseReg(rv1);
1652     }
1653     else
1654     {
1655         /* Get the first operand into a register */
1656
1657         // Determine what registers go live between rv1 and rv2
1658         newLiveMask = genNewLiveRegMask(rv1, rv2);
1659
1660         rv1Mask = regMask & ~newLiveMask;
1661         rv1Mask &= ~rv2->gtRsvdRegs;
1662
1663         if (rv1Mask == RBM_NONE)
1664         {
1665             // The regMask hint cannot be honored
1666             // We probably have a call that trashes the register(s) in regMask
1667             // so ignore the regMask hint, but try to avoid using
1668             // the registers in liveMask and the rv2->gtRsvdRegs
1669             //
1670             rv1Mask = RBM_ALLINT & ~newLiveMask;
1671             rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
1672         }
1673
1674         genCodeForTree(rv1, rv1Mask);
1675         regSet.rsMarkRegUsed(rv1, oper);
1676
1677     GEN_RV2:
1678
1679         /* Here, we need to get rv2 in a register. We have either already
1680            materialized rv1 into a register, or it was already in a one */
1681
1682         noway_assert(rv1->InReg());
1683         noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
1684
1685         /* Generate the second operand as well */
1686
1687         regMask &= ~genRegMask(rv1->gtRegNum);
1688         genCodeForTree(rv2, regMask);
1689
1690         if (rev)
1691         {
1692             /* rev==true means the evaluation order is rv2,rv1. We just
1693                evaluated rv2, and rv1 was already in a register. Just
1694                update liveness to rv1 and we are done. */
1695
1696             genUpdateLife(rv1);
1697         }
1698         else
1699         {
1700             /* We have evaluated rv1 and rv2. Free up both operands in
1701                the right order (they might be re-marked as used below) */
1702
1703             /* Even though we have not explicitly marked rv2 as used,
1704                rv2->gtRegNum may be used if rv2 is a multi-use or
1705                an enregistered variable. */
1706             regMaskTP rv2Used;
1707             regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
1708
1709             /* Check for special case both rv1 and rv2 are the same register */
1710             if (rv2Used != genRegMask(rv1->gtRegNum))
1711             {
1712                 genReleaseReg(rv1);
1713                 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1714             }
1715             else
1716             {
1717                 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1718                 genReleaseReg(rv1);
1719             }
1720         }
1721     }
1722
1723 /*-------------------------------------------------------------------------
1724  *
1725  * At this point, both rv1 and rv2 (if present) are in registers
1726  *
1727  */
1728
1729 DONE_REGS:
1730
1731     /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
1732
1733     if (rv1 && !(rv1->InReg()))
1734         return false;
1735     if (rv2 && !(rv2->InReg()))
1736         return false;
1737
1738 YES:
1739
1740     // *(intVar1+intVar1) causes problems as we
1741     // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
1742     // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
1743     // that currently as we return a single mask in useMaskPtr.
1744
1745     if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && rv1->InReg() && rv2->InReg())
1746     {
1747         if (rv1->gtRegNum == rv2->gtRegNum)
1748         {
1749             noway_assert(!operIsArrIndex);
1750             return false;
1751         }
1752     }
1753
1754     /* Check either register operand to see if it needs to be saved */
1755
1756     if (rv1)
1757     {
1758         noway_assert(rv1->InReg());
1759
1760         if (keepReg == RegSet::KEEP_REG)
1761         {
1762             regSet.rsMarkRegUsed(rv1, oper);
1763         }
1764         else
1765         {
1766             /* If the register holds an address, mark it */
1767
1768             gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
1769         }
1770     }
1771
1772     if (rv2)
1773     {
1774         noway_assert(rv2->InReg());
1775
1776         if (keepReg == RegSet::KEEP_REG)
1777             regSet.rsMarkRegUsed(rv2, oper);
1778     }
1779
1780     if (deferOK)
1781     {
1782         noway_assert(!scaledIndex);
1783         return true;
1784     }
1785
1786     /* Compute the set of registers the address depends on */
1787
1788     regMaskTP useMask = RBM_NONE;
1789
1790     if (rv1)
1791     {
1792         if (rv1->gtFlags & GTF_SPILLED)
1793             regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
1794
1795         noway_assert(rv1->InReg());
1796         useMask |= genRegMask(rv1->gtRegNum);
1797     }
1798
1799     if (rv2)
1800     {
1801         if (rv2->gtFlags & GTF_SPILLED)
1802         {
1803             if (rv1)
1804             {
1805                 regMaskTP lregMask = genRegMask(rv1->gtRegNum);
1806                 regMaskTP used;
1807
1808                 regSet.rsLockReg(lregMask, &used);
1809                 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1810                 regSet.rsUnlockReg(lregMask, used);
1811             }
1812             else
1813                 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1814         }
1815         noway_assert(rv2->InReg());
1816         useMask |= genRegMask(rv2->gtRegNum);
1817     }
1818
1819     /* Tell the caller which registers we need to hang on to */
1820
1821     *useMaskPtr = useMask;
1822
1823     return true;
1824 }
1825
1826 /*****************************************************************************
1827  *
1828  *  'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
1829  */
1830
1831 void CodeGen::genRangeCheck(GenTreePtr oper)
1832 {
1833     noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1834     GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1835
1836     GenTreePtr arrLen    = bndsChk->gtArrLen;
1837     GenTreePtr arrRef    = NULL;
1838     int        lenOffset = 0;
1839
1840     /* Is the array index a constant value? */
1841     GenTreePtr index = bndsChk->gtIndex;
1842     if (!index->IsCnsIntOrI())
1843     {
1844         // No, it's not a constant.
1845         genCodeForTree(index, RBM_ALLINT);
1846         regSet.rsMarkRegUsed(index);
1847     }
1848
1849     // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
1850     // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
1851     // a register.
1852
1853     if (arrLen->OperGet() == GT_ARR_LENGTH)
1854     {
1855         GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
1856         lenOffset                  = arrLenExact->ArrLenOffset();
1857
1858 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1859         // We always load the length into a register on ARM and x64.
1860
1861         // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
1862         // lengths, but the index expression *can* be native int (64-bits)
1863         arrRef = arrLenExact->ArrRef();
1864         genCodeForTree(arrRef, RBM_ALLINT);
1865         noway_assert(arrRef->InReg());
1866         regSet.rsMarkRegUsed(arrRef);
1867         noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1868 #endif
1869     }
1870 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1871     // This is another form in which we have an array reference and a constant length.  Don't use
1872     // on LOAD_STORE or 64BIT.
1873     else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
1874     {
1875         genCodeForTree(arrRef, RBM_ALLINT);
1876         noway_assert(arrRef->InReg());
1877         regSet.rsMarkRegUsed(arrRef);
1878         noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1879     }
1880 #endif
1881
1882     // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
1883     if (arrRef == NULL)
1884     {
1885         // (Unless it's a constant.)
1886         if (!arrLen->IsCnsIntOrI())
1887         {
1888             genCodeForTree(arrLen, RBM_ALLINT);
1889             regSet.rsMarkRegUsed(arrLen);
1890
1891             noway_assert(arrLen->InReg());
1892             noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
1893         }
1894     }
1895
1896     if (!index->IsCnsIntOrI())
1897     {
1898         // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
1899         // from its register, get it back in a register.
1900         regMaskTP indRegMask = RBM_ALLINT;
1901         regMaskTP arrRegMask = RBM_ALLINT;
1902         if (!(index->gtFlags & GTF_SPILLED))
1903             arrRegMask = ~genRegMask(index->gtRegNum);
1904         if (arrRef != NULL)
1905         {
1906             genRecoverReg(arrRef, arrRegMask, RegSet::KEEP_REG);
1907             indRegMask &= ~genRegMask(arrRef->gtRegNum);
1908         }
1909         else if (!arrLen->IsCnsIntOrI())
1910         {
1911             genRecoverReg(arrLen, arrRegMask, RegSet::KEEP_REG);
1912             indRegMask &= ~genRegMask(arrLen->gtRegNum);
1913         }
1914         if (index->gtFlags & GTF_SPILLED)
1915             regSet.rsUnspillReg(index, indRegMask, RegSet::KEEP_REG);
1916
1917         /* Make sure we have the values we expect */
1918         noway_assert(index->InReg());
1919         noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
1920
1921         noway_assert(index->TypeGet() == TYP_I_IMPL ||
1922                      (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
1923         var_types indxType = index->TypeGet();
1924         if (indxType != TYP_I_IMPL)
1925             indxType = TYP_INT;
1926
1927         if (arrRef != NULL)
1928         { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1929
1930             /* Generate "cmp index, [arrRef+LenOffs]" */
1931             inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
1932         }
1933         else if (arrLen->IsCnsIntOrI())
1934         {
1935             ssize_t len = arrLen->AsIntConCommon()->IconValue();
1936             inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
1937         }
1938         else
1939         {
1940             inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
1941         }
1942
1943         /* Generate "jae <fail_label>" */
1944
1945         noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
1946         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1947         genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1948     }
1949     else
1950     {
1951         /* Generate "cmp [rv1+LenOffs], cns" */
1952
1953         bool indIsInt = true;
1954 #ifdef _TARGET_64BIT_
1955         int     ixv     = 0;
1956         ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1957         if (ixvFull > INT32_MAX)
1958         {
1959             indIsInt = false;
1960         }
1961         else
1962         {
1963             ixv = (int)ixvFull;
1964         }
1965 #else
1966         ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1967         int     ixv     = (int)ixvFull;
1968 #endif
1969         if (arrRef != NULL && indIsInt)
1970         { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1971             /* Generate "cmp [arrRef+LenOffs], ixv" */
1972             inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
1973             // Generate "jbe <fail_label>"
1974             emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1975             genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1976         }
1977         else if (arrLen->IsCnsIntOrI())
1978         {
1979             ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
1980             // Both are constants; decide at compile time.
1981             if (!(0 <= ixvFull && ixvFull < lenv))
1982             {
1983                 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1984             }
1985         }
1986         else if (!indIsInt)
1987         {
1988             genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1989         }
1990         else
1991         {
1992             /* Generate "cmp arrLen, ixv" */
1993             inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
1994             // Generate "jbe <fail_label>"
1995             emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1996             genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1997         }
1998     }
1999
2000     // Free the registers that were used.
2001     if (!index->IsCnsIntOrI())
2002     {
2003         regSet.rsMarkRegFree(index->gtRegNum, index);
2004     }
2005
2006     if (arrRef != NULL)
2007     {
2008         regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
2009     }
2010     else if (!arrLen->IsCnsIntOrI())
2011     {
2012         regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
2013     }
2014 }
2015
2016 /*****************************************************************************
2017  *
2018  * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
2019  * Otherwise, check if rvalue is in register. If so, mark it. Then
2020  * call genMakeAddressable(). Needed because genMakeAddressable is used
2021  * for both lvalue and rvalue, and we only can do this for rvalue.
2022  */
2023
2024 // inline
2025 regMaskTP CodeGen::genMakeRvalueAddressable(
2026     GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
2027 {
2028     regNumber reg;
2029
2030 #if REDUNDANT_LOAD
2031
2032     if (tree->gtOper == GT_LCL_VAR)
2033     {
2034         reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
2035
2036         if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
2037         {
2038             noway_assert(!isRegPairType(tree->gtType));
2039
2040             genMarkTreeInReg(tree, reg);
2041         }
2042     }
2043
2044 #endif
2045
2046     return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
2047 }
2048
2049 /*****************************************************************************/
2050
2051 bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
2052 {
2053     const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
2054
2055     noway_assert(tree->OperGet() == GT_LCL_VAR);
2056     noway_assert(varDsc->lvTracked);
2057
2058     return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
2059 }
2060
2061 /*****************************************************************************
2062  *
2063  *  This is genMakeAddressable(GT_ARR_ELEM).
2064  *  Makes the array-element addressible and returns the addressibility registers.
2065  *  It also marks them as used if keepReg==RegSet::KEEP_REG.
2066  *  tree is the dependant tree.
2067  *
2068  *  Note that an array-element needs 2 registers to be addressibile, the
2069  *  array-object and the offset. This function marks gtArrObj and gtArrInds[0]
2070  *  with the 2 registers so that other functions (like instGetAddrMode()) know
2071  *  where to look for the offset to use.
2072  */
2073
2074 regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
2075 {
2076     noway_assert(arrElem->gtOper == GT_ARR_ELEM);
2077     noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
2078
2079     /* Evaluate all the operands. We don't evaluate them into registers yet
2080        as GT_ARR_ELEM does not reorder the evaluation of the operands, and
2081        hence may use a sub-optimal ordering. We try to improve this
2082        situation somewhat by accessing the operands in stages
2083        (genMakeAddressable2 + genComputeAddressable and
2084        genCompIntoFreeReg + genRecoverReg).
2085
2086        Note: we compute operands into free regs to avoid multiple uses of
2087        the same register. Multi-use would cause problems when we free
2088        registers in FIFO order instead of the assumed LIFO order that
2089        applies to all type of tree nodes except for GT_ARR_ELEM.
2090      */
2091
2092     GenTreePtr arrObj   = arrElem->gtArrElem.gtArrObj;
2093     unsigned   rank     = arrElem->gtArrElem.gtArrRank;
2094     var_types  elemType = arrElem->gtArrElem.gtArrElemType;
2095     regMaskTP  addrReg  = RBM_NONE;
2096     regMaskTP  regNeed  = RBM_ALLINT;
2097
2098 #if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
2099     // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
2100     // since the arrObj participates in the lea/add instruction
2101     // that computes ARG_0 we should avoid putting it in ARG_1
2102     //
2103     if (varTypeIsGC(elemType))
2104     {
2105         regNeed &= ~RBM_ARG_1;
2106     }
2107 #endif
2108
2109     // Strip off any comma expression.
2110     arrObj = genCodeForCommaTree(arrObj);
2111
2112     // Having generated the code for the comma, we don't care about it anymore.
2113     arrElem->gtArrElem.gtArrObj = arrObj;
2114
2115     // If the array ref is a stack var that's dying here we have to move it
2116     // into a register (regalloc already counts of this), as if it's a GC pointer
2117     // it can be collected from here on. This is not an issue for locals that are
2118     // in a register, as they get marked as used an will be tracked.
2119     // The bug that caused this is #100776. (untracked vars?)
2120     if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
2121         !genMarkLclVar(arrObj))
2122     {
2123         genCodeForTree(arrObj, regNeed);
2124         regSet.rsMarkRegUsed(arrObj, 0);
2125         addrReg = genRegMask(arrObj->gtRegNum);
2126     }
2127     else
2128     {
2129         addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
2130                                       true,  // forLoadStore
2131                                       false, // smallOK
2132                                       false, // deferOK
2133                                       true); // evalSideEffs
2134     }
2135
2136     unsigned dim;
2137     for (dim = 0; dim < rank; dim++)
2138         genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
2139
2140     /* Ensure that the array-object is in a register */
2141
2142     addrReg = genKeepAddressable(arrObj, addrReg);
2143     genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
2144
2145     regNumber arrReg     = arrObj->gtRegNum;
2146     regMaskTP arrRegMask = genRegMask(arrReg);
2147     regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
2148     regSet.rsLockUsedReg(arrRegMask);
2149
2150     /* Now process all the indices, do the range check, and compute
2151        the offset of the element */
2152
2153     regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
2154
2155     for (dim = 0; dim < rank; dim++)
2156     {
2157         GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
2158
2159         /* Get the index into a free register (other than the register holding the array) */
2160
2161         genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
2162
2163 #if CPU_LOAD_STORE_ARCH
2164         /* Subtract the lower bound, and do the range check */
2165
2166         regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
2167         getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2168                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2169         regTracker.rsTrackRegTrash(valueReg);
2170         getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
2171         regTracker.rsTrackRegTrash(index->gtRegNum);
2172
2173         getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2174                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2175         getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
2176 #else
2177         /* Subtract the lower bound, and do the range check */
2178         getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
2179                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2180         regTracker.rsTrackRegTrash(index->gtRegNum);
2181
2182         getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
2183                                    compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2184 #endif
2185         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2186         genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
2187
2188         if (dim == 0)
2189         {
2190             /* Hang on to the register of the first index */
2191
2192             noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
2193             accReg = index->gtRegNum;
2194             noway_assert(accReg != arrReg);
2195             regSet.rsLockUsedReg(genRegMask(accReg));
2196         }
2197         else
2198         {
2199             /* Evaluate accReg = accReg*dim_size + index */
2200
2201             noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
2202 #if CPU_LOAD_STORE_ARCH
2203             getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2204                                        compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2205             regTracker.rsTrackRegTrash(valueReg);
2206             getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
2207 #else
2208             getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
2209                                        compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2210 #endif
2211
2212             inst_RV_RV(INS_add, accReg, index->gtRegNum);
2213             regSet.rsMarkRegFree(index->gtRegNum, index);
2214             regTracker.rsTrackRegTrash(accReg);
2215         }
2216     }
2217
2218     if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
2219     {
2220         regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
2221
2222         getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
2223         regTracker.rsTrackRegTrash(accReg);
2224     }
2225
2226     regSet.rsUnlockUsedReg(genRegMask(arrReg));
2227     regSet.rsUnlockUsedReg(genRegMask(accReg));
2228
2229     regSet.rsMarkRegFree(genRegMask(arrReg));
2230     regSet.rsMarkRegFree(genRegMask(accReg));
2231
2232     if (keepReg == RegSet::KEEP_REG)
2233     {
2234         /* We mark the addressability registers on arrObj and gtArrInds[0].
2235            instGetAddrMode() knows to work with this. */
2236
2237         regSet.rsMarkRegUsed(arrObj, tree);
2238         regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
2239     }
2240
2241     return genRegMask(arrReg) | genRegMask(accReg);
2242 }
2243
2244 /*****************************************************************************
2245  *
2246  *  Make sure the given tree is addressable.  'needReg' is a mask that indicates
2247  *  the set of registers we would prefer the destination tree to be computed
2248  *  into (RBM_NONE means no preference).
2249  *
2250  *  'tree' can subsequently be used with the inst_XX_TT() family of functions.
2251  *
2252  *  If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
2253  *  on as used, and return the mask for that register set (if no registers
2254  *  are marked as used, RBM_NONE is returned).
2255  *
2256  *  If 'smallOK' is not true and the datatype being address is a byte or short,
2257  *  then the tree is forced into a register.  This is useful when the machine
2258  *  instruction being emitted does not have a byte or short version.
2259  *
2260  *  The "deferOK" parameter indicates the mode of operation - when it's false,
2261  *  upon returning an actual address mode must have been formed (i.e. it must
2262  *  be possible to immediately call one of the inst_TT methods to operate on
2263  *  the value). When "deferOK" is true, we do whatever it takes to be ready
2264  *  to form the address mode later - for example, if an index address mode on
2265  *  a particular CPU requires the use of a specific register, we usually don't
2266  *  want to immediately grab that register for an address mode that will only
2267  *  be needed later. The convention is to call genMakeAddressable() with
2268  *  "deferOK" equal to true, do whatever work is needed to prepare the other
2269  *  operand, call genMakeAddressable() with "deferOK" equal to false, and
2270  *  finally call one of the inst_TT methods right after that.
2271  *
2272  *  If we do any other codegen after genMakeAddressable(tree) which can
2273  *  potentially spill the addressability registers, genKeepAddressable()
2274  *  needs to be called before accessing the tree again.
2275  *
2276  *  genDoneAddressable() needs to be called when we are done with the tree
2277  *  to free the addressability registers.
2278  */
2279
2280 regMaskTP CodeGen::genMakeAddressable(
2281     GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
2282 {
2283     GenTreePtr addr = NULL;
2284     regMaskTP  regMask;
2285
2286     /* Is the value simply sitting in a register? */
2287
2288     if (tree->InReg())
2289     {
2290         genUpdateLife(tree);
2291
2292         goto GOT_VAL;
2293     }
2294
2295     // TODO: If the value is for example a cast of float -> int, compute
2296     // TODO: the converted value into a stack temp, and leave it there,
2297     // TODO: since stack temps are always addressable. This would require
2298     // TODO: recording the fact that a particular tree is in a stack temp.
2299
2300     /* byte/char/short operand -- is this acceptable to the caller? */
2301
2302     if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
2303         goto EVAL_TREE;
2304
2305     // Evaluate non-last elements of comma expressions, to get to the last.
2306     tree = genCodeForCommaTree(tree);
2307
2308     switch (tree->gtOper)
2309     {
2310         case GT_LCL_FLD:
2311
2312             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
2313             // to worry about it being enregistered.
2314             noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
2315
2316             genUpdateLife(tree);
2317             return 0;
2318
2319         case GT_LCL_VAR:
2320
2321             if (!genMarkLclVar(tree))
2322             {
2323                 genUpdateLife(tree);
2324                 return 0;
2325             }
2326
2327             __fallthrough; // it turns out the variable lives in a register
2328
2329         case GT_REG_VAR:
2330
2331             genUpdateLife(tree);
2332
2333             goto GOT_VAL;
2334
2335         case GT_CLS_VAR:
2336
2337             return 0;
2338
2339         case GT_CNS_INT:
2340 #ifdef _TARGET_64BIT_
2341             // Non-relocs will be sign extended, so we don't have to enregister
2342             // constants that are equivalent to a sign-extended int.
2343             // Relocs can be left alone if they are RIP-relative.
2344             if ((genTypeSize(tree->TypeGet()) > 4) &&
2345                 (!tree->IsIntCnsFitsInI32() ||
2346                  (tree->IsIconHandle() &&
2347                   (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
2348             {
2349                 break;
2350             }
2351 #endif // _TARGET_64BIT_
2352             __fallthrough;
2353
2354         case GT_CNS_LNG:
2355         case GT_CNS_DBL:
2356             // For MinOpts, we don't do constant folding, so we have
2357             // constants showing up in places we don't like.
2358             // force them into a register now to prevent that.
2359             if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
2360                 return 0;
2361             break;
2362
2363         case GT_IND:
2364         case GT_NULLCHECK:
2365
2366             /* Try to make the address directly addressable */
2367
2368             if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2369                                    needReg, keepReg, &regMask, deferOK))
2370             {
2371                 genUpdateLife(tree);
2372                 return regMask;
2373             }
2374
2375             /* No good, we'll have to load the address into a register */
2376
2377             addr = tree;
2378             tree = tree->gtOp.gtOp1;
2379             break;
2380
2381         default:
2382             break;
2383     }
2384
2385 EVAL_TREE:
2386
2387     /* Here we need to compute the value 'tree' into a register */
2388
2389     genCodeForTree(tree, needReg);
2390
2391 GOT_VAL:
2392
2393     noway_assert(tree->InReg());
2394
2395     if (isRegPairType(tree->gtType))
2396     {
2397         /* Are we supposed to hang on to the register? */
2398
2399         if (keepReg == RegSet::KEEP_REG)
2400             regSet.rsMarkRegPairUsed(tree);
2401
2402         regMask = genRegPairMask(tree->gtRegPair);
2403     }
2404     else
2405     {
2406         /* Are we supposed to hang on to the register? */
2407
2408         if (keepReg == RegSet::KEEP_REG)
2409             regSet.rsMarkRegUsed(tree, addr);
2410
2411         regMask = genRegMask(tree->gtRegNum);
2412     }
2413
2414     return regMask;
2415 }
2416
2417 /*****************************************************************************
2418  *  Compute a tree (which was previously made addressable using
2419  *  genMakeAddressable()) into a register.
2420  *  needReg - mask of preferred registers.
2421  *  keepReg - should the computed register be marked as used by the tree
2422  *  freeOnly - target register needs to be a scratch register
2423  */
2424
2425 void CodeGen::genComputeAddressable(GenTreePtr      tree,
2426                                     regMaskTP       addrReg,
2427                                     RegSet::KeepReg keptReg,
2428                                     regMaskTP       needReg,
2429                                     RegSet::KeepReg keepReg,
2430                                     bool            freeOnly)
2431 {
2432     noway_assert(genStillAddressable(tree));
2433     noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
2434
2435     genDoneAddressable(tree, addrReg, keptReg);
2436
2437     regNumber reg;
2438
2439     if (tree->InReg())
2440     {
2441         reg = tree->gtRegNum;
2442
2443         if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
2444             goto MOVE_REG;
2445     }
2446     else
2447     {
2448         if (tree->OperIsConst())
2449         {
2450             /* Need to handle consts separately as we don't want to emit
2451               "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
2452               handles consts better for SMALL_CODE */
2453
2454             noway_assert(tree->IsCnsIntOrI());
2455             reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
2456         }
2457         else
2458         {
2459         MOVE_REG:
2460             reg = regSet.rsPickReg(needReg);
2461
2462             inst_RV_TT(INS_mov, reg, tree);
2463             regTracker.rsTrackRegTrash(reg);
2464         }
2465     }
2466
2467     genMarkTreeInReg(tree, reg);
2468
2469     if (keepReg == RegSet::KEEP_REG)
2470         regSet.rsMarkRegUsed(tree);
2471     else
2472         gcInfo.gcMarkRegPtrVal(tree);
2473 }
2474
2475 /*****************************************************************************
2476  *  Should be similar to genMakeAddressable() but gives more control.
2477  */
2478
2479 regMaskTP CodeGen::genMakeAddressable2(GenTreePtr      tree,
2480                                        regMaskTP       needReg,
2481                                        RegSet::KeepReg keepReg,
2482                                        bool            forLoadStore,
2483                                        bool            smallOK,
2484                                        bool            deferOK,
2485                                        bool            evalSideEffs)
2486
2487 {
2488     bool evalToReg = false;
2489
2490     if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
2491         evalToReg = true;
2492
2493 #if CPU_LOAD_STORE_ARCH
2494     if (!forLoadStore)
2495         evalToReg = true;
2496 #endif
2497
2498     if (evalToReg)
2499     {
2500         genCodeForTree(tree, needReg);
2501
2502         noway_assert(tree->InReg());
2503
2504         if (isRegPairType(tree->gtType))
2505         {
2506             /* Are we supposed to hang on to the register? */
2507
2508             if (keepReg == RegSet::KEEP_REG)
2509                 regSet.rsMarkRegPairUsed(tree);
2510
2511             return genRegPairMask(tree->gtRegPair);
2512         }
2513         else
2514         {
2515             /* Are we supposed to hang on to the register? */
2516
2517             if (keepReg == RegSet::KEEP_REG)
2518                 regSet.rsMarkRegUsed(tree);
2519
2520             return genRegMask(tree->gtRegNum);
2521         }
2522     }
2523     else
2524     {
2525         return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
2526     }
2527 }
2528
2529 /*****************************************************************************
2530  *
2531  *  The given tree was previously passed to genMakeAddressable(); return
2532  *  'true' if the operand is still addressable.
2533  */
2534
2535 // inline
2536 bool CodeGen::genStillAddressable(GenTreePtr tree)
2537 {
2538     /* Has the value (or one or more of its sub-operands) been spilled? */
2539
2540     if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
2541         return false;
2542
2543     return true;
2544 }
2545
2546 /*****************************************************************************
2547  *
2548  *  Recursive helper to restore complex address modes. The 'lockPhase'
2549  *  argument indicates whether we're in the 'lock' or 'reload' phase.
2550  */
2551
2552 regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
2553 {
2554     regMaskTP regMask = RBM_NONE;
2555
2556     /* Have we found a spilled value? */
2557
2558     if (tree->gtFlags & GTF_SPILLED)
2559     {
2560         /* Do nothing if we're locking, otherwise reload and lock */
2561
2562         if (!lockPhase)
2563         {
2564             /* Unspill the register */
2565
2566             regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
2567
2568             /* The value should now be sitting in a register */
2569
2570             noway_assert(tree->InReg());
2571             regMask = genRegMask(tree->gtRegNum);
2572
2573             /* Mark the register as used for the address */
2574
2575             regSet.rsMarkRegUsed(tree, addr);
2576
2577             /* Lock the register until we're done with the entire address */
2578
2579             regSet.rsMaskLock |= regMask;
2580         }
2581
2582         return regMask;
2583     }
2584
2585     /* Is this sub-tree sitting in a register? */
2586
2587     if (tree->InReg())
2588     {
2589         regMask = genRegMask(tree->gtRegNum);
2590
2591         /* Lock the register if we're in the locking phase */
2592
2593         if (lockPhase)
2594             regSet.rsMaskLock |= regMask;
2595     }
2596     else
2597     {
2598         /* Process any sub-operands of this node */
2599
2600         unsigned kind = tree->OperKind();
2601
2602         if (kind & GTK_SMPOP)
2603         {
2604             /* Unary/binary operator */
2605
2606             if (tree->gtOp.gtOp1)
2607                 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
2608             if (tree->gtGetOp2IfPresent())
2609                 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
2610         }
2611         else if (tree->gtOper == GT_ARR_ELEM)
2612         {
2613             /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
2614                which holds the offset-calculation */
2615
2616             regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
2617             regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
2618         }
2619         else if (tree->gtOper == GT_CMPXCHG)
2620         {
2621             regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
2622         }
2623         else
2624         {
2625             /* Must be a leaf/constant node */
2626
2627             noway_assert(kind & (GTK_LEAF | GTK_CONST));
2628         }
2629     }
2630
2631     return regMask;
2632 }
2633
2634 /*****************************************************************************
2635  *
2636  *  The given tree was previously passed to genMakeAddressable, but since then
2637  *  some of its registers are known to have been spilled; do whatever it takes
2638  *  to make the operand addressable again (typically by reloading any spilled
2639  *  registers).
2640  */
2641
2642 regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
2643 {
2644     noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2645
2646     /* Is this a 'simple' register spill? */
2647
2648     if (tree->gtFlags & GTF_SPILLED)
2649     {
2650         /* The mask must match the original register/regpair */
2651
2652         if (isRegPairType(tree->gtType))
2653         {
2654             noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
2655
2656             regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2657
2658             addrReg = genRegPairMask(tree->gtRegPair);
2659         }
2660         else
2661         {
2662             noway_assert(addrReg == genRegMask(tree->gtRegNum));
2663
2664             regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2665
2666             addrReg = genRegMask(tree->gtRegNum);
2667         }
2668
2669         noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2670         regSet.rsMaskLock -= lockMask;
2671
2672         return addrReg;
2673     }
2674
2675     /* We have a complex address mode with some of its sub-operands spilled */
2676
2677     noway_assert((tree->InReg()) == 0);
2678     noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
2679
2680     /*
2681         We'll proceed in several phases:
2682
2683          1. Lock any registers that are part of the address mode and
2684             have not been spilled. This prevents these registers from
2685             getting spilled in step 2.
2686
2687          2. Reload any registers that have been spilled; lock each
2688             one right after it is reloaded.
2689
2690          3. Unlock all the registers.
2691      */
2692
2693     addrReg = genRestoreAddrMode(tree, tree, true);
2694     addrReg |= genRestoreAddrMode(tree, tree, false);
2695
2696     /* Unlock all registers that the address mode uses */
2697
2698     lockMask |= addrReg;
2699
2700     noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2701     regSet.rsMaskLock -= lockMask;
2702
2703     return addrReg;
2704 }
2705
2706 /*****************************************************************************
2707  *
2708  *  The given tree was previously passed to genMakeAddressable, but since then
2709  *  some of its registers might have been spilled ('addrReg' is the set of
2710  *  registers used by the address). This function makes sure the operand is
2711  *  still addressable (while avoiding any of the registers in 'avoidMask'),
2712  *  and returns the (possibly modified) set of registers that are used by
2713  *  the address (these will be marked as used on exit).
2714  */
2715
2716 regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
2717 {
2718     /* Is the operand still addressable? */
2719
2720     tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
2721
2722     if (!genStillAddressable(tree))
2723     {
2724         if (avoidMask)
2725         {
2726             // Temporarily lock 'avoidMask' while we restore addressability
2727             // genRestAddressable will unlock the 'avoidMask' for us
2728             // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
2729             // In regSet.rsRegMaskFree() we require that all locked register be marked as used
2730             //
2731             regSet.rsLockUsedReg(avoidMask);
2732         }
2733
2734         addrReg = genRestAddressable(tree, addrReg, avoidMask);
2735
2736         noway_assert((regSet.rsMaskLock & avoidMask) == 0);
2737     }
2738
2739     return addrReg;
2740 }
2741
2742 /*****************************************************************************
2743  *
2744  *  After we're finished with the given operand (which was previously marked
2745  *  by calling genMakeAddressable), this function must be called to free any
2746  *  registers that may have been used by the address.
2747  *  keptReg indicates if the addressability registers were marked as used
2748  *  by genMakeAddressable().
2749  */
2750
2751 void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
2752 {
2753     if (keptReg == RegSet::FREE_REG)
2754     {
2755         // We exclude regSet.rsMaskUsed since the registers may be multi-used.
2756         // ie. There may be a pending use in a higher-up tree.
2757
2758         addrReg &= ~regSet.rsMaskUsed;
2759
2760         /* addrReg was not marked as used. So just reset its GC info */
2761         if (addrReg)
2762         {
2763             gcInfo.gcMarkRegSetNpt(addrReg);
2764         }
2765     }
2766     else
2767     {
2768         /* addrReg was marked as used. So we need to free it up (which
2769            will also reset its GC info) */
2770
2771         regSet.rsMarkRegFree(addrReg);
2772     }
2773 }
2774
2775 /*****************************************************************************/
2776 /*****************************************************************************
2777  *
2778  *  Make sure the given floating point value is addressable, and return a tree
2779  *  that will yield the value as an addressing mode (this tree may differ from
2780  *  the one passed in, BTW). If the only way to make the value addressable is
2781  *  to evaluate into the FP stack, we do this and return zero.
2782  */
2783
2784 GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
2785 {
2786     *regMaskPtr = 0;
2787
2788     switch (tree->gtOper)
2789     {
2790         case GT_LCL_VAR:
2791         case GT_LCL_FLD:
2792         case GT_CLS_VAR:
2793             return tree;
2794
2795         case GT_CNS_DBL:
2796             if (tree->gtType == TYP_FLOAT)
2797             {
2798                 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
2799                 return genMakeConst(&f, TYP_FLOAT, tree, false);
2800             }
2801             return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
2802
2803         case GT_IND:
2804         case GT_NULLCHECK:
2805
2806             /* Try to make the address directly addressable */
2807
2808             if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2809                                    0, RegSet::FREE_REG, regMaskPtr, false))
2810             {
2811                 genUpdateLife(tree);
2812                 return tree;
2813             }
2814
2815             break;
2816
2817         default:
2818             break;
2819     }
2820 #if FEATURE_STACK_FP_X87
2821     /* We have no choice but to compute the value 'tree' onto the FP stack */
2822
2823     genCodeForTreeFlt(tree);
2824 #endif
2825     return 0;
2826 }
2827
2828 /*****************************************************************************/
2829 /*****************************************************************************
2830  *
2831  *  Display a string literal value (debug only).
2832  */
2833
2834 #ifdef DEBUG
2835 #endif
2836
2837 /*****************************************************************************
2838  *
2839  *   Generate code to check that the GS cookie wasn't thrashed by a buffer
2840  *   overrun.  If pushReg is true, preserve all registers around code sequence.
2841  *   Otherwise, ECX maybe modified.
2842  *
2843  *   TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
2844  */
2845 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2846 {
2847     // Make sure that EAX didn't die in the return expression
2848     if (!pushReg && (compiler->info.compRetType == TYP_REF))
2849         gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2850
2851     // Add cookie check code for unsafe buffers
2852     BasicBlock* gsCheckBlk;
2853     regMaskTP   byrefPushedRegs = RBM_NONE;
2854     regMaskTP   norefPushedRegs = RBM_NONE;
2855     regMaskTP   pushedRegs      = RBM_NONE;
2856
2857     noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2858
2859     if (compiler->gsGlobalSecurityCookieAddr == NULL)
2860     {
2861         // JIT case
2862         CLANG_FORMAT_COMMENT_ANCHOR;
2863
2864 #if CPU_LOAD_STORE_ARCH
2865
2866         regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
2867         getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
2868         regTracker.rsTrackRegTrash(reg);
2869
2870         if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
2871             arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
2872         {
2873             getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
2874         }
2875         else
2876         {
2877             // Load CookieVal into a register
2878             regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
2879             instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
2880             getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
2881         }
2882 #else
2883         getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
2884                                   (int)compiler->gsGlobalSecurityCookieVal);
2885 #endif
2886     }
2887     else
2888     {
2889         regNumber regGSCheck;
2890         regMaskTP regMaskGSCheck;
2891 #if CPU_LOAD_STORE_ARCH
2892         regGSCheck     = regSet.rsGrabReg(RBM_ALLINT);
2893         regMaskGSCheck = genRegMask(regGSCheck);
2894 #else
2895         // Don't pick the 'this' register
2896         if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
2897             (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
2898         {
2899             regGSCheck     = REG_EDX;
2900             regMaskGSCheck = RBM_EDX;
2901         }
2902         else
2903         {
2904             regGSCheck     = REG_ECX;
2905             regMaskGSCheck = RBM_ECX;
2906         }
2907
2908         // NGen case
2909         if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
2910         {
2911             pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
2912         }
2913         else
2914         {
2915             noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
2916         }
2917 #endif
2918 #if defined(_TARGET_ARM_)
2919         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2920         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
2921 #else
2922         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
2923                                   (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2924 #endif // !_TARGET_ARM_
2925         regTracker.rsTrashRegSet(regMaskGSCheck);
2926 #ifdef _TARGET_ARM_
2927         regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
2928         getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
2929         regTracker.rsTrackRegTrash(regTmp);
2930         getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
2931 #else
2932         getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
2933 #endif
2934     }
2935
2936     gsCheckBlk            = genCreateTempLabel();
2937     emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2938     inst_JMP(jmpEqual, gsCheckBlk);
2939     genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
2940     genDefineTempLabel(gsCheckBlk);
2941
2942     genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
2943 }
2944
2945 /*****************************************************************************
2946  *
2947  *  Generate any side effects within the given expression tree.
2948  */
2949
2950 void CodeGen::genEvalSideEffects(GenTreePtr tree)
2951 {
2952     genTreeOps oper;
2953     unsigned   kind;
2954
2955 AGAIN:
2956
2957     /* Does this sub-tree contain any side-effects? */
2958     if (tree->gtFlags & GTF_SIDE_EFFECT)
2959     {
2960 #if FEATURE_STACK_FP_X87
2961         /* Remember the current FP stack level */
2962         int iTemps = genNumberTemps();
2963 #endif
2964         if (tree->OperIsIndir())
2965         {
2966             regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
2967
2968             if (tree->InReg())
2969             {
2970                 gcInfo.gcMarkRegPtrVal(tree);
2971                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
2972             }
2973             // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
2974             // do not need an additional null-check
2975             /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
2976             else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
2977             {
2978                 /* Compare against any register to do null-check */
2979                 CLANG_FORMAT_COMMENT_ANCHOR;
2980
2981 #if defined(_TARGET_XARCH_)
2982                 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
2983                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
2984 #elif CPU_LOAD_STORE_ARCH
2985                 if (varTypeIsFloating(tree->TypeGet()))
2986                 {
2987                     genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
2988                                                RegSet::FREE_REG);
2989                 }
2990                 else
2991                 {
2992                     genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
2993                 }
2994 #ifdef _TARGET_ARM_
2995                 if (tree->gtFlags & GTF_IND_VOLATILE)
2996                 {
2997                     // Emit a memory barrier instruction after the load
2998                     instGen_MemoryBarrier();
2999                 }
3000 #endif
3001 #else
3002                 NYI("TARGET");
3003 #endif
3004             }
3005             else
3006             {
3007                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3008             }
3009         }
3010         else
3011         {
3012             /* Generate the expression and throw it away */
3013             genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
3014             if (tree->InReg())
3015             {
3016                 gcInfo.gcMarkRegPtrVal(tree);
3017             }
3018         }
3019 #if FEATURE_STACK_FP_X87
3020         /* If the tree computed a value on the FP stack, pop the stack */
3021         if (genNumberTemps() > iTemps)
3022         {
3023             noway_assert(genNumberTemps() == iTemps + 1);
3024             genDiscardStackFP(tree);
3025         }
3026 #endif
3027         return;
3028     }
3029
3030     noway_assert(tree->gtOper != GT_ASG);
3031
3032     /* Walk the tree, just to mark any dead values appropriately */
3033
3034     oper = tree->OperGet();
3035     kind = tree->OperKind();
3036
3037     /* Is this a constant or leaf node? */
3038
3039     if (kind & (GTK_CONST | GTK_LEAF))
3040     {
3041 #if FEATURE_STACK_FP_X87
3042         if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
3043         {
3044             genRegVarDeathStackFP(tree);
3045             FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
3046         }
3047 #endif
3048         genUpdateLife(tree);
3049         gcInfo.gcMarkRegPtrVal(tree);
3050         return;
3051     }
3052
3053     /* Must be a 'simple' unary/binary operator */
3054
3055     noway_assert(kind & GTK_SMPOP);
3056
3057     if (tree->gtGetOp2IfPresent())
3058     {
3059         genEvalSideEffects(tree->gtOp.gtOp1);
3060
3061         tree = tree->gtOp.gtOp2;
3062         goto AGAIN;
3063     }
3064     else
3065     {
3066         tree = tree->gtOp.gtOp1;
3067         if (tree)
3068             goto AGAIN;
3069     }
3070 }
3071
3072 /*****************************************************************************
3073  *
3074  *  A persistent pointer value is being overwritten, record it for the GC.
3075  *
3076  *  tgt        : the destination being written to
3077  *  assignVal  : the value being assigned (the source). It must currently be in a register.
3078  *  tgtAddrReg : the set of registers being used by "tgt"
3079  *
3080  *  Returns    : the mask of the scratch register that was used.
3081  *               RBM_NONE if a write-barrier is not needed.
3082  */
3083
3084 regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
3085 {
3086     noway_assert(assignVal->InReg());
3087
3088     GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3089     if (wbf == GCInfo::WBF_NoBarrier)
3090         return RBM_NONE;
3091
3092     regMaskTP resultRegMask = RBM_NONE;
3093
3094 #if FEATURE_WRITE_BARRIER
3095
3096     regNumber reg = assignVal->gtRegNum;
3097
3098 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3099 #ifdef DEBUG
3100     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
3101     {
3102 #endif
3103         const static int regToHelper[2][8] = {
3104             // If the target is known to be in managed memory
3105             {
3106                 CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
3107                 CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
3108             },
3109
3110             // Don't know if the target is in managed memory
3111             {
3112                 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
3113                 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
3114                 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
3115             },
3116         };
3117
3118         noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
3119         noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
3120         noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
3121         noway_assert(regToHelper[0][REG_ESP] == -1);
3122         noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
3123         noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
3124         noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
3125
3126         noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
3127         noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
3128         noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
3129         noway_assert(regToHelper[1][REG_ESP] == -1);
3130         noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
3131         noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
3132         noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
3133
3134         noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
3135
3136         /*
3137             Generate the following code:
3138
3139                     lea     edx, tgt
3140                     call    write_barrier_helper_reg
3141
3142             First grab the RBM_WRITE_BARRIER register for the target address.
3143          */
3144
3145         regNumber rg1;
3146         bool      trashOp1;
3147
3148         if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
3149         {
3150             rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
3151
3152             regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
3153             regSet.rsMaskLock |= RBM_WRITE_BARRIER;
3154
3155             trashOp1 = false;
3156         }
3157         else
3158         {
3159             rg1 = REG_WRITE_BARRIER;
3160
3161             trashOp1 = true;
3162         }
3163
3164         noway_assert(rg1 == REG_WRITE_BARRIER);
3165
3166         /* Generate "lea EDX, [addr-mode]" */
3167
3168         noway_assert(tgt->gtType == TYP_REF);
3169         tgt->gtType = TYP_BYREF;
3170         inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
3171
3172         /* Free up anything that was tied up by the LHS */
3173         genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3174
3175         // In case "tgt" was a comma:
3176         tgt = tgt->gtEffectiveVal();
3177
3178         regTracker.rsTrackRegTrash(rg1);
3179         gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
3180         gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
3181
3182         /* Call the proper vm helper */
3183
3184         // enforced by gcIsWriteBarrierCandidate
3185         noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
3186
3187         unsigned tgtAnywhere = 0;
3188         if ((tgt->gtOper == GT_IND) &&
3189             ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
3190         {
3191             tgtAnywhere = 1;
3192         }
3193
3194         int helper    = regToHelper[tgtAnywhere][reg];
3195         resultRegMask = genRegMask(reg);
3196
3197         gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
3198
3199         genEmitHelperCall(helper,
3200                           0,           // argSize
3201                           EA_PTRSIZE); // retSize
3202
3203         if (!trashOp1)
3204         {
3205             regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
3206             regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
3207         }
3208
3209         return resultRegMask;
3210
3211 #ifdef DEBUG
3212     }
3213     else
3214 #endif
3215 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3216
3217 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3218     {
3219         /*
3220             Generate the following code (or its equivalent on the given target):
3221
3222                     mov     arg1, srcReg
3223                     lea     arg0, tgt
3224                     call    write_barrier_helper
3225
3226             First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
3227          */
3228
3229         if (reg != REG_ARG_1)
3230         {
3231             // We may need to spill whatever is in the ARG_1 register
3232             //
3233             if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
3234             {
3235                 regSet.rsSpillReg(REG_ARG_1);
3236             }
3237
3238             inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
3239         }
3240         resultRegMask = RBM_ARG_1;
3241
3242         regTracker.rsTrackRegTrash(REG_ARG_1);
3243         gcInfo.gcMarkRegSetNpt(REG_ARG_1);
3244         gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
3245
3246         bool free_arg1 = false;
3247         if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
3248         {
3249             regSet.rsMaskUsed |= RBM_ARG_1;
3250             free_arg1 = true;
3251         }
3252
3253         // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
3254
3255         /* Generate "lea R0, [addr-mode]" */
3256
3257         noway_assert(tgt->gtType == TYP_REF);
3258         tgt->gtType = TYP_BYREF;
3259
3260         tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
3261
3262         // We may need to spill whatever is in the ARG_0 register
3263         //
3264         if (((tgtAddrReg & RBM_ARG_0) == 0) &&        // tgtAddrReg does not contain REG_ARG_0
3265             ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
3266             (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
3267         {
3268             regSet.rsSpillReg(REG_ARG_0);
3269         }
3270
3271         inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
3272
3273         /* Free up anything that was tied up by the LHS */
3274         genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3275
3276         regTracker.rsTrackRegTrash(REG_ARG_0);
3277         gcInfo.gcMarkRegSetNpt(REG_ARG_0);
3278         gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
3279
3280 #ifdef _TARGET_ARM_
3281 #if NOGC_WRITE_BARRIERS
3282         // Finally, we may be required to spill whatever is in the further argument registers
3283         // trashed by the call. The write barrier trashes some further registers --
3284         // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
3285
3286         regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
3287 #else
3288         regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
3289 #endif
3290         // Spill any other registers trashed by the write barrier call and currently in use.
3291         regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
3292         if (mustSpill)
3293             regSet.rsSpillRegs(mustSpill);
3294 #endif // _TARGET_ARM_
3295
3296         bool free_arg0 = false;
3297         if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
3298         {
3299             regSet.rsMaskUsed |= RBM_ARG_0;
3300             free_arg0 = true;
3301         }
3302
3303         // genEmitHelperCall might need to grab a register
3304         // so don't let it spill one of the arguments
3305         //
3306         regMaskTP reallyUsedRegs = RBM_NONE;
3307         regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
3308
3309         genGCWriteBarrier(tgt, wbf);
3310
3311         regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
3312         gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
3313
3314         if (free_arg0)
3315         {
3316             regSet.rsMaskUsed &= ~RBM_ARG_0;
3317         }
3318         if (free_arg1)
3319         {
3320             regSet.rsMaskUsed &= ~RBM_ARG_1;
3321         }
3322
3323         return resultRegMask;
3324     }
3325 #endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3326
3327 #else // !FEATURE_WRITE_BARRIER
3328
3329     NYI("FEATURE_WRITE_BARRIER unimplemented");
3330     return resultRegMask;
3331
3332 #endif // !FEATURE_WRITE_BARRIER
3333 }
3334
3335 #ifdef _TARGET_X86_
3336 /*****************************************************************************
3337  *
3338  *  Generate the appropriate conditional jump(s) right after the low 32 bits
3339  *  of two long values have been compared.
3340  */
3341
3342 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3343 {
3344     if (cmp != GT_NE)
3345     {
3346         jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3347     }
3348
3349     switch (cmp)
3350     {
3351         case GT_EQ:
3352             inst_JMP(EJ_jne, jumpFalse);
3353             break;
3354
3355         case GT_NE:
3356             inst_JMP(EJ_jne, jumpTrue);
3357             break;
3358
3359         case GT_LT:
3360         case GT_LE:
3361             if (isUnsigned)
3362             {
3363                 inst_JMP(EJ_ja, jumpFalse);
3364                 inst_JMP(EJ_jb, jumpTrue);
3365             }
3366             else
3367             {
3368                 inst_JMP(EJ_jg, jumpFalse);
3369                 inst_JMP(EJ_jl, jumpTrue);
3370             }
3371             break;
3372
3373         case GT_GE:
3374         case GT_GT:
3375             if (isUnsigned)
3376             {
3377                 inst_JMP(EJ_jb, jumpFalse);
3378                 inst_JMP(EJ_ja, jumpTrue);
3379             }
3380             else
3381             {
3382                 inst_JMP(EJ_jl, jumpFalse);
3383                 inst_JMP(EJ_jg, jumpTrue);
3384             }
3385             break;
3386
3387         default:
3388             noway_assert(!"expected a comparison operator");
3389     }
3390 }
3391
3392 /*****************************************************************************
3393  *
3394  *  Generate the appropriate conditional jump(s) right after the high 32 bits
3395  *  of two long values have been compared.
3396  */
3397
3398 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3399 {
3400     switch (cmp)
3401     {
3402         case GT_EQ:
3403             inst_JMP(EJ_je, jumpTrue);
3404             break;
3405
3406         case GT_NE:
3407             inst_JMP(EJ_jne, jumpTrue);
3408             break;
3409
3410         case GT_LT:
3411             inst_JMP(EJ_jb, jumpTrue);
3412             break;
3413
3414         case GT_LE:
3415             inst_JMP(EJ_jbe, jumpTrue);
3416             break;
3417
3418         case GT_GE:
3419             inst_JMP(EJ_jae, jumpTrue);
3420             break;
3421
3422         case GT_GT:
3423             inst_JMP(EJ_ja, jumpTrue);
3424             break;
3425
3426         default:
3427             noway_assert(!"expected comparison");
3428     }
3429 }
3430 #elif defined(_TARGET_ARM_)
3431 /*****************************************************************************
3432 *
3433 *  Generate the appropriate conditional jump(s) right after the low 32 bits
3434 *  of two long values have been compared.
3435 */
3436
3437 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3438 {
3439     if (cmp != GT_NE)
3440     {
3441         jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3442     }
3443
3444     switch (cmp)
3445     {
3446         case GT_EQ:
3447             inst_JMP(EJ_ne, jumpFalse);
3448             break;
3449
3450         case GT_NE:
3451             inst_JMP(EJ_ne, jumpTrue);
3452             break;
3453
3454         case GT_LT:
3455         case GT_LE:
3456             if (isUnsigned)
3457             {
3458                 inst_JMP(EJ_hi, jumpFalse);
3459                 inst_JMP(EJ_lo, jumpTrue);
3460             }
3461             else
3462             {
3463                 inst_JMP(EJ_gt, jumpFalse);
3464                 inst_JMP(EJ_lt, jumpTrue);
3465             }
3466             break;
3467
3468         case GT_GE:
3469         case GT_GT:
3470             if (isUnsigned)
3471             {
3472                 inst_JMP(EJ_lo, jumpFalse);
3473                 inst_JMP(EJ_hi, jumpTrue);
3474             }
3475             else
3476             {
3477                 inst_JMP(EJ_lt, jumpFalse);
3478                 inst_JMP(EJ_gt, jumpTrue);
3479             }
3480             break;
3481
3482         default:
3483             noway_assert(!"expected a comparison operator");
3484     }
3485 }
3486
3487 /*****************************************************************************
3488 *
3489 *  Generate the appropriate conditional jump(s) right after the high 32 bits
3490 *  of two long values have been compared.
3491 */
3492
3493 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3494 {
3495     switch (cmp)
3496     {
3497         case GT_EQ:
3498             inst_JMP(EJ_eq, jumpTrue);
3499             break;
3500
3501         case GT_NE:
3502             inst_JMP(EJ_ne, jumpTrue);
3503             break;
3504
3505         case GT_LT:
3506             inst_JMP(EJ_lo, jumpTrue);
3507             break;
3508
3509         case GT_LE:
3510             inst_JMP(EJ_ls, jumpTrue);
3511             break;
3512
3513         case GT_GE:
3514             inst_JMP(EJ_hs, jumpTrue);
3515             break;
3516
3517         case GT_GT:
3518             inst_JMP(EJ_hi, jumpTrue);
3519             break;
3520
3521         default:
3522             noway_assert(!"expected comparison");
3523     }
3524 }
3525 #endif
3526 /*****************************************************************************
3527  *
3528  *  Called by genCondJump() for TYP_LONG.
3529  */
3530
3531 void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
3532 {
3533     noway_assert(jumpTrue && jumpFalse);
3534     noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
3535     noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
3536
3537     GenTreePtr op1 = cond->gtOp.gtOp1;
3538     GenTreePtr op2 = cond->gtOp.gtOp2;
3539     genTreeOps cmp = cond->OperGet();
3540
3541     regMaskTP addrReg;
3542
3543     /* Are we comparing against a constant? */
3544
3545     if (op2->gtOper == GT_CNS_LNG)
3546     {
3547         __int64   lval = op2->gtLngCon.gtLconVal;
3548         regNumber rTmp;
3549
3550         // We're "done" evaluating op2; let's strip any commas off op1 before we
3551         // evaluate it.
3552         op1 = genCodeForCommaTree(op1);
3553
3554         /* We can generate better code for some special cases */
3555         instruction ins              = INS_invalid;
3556         bool        useIncToSetFlags = false;
3557         bool        specialCaseCmp   = false;
3558
3559         if (cmp == GT_EQ)
3560         {
3561             if (lval == 0)
3562             {
3563                 /* op1 == 0  */
3564                 ins              = INS_OR;
3565                 useIncToSetFlags = false;
3566                 specialCaseCmp   = true;
3567             }
3568             else if (lval == -1)
3569             {
3570                 /* op1 == -1 */
3571                 ins              = INS_AND;
3572                 useIncToSetFlags = true;
3573                 specialCaseCmp   = true;
3574             }
3575         }
3576         else if (cmp == GT_NE)
3577         {
3578             if (lval == 0)
3579             {
3580                 /* op1 != 0  */
3581                 ins              = INS_OR;
3582                 useIncToSetFlags = false;
3583                 specialCaseCmp   = true;
3584             }
3585             else if (lval == -1)
3586             {
3587                 /* op1 != -1 */
3588                 ins              = INS_AND;
3589                 useIncToSetFlags = true;
3590                 specialCaseCmp   = true;
3591             }
3592         }
3593
3594         if (specialCaseCmp)
3595         {
3596             /* Make the comparand addressable */
3597
3598             addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
3599
3600             regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
3601             insFlags  flags   = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
3602
3603             if (op1->InReg())
3604             {
3605                 regPairNo regPair = op1->gtRegPair;
3606                 regNumber rLo     = genRegPairLo(regPair);
3607                 regNumber rHi     = genRegPairHi(regPair);
3608                 if (tmpMask & genRegMask(rLo))
3609                 {
3610                     rTmp = rLo;
3611                 }
3612                 else if (tmpMask & genRegMask(rHi))
3613                 {
3614                     rTmp = rHi;
3615                     rHi  = rLo;
3616                 }
3617                 else
3618                 {
3619                     rTmp = regSet.rsGrabReg(tmpMask);
3620                     inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
3621                 }
3622
3623                 /* The register is now trashed */
3624                 regTracker.rsTrackRegTrash(rTmp);
3625
3626                 if (rHi != REG_STK)
3627                 {
3628                     /* Set the flags using INS_AND | INS_OR */
3629                     inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
3630                 }
3631                 else
3632                 {
3633                     /* Set the flags using INS_AND | INS_OR */
3634                     inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3635                 }
3636             }
3637             else // op1 is not in a register.
3638             {
3639                 rTmp = regSet.rsGrabReg(tmpMask);
3640
3641                 /* Load the low 32-bits of op1 */
3642                 inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
3643
3644                 /* The register is now trashed */
3645                 regTracker.rsTrackRegTrash(rTmp);
3646
3647                 /* Set the flags using INS_AND | INS_OR */
3648                 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3649             }
3650
3651             /* Free up the addrReg(s) if any */
3652             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
3653
3654             /* compares against -1, also requires an an inc instruction */
3655             if (useIncToSetFlags)
3656             {
3657                 /* Make sure the inc will set the flags */
3658                 assert(cond->gtSetFlags());
3659                 genIncRegBy(rTmp, 1, cond, TYP_INT);
3660             }
3661
3662 #if FEATURE_STACK_FP_X87
3663             // We may need a transition block
3664             if (bFPTransition)
3665             {
3666                 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3667             }
3668 #endif
3669             emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
3670             inst_JMP(jmpKind, jumpTrue);
3671         }
3672         else // specialCaseCmp == false
3673         {
3674             /* Make the comparand addressable */
3675             addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
3676
3677             /* Compare the high part first */
3678
3679             int ival = (int)(lval >> 32);
3680
3681             /* Comparing a register against 0 is easier */
3682
3683             if (!ival && (op1->InReg()) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
3684             {
3685                 /* Generate 'test rTmp, rTmp' */
3686                 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3687             }
3688             else
3689             {
3690                 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3691                 {
3692                     /* Special case: comparison of two constants */
3693                     // Needed as gtFoldExpr() doesn't fold longs
3694
3695                     noway_assert(addrReg == 0);
3696                     int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
3697
3698                     /* Get the constant operand into a register */
3699                     rTmp = genGetRegSetToIcon(op1_hiword);
3700
3701                     /* Generate 'cmp rTmp, ival' */
3702
3703                     inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3704                 }
3705                 else
3706                 {
3707                     /* Generate 'cmp op1, ival' */
3708
3709                     inst_TT_IV(INS_cmp, op1, ival, 4);
3710                 }
3711             }
3712
3713 #if FEATURE_STACK_FP_X87
3714             // We may need a transition block
3715             if (bFPTransition)
3716             {
3717                 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3718             }
3719 #endif
3720             /* Generate the appropriate jumps */
3721
3722             if (cond->gtFlags & GTF_UNSIGNED)
3723                 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3724             else
3725                 genJccLongHi(cmp, jumpTrue, jumpFalse);
3726
3727             /* Compare the low part second */
3728
3729             ival = (int)lval;
3730
3731             /* Comparing a register against 0 is easier */
3732
3733             if (!ival && (op1->InReg()) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
3734             {
3735                 /* Generate 'test rTmp, rTmp' */
3736                 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3737             }
3738             else
3739             {
3740                 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3741                 {
3742                     /* Special case: comparison of two constants */
3743                     // Needed as gtFoldExpr() doesn't fold longs
3744
3745                     noway_assert(addrReg == 0);
3746                     int op1_loword = (int)op1->gtLngCon.gtLconVal;
3747
3748                     /* get the constant operand into a register */
3749                     rTmp = genGetRegSetToIcon(op1_loword);
3750
3751                     /* Generate 'cmp rTmp, ival' */
3752
3753                     inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3754                 }
3755                 else
3756                 {
3757                     /* Generate 'cmp op1, ival' */
3758
3759                     inst_TT_IV(INS_cmp, op1, ival, 0);
3760                 }
3761             }
3762
3763             /* Generate the appropriate jumps */
3764             genJccLongLo(cmp, jumpTrue, jumpFalse);
3765
3766             genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
3767         }
3768     }
3769     else // (op2->gtOper != GT_CNS_LNG)
3770     {
3771
3772         /* The operands would be reversed by physically swapping them */
3773
3774         noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
3775
3776         /* Generate the first operand into a register pair */
3777
3778         genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
3779         noway_assert(op1->InReg());
3780
3781 #if CPU_LOAD_STORE_ARCH
3782         /* Generate the second operand into a register pair */
3783         // Fix 388442 ARM JitStress WP7
3784         genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3785         noway_assert(op2->InReg());
3786         regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
3787 #else
3788         /* Make the second operand addressable */
3789
3790         addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3791 #endif
3792         /* Make sure the first operand hasn't been spilled */
3793
3794         genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
3795         noway_assert(op1->InReg());
3796
3797         regPairNo regPair = op1->gtRegPair;
3798
3799 #if !CPU_LOAD_STORE_ARCH
3800         /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
3801
3802         addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
3803 #endif
3804
3805 #if FEATURE_STACK_FP_X87
3806         // We may need a transition block
3807         if (bFPTransition)
3808         {
3809             jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3810         }
3811 #endif
3812
3813         /* Perform the comparison - high parts */
3814
3815         inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
3816
3817         if (cond->gtFlags & GTF_UNSIGNED)
3818             genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3819         else
3820             genJccLongHi(cmp, jumpTrue, jumpFalse);
3821
3822         /* Compare the low parts */
3823
3824         inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
3825         genJccLongLo(cmp, jumpTrue, jumpFalse);
3826
3827         /* Free up anything that was tied up by either operand */
3828         CLANG_FORMAT_COMMENT_ANCHOR;
3829
3830 #if CPU_LOAD_STORE_ARCH
3831
3832         // Fix 388442 ARM JitStress WP7
3833         regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
3834         genReleaseRegPair(op2);
3835 #else
3836         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
3837 #endif
3838         genReleaseRegPair(op1);
3839     }
3840 }
3841
3842 /*****************************************************************************
3843  *  gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
3844  *  Called by genCondJumpFlt() to generate the fcomp instruction appropriate
3845  *  to the architecture we're running on.
3846  *
3847  *  P5:
3848  *  gen_fcomp_FN:     fcomp ST(0), stk
3849  *  gen_fcomp_FS_TT:  fcomp ST(0), addr
3850  *  gen_fcompp_FS:    fcompp
3851  *    These are followed by fnstsw, sahf to get the flags in EFLAGS.
3852  *
3853  *  P6:
3854  *  gen_fcomp_FN:     fcomip ST(0), stk
3855  *  gen_fcomp_FS_TT:  fld addr, fcomip ST(0), ST(1), fstp ST(0)
3856  *      (and reverse the branch condition since addr comes first)
3857  *  gen_fcompp_FS:    fcomip, fstp
3858  *    These instructions will correctly set the EFLAGS register.
3859  *
3860  *  Return value:  These functions return true if the instruction has
3861  *    already placed its result in the EFLAGS register.
3862  */
3863
3864 bool CodeGen::genUse_fcomip()
3865 {
3866     return compiler->opts.compUseFCOMI;
3867 }
3868
3869 /*****************************************************************************
3870  *
3871  *  Sets the flag for the TYP_INT/TYP_REF comparison.
3872  *  We try to use the flags if they have already been set by a prior
3873  *  instruction.
3874  *  eg. i++; if(i<0) {}  Here, the "i++;" will have set the sign flag. We don't
3875  *                       need to compare again with zero. Just use a "INS_js"
3876  *
3877  *  Returns the flags the following jump/set instruction should use.
3878  */
3879
3880 emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
3881 {
3882     noway_assert(cond->OperIsCompare());
3883     noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
3884
3885     GenTreePtr op1 = cond->gtOp.gtOp1;
3886     GenTreePtr op2 = cond->gtOp.gtOp2;
3887     genTreeOps cmp = cond->OperGet();
3888
3889     if (cond->gtFlags & GTF_REVERSE_OPS)
3890     {
3891         /* Don't forget to modify the condition as well */
3892
3893         cond->gtOp.gtOp1 = op2;
3894         cond->gtOp.gtOp2 = op1;
3895         cond->SetOper(GenTree::SwapRelop(cmp));
3896         cond->gtFlags &= ~GTF_REVERSE_OPS;
3897
3898         /* Get hold of the new values */
3899
3900         cmp = cond->OperGet();
3901         op1 = cond->gtOp.gtOp1;
3902         op2 = cond->gtOp.gtOp2;
3903     }
3904
3905     // Note that op1's type may get bashed. So save it early
3906
3907     var_types op1Type     = op1->TypeGet();
3908     bool      unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
3909     emitAttr  size        = EA_UNKNOWN;
3910
3911     regMaskTP    regNeed;
3912     regMaskTP    addrReg1 = RBM_NONE;
3913     regMaskTP    addrReg2 = RBM_NONE;
3914     emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
3915
3916     bool byteCmp;
3917     bool shortCmp;
3918
3919     regMaskTP newLiveMask;
3920     regNumber op1Reg;
3921
3922     /* Are we comparing against a constant? */
3923
3924     if (op2->IsCnsIntOrI())
3925     {
3926         ssize_t ival = op2->gtIntConCommon.IconValue();
3927
3928         /* unsigned less than comparisons with 1 ('< 1' )
3929            should be transformed into '== 0' to potentially
3930            suppress a tst instruction.
3931         */
3932         if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
3933         {
3934             op2->gtIntCon.gtIconVal = ival = 0;
3935             cond->gtOper = cmp = GT_EQ;
3936         }
3937
3938         /* Comparisons against 0 can be easier */
3939
3940         if (ival == 0)
3941         {
3942             // if we can safely change the comparison to unsigned we do so
3943             if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
3944             {
3945                 unsignedCmp = true;
3946             }
3947
3948             /* unsigned comparisons with 0 should be transformed into
3949                '==0' or '!= 0' to potentially suppress a tst instruction. */
3950
3951             if (unsignedCmp)
3952             {
3953                 if (cmp == GT_GT)
3954                     cond->gtOper = cmp = GT_NE;
3955                 else if (cmp == GT_LE)
3956                     cond->gtOper = cmp = GT_EQ;
3957             }
3958
3959             /* Is this a simple zero/non-zero test? */
3960
3961             if (cmp == GT_EQ || cmp == GT_NE)
3962             {
3963                 /* Is the operand an "AND" operation? */
3964
3965                 if (op1->gtOper == GT_AND)
3966                 {
3967                     GenTreePtr an1 = op1->gtOp.gtOp1;
3968                     GenTreePtr an2 = op1->gtOp.gtOp2;
3969
3970                     /* Check for the case "expr & icon" */
3971
3972                     if (an2->IsIntCnsFitsInI32())
3973                     {
3974                         int iVal = (int)an2->gtIntCon.gtIconVal;
3975
3976                         /* make sure that constant is not out of an1's range */
3977
3978                         switch (an1->gtType)
3979                         {
3980                             case TYP_BOOL:
3981                             case TYP_BYTE:
3982                                 if (iVal & 0xffffff00)
3983                                     goto NO_TEST_FOR_AND;
3984                                 break;
3985                             case TYP_CHAR:
3986                             case TYP_SHORT:
3987                                 if (iVal & 0xffff0000)
3988                                     goto NO_TEST_FOR_AND;
3989                                 break;
3990                             default:
3991                                 break;
3992                         }
3993
3994                         if (an1->IsCnsIntOrI())
3995                         {
3996                             // Special case - Both operands of AND are consts
3997                             genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
3998                             addrReg1 = genRegMask(an1->gtRegNum);
3999                         }
4000                         else
4001                         {
4002                             addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
4003                         }
4004 #if CPU_LOAD_STORE_ARCH
4005                         if ((an1->InReg()) == 0)
4006                         {
4007                             genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
4008                             if (arm_Valid_Imm_For_Alu(iVal))
4009                             {
4010                                 inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
4011                             }
4012                             else
4013                             {
4014                                 regNumber regTmp = regSet.rsPickFreeReg();
4015                                 instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
4016                                 inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
4017                             }
4018                             genReleaseReg(an1);
4019                             addrReg1 = RBM_NONE;
4020                         }
4021                         else
4022 #endif
4023                         {
4024 #ifdef _TARGET_XARCH_
4025                             // Check to see if we can use a smaller immediate.
4026                             if ((an1->InReg()) && ((iVal & 0x0000FFFF) == iVal))
4027                             {
4028                                 var_types testType =
4029                                     (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
4030 #if CPU_HAS_BYTE_REGS
4031                                 // if we don't have byte-able register, switch to the 2-byte form
4032                                 if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
4033                                 {
4034                                     testType = TYP_USHORT;
4035                                 }
4036 #endif // CPU_HAS_BYTE_REGS
4037
4038                                 inst_TT_IV(INS_TEST, an1, iVal, testType);
4039                             }
4040                             else
4041 #endif // _TARGET_XARCH_
4042                             {
4043                                 inst_TT_IV(INS_TEST, an1, iVal);
4044                             }
4045                         }
4046
4047                         goto DONE;
4048
4049                     NO_TEST_FOR_AND:;
4050                     }
4051
4052                     // TODO: Check for other cases that can generate 'test',
4053                     // TODO: also check for a 64-bit integer zero test which
4054                     // TODO: could generate 'or lo, hi' followed by jz/jnz.
4055                 }
4056             }
4057
4058             // See what Jcc instruction we would use if we can take advantage of
4059             // the knowledge of EFLAGs.
4060
4061             if (unsignedCmp)
4062             {
4063                 /*
4064                     Unsigned comparison to 0. Using this table:
4065
4066                     ----------------------------------------------------
4067                     | Comparison | Flags Checked    | Instruction Used |
4068                     ----------------------------------------------------
4069                     |    == 0    | ZF = 1           |       je         |
4070                     ----------------------------------------------------
4071                     |    != 0    | ZF = 0           |       jne        |
4072                     ----------------------------------------------------
4073                     |     < 0    | always FALSE     |       N/A        |
4074                     ----------------------------------------------------
4075                     |    <= 0    | ZF = 1           |       je         |
4076                     ----------------------------------------------------
4077                     |    >= 0    | always TRUE      |       N/A        |
4078                     ----------------------------------------------------
4079                     |     > 0    | ZF = 0           |       jne        |
4080                     ----------------------------------------------------
4081                 */
4082                 switch (cmp)
4083                 {
4084 #ifdef _TARGET_ARM_
4085                     case GT_EQ:
4086                         jumpKind = EJ_eq;
4087                         break;
4088                     case GT_NE:
4089                         jumpKind = EJ_ne;
4090                         break;
4091                     case GT_LT:
4092                         jumpKind = EJ_NONE;
4093                         break;
4094                     case GT_LE:
4095                         jumpKind = EJ_eq;
4096                         break;
4097                     case GT_GE:
4098                         jumpKind = EJ_NONE;
4099                         break;
4100                     case GT_GT:
4101                         jumpKind = EJ_ne;
4102                         break;
4103 #elif defined(_TARGET_X86_)
4104                     case GT_EQ:
4105                         jumpKind = EJ_je;
4106                         break;
4107                     case GT_NE:
4108                         jumpKind = EJ_jne;
4109                         break;
4110                     case GT_LT:
4111                         jumpKind = EJ_NONE;
4112                         break;
4113                     case GT_LE:
4114                         jumpKind = EJ_je;
4115                         break;
4116                     case GT_GE:
4117                         jumpKind = EJ_NONE;
4118                         break;
4119                     case GT_GT:
4120                         jumpKind = EJ_jne;
4121                         break;
4122 #endif // TARGET
4123                     default:
4124                         noway_assert(!"Unexpected comparison OpCode");
4125                         break;
4126                 }
4127             }
4128             else
4129             {
4130                 /*
4131                     Signed comparison to 0. Using this table:
4132
4133                     -----------------------------------------------------
4134                     | Comparison | Flags Checked     | Instruction Used |
4135                     -----------------------------------------------------
4136                     |    == 0    | ZF = 1            |       je         |
4137                     -----------------------------------------------------
4138                     |    != 0    | ZF = 0            |       jne        |
4139                     -----------------------------------------------------
4140                     |     < 0    | SF = 1            |       js         |
4141                     -----------------------------------------------------
4142                     |    <= 0    |      N/A          |       N/A        |
4143                     -----------------------------------------------------
4144                     |    >= 0    | SF = 0            |       jns        |
4145                     -----------------------------------------------------
4146                     |     > 0    |      N/A          |       N/A        |
4147                     -----------------------------------------------------
4148                 */
4149
4150                 switch (cmp)
4151                 {
4152 #ifdef _TARGET_ARM_
4153                     case GT_EQ:
4154                         jumpKind = EJ_eq;
4155                         break;
4156                     case GT_NE:
4157                         jumpKind = EJ_ne;
4158                         break;
4159                     case GT_LT:
4160                         jumpKind = EJ_mi;
4161                         break;
4162                     case GT_LE:
4163                         jumpKind = EJ_NONE;
4164                         break;
4165                     case GT_GE:
4166                         jumpKind = EJ_pl;
4167                         break;
4168                     case GT_GT:
4169                         jumpKind = EJ_NONE;
4170                         break;
4171 #elif defined(_TARGET_X86_)
4172                     case GT_EQ:
4173                         jumpKind = EJ_je;
4174                         break;
4175                     case GT_NE:
4176                         jumpKind = EJ_jne;
4177                         break;
4178                     case GT_LT:
4179                         jumpKind = EJ_js;
4180                         break;
4181                     case GT_LE:
4182                         jumpKind = EJ_NONE;
4183                         break;
4184                     case GT_GE:
4185                         jumpKind = EJ_jns;
4186                         break;
4187                     case GT_GT:
4188                         jumpKind = EJ_NONE;
4189                         break;
4190 #endif // TARGET
4191                     default:
4192                         noway_assert(!"Unexpected comparison OpCode");
4193                         break;
4194                 }
4195                 assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
4196             }
4197             assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
4198
4199             /* Is the value a simple local variable? */
4200
4201             if (op1->gtOper == GT_LCL_VAR)
4202             {
4203                 /* Is the flags register set to the value? */
4204
4205                 if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
4206                 {
4207                     if (jumpKind != EJ_NONE)
4208                     {
4209                         addrReg1 = RBM_NONE;
4210                         genUpdateLife(op1);
4211                         goto DONE_FLAGS;
4212                     }
4213                 }
4214             }
4215
4216             /* Make the comparand addressable */
4217             addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4218
4219             /* Are the condition flags set based on the value? */
4220
4221             unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
4222
4223             if (op1->InReg())
4224             {
4225                 if (genFlagsAreReg(op1->gtRegNum))
4226                 {
4227                     flags |= GTF_ZSF_SET;
4228                 }
4229             }
4230
4231             if (flags)
4232             {
4233                 if (jumpKind != EJ_NONE)
4234                 {
4235                     goto DONE_FLAGS;
4236                 }
4237             }
4238
4239             /* Is the value in a register? */
4240
4241             if (op1->InReg())
4242             {
4243                 regNumber reg = op1->gtRegNum;
4244
4245                 /* With a 'test' we can do any signed test or any test for equality */
4246
4247                 if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
4248                 {
4249                     emitAttr compareSize = emitTypeSize(op1->TypeGet());
4250
4251                     // If we have an GT_REG_VAR then the register will be properly sign/zero extended
4252                     // But only up to 4 bytes
4253                     if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
4254                     {
4255                         compareSize = EA_4BYTE;
4256                     }
4257
4258 #if CPU_HAS_BYTE_REGS
4259                     // Make sure if we require a byte compare that we have a byte-able register
4260                     if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
4261 #endif // CPU_HAS_BYTE_REGS
4262                     {
4263                         /* Generate 'test reg, reg' */
4264                         instGen_Compare_Reg_To_Zero(compareSize, reg);
4265                         goto DONE;
4266                     }
4267                 }
4268             }
4269         }
4270
4271         else // if (ival != 0)
4272         {
4273             bool smallOk = true;
4274
4275             /* make sure that constant is not out of op1's range
4276                if it is, we need to perform an int with int comparison
4277                and therefore, we set smallOk to false, so op1 gets loaded
4278                into a register
4279             */
4280
4281             /* If op1 is TYP_SHORT, and is followed by an unsigned
4282              * comparison, we can use smallOk. But we don't know which
4283              * flags will be needed. This probably doesn't happen often.
4284             */
4285             var_types gtType = op1->TypeGet();
4286
4287             switch (gtType)
4288             {
4289                 case TYP_BYTE:
4290                     if (ival != (signed char)ival)
4291                         smallOk = false;
4292                     break;
4293                 case TYP_BOOL:
4294                 case TYP_UBYTE:
4295                     if (ival != (unsigned char)ival)
4296                         smallOk = false;
4297                     break;
4298
4299                 case TYP_SHORT:
4300                     if (ival != (signed short)ival)
4301                         smallOk = false;
4302                     break;
4303                 case TYP_CHAR:
4304                     if (ival != (unsigned short)ival)
4305                         smallOk = false;
4306                     break;
4307
4308 #ifdef _TARGET_64BIT_
4309                 case TYP_INT:
4310                     if (!FitsIn<INT32>(ival))
4311                         smallOk = false;
4312                     break;
4313                 case TYP_UINT:
4314                     if (!FitsIn<UINT32>(ival))
4315                         smallOk = false;
4316                     break;
4317 #endif // _TARGET_64BIT_
4318
4319                 default:
4320                     break;
4321             }
4322
4323             if (smallOk &&                 // constant is in op1's range
4324                 !unsignedCmp &&            // signed comparison
4325                 varTypeIsSmall(gtType) &&  // smalltype var
4326                 varTypeIsUnsigned(gtType)) // unsigned type
4327             {
4328                 unsignedCmp = true;
4329             }
4330
4331             /* Make the comparand addressable */
4332             addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
4333         }
4334
4335         /* Special case: comparison of two constants */
4336
4337         // Needed if Importer doesn't call gtFoldExpr()
4338
4339         if (!(op1->InReg()) && (op1->IsCnsIntOrI()))
4340         {
4341             // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
4342
4343             /* Workaround: get the constant operand into a register */
4344             genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4345
4346             noway_assert(addrReg1 == RBM_NONE);
4347             noway_assert(op1->InReg());
4348
4349             addrReg1 = genRegMask(op1->gtRegNum);
4350         }
4351
4352         /* Compare the operand against the constant */
4353
4354         if (op2->IsIconHandle())
4355         {
4356             inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
4357         }
4358         else
4359         {
4360             inst_TT_IV(INS_cmp, op1, ival);
4361         }
4362         goto DONE;
4363     }
4364
4365     //---------------------------------------------------------------------
4366     //
4367     // We reach here if op2 was not a GT_CNS_INT
4368     //
4369
4370     byteCmp  = false;
4371     shortCmp = false;
4372
4373     if (op1Type == op2->gtType)
4374     {
4375         shortCmp = varTypeIsShort(op1Type);
4376         byteCmp  = varTypeIsByte(op1Type);
4377     }
4378
4379     noway_assert(op1->gtOper != GT_CNS_INT);
4380
4381     if (op2->gtOper == GT_LCL_VAR)
4382         genMarkLclVar(op2);
4383
4384     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4385     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4386
4387     /* Are we comparing against a register? */
4388
4389     if (op2->InReg())
4390     {
4391         /* Make the comparands addressable and mark as used */
4392
4393         assert(addrReg1 == RBM_NONE);
4394         addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4395
4396         /* Is the size of the comparison byte/char/short ? */
4397
4398         if (varTypeIsSmall(op1->TypeGet()))
4399         {
4400             /* Is op2 sitting in an appropriate register? */
4401
4402             if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
4403                 goto NO_SMALL_CMP;
4404
4405             /* Is op2 of the right type for a small comparison */
4406
4407             if (op2->gtOper == GT_REG_VAR)
4408             {
4409                 if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
4410                     goto NO_SMALL_CMP;
4411             }
4412             else
4413             {
4414                 if (op1->gtType != op2->gtType)
4415                     goto NO_SMALL_CMP;
4416             }
4417
4418             if (varTypeIsUnsigned(op1->TypeGet()))
4419                 unsignedCmp = true;
4420         }
4421
4422         assert(addrReg2 == RBM_NONE);
4423
4424         genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4425         addrReg2 = genRegMask(op2->gtRegNum);
4426         addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
4427         assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4428         assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4429
4430         /* Compare against the register */
4431
4432         inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
4433
4434         goto DONE;
4435
4436     NO_SMALL_CMP:
4437
4438         // op1 has been made addressable and is marked as in use
4439         // op2 is un-generated
4440         assert(addrReg2 == 0);
4441
4442         if ((op1->InReg()) == 0)
4443         {
4444             regNumber reg1 = regSet.rsPickReg();
4445
4446             noway_assert(varTypeIsSmall(op1->TypeGet()));
4447             instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->InReg()) != 0);
4448
4449             // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
4450             // so we need to make sure it is still valid.  Note that at this point, reg1 is
4451             // *not* marked as in use, and it is possible for it to be used in the address
4452             // mode expression, but that is OK, because we are done with expression after
4453             // this.  We only need reg1.
4454             addrReg1 = genKeepAddressable(op1, addrReg1);
4455             inst_RV_TT(ins, reg1, op1);
4456             regTracker.rsTrackRegTrash(reg1);
4457
4458             genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4459             addrReg1 = 0;
4460
4461             genMarkTreeInReg(op1, reg1);
4462
4463             regSet.rsMarkRegUsed(op1);
4464             addrReg1 = genRegMask(op1->gtRegNum);
4465         }
4466
4467         assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4468         assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4469
4470         goto DONE_OP1;
4471     }
4472
4473     // We come here if op2 is not enregistered or not in a "good" register.
4474
4475     assert(addrReg1 == 0);
4476
4477     // Determine what registers go live between op1 and op2
4478     newLiveMask = genNewLiveRegMask(op1, op2);
4479
4480     // Setup regNeed with the set of register that we suggest for op1 to be in
4481     //
4482     regNeed = RBM_ALLINT;
4483
4484     // avoid selecting registers that get newly born in op2
4485     regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
4486
4487     // avoid selecting op2 reserved regs
4488     regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
4489
4490 #if CPU_HAS_BYTE_REGS
4491     // if necessary setup regNeed to select just the byte-able registers
4492     if (byteCmp)
4493         regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
4494 #endif // CPU_HAS_BYTE_REGS
4495
4496     // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
4497     //
4498     genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
4499     noway_assert(op1->InReg());
4500
4501     op1Reg = op1->gtRegNum;
4502
4503     // Setup regNeed with the set of register that we require for op1 to be in
4504     //
4505     regNeed = RBM_ALLINT;
4506
4507 #if CPU_HAS_BYTE_REGS
4508     // if necessary setup regNeed to select just the byte-able registers
4509     if (byteCmp)
4510         regNeed &= RBM_BYTE_REGS;
4511 #endif // CPU_HAS_BYTE_REGS
4512
4513     // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
4514     regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
4515
4516     // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
4517     regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
4518
4519     // Did we end up in an acceptable register?
4520     // and do we have an acceptable free register available to grab?
4521     //
4522     if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
4523     {
4524         // Grab an acceptable register
4525         regNumber newReg = regSet.rsGrabReg(regNeed);
4526
4527         noway_assert(op1Reg != newReg);
4528
4529         /* Update the value in the target register */
4530
4531         regTracker.rsTrackRegCopy(newReg, op1Reg);
4532
4533         inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
4534
4535         /* The value has been transferred to 'reg' */
4536
4537         if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
4538             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
4539
4540         gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
4541
4542         /* The value is now in an appropriate register */
4543
4544         op1->gtRegNum = newReg;
4545     }
4546     noway_assert(op1->InReg());
4547     op1Reg = op1->gtRegNum;
4548
4549     genUpdateLife(op1);
4550
4551     /* Mark the register as 'used' */
4552     regSet.rsMarkRegUsed(op1);
4553
4554     addrReg1 = genRegMask(op1Reg);
4555
4556     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4557     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4558
4559 DONE_OP1:
4560
4561     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4562     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4563     noway_assert(op1->InReg());
4564
4565     // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
4566     // when byteCmp is true we will perform a byte sized cmp instruction
4567     // and that instruction requires that any registers used are byte-able ones.
4568     //
4569     regNeed = RBM_ALLINT;
4570
4571 #if CPU_HAS_BYTE_REGS
4572     // if necessary setup regNeed to select just the byte-able registers
4573     if (byteCmp)
4574         regNeed &= RBM_BYTE_REGS;
4575 #endif // CPU_HAS_BYTE_REGS
4576
4577     /* Make the comparand addressable */
4578     assert(addrReg2 == 0);
4579     addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
4580
4581     /*  Make sure the first operand is still in a register; if
4582         it's been spilled, we have to make sure it's reloaded
4583         into a byte-addressable register if needed.
4584         Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
4585      */
4586
4587     assert(addrReg1 != 0);
4588     genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
4589
4590     noway_assert(op1->InReg());
4591     noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
4592
4593     addrReg1 = genRegMask(op1->gtRegNum);
4594     regSet.rsLockUsedReg(addrReg1);
4595
4596     /* Make sure that op2 is addressable. If we are going to do a
4597        byte-comparison, we need it to be in a byte register. */
4598
4599     if (byteCmp && (op2->InReg()))
4600     {
4601         genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
4602         addrReg2 = genRegMask(op2->gtRegNum);
4603     }
4604     else
4605     {
4606         addrReg2 = genKeepAddressable(op2, addrReg2);
4607     }
4608
4609     regSet.rsUnlockUsedReg(addrReg1);
4610
4611     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4612     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4613
4614     if (byteCmp || shortCmp)
4615     {
4616         size = emitTypeSize(op2->TypeGet());
4617         if (varTypeIsUnsigned(op1Type))
4618             unsignedCmp = true;
4619     }
4620     else
4621     {
4622         size = emitActualTypeSize(op2->TypeGet());
4623     }
4624
4625     /* Perform the comparison */
4626     inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
4627
4628 DONE:
4629
4630     jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
4631
4632 DONE_FLAGS: // We have determined what jumpKind to use
4633
4634     genUpdateLife(cond);
4635
4636     /* The condition value is dead at the jump that follows */
4637
4638     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4639     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4640     genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4641     genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
4642
4643     noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
4644
4645     return jumpKind;
4646 }
4647
4648 /*****************************************************************************/
4649 /*****************************************************************************/
4650 /*****************************************************************************
4651  *
4652  *  Generate code to jump to the jump target of the current basic block if
4653  *  the given relational operator yields 'true'.
4654  */
4655
4656 void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
4657 {
4658     BasicBlock* jumpTrue;
4659     BasicBlock* jumpFalse;
4660
4661     GenTreePtr op1 = cond->gtOp.gtOp1;
4662     GenTreePtr op2 = cond->gtOp.gtOp2;
4663     genTreeOps cmp = cond->OperGet();
4664
4665     if (destTrue)
4666     {
4667         jumpTrue  = destTrue;
4668         jumpFalse = destFalse;
4669     }
4670     else
4671     {
4672         noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
4673
4674         jumpTrue  = compiler->compCurBB->bbJumpDest;
4675         jumpFalse = compiler->compCurBB->bbNext;
4676     }
4677
4678     noway_assert(cond->OperIsCompare());
4679
4680     /* Make sure the more expensive operand is 'op1' */
4681     noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
4682
4683     if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
4684     {
4685         /* Don't forget to modify the condition as well */
4686
4687         cond->gtOp.gtOp1 = op2;
4688         cond->gtOp.gtOp2 = op1;
4689         cond->SetOper(GenTree::SwapRelop(cmp));
4690         cond->gtFlags &= ~GTF_REVERSE_OPS;
4691
4692         /* Get hold of the new values */
4693
4694         cmp = cond->OperGet();
4695         op1 = cond->gtOp.gtOp1;
4696         op2 = cond->gtOp.gtOp2;
4697     }
4698
4699     /* What is the type of the operand? */
4700
4701     switch (genActualType(op1->gtType))
4702     {
4703         case TYP_INT:
4704         case TYP_REF:
4705         case TYP_BYREF:
4706             emitJumpKind jumpKind;
4707
4708             // Check if we can use the currently set flags. Else set them
4709
4710             jumpKind = genCondSetFlags(cond);
4711
4712 #if FEATURE_STACK_FP_X87
4713             if (bStackFPFixup)
4714             {
4715                 genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
4716             }
4717             else
4718 #endif
4719             {
4720                 /* Generate the conditional jump */
4721                 inst_JMP(jumpKind, jumpTrue);
4722             }
4723
4724             return;
4725
4726         case TYP_LONG:
4727 #if FEATURE_STACK_FP_X87
4728             if (bStackFPFixup)
4729             {
4730                 genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
4731             }
4732             else
4733 #endif
4734             {
4735                 genCondJumpLng(cond, jumpTrue, jumpFalse);
4736             }
4737             return;
4738
4739         case TYP_FLOAT:
4740         case TYP_DOUBLE:
4741 #if FEATURE_STACK_FP_X87
4742             genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
4743 #else
4744             genCondJumpFloat(cond, jumpTrue, jumpFalse);
4745 #endif
4746             return;
4747
4748         default:
4749 #ifdef DEBUG
4750             compiler->gtDispTree(cond);
4751 #endif
4752             unreached(); // unexpected/unsupported 'jtrue' operands type
4753     }
4754 }
4755
4756 /*****************************************************************************
4757  *  Spill registers to check callers can handle it.
4758  */
4759
4760 #ifdef DEBUG
4761
4762 void CodeGen::genStressRegs(GenTreePtr tree)
4763 {
4764     if (regSet.rsStressRegs() < 2)
4765         return;
4766
4767     /* Spill as many registers as possible. Callers should be prepared
4768        to handle this case.
4769        But don't spill trees with no size (TYP_STRUCT comes to mind) */
4770
4771     {
4772         regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
4773         regNumber regNum;
4774         regMaskTP regBit;
4775
4776         for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
4777         {
4778             if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
4779                 (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
4780             {
4781                 regSet.rsSpillReg(regNum);
4782
4783                 spillRegs &= regSet.rsMaskUsed;
4784
4785                 if (!spillRegs)
4786                     break;
4787             }
4788         }
4789     }
4790
4791     regMaskTP trashRegs = regSet.rsRegMaskFree();
4792
4793     if (trashRegs == RBM_NONE)
4794         return;
4795
4796     /* It is sometimes reasonable to expect that calling genCodeForTree()
4797        on certain trees won't spill anything */
4798
4799     if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
4800         handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
4801     {
4802         trashRegs &= ~(RBM_EXCEPTION_OBJECT);
4803     }
4804
4805     // If genCodeForTree() effectively gets called a second time on the same tree
4806
4807     if (tree->InReg())
4808     {
4809         noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
4810         trashRegs &= ~genRegMask(tree->gtRegNum);
4811     }
4812
4813     if (tree->gtType == TYP_INT && tree->OperIsSimple())
4814     {
4815         GenTreePtr op1 = tree->gtOp.gtOp1;
4816         GenTreePtr op2 = tree->gtOp.gtOp2;
4817         if (op1 && (op1->InReg()))
4818             trashRegs &= ~genRegMask(op1->gtRegNum);
4819         if (op2 && (op2->InReg()))
4820             trashRegs &= ~genRegMask(op2->gtRegNum);
4821     }
4822
4823     if (compiler->compCurBB == compiler->genReturnBB)
4824     {
4825         if (compiler->info.compCallUnmanaged)
4826         {
4827             LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
4828             if (varDsc->lvRegister)
4829                 trashRegs &= ~genRegMask(varDsc->lvRegNum);
4830         }
4831     }
4832
4833     /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
4834        to save/restore the register. We try to be as unintrusive
4835        as possible */
4836
4837     noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
4838     // This is obviously false for ARM, but this function is never called.
4839     for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
4840     {
4841         regMaskTP regMask = genRegMask(reg);
4842
4843         if (regSet.rsRegsModified(regMask & trashRegs))
4844             genSetRegToIcon(reg, 0);
4845     }
4846 }
4847
4848 #endif // DEBUG
4849
4850 /*****************************************************************************
4851  *
4852  *  Generate code for a GTK_CONST tree
4853  */
4854
4855 void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
4856 {
4857     noway_assert(tree->IsCnsIntOrI());
4858
4859     ssize_t   ival    = tree->gtIntConCommon.IconValue();
4860     regMaskTP needReg = destReg;
4861     regNumber reg;
4862     bool      needReloc = compiler->opts.compReloc && tree->IsIconHandle();
4863
4864 #if REDUNDANT_LOAD
4865
4866     /* If we are targeting destReg and ival is zero           */
4867     /* we would rather xor needReg than copy another register */
4868
4869     if (!needReloc)
4870     {
4871         bool reuseConstantInReg = false;
4872
4873         if (destReg == RBM_NONE)
4874             reuseConstantInReg = true;
4875
4876 #ifdef _TARGET_ARM_
4877         // If we can set a register to a constant with a small encoding, then do that.
4878         // Assume we'll get a low register if needReg has low registers as options.
4879         if (!reuseConstantInReg &&
4880             !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
4881         {
4882             reuseConstantInReg = true;
4883         }
4884 #else
4885         if (!reuseConstantInReg && ival != 0)
4886             reuseConstantInReg = true;
4887 #endif
4888
4889         if (reuseConstantInReg)
4890         {
4891             /* Is the constant already in register? If so, use this register */
4892
4893             reg = regTracker.rsIconIsInReg(ival);
4894             if (reg != REG_NA)
4895                 goto REG_LOADED;
4896         }
4897     }
4898
4899 #endif // REDUNDANT_LOAD
4900
4901     reg = regSet.rsPickReg(needReg, bestReg);
4902
4903     /* If the constant is a handle, we need a reloc to be applied to it */
4904
4905     if (needReloc)
4906     {
4907         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
4908         regTracker.rsTrackRegTrash(reg);
4909     }
4910     else
4911     {
4912         genSetRegToIcon(reg, ival, tree->TypeGet());
4913     }
4914
4915 REG_LOADED:
4916
4917 #ifdef DEBUG
4918     /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
4919
4920     if (!genTempLiveChg)
4921     {
4922         VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
4923         genTempLiveChg = true;
4924     }
4925 #endif
4926
4927     gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
4928     genCodeForTree_DONE(tree, reg);
4929 }
4930
4931 /*****************************************************************************
4932  *
4933  *  Generate code for a GTK_LEAF tree
4934  */
4935
4936 void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
4937 {
4938     genTreeOps oper    = tree->OperGet();
4939     regNumber  reg     = DUMMY_INIT(REG_CORRUPT);
4940     regMaskTP  regs    = regSet.rsMaskUsed;
4941     regMaskTP  needReg = destReg;
4942     size_t     size;
4943
4944     noway_assert(tree->OperKind() & GTK_LEAF);
4945
4946     switch (oper)
4947     {
4948         case GT_REG_VAR:
4949             NO_WAY("GT_REG_VAR should have been caught above");
4950             break;
4951
4952         case GT_LCL_VAR:
4953
4954             /* Does the variable live in a register? */
4955
4956             if (genMarkLclVar(tree))
4957             {
4958                 genCodeForTree_REG_VAR1(tree);
4959                 return;
4960             }
4961
4962 #if REDUNDANT_LOAD
4963
4964             /* Is the local variable already in register? */
4965
4966             reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
4967
4968             if (reg != REG_NA)
4969             {
4970                 /* Use the register the variable happens to be in */
4971                 regMaskTP regMask = genRegMask(reg);
4972
4973                 // If the register that it was in isn't one of the needRegs
4974                 // then try to move it into a needReg register
4975
4976                 if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
4977                 {
4978                     regNumber rg2 = reg;
4979                     reg           = regSet.rsPickReg(needReg, bestReg);
4980                     if (reg != rg2)
4981                     {
4982                         regMask = genRegMask(reg);
4983                         inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
4984                     }
4985                 }
4986
4987                 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
4988                 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
4989                 break;
4990             }
4991
4992 #endif
4993             goto MEM_LEAF;
4994
4995         case GT_LCL_FLD:
4996
4997             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
4998             // to worry about it being enregistered.
4999             noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
5000             goto MEM_LEAF;
5001
5002         case GT_CLS_VAR:
5003
5004         MEM_LEAF:
5005
5006             /* Pick a register for the value */
5007
5008             reg = regSet.rsPickReg(needReg, bestReg);
5009
5010             /* Load the variable into the register */
5011
5012             size = genTypeSize(tree->gtType);
5013
5014             if (size < EA_4BYTE)
5015             {
5016                 instruction ins = ins_Move_Extend(tree->TypeGet(), tree->InReg());
5017                 inst_RV_TT(ins, reg, tree, 0);
5018
5019                 /* We've now "promoted" the tree-node to TYP_INT */
5020
5021                 tree->gtType = TYP_INT;
5022             }
5023             else
5024             {
5025                 inst_RV_TT(INS_mov, reg, tree, 0);
5026             }
5027
5028             regTracker.rsTrackRegTrash(reg);
5029
5030             gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
5031
5032             switch (oper)
5033             {
5034                 case GT_CLS_VAR:
5035                     regTracker.rsTrackRegClsVar(reg, tree);
5036                     break;
5037                 case GT_LCL_VAR:
5038                     regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5039                     break;
5040                 case GT_LCL_FLD:
5041                     break;
5042                 default:
5043                     noway_assert(!"Unexpected oper");
5044             }
5045
5046 #ifdef _TARGET_ARM_
5047             if (tree->gtFlags & GTF_IND_VOLATILE)
5048             {
5049                 // Emit a memory barrier instruction after the load
5050                 instGen_MemoryBarrier();
5051             }
5052 #endif
5053
5054             break;
5055
5056         case GT_NO_OP:
5057             instGen(INS_nop);
5058             reg = REG_STK;
5059             break;
5060
5061 #if !FEATURE_EH_FUNCLETS
5062         case GT_END_LFIN:
5063
5064             /* Have to clear the shadowSP of the nesting level which
5065                encloses the finally */
5066
5067             unsigned finallyNesting;
5068             finallyNesting = (unsigned)tree->gtVal.gtVal1;
5069             noway_assert(tree->gtVal.gtVal1 <
5070                          compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
5071             noway_assert(finallyNesting < compiler->compHndBBtabCount);
5072
5073             // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
5074             unsigned filterEndOffsetSlotOffs;
5075             PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
5076                           sizeof(void*)); // below doesn't underflow.
5077             filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
5078
5079             unsigned curNestingSlotOffs;
5080             curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
5081             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
5082             reg = REG_STK;
5083             break;
5084 #endif // !FEATURE_EH_FUNCLETS
5085
5086         case GT_CATCH_ARG:
5087
5088             noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
5089
5090             /* Catch arguments get passed in a register. genCodeForBBlist()
5091                would have marked it as holding a GC object, but not used. */
5092
5093             noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
5094             reg = REG_EXCEPTION_OBJECT;
5095             break;
5096
5097         case GT_JMP:
5098             genCodeForTreeLeaf_GT_JMP(tree);
5099             return;
5100
5101         case GT_MEMORYBARRIER:
5102             // Emit the memory barrier instruction
5103             instGen_MemoryBarrier();
5104             reg = REG_STK;
5105             break;
5106
5107         default:
5108 #ifdef DEBUG
5109             compiler->gtDispTree(tree);
5110 #endif
5111             noway_assert(!"unexpected leaf");
5112     }
5113
5114     noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
5115     genCodeForTree_DONE(tree, reg);
5116 }
5117
5118 GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
5119 {
5120     while (tree->OperGet() == GT_COMMA)
5121     {
5122         GenTreePtr op1 = tree->gtOp.gtOp1;
5123         genCodeForTree(op1, RBM_NONE);
5124         gcInfo.gcMarkRegPtrVal(op1);
5125
5126         tree = tree->gtOp.gtOp2;
5127     }
5128     return tree;
5129 }
5130
5131 /*****************************************************************************
5132  *
5133  *  Generate code for the a leaf node of type GT_JMP
5134  */
5135
5136 void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
5137 {
5138     noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
5139
5140 #ifdef PROFILING_SUPPORTED
5141     if (compiler->compIsProfilerHookNeeded())
5142     {
5143         /* fire the event at the call site */
5144         unsigned saveStackLvl2 = genStackLevel;
5145
5146         compiler->info.compProfilerCallback = true;
5147
5148 #ifdef _TARGET_X86_
5149         //
5150         // Push the profilerHandle
5151         //
5152         regMaskTP byrefPushedRegs;
5153         regMaskTP norefPushedRegs;
5154         regMaskTP pushedArgRegs =
5155             genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
5156                         &norefPushedRegs);
5157
5158         if (compiler->compProfilerMethHndIndirected)
5159         {
5160             getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
5161                                        (ssize_t)compiler->compProfilerMethHnd);
5162         }
5163         else
5164         {
5165             inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
5166         }
5167         genSinglePush();
5168
5169         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5170                           sizeof(int) * 1, // argSize
5171                           EA_UNKNOWN);     // retSize
5172
5173         //
5174         // Adjust the number of stack slots used by this managed method if necessary.
5175         //
5176         if (compiler->fgPtrArgCntMax < 1)
5177         {
5178             JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
5179             compiler->fgPtrArgCntMax = 1;
5180         }
5181
5182         genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
5183 #elif _TARGET_ARM_
5184         // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
5185         // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
5186         regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
5187         noway_assert(argReg == REG_PROFILER_JMP_ARG);
5188         regSet.rsLockReg(RBM_PROFILER_JMP_USED);
5189
5190         if (compiler->compProfilerMethHndIndirected)
5191         {
5192             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
5193             regTracker.rsTrackRegTrash(argReg);
5194         }
5195         else
5196         {
5197             instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
5198         }
5199
5200         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5201                           0,           // argSize
5202                           EA_UNKNOWN); // retSize
5203
5204         regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
5205 #else
5206         NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
5207 #endif //_TARGET_X86_
5208
5209         /* Restore the stack level */
5210         SetStackLevel(saveStackLvl2);
5211     }
5212 #endif // PROFILING_SUPPORTED
5213
5214     /* This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
5215      * call genPInvokeMethodEpilog anywhere that we have a method return.  We should really
5216      * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
5217      */
5218
5219     if (compiler->info.compCallUnmanaged)
5220     {
5221         genPInvokeMethodEpilog();
5222     }
5223
5224     // Make sure register arguments are in their initial registers
5225     // and stack arguments are put back as well.
5226     //
5227     // This does not deal with circular dependencies of register
5228     // arguments, which is safe because RegAlloc prevents that by
5229     // not enregistering any RegArgs when a JMP opcode is used.
5230
5231     if (compiler->info.compArgsCount == 0)
5232     {
5233         return;
5234     }
5235
5236     unsigned   varNum;
5237     LclVarDsc* varDsc;
5238
5239     // First move any enregistered stack arguments back to the stack
5240     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5241     {
5242         noway_assert(varDsc->lvIsParam);
5243         if (varDsc->lvIsRegArg || !varDsc->lvRegister)
5244             continue;
5245
5246         /* Argument was passed on the stack, but ended up in a register
5247          * Store it back to the stack */
5248         CLANG_FORMAT_COMMENT_ANCHOR;
5249
5250 #ifndef _TARGET_64BIT_
5251         if (varDsc->TypeGet() == TYP_LONG)
5252         {
5253             /* long - at least the low half must be enregistered */
5254
5255             getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
5256
5257             /* Is the upper half also enregistered? */
5258
5259             if (varDsc->lvOtherReg != REG_STK)
5260             {
5261                 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
5262             }
5263         }
5264         else
5265 #endif // _TARGET_64BIT_
5266         {
5267             getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
5268                                       varNum, 0);
5269         }
5270     }
5271
5272 #ifdef _TARGET_ARM_
5273     regMaskTP fixedArgsMask = RBM_NONE;
5274 #endif
5275
5276     // Next move any un-enregistered register arguments back to their register
5277     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5278     {
5279         /* Is this variable a register arg? */
5280
5281         if (!varDsc->lvIsRegArg)
5282             continue;
5283
5284         /* Register argument */
5285
5286         noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5287         noway_assert(!varDsc->lvRegister);
5288
5289         /* Reload it from the stack */
5290         CLANG_FORMAT_COMMENT_ANCHOR;
5291
5292 #ifndef _TARGET_64BIT_
5293         if (varDsc->TypeGet() == TYP_LONG)
5294         {
5295             /* long - at least the low half must be enregistered */
5296
5297             getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
5298             regTracker.rsTrackRegTrash(varDsc->lvArgReg);
5299
5300             /* Also assume the upper half also enregistered */
5301
5302             getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
5303                                       sizeof(int));
5304             regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
5305
5306 #ifdef _TARGET_ARM_
5307             fixedArgsMask |= genRegMask(varDsc->lvArgReg);
5308             fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
5309 #endif
5310         }
5311         else
5312 #endif // _TARGET_64BIT_
5313 #ifdef _TARGET_ARM_
5314             if (varDsc->lvIsHfaRegArg())
5315         {
5316             const var_types   elemType = varDsc->GetHfaType();
5317             const instruction loadOp   = ins_Load(elemType);
5318             const emitAttr    size     = emitTypeSize(elemType);
5319             regNumber         argReg   = varDsc->lvArgReg;
5320             const unsigned    maxSize  = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
5321
5322             for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5323             {
5324                 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5325                 assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
5326                 argReg = regNextOfType(argReg, elemType);
5327             }
5328         }
5329         else if (varDsc->TypeGet() == TYP_STRUCT)
5330         {
5331             const var_types   elemType = TYP_INT; // we pad everything out to at least 4 bytes
5332             const instruction loadOp   = ins_Load(elemType);
5333             const emitAttr    size     = emitTypeSize(elemType);
5334             regNumber         argReg   = varDsc->lvArgReg;
5335             const unsigned    maxSize  = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
5336
5337             for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5338             {
5339                 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5340                 regTracker.rsTrackRegTrash(argReg);
5341
5342                 fixedArgsMask |= genRegMask(argReg);
5343
5344                 argReg = genRegArgNext(argReg);
5345             }
5346         }
5347         else
5348 #endif //_TARGET_ARM_
5349         {
5350             var_types loadType = varDsc->TypeGet();
5351             regNumber argReg   = varDsc->lvArgReg; // incoming arg register
5352             bool      twoParts = false;
5353
5354             if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
5355             {
5356 #ifndef _TARGET_64BIT_
5357                 if (loadType == TYP_DOUBLE)
5358                     twoParts = true;
5359 #endif
5360                 loadType = TYP_I_IMPL;
5361                 assert(isValidIntArgReg(argReg));
5362             }
5363
5364             getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
5365             regTracker.rsTrackRegTrash(argReg);
5366
5367 #ifdef _TARGET_ARM_
5368             fixedArgsMask |= genRegMask(argReg);
5369 #endif
5370             if (twoParts)
5371             {
5372                 argReg = genRegArgNext(argReg);
5373                 assert(isValidIntArgReg(argReg));
5374
5375                 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
5376                 regTracker.rsTrackRegTrash(argReg);
5377
5378 #ifdef _TARGET_ARM_
5379                 fixedArgsMask |= genRegMask(argReg);
5380 #endif
5381             }
5382         }
5383     }
5384
5385 #ifdef _TARGET_ARM_
5386     // Check if we have any non-fixed args possibly in the arg registers.
5387     if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
5388     {
5389         noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
5390
5391         regNumber regDeclArgs = REG_ARG_FIRST;
5392
5393         // Skip the 'this' pointer.
5394         if (!compiler->info.compIsStatic)
5395         {
5396             regDeclArgs = REG_NEXT(regDeclArgs);
5397         }
5398
5399         // Skip the 'generic context.'
5400         if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5401         {
5402             regDeclArgs = REG_NEXT(regDeclArgs);
5403         }
5404
5405         // Skip any 'return buffer arg.'
5406         if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
5407         {
5408             regDeclArgs = REG_NEXT(regDeclArgs);
5409         }
5410
5411         // Skip the 'vararg cookie.'
5412         regDeclArgs = REG_NEXT(regDeclArgs);
5413
5414         // Also add offset for the vararg cookie.
5415         int offset = REGSIZE_BYTES;
5416
5417         // Load all the variable arguments in registers back to their registers.
5418         for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
5419         {
5420             if (!(fixedArgsMask & genRegMask(reg)))
5421             {
5422                 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
5423                 regTracker.rsTrackRegTrash(reg);
5424             }
5425             offset += REGSIZE_BYTES;
5426         }
5427     }
5428 #endif // _TARGET_ARM_
5429 }
5430
5431 /*****************************************************************************
5432  *
5433  *  Check if a variable is assigned to in a tree.  The variable number is
5434  *  passed in pCallBackData.  If the variable is assigned to, return
5435  *  Compiler::WALK_ABORT.  Otherwise return Compiler::WALK_CONTINUE.
5436  */
5437 Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
5438 {
5439     GenTreePtr tree = *pTree;
5440     if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
5441         (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
5442     {
5443         return Compiler::WALK_ABORT;
5444     }
5445
5446     return Compiler::WALK_CONTINUE;
5447 }
5448
5449 regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
5450 {
5451     unsigned   varNum;
5452     LclVarDsc* varDsc;
5453
5454     if (tree->gtOper == GT_LCL_VAR)
5455     {
5456         /* Does the variable live in a register? */
5457
5458         varNum = tree->gtLclVarCommon.gtLclNum;
5459         noway_assert(varNum < compiler->lvaCount);
5460         varDsc = compiler->lvaTable + varNum;
5461
5462         if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
5463         {
5464             return varDsc->lvRegNum;
5465         }
5466     }
5467
5468     return REG_NA;
5469 }
5470
5471 // inline
5472 void CodeGen::unspillLiveness(genLivenessSet* ls)
5473 {
5474     // Only try to unspill the registers that are missing from the currentLiveRegs
5475     //
5476     regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
5477     regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
5478     cannotSpillMask &= ~currentLiveRegs;
5479
5480     // Typically this will always be true and we will return
5481     //
5482     if (cannotSpillMask == 0)
5483         return;
5484
5485     for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
5486     {
5487         // Is this a register that we cannot leave in the spilled state?
5488         //
5489         if ((cannotSpillMask & genRegMask(reg)) == 0)
5490             continue;
5491
5492         RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
5493
5494         // Was it spilled, if not then skip it.
5495         //
5496         if (!spill)
5497             continue;
5498
5499         noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
5500
5501         regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
5502     }
5503 }
5504
5505 /*****************************************************************************
5506  *
5507  *  Generate code for a qmark colon
5508  */
5509
5510 void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
5511 {
5512     GenTreePtr op1 = tree->gtOp.gtOp1;
5513     GenTreePtr op2 = tree->gtOp.gtOp2;
5514     regNumber  reg;
5515     regMaskTP  regs    = regSet.rsMaskUsed;
5516     regMaskTP  needReg = destReg;
5517
5518     noway_assert(compiler->compQmarkUsed);
5519     noway_assert(tree->gtOper == GT_QMARK);
5520     noway_assert(op1->OperIsCompare());
5521     noway_assert(op2->gtOper == GT_COLON);
5522
5523     GenTreePtr thenNode = op2->AsColon()->ThenNode();
5524     GenTreePtr elseNode = op2->AsColon()->ElseNode();
5525
5526     /* If elseNode is a Nop node you must reverse the
5527        thenNode and elseNode prior to reaching here!
5528        (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
5529
5530     noway_assert(!elseNode->IsNothingNode());
5531
5532     /* Try to implement the qmark colon using a CMOV.  If we can't for
5533        whatever reason, this will return false and we will implement
5534        it using regular branching constructs. */
5535
5536     if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
5537         return;
5538
5539     /*
5540         This is a ?: operator; generate code like this:
5541
5542             condition_compare
5543             jmp_if_true lab_true
5544
5545         lab_false:
5546             op1 (false = 'else' part)
5547             jmp lab_done
5548
5549         lab_true:
5550             op2 (true = 'then' part)
5551
5552         lab_done:
5553
5554
5555         NOTE: If no 'then' part we do not generate the 'jmp lab_done'
5556             or the 'lab_done' label
5557     */
5558
5559     BasicBlock* lab_true;
5560     BasicBlock* lab_false;
5561     BasicBlock* lab_done;
5562
5563     genLivenessSet entryLiveness;
5564     genLivenessSet exitLiveness;
5565
5566     lab_true  = genCreateTempLabel();
5567     lab_false = genCreateTempLabel();
5568
5569 #if FEATURE_STACK_FP_X87
5570     /* Spill any register that hold partial values so that the exit liveness
5571        from sides is the same */
5572     CLANG_FORMAT_COMMENT_ANCHOR;
5573
5574 #ifdef DEBUG
5575     regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
5576
5577     // spillMask should be the whole FP stack
5578     noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
5579 #endif
5580
5581     SpillTempsStackFP(regSet.rsMaskUsedFloat);
5582     noway_assert(regSet.rsMaskUsedFloat == 0);
5583 #endif
5584
5585     /* Before we generate code for qmark, we spill all the currently used registers
5586        that conflict with the registers used in the qmark tree. This is to avoid
5587        introducing spills that only occur on either the 'then' or 'else' side of
5588        the tree, but not both identically. We need to be careful with enregistered
5589        variables that are used; see below.
5590     */
5591
5592     if (regSet.rsMaskUsed)
5593     {
5594         /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
5595            variable), then it may not get spilled. However, the variable may
5596            then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
5597            may get spilled from one side and not the other. So unmark regSet.rsMaskVars
5598            before spilling regSet.rsMaskUsed */
5599
5600         regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
5601         regMaskTP rsAdditional           = RBM_NONE;
5602
5603         // For each multi-use of an enregistered variable, we need to determine if
5604         // it can get spilled inside the qmark colon.  This can only happen if
5605         // its life ends somewhere in the qmark colon.  We have the following
5606         // cases:
5607         // 1) Variable is dead at the end of the colon -- needs to be spilled
5608         // 2) Variable is alive at the end of the colon -- needs to be spilled
5609         //    iff it is assigned to in the colon.  In order to determine that, we
5610         //    examine the GTF_ASG flag to see if any assignments were made in the
5611         //    colon.  If there are any, we need to do a tree walk to see if this
5612         //    variable is the target of an assignment.  This treewalk should not
5613         //    happen frequently.
5614         if (rsAdditionalCandidates)
5615         {
5616 #ifdef DEBUG
5617             if (compiler->verbose)
5618             {
5619                 Compiler::printTreeID(tree);
5620                 printf(": Qmark-Colon additional spilling candidates are ");
5621                 dspRegMask(rsAdditionalCandidates);
5622                 printf("\n");
5623             }
5624 #endif
5625
5626             // If any candidates are not alive at the GT_QMARK node, then they
5627             // need to be spilled
5628
5629             const VARSET_TP& rsLiveNow(compiler->compCurLife);
5630             VARSET_TP rsLiveAfter(compiler->fgUpdateLiveSet(compiler->compCurLife, compiler->compCurLifeTree, tree));
5631
5632             VARSET_TP regVarLiveNow(VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
5633
5634             VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
5635             while (iter.NextElem(&varIndex))
5636             {
5637                 // Find the variable in compiler->lvaTable
5638                 unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
5639                 LclVarDsc* varDsc = compiler->lvaTable + varNum;
5640
5641 #if !FEATURE_FP_REGALLOC
5642                 if (varDsc->IsFloatRegType())
5643                     continue;
5644 #endif
5645
5646                 noway_assert(varDsc->lvRegister);
5647
5648                 regMaskTP regBit;
5649
5650                 if (varTypeIsFloating(varDsc->TypeGet()))
5651                 {
5652                     regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
5653                 }
5654                 else
5655                 {
5656                     regBit = genRegMask(varDsc->lvRegNum);
5657
5658                     // For longs we may need to spill both regs
5659                     if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
5660                         regBit |= genRegMask(varDsc->lvOtherReg);
5661                 }
5662
5663                 // Is it one of our reg-use vars?  If not, we don't need to spill it.
5664                 regBit &= rsAdditionalCandidates;
5665                 if (!regBit)
5666                     continue;
5667
5668                 // Is the variable live at the end of the colon?
5669                 if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
5670                 {
5671                     // Variable is alive at the end of the colon.  Was it assigned
5672                     // to inside the colon?
5673
5674                     if (!(op2->gtFlags & GTF_ASG))
5675                         continue;
5676
5677                     if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
5678                         Compiler::WALK_ABORT)
5679                     {
5680                         // Variable was assigned to, so we need to spill it.
5681
5682                         rsAdditional |= regBit;
5683 #ifdef DEBUG
5684                         if (compiler->verbose)
5685                         {
5686                             Compiler::printTreeID(tree);
5687                             printf(": Qmark-Colon candidate ");
5688                             dspRegMask(regBit);
5689                             printf("\n");
5690                             printf("    is assigned to inside colon and will be spilled\n");
5691                         }
5692 #endif
5693                     }
5694                 }
5695                 else
5696                 {
5697                     // Variable is not alive at the end of the colon.  We need to spill it.
5698
5699                     rsAdditional |= regBit;
5700 #ifdef DEBUG
5701                     if (compiler->verbose)
5702                     {
5703                         Compiler::printTreeID(tree);
5704                         printf(": Qmark-Colon candidate ");
5705                         dspRegMask(regBit);
5706                         printf("\n");
5707                         printf("    is alive at end of colon and will be spilled\n");
5708                     }
5709 #endif
5710                 }
5711             }
5712
5713 #ifdef DEBUG
5714             if (compiler->verbose)
5715             {
5716                 Compiler::printTreeID(tree);
5717                 printf(": Qmark-Colon approved additional spilling candidates are ");
5718                 dspRegMask(rsAdditional);
5719                 printf("\n");
5720             }
5721 #endif
5722         }
5723
5724         noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
5725
5726         // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
5727         // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
5728         // we will have unbalanced spills and generate bad code.
5729         regMaskTP rsSpill =
5730             ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
5731
5732 #ifdef DEBUG
5733         // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
5734         // 'bad' registers, causing spills. So, just force all used registers to get spilled
5735         // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
5736         // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
5737         // unspilled while generating that same tree.
5738
5739         if (regSet.rsStressRegs() >= 1)
5740         {
5741             rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
5742         }
5743 #endif // DEBUG
5744
5745         if (rsSpill)
5746         {
5747             // Remember which registers hold pointers. We will spill
5748             // them, but the code that follows will fetch reg vars from
5749             // the registers, so we need that gc compiler->info.
5750             regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
5751             regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
5752
5753             // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
5754             // So, pretend there aren't any, and spill them anyway. This will only occur
5755             // if rsAdditional is non-empty.
5756             regMaskTP rsTemp = regSet.rsMaskVars;
5757             regSet.ClearMaskVars();
5758
5759             regSet.rsSpillRegs(rsSpill);
5760
5761             // Restore gc tracking masks.
5762             gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
5763             gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
5764
5765             // Set regSet.rsMaskVars back to normal
5766             regSet.rsMaskVars = rsTemp;
5767         }
5768     }
5769
5770     // Generate the conditional jump but without doing any StackFP fixups.
5771     genCondJump(op1, lab_true, lab_false, false);
5772
5773     /* Save the current liveness, register status, and GC pointers */
5774     /* This is the liveness information upon entry                 */
5775     /* to both the then and else parts of the qmark                */
5776
5777     saveLiveness(&entryLiveness);
5778
5779     /* Clear the liveness of any local variables that are dead upon   */
5780     /* entry to the else part.                                        */
5781
5782     /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
5783     /* from the "colon or op2" liveSet                                */
5784     genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
5785
5786     /* genCondJump() closes the current emitter block */
5787
5788     genDefineTempLabel(lab_false);
5789
5790 #if FEATURE_STACK_FP_X87
5791     // Store fpstate
5792
5793     QmarkStateStackFP tempFPState;
5794     bool              bHasFPUState = !compCurFPState.IsEmpty();
5795     genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
5796 #endif
5797
5798     /* Does the operator yield a value? */
5799
5800     if (tree->gtType == TYP_VOID)
5801     {
5802         /* Generate the code for the else part of the qmark */
5803
5804         genCodeForTree(elseNode, needReg, bestReg);
5805
5806         /* The type is VOID, so we shouldn't have computed a value */
5807
5808         noway_assert(!(elseNode->InReg()));
5809
5810         /* Save the current liveness, register status, and GC pointers               */
5811         /* This is the liveness information upon exit of the then part of the qmark  */
5812
5813         saveLiveness(&exitLiveness);
5814
5815         /* Is there a 'then' part? */
5816
5817         if (thenNode->IsNothingNode())
5818         {
5819 #if FEATURE_STACK_FP_X87
5820             if (bHasFPUState)
5821             {
5822                 // We had FP state on entry just after the condition, so potentially, the else
5823                 // node may have to do transition work.
5824                 lab_done = genCreateTempLabel();
5825
5826                 /* Generate jmp lab_done */
5827
5828                 inst_JMP(EJ_jmp, lab_done);
5829
5830                 /* No 'then' - just generate the 'lab_true' label */
5831
5832                 genDefineTempLabel(lab_true);
5833
5834                 // We need to do this after defining the lab_false label
5835                 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5836                 genQMarkAfterThenBlockStackFP(&tempFPState);
5837                 genDefineTempLabel(lab_done);
5838             }
5839             else
5840 #endif // FEATURE_STACK_FP_X87
5841             {
5842                 /* No 'then' - just generate the 'lab_true' label */
5843                 genDefineTempLabel(lab_true);
5844             }
5845         }
5846         else
5847         {
5848             lab_done = genCreateTempLabel();
5849
5850             /* Generate jmp lab_done */
5851
5852             inst_JMP(EJ_jmp, lab_done);
5853
5854             /* Restore the liveness that we had upon entry of the then part of the qmark */
5855
5856             restoreLiveness(&entryLiveness);
5857
5858             /* Clear the liveness of any local variables that are dead upon    */
5859             /* entry to the then part.                                         */
5860             genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5861
5862             /* Generate lab_true: */
5863
5864             genDefineTempLabel(lab_true);
5865 #if FEATURE_STACK_FP_X87
5866             // We need to do this after defining the lab_false label
5867             genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5868 #endif
5869             /* Enter the then part - trash all registers */
5870
5871             regTracker.rsTrackRegClr();
5872
5873             /* Generate the code for the then part of the qmark */
5874
5875             genCodeForTree(thenNode, needReg, bestReg);
5876
5877             /* The type is VOID, so we shouldn't have computed a value */
5878
5879             noway_assert(!(thenNode->InReg()));
5880
5881             unspillLiveness(&exitLiveness);
5882
5883             /* Verify that the exit liveness information is the same for the two parts of the qmark */
5884
5885             checkLiveness(&exitLiveness);
5886 #if FEATURE_STACK_FP_X87
5887             genQMarkAfterThenBlockStackFP(&tempFPState);
5888 #endif
5889             /* Define the "result" label */
5890
5891             genDefineTempLabel(lab_done);
5892         }
5893
5894         /* Join of the two branches - trash all registers */
5895
5896         regTracker.rsTrackRegClr();
5897
5898         /* We're just about done */
5899
5900         genUpdateLife(tree);
5901     }
5902     else
5903     {
5904         /* Generate code for a qmark that generates a value */
5905
5906         /* Generate the code for the else part of the qmark */
5907
5908         noway_assert(elseNode->IsNothingNode() == false);
5909
5910         /* Compute the elseNode into any free register */
5911         genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
5912         noway_assert(elseNode->InReg());
5913         noway_assert(elseNode->gtRegNum != REG_NA);
5914
5915         /* Record the chosen register */
5916         reg  = elseNode->gtRegNum;
5917         regs = genRegMask(reg);
5918
5919         /* Save the current liveness, register status, and GC pointers               */
5920         /* This is the liveness information upon exit of the else part of the qmark  */
5921
5922         saveLiveness(&exitLiveness);
5923
5924         /* Generate jmp lab_done */
5925         lab_done = genCreateTempLabel();
5926
5927 #ifdef DEBUG
5928         // We will use this to assert we don't emit instructions if we decide not to
5929         // do the jmp
5930         unsigned emittedInstructions = getEmitter()->emitInsCount;
5931         bool     bSkippedJump        = false;
5932 #endif
5933         // We would like to know here if the else node is really going to generate
5934         // code, as if it isn't, we're generating here a jump to the next instruction.
5935         // What you would really like is to be able to go back and remove the jump, but
5936         // we have no way of doing that right now.
5937
5938         if (
5939 #if FEATURE_STACK_FP_X87
5940             !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
5941 #endif
5942             genIsEnregisteredIntVariable(thenNode) == reg)
5943         {
5944 #ifdef DEBUG
5945             // For the moment, fix this easy case (enregistered else node), which
5946             // is the one that happens all the time.
5947
5948             bSkippedJump = true;
5949 #endif
5950         }
5951         else
5952         {
5953             inst_JMP(EJ_jmp, lab_done);
5954         }
5955
5956         /* Restore the liveness that we had upon entry of the else part of the qmark */
5957
5958         restoreLiveness(&entryLiveness);
5959
5960         /* Clear the liveness of any local variables that are dead upon    */
5961         /* entry to the then part.                                         */
5962         genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5963
5964         /* Generate lab_true: */
5965         genDefineTempLabel(lab_true);
5966 #if FEATURE_STACK_FP_X87
5967         // Store FP state
5968
5969         // We need to do this after defining the lab_true label
5970         genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5971 #endif
5972         /* Enter the then part - trash all registers */
5973
5974         regTracker.rsTrackRegClr();
5975
5976         /* Generate the code for the then part of the qmark */
5977
5978         noway_assert(thenNode->IsNothingNode() == false);
5979
5980         /* This must place a value into the chosen register */
5981         genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
5982
5983         noway_assert(thenNode->InReg());
5984         noway_assert(thenNode->gtRegNum == reg);
5985
5986         unspillLiveness(&exitLiveness);
5987
5988         /* Verify that the exit liveness information is the same for the two parts of the qmark */
5989         checkLiveness(&exitLiveness);
5990 #if FEATURE_STACK_FP_X87
5991         genQMarkAfterThenBlockStackFP(&tempFPState);
5992 #endif
5993
5994 #ifdef DEBUG
5995         noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
5996 #endif
5997
5998         /* Define the "result" label */
5999         genDefineTempLabel(lab_done);
6000
6001         /* Join of the two branches - trash all registers */
6002
6003         regTracker.rsTrackRegClr();
6004
6005         /* Check whether this subtree has freed up any variables */
6006
6007         genUpdateLife(tree);
6008
6009         genMarkTreeInReg(tree, reg);
6010     }
6011 }
6012
6013 /*****************************************************************************
6014  *
6015  *  Generate code for a qmark colon using the CMOV instruction.  It's OK
6016  *  to return false when we can't easily implement it using a cmov (leading
6017  *  genCodeForQmark to implement it using branches).
6018  */
6019
6020 bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
6021 {
6022 #ifdef _TARGET_XARCH_
6023     GenTreePtr cond  = tree->gtOp.gtOp1;
6024     GenTreePtr colon = tree->gtOp.gtOp2;
6025     // Warning: this naming of the local vars is backwards!
6026     GenTreePtr thenNode = colon->gtOp.gtOp1;
6027     GenTreePtr elseNode = colon->gtOp.gtOp2;
6028     GenTreePtr alwaysNode, predicateNode;
6029     regNumber  reg;
6030     regMaskTP  needReg = destReg;
6031
6032     noway_assert(tree->gtOper == GT_QMARK);
6033     noway_assert(cond->OperIsCompare());
6034     noway_assert(colon->gtOper == GT_COLON);
6035
6036 #ifdef DEBUG
6037     if (JitConfig.JitNoCMOV())
6038     {
6039         return false;
6040     }
6041 #endif
6042
6043     /* Can only implement CMOV on processors that support it */
6044
6045     if (!compiler->opts.compUseCMOV)
6046     {
6047         return false;
6048     }
6049
6050     /* thenNode better be a local or a constant */
6051
6052     if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
6053     {
6054         return false;
6055     }
6056
6057     /* elseNode better be a local or a constant or nothing */
6058
6059     if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
6060     {
6061         return false;
6062     }
6063
6064     /* can't handle two constants here */
6065
6066     if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
6067     {
6068         return false;
6069     }
6070
6071     /* let's not handle comparisons of non-integer types */
6072
6073     if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
6074     {
6075         return false;
6076     }
6077
6078     /* Choose nodes for predicateNode and alwaysNode.  Swap cond if necessary.
6079        The biggest constraint is that cmov doesn't take an integer argument.
6080     */
6081
6082     bool reverseCond = false;
6083     if (elseNode->OperGet() == GT_CNS_INT)
6084     {
6085         // else node is a constant
6086
6087         alwaysNode    = elseNode;
6088         predicateNode = thenNode;
6089         reverseCond   = true;
6090     }
6091     else
6092     {
6093         alwaysNode    = thenNode;
6094         predicateNode = elseNode;
6095     }
6096
6097     // If the live set in alwaysNode is not the same as in tree, then
6098     // the variable in predicate node dies here.  This is a dangerous
6099     // case that we don't handle (genComputeReg could overwrite
6100     // the value of the variable in the predicate node).
6101
6102     // This assert is just paranoid (we've already asserted it above)
6103     assert(predicateNode->OperGet() == GT_LCL_VAR);
6104     if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
6105     {
6106         return false;
6107     }
6108
6109     // Pass this point we are comitting to use CMOV.
6110
6111     if (reverseCond)
6112     {
6113         compiler->gtReverseCond(cond);
6114     }
6115
6116     emitJumpKind jumpKind = genCondSetFlags(cond);
6117
6118     // Compute the always node into any free register.  If it's a constant,
6119     // we need to generate the mov instruction here (otherwise genComputeReg might
6120     // modify the flags, as in xor reg,reg).
6121
6122     if (alwaysNode->OperGet() == GT_CNS_INT)
6123     {
6124         reg = regSet.rsPickReg(needReg, bestReg);
6125         inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
6126         gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
6127         regTracker.rsTrackRegTrash(reg);
6128     }
6129     else
6130     {
6131         genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6132         noway_assert(alwaysNode->InReg());
6133         noway_assert(alwaysNode->gtRegNum != REG_NA);
6134
6135         // Record the chosen register
6136
6137         reg = alwaysNode->gtRegNum;
6138     }
6139
6140     regNumber regPredicate = REG_NA;
6141
6142     // Is predicateNode an enregistered variable?
6143
6144     if (genMarkLclVar(predicateNode))
6145     {
6146         // Variable lives in a register
6147
6148         regPredicate = predicateNode->gtRegNum;
6149     }
6150 #if REDUNDANT_LOAD
6151     else
6152     {
6153         // Checks if the variable happens to be in any of the registers
6154
6155         regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
6156     }
6157 #endif
6158
6159     const static instruction EJtoCMOV[] = {INS_nop,    INS_nop,    INS_cmovo,  INS_cmovno, INS_cmovb,  INS_cmovae,
6160                                            INS_cmove,  INS_cmovne, INS_cmovbe, INS_cmova,  INS_cmovs,  INS_cmovns,
6161                                            INS_cmovpe, INS_cmovpo, INS_cmovl,  INS_cmovge, INS_cmovle, INS_cmovg};
6162
6163     noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
6164     instruction cmov_ins = EJtoCMOV[jumpKind];
6165
6166     noway_assert(insIsCMOV(cmov_ins));
6167
6168     if (regPredicate != REG_NA)
6169     {
6170         // regPredicate is in a register
6171
6172         inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
6173     }
6174     else
6175     {
6176         // regPredicate is in memory
6177
6178         inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
6179     }
6180     gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
6181     regTracker.rsTrackRegTrash(reg);
6182
6183     genUpdateLife(alwaysNode);
6184     genUpdateLife(predicateNode);
6185     genCodeForTree_DONE_LIFE(tree, reg);
6186     return true;
6187 #else
6188     return false;
6189 #endif
6190 }
6191
6192 #ifdef _TARGET_XARCH_
6193 void CodeGen::genCodeForMultEAX(GenTreePtr tree)
6194 {
6195     GenTreePtr op1  = tree->gtOp.gtOp1;
6196     GenTreePtr op2  = tree->gtGetOp2();
6197     bool       ovfl = tree->gtOverflow();
6198     regNumber  reg  = DUMMY_INIT(REG_CORRUPT);
6199     regMaskTP  addrReg;
6200
6201     noway_assert(tree->OperGet() == GT_MUL);
6202
6203     /* We'll evaluate 'op1' first */
6204
6205     regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
6206
6207     /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
6208
6209     genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
6210     noway_assert(op1->InReg());
6211
6212     // If op2 is a constant we need to load  the constant into a register
6213     if (op2->OperKind() & GTK_CONST)
6214     {
6215         genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
6216         noway_assert(op2->InReg());
6217         regSet.rsMarkRegUsed(op2);
6218         addrReg = genRegMask(op2->gtRegNum);
6219     }
6220     else
6221     {
6222         /* Make the second operand addressable */
6223         // Try to avoid EAX.
6224         addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
6225     }
6226
6227     /* Make sure the first operand is still in a register */
6228     // op1 *must* go into EAX.
6229     genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
6230     noway_assert(op1->InReg());
6231
6232     reg = op1->gtRegNum;
6233
6234     // For 8 bit operations, we need to pick byte addressable registers
6235
6236     if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
6237     {
6238         regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6239
6240         inst_RV_RV(INS_mov, byteReg, reg);
6241
6242         regTracker.rsTrackRegTrash(byteReg);
6243         regSet.rsMarkRegFree(genRegMask(reg));
6244
6245         reg           = byteReg;
6246         op1->gtRegNum = reg;
6247         regSet.rsMarkRegUsed(op1);
6248     }
6249
6250     /* Make sure the operand is still addressable */
6251     addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
6252
6253     /* Free up the operand, if it's a regvar */
6254
6255     genUpdateLife(op2);
6256
6257     /* The register is about to be trashed */
6258
6259     regTracker.rsTrackRegTrash(reg);
6260
6261     // For overflow instructions, tree->TypeGet() is the accurate type,
6262     // and gives us the size for the operands.
6263
6264     emitAttr opSize = emitTypeSize(tree->TypeGet());
6265
6266     /* Compute the new value */
6267
6268     noway_assert(op1->gtRegNum == REG_EAX);
6269
6270     // Make sure Edx is free (unless used by op2 itself)
6271     bool op2Released = false;
6272
6273     if ((addrReg & RBM_EDX) == 0)
6274     {
6275         // op2 does not use Edx, so make sure noone else does either
6276         regSet.rsGrabReg(RBM_EDX);
6277     }
6278     else if (regSet.rsMaskMult & RBM_EDX)
6279     {
6280         /* Edx is used by op2 and some other trees.
6281            Spill the other trees besides op2. */
6282
6283         regSet.rsGrabReg(RBM_EDX);
6284         op2Released = true;
6285
6286         /* keepReg==RegSet::FREE_REG so that the other multi-used trees
6287            don't get marked as unspilled as well. */
6288         regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
6289     }
6290
6291     instruction ins;
6292
6293     if (tree->gtFlags & GTF_UNSIGNED)
6294         ins = INS_mulEAX;
6295     else
6296         ins = INS_imulEAX;
6297
6298     inst_TT(ins, op2, 0, 0, opSize);
6299
6300     /* Both EAX and EDX are now trashed */
6301
6302     regTracker.rsTrackRegTrash(REG_EAX);
6303     regTracker.rsTrackRegTrash(REG_EDX);
6304
6305     /* Free up anything that was tied up by the operand */
6306
6307     if (!op2Released)
6308         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
6309
6310     /* The result will be where the first operand is sitting */
6311
6312     /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
6313     genRecoverReg(op1, 0, RegSet::KEEP_REG);
6314
6315     reg = op1->gtRegNum;
6316     noway_assert(reg == REG_EAX);
6317
6318     genReleaseReg(op1);
6319
6320     /* Do we need an overflow check */
6321
6322     if (ovfl)
6323         genCheckOverflow(tree);
6324
6325     genCodeForTree_DONE(tree, reg);
6326 }
6327 #endif // _TARGET_XARCH_
6328
6329 #ifdef _TARGET_ARM_
6330 void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
6331 {
6332     GenTreePtr op1 = tree->gtOp.gtOp1;
6333     GenTreePtr op2 = tree->gtGetOp2();
6334
6335     noway_assert(tree->OperGet() == GT_MUL);
6336
6337     /* Generate the first operand into some register */
6338
6339     genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6340     noway_assert(op1->InReg());
6341
6342     /* Generate the second operand into some register */
6343
6344     genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6345     noway_assert(op2->InReg());
6346
6347     /* Make sure the first operand is still in a register */
6348     genRecoverReg(op1, 0, RegSet::KEEP_REG);
6349     noway_assert(op1->InReg());
6350
6351     /* Free up the operands */
6352     genUpdateLife(tree);
6353
6354     genReleaseReg(op1);
6355     genReleaseReg(op2);
6356
6357     regNumber regLo = regSet.rsPickReg(destReg, bestReg);
6358     regNumber regHi;
6359
6360     regSet.rsLockReg(genRegMask(regLo));
6361     regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
6362     regSet.rsUnlockReg(genRegMask(regLo));
6363
6364     instruction ins;
6365     if (tree->gtFlags & GTF_UNSIGNED)
6366         ins = INS_umull;
6367     else
6368         ins = INS_smull;
6369
6370     getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
6371     regTracker.rsTrackRegTrash(regHi);
6372     regTracker.rsTrackRegTrash(regLo);
6373
6374     /* Do we need an overflow check */
6375
6376     if (tree->gtOverflow())
6377     {
6378         // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
6379         //
6380         regSet.rsLockReg(genRegMask(regLo));
6381
6382         if (tree->gtFlags & GTF_MUL_64RSLT)
6383             regSet.rsLockReg(genRegMask(regHi));
6384
6385         regNumber regTmpHi = regHi;
6386         if ((tree->gtFlags & GTF_UNSIGNED) == 0)
6387         {
6388             getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
6389             regTmpHi = regSet.rsPickReg(RBM_ALLINT);
6390             getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
6391             regTracker.rsTrackRegTrash(regTmpHi);
6392         }
6393         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
6394
6395         // Jump to the block which will throw the expection
6396         emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
6397         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
6398
6399         // Unlock regLo [and regHi] after generating code for the gtOverflow() case
6400         //
6401         regSet.rsUnlockReg(genRegMask(regLo));
6402
6403         if (tree->gtFlags & GTF_MUL_64RSLT)
6404             regSet.rsUnlockReg(genRegMask(regHi));
6405     }
6406
6407     genUpdateLife(tree);
6408
6409     if (tree->gtFlags & GTF_MUL_64RSLT)
6410         genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
6411     else
6412         genMarkTreeInReg(tree, regLo);
6413 }
6414 #endif // _TARGET_ARM_
6415
6416 /*****************************************************************************
6417  *
6418  *  Generate code for a simple binary arithmetic or logical operator.
6419  *  Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
6420  */
6421
6422 void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
6423 {
6424     instruction     ins;
6425     genTreeOps      oper     = tree->OperGet();
6426     const var_types treeType = tree->TypeGet();
6427     GenTreePtr      op1      = tree->gtOp.gtOp1;
6428     GenTreePtr      op2      = tree->gtGetOp2();
6429     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
6430     regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
6431     regMaskTP       needReg  = destReg;
6432
6433     /* Figure out what instruction to generate */
6434
6435     bool isArith;
6436     switch (oper)
6437     {
6438         case GT_AND:
6439             ins     = INS_AND;
6440             isArith = false;
6441             break;
6442         case GT_OR:
6443             ins     = INS_OR;
6444             isArith = false;
6445             break;
6446         case GT_XOR:
6447             ins     = INS_XOR;
6448             isArith = false;
6449             break;
6450         case GT_ADD:
6451             ins     = INS_add;
6452             isArith = true;
6453             break;
6454         case GT_SUB:
6455             ins     = INS_sub;
6456             isArith = true;
6457             break;
6458         case GT_MUL:
6459             ins     = INS_MUL;
6460             isArith = true;
6461             break;
6462         default:
6463             unreached();
6464     }
6465
6466 #ifdef _TARGET_XARCH_
6467     /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
6468
6469     if ((oper == GT_MUL) &&
6470         op2->IsIntCnsFitsInI32() &&              // op2 is a constant that fits in a sign-extended 32-bit immediate
6471         !op1->IsCnsIntOrI() &&                   // op1 is not a constant
6472         (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
6473         !varTypeIsByte(treeType) &&              // No encoding for say "imul al,al,imm"
6474         !tree->gtOverflow())                     // 3 operand imul doesn't set flags
6475     {
6476         /* Make the first operand addressable */
6477
6478         regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
6479
6480         /* Grab a register for the target */
6481
6482         reg = regSet.rsPickReg(needReg, bestReg);
6483
6484 #if LEA_AVAILABLE
6485         /* Compute the value into the target: reg=op1*op2_icon */
6486         if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
6487         {
6488             regNumber regSrc;
6489             if (op1->InReg())
6490             {
6491                 regSrc = op1->gtRegNum;
6492             }
6493             else
6494             {
6495                 inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
6496                 regSrc = reg;
6497             }
6498             getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
6499                                         (op2->gtIntCon.gtIconVal & -2), 0);
6500         }
6501         else
6502 #endif // LEA_AVAILABLE
6503         {
6504             /* Compute the value into the target: reg=op1*op2_icon */
6505             inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
6506         }
6507
6508         /* The register has been trashed now */
6509
6510         regTracker.rsTrackRegTrash(reg);
6511
6512         /* The address is no longer live */
6513
6514         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
6515
6516         genCodeForTree_DONE(tree, reg);
6517         return;
6518     }
6519 #endif // _TARGET_XARCH_
6520
6521     bool ovfl = false;
6522
6523     if (isArith)
6524     {
6525         // We only reach here for GT_ADD, GT_SUB and GT_MUL.
6526         assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
6527
6528         ovfl = tree->gtOverflow();
6529
6530         /* We record the accurate (small) types in trees only we need to
6531          * check for overflow. Otherwise we record genActualType()
6532          */
6533
6534         noway_assert(ovfl || (treeType == genActualType(treeType)));
6535
6536 #if LEA_AVAILABLE
6537
6538         /* Can we use an 'lea' to compute the result?
6539            Can't use 'lea' for overflow as it doesn't set flags
6540            Can't use 'lea' unless we have at least two free registers */
6541         {
6542             bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
6543                                    genCountBits(regSet.rsMaskLock) +  // Locked registers
6544                                    2                                  // We will need two regisers
6545                                <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
6546
6547             regMaskTP regs = RBM_NONE; // OUT argument
6548             if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
6549             {
6550                 emitAttr size;
6551
6552                 /* Is the value now computed in some register? */
6553
6554                 if (tree->InReg())
6555                 {
6556                     genCodeForTree_REG_VAR1(tree);
6557                     return;
6558                 }
6559
6560                 /* If we can reuse op1/2's register directly, and 'tree' is
6561                    a simple expression (ie. not in scaled index form),
6562                    might as well just use "add" instead of "lea" */
6563
6564                 // However, if we're in a context where we want to evaluate "tree" into a specific
6565                 // register different from the reg we'd use in this optimization, then it doesn't
6566                 // make sense to do the "add", since we'd also have to do a "mov."
6567                 if (op1->InReg())
6568                 {
6569                     reg = op1->gtRegNum;
6570
6571                     if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6572                     {
6573                         if (op2->InReg())
6574                         {
6575                             /* Simply add op2 to the register */
6576
6577                             inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
6578
6579                             if (tree->gtSetFlags())
6580                                 genFlagsEqualToReg(tree, reg);
6581
6582                             goto DONE_LEA_ADD;
6583                         }
6584                         else if (op2->OperGet() == GT_CNS_INT)
6585                         {
6586                             /* Simply add op2 to the register */
6587
6588                             genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
6589
6590                             goto DONE_LEA_ADD;
6591                         }
6592                     }
6593                 }
6594
6595                 if (op2->InReg())
6596                 {
6597                     reg = op2->gtRegNum;
6598
6599                     if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6600                     {
6601                         if (op1->InReg())
6602                         {
6603                             /* Simply add op1 to the register */
6604
6605                             inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
6606
6607                             if (tree->gtSetFlags())
6608                                 genFlagsEqualToReg(tree, reg);
6609
6610                             goto DONE_LEA_ADD;
6611                         }
6612                     }
6613                 }
6614
6615                 // The expression either requires a scaled-index form, or the
6616                 // op1 or op2's register can't be targeted, this can be
6617                 // caused when op1 or op2 are enregistered variables.
6618
6619                 reg  = regSet.rsPickReg(needReg, bestReg);
6620                 size = emitActualTypeSize(treeType);
6621
6622                 /* Generate "lea reg, [addr-mode]" */
6623
6624                 inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
6625
6626 #ifndef _TARGET_XARCH_
6627                 // Don't call genFlagsEqualToReg on x86/x64
6628                 //  as it does not set the flags
6629                 if (tree->gtSetFlags())
6630                     genFlagsEqualToReg(tree, reg);
6631 #endif
6632
6633             DONE_LEA_ADD:
6634                 /* The register has been trashed now */
6635                 regTracker.rsTrackRegTrash(reg);
6636
6637                 genDoneAddressable(tree, regs, RegSet::FREE_REG);
6638
6639                 /* The following could be an 'inner' pointer!!! */
6640
6641                 noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
6642
6643                 if (treeType == TYP_BYREF)
6644                 {
6645                     genUpdateLife(tree);
6646
6647                     gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
6648                     gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
6649                 }
6650
6651                 genCodeForTree_DONE(tree, reg);
6652                 return;
6653             }
6654         }
6655
6656 #endif // LEA_AVAILABLE
6657
6658         noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
6659     }
6660
6661     /* The following makes an assumption about gtSetEvalOrder(this) */
6662
6663     noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
6664
6665     /* Compute a useful register mask */
6666     needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6667     needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6668
6669     // Determine what registers go live between op1 and op2
6670     // Don't bother checking if op1 is already in a register.
6671     // This is not just for efficiency; if it's already in a
6672     // register then it may already be considered "evaluated"
6673     // for the purposes of liveness, in which genNewLiveRegMask
6674     // will assert
6675     if (!op1->InReg())
6676     {
6677         regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
6678         if (newLiveMask)
6679         {
6680             needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
6681         }
6682     }
6683
6684 #if CPU_HAS_BYTE_REGS
6685     /* 8-bit operations can only be done in the byte-regs */
6686     if (varTypeIsByte(treeType))
6687         needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6688 #endif // CPU_HAS_BYTE_REGS
6689
6690     // Try selecting one of the 'bestRegs'
6691     needReg = regSet.rsNarrowHint(needReg, bestReg);
6692
6693     /* Special case: small_val & small_mask */
6694
6695     if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
6696     {
6697         size_t    and_val = op2->gtIntCon.gtIconVal;
6698         size_t    andMask;
6699         var_types typ = op1->TypeGet();
6700
6701         switch (typ)
6702         {
6703             case TYP_BOOL:
6704             case TYP_BYTE:
6705             case TYP_UBYTE:
6706                 andMask = 0x000000FF;
6707                 break;
6708             case TYP_SHORT:
6709             case TYP_CHAR:
6710                 andMask = 0x0000FFFF;
6711                 break;
6712             default:
6713                 noway_assert(!"unexpected type");
6714                 return;
6715         }
6716
6717         // Is the 'and_val' completely contained within the bits found in 'andMask'
6718         if ((and_val & ~andMask) == 0)
6719         {
6720             // We must use unsigned instructions when loading op1
6721             if (varTypeIsByte(typ))
6722             {
6723                 op1->gtType = TYP_UBYTE;
6724             }
6725             else // varTypeIsShort(typ)
6726             {
6727                 assert(varTypeIsShort(typ));
6728                 op1->gtType = TYP_CHAR;
6729             }
6730
6731             /* Generate the first operand into a scratch register */
6732
6733             op1 = genCodeForCommaTree(op1);
6734             genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6735
6736             noway_assert(op1->InReg());
6737
6738             regNumber op1Reg = op1->gtRegNum;
6739
6740             // Did we end up in an acceptable register?
6741             // and do we have an acceptable free register available to grab?
6742             //
6743             if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6744             {
6745                 // See if we can pick a register from bestReg
6746                 bestReg &= needReg;
6747
6748                 // Grab an acceptable register
6749                 regNumber newReg;
6750                 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6751                     newReg = regSet.rsGrabReg(bestReg);
6752                 else
6753                     newReg = regSet.rsGrabReg(needReg);
6754
6755                 noway_assert(op1Reg != newReg);
6756
6757                 /* Update the value in the target register */
6758
6759                 regTracker.rsTrackRegCopy(newReg, op1Reg);
6760
6761                 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6762
6763                 /* The value has been transferred to 'reg' */
6764
6765                 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6766                     gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6767
6768                 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6769
6770                 /* The value is now in an appropriate register */
6771
6772                 op1->gtRegNum = newReg;
6773             }
6774             noway_assert(op1->InReg());
6775             genUpdateLife(op1);
6776
6777             /* Mark the register as 'used' */
6778             regSet.rsMarkRegUsed(op1);
6779             reg = op1->gtRegNum;
6780
6781             if (and_val != andMask) // Does the "and" mask only cover some of the bits?
6782             {
6783                 /* "and" the value */
6784
6785                 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
6786             }
6787
6788 #ifdef DEBUG
6789             /* Update the live set of register variables */
6790             if (compiler->opts.varNames)
6791                 genUpdateLife(tree);
6792 #endif
6793
6794             /* Now we can update the register pointer information */
6795
6796             genReleaseReg(op1);
6797             gcInfo.gcMarkRegPtrVal(reg, treeType);
6798
6799             genCodeForTree_DONE_LIFE(tree, reg);
6800             return;
6801         }
6802     }
6803
6804 #ifdef _TARGET_XARCH_
6805
6806     // Do we have to use the special "imul" instruction
6807     // which has eax as the implicit operand ?
6808     //
6809     bool multEAX = false;
6810
6811     if (oper == GT_MUL)
6812     {
6813         if (tree->gtFlags & GTF_MUL_64RSLT)
6814         {
6815             /* Only multiplying with EAX will leave the 64-bit
6816              * result in EDX:EAX */
6817
6818             multEAX = true;
6819         }
6820         else if (ovfl)
6821         {
6822             if (tree->gtFlags & GTF_UNSIGNED)
6823             {
6824                 /* "mul reg/mem" always has EAX as default operand */
6825
6826                 multEAX = true;
6827             }
6828             else if (varTypeIsSmall(treeType))
6829             {
6830                 /* Only the "imul with EAX" encoding has the 'w' bit
6831                  * to specify the size of the operands */
6832
6833                 multEAX = true;
6834             }
6835         }
6836     }
6837
6838     if (multEAX)
6839     {
6840         noway_assert(oper == GT_MUL);
6841
6842         return genCodeForMultEAX(tree);
6843     }
6844 #endif // _TARGET_XARCH_
6845
6846 #ifdef _TARGET_ARM_
6847
6848     // Do we have to use the special 32x32 => 64 bit multiply
6849     //
6850     bool mult64 = false;
6851
6852     if (oper == GT_MUL)
6853     {
6854         if (tree->gtFlags & GTF_MUL_64RSLT)
6855         {
6856             mult64 = true;
6857         }
6858         else if (ovfl)
6859         {
6860             // We always must use the 32x32 => 64 bit multiply
6861             // to detect overflow
6862             mult64 = true;
6863         }
6864     }
6865
6866     if (mult64)
6867     {
6868         noway_assert(oper == GT_MUL);
6869
6870         return genCodeForMult64(tree, destReg, bestReg);
6871     }
6872 #endif // _TARGET_ARM_
6873
6874     /* Generate the first operand into a scratch register */
6875
6876     op1 = genCodeForCommaTree(op1);
6877     genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6878
6879     noway_assert(op1->InReg());
6880
6881     regNumber op1Reg = op1->gtRegNum;
6882
6883     // Setup needReg with the set of register that we require for op1 to be in
6884     //
6885     needReg = RBM_ALLINT;
6886
6887     /* Compute a useful register mask */
6888     needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6889     needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6890
6891 #if CPU_HAS_BYTE_REGS
6892     /* 8-bit operations can only be done in the byte-regs */
6893     if (varTypeIsByte(treeType))
6894         needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6895 #endif // CPU_HAS_BYTE_REGS
6896
6897     // Did we end up in an acceptable register?
6898     // and do we have an acceptable free register available to grab?
6899     //
6900     if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6901     {
6902         // See if we can pick a register from bestReg
6903         bestReg &= needReg;
6904
6905         // Grab an acceptable register
6906         regNumber newReg;
6907         if ((bestReg & regSet.rsRegMaskFree()) != 0)
6908             newReg = regSet.rsGrabReg(bestReg);
6909         else
6910             newReg = regSet.rsGrabReg(needReg);
6911
6912         noway_assert(op1Reg != newReg);
6913
6914         /* Update the value in the target register */
6915
6916         regTracker.rsTrackRegCopy(newReg, op1Reg);
6917
6918         inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6919
6920         /* The value has been transferred to 'reg' */
6921
6922         if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6923             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6924
6925         gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6926
6927         /* The value is now in an appropriate register */
6928
6929         op1->gtRegNum = newReg;
6930     }
6931     noway_assert(op1->InReg());
6932     op1Reg = op1->gtRegNum;
6933
6934     genUpdateLife(op1);
6935
6936     /* Mark the register as 'used' */
6937     regSet.rsMarkRegUsed(op1);
6938
6939     bool isSmallConst = false;
6940
6941 #ifdef _TARGET_ARM_
6942     if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
6943     {
6944         isSmallConst = true;
6945     }
6946 #endif
6947     /* Make the second operand addressable */
6948
6949     regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
6950
6951 #if CPU_LOAD_STORE_ARCH
6952     genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
6953 #else  // !CPU_LOAD_STORE_ARCH
6954     /* Is op1 spilled and op2 in a register? */
6955
6956     if ((op1->gtFlags & GTF_SPILLED) && (op2->InReg()) && (ins != INS_sub))
6957     {
6958         noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
6959
6960         // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
6961         noway_assert(op2->gtOper != GT_LCL_VAR ||
6962                      varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
6963
6964         reg               = op2->gtRegNum;
6965         regMaskTP regMask = genRegMask(reg);
6966
6967         /* Is the register holding op2 available? */
6968
6969         if (regMask & regSet.rsMaskVars)
6970         {
6971         }
6972         else
6973         {
6974             /* Get the temp we spilled into. */
6975
6976             TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
6977
6978             /* For 8bit operations, we need to make sure that op2 is
6979                in a byte-addressable registers */
6980
6981             if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
6982             {
6983                 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6984
6985                 inst_RV_RV(INS_mov, byteReg, reg);
6986                 regTracker.rsTrackRegTrash(byteReg);
6987
6988                 /* op2 couldn't have spilled as it was not sitting in
6989                    RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
6990                 noway_assert(op2->InReg());
6991
6992                 regSet.rsUnlockReg(regMask);
6993                 regSet.rsMarkRegFree(regMask);
6994
6995                 reg           = byteReg;
6996                 regMask       = genRegMask(reg);
6997                 op2->gtRegNum = reg;
6998                 regSet.rsMarkRegUsed(op2);
6999             }
7000
7001             inst_RV_ST(ins, reg, temp, 0, treeType);
7002
7003             regTracker.rsTrackRegTrash(reg);
7004
7005             /* Free the temp */
7006
7007             compiler->tmpRlsTemp(temp);
7008
7009             /* 'add'/'sub' set all CC flags, others only ZF */
7010
7011             /* If we need to check overflow, for small types, the
7012              * flags can't be used as we perform the arithmetic
7013              * operation (on small registers) and then sign extend it
7014              *
7015              * NOTE : If we ever don't need to sign-extend the result,
7016              * we can use the flags
7017              */
7018
7019             if (tree->gtSetFlags())
7020             {
7021                 genFlagsEqualToReg(tree, reg);
7022             }
7023
7024             /* The result is where the second operand is sitting. Mark result reg as free */
7025             regSet.rsMarkRegFree(genRegMask(reg));
7026
7027             gcInfo.gcMarkRegPtrVal(reg, treeType);
7028
7029             goto CHK_OVF;
7030         }
7031     }
7032 #endif // !CPU_LOAD_STORE_ARCH
7033
7034     /* Make sure the first operand is still in a register */
7035     regSet.rsLockUsedReg(addrReg);
7036     genRecoverReg(op1, 0, RegSet::KEEP_REG);
7037     noway_assert(op1->InReg());
7038     regSet.rsUnlockUsedReg(addrReg);
7039
7040     reg = op1->gtRegNum;
7041
7042     // For 8 bit operations, we need to pick byte addressable registers
7043
7044     if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
7045     {
7046         regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7047
7048         inst_RV_RV(INS_mov, byteReg, reg);
7049
7050         regTracker.rsTrackRegTrash(byteReg);
7051         regSet.rsMarkRegFree(genRegMask(reg));
7052
7053         reg           = byteReg;
7054         op1->gtRegNum = reg;
7055         regSet.rsMarkRegUsed(op1);
7056     }
7057
7058     /* Make sure the operand is still addressable */
7059     addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
7060
7061     /* Free up the operand, if it's a regvar */
7062
7063     genUpdateLife(op2);
7064
7065     /* The register is about to be trashed */
7066
7067     regTracker.rsTrackRegTrash(reg);
7068
7069     {
7070         bool op2Released = false;
7071
7072         // For overflow instructions, tree->gtType is the accurate type,
7073         // and gives us the size for the operands.
7074
7075         emitAttr opSize = emitTypeSize(treeType);
7076
7077         /* Compute the new value */
7078
7079         if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
7080 #if !CPU_HAS_FP_SUPPORT
7081             && (treeType == TYP_INT || treeType == TYP_I_IMPL)
7082 #endif
7083                 )
7084         {
7085             ssize_t ival = op2->gtIntCon.gtIconVal;
7086
7087             if (oper == GT_ADD)
7088             {
7089                 genIncRegBy(reg, ival, tree, treeType, ovfl);
7090             }
7091             else if (oper == GT_SUB)
7092             {
7093                 if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7094                              (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
7095                     // Therefore we can't use -ival.
7096                     )
7097                 {
7098                     /* For unsigned overflow, we have to use INS_sub to set
7099                     the flags correctly */
7100
7101                     genDecRegBy(reg, ival, tree);
7102                 }
7103                 else
7104                 {
7105                     /* Else, we simply add the negative of the value */
7106
7107                     genIncRegBy(reg, -ival, tree, treeType, ovfl);
7108                 }
7109             }
7110             else if (oper == GT_MUL)
7111             {
7112                 genMulRegBy(reg, ival, tree, treeType, ovfl);
7113             }
7114         }
7115         else
7116         {
7117             // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
7118             op2 = op2->gtEffectiveVal();
7119             if (varTypeIsByte(treeType) && op2->InReg())
7120             {
7121                 noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
7122
7123                 regNumber op2reg     = op2->gtRegNum;
7124                 regMaskTP op2regMask = genRegMask(op2reg);
7125
7126                 if (!(op2regMask & RBM_BYTE_REGS))
7127                 {
7128                     regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7129
7130                     inst_RV_RV(INS_mov, byteReg, op2reg);
7131                     regTracker.rsTrackRegTrash(byteReg);
7132
7133                     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7134                     op2Released = true;
7135
7136                     op2->gtRegNum = byteReg;
7137                 }
7138             }
7139
7140             inst_RV_TT(ins, reg, op2, 0, opSize, flags);
7141         }
7142
7143         /* Free up anything that was tied up by the operand */
7144
7145         if (!op2Released)
7146         {
7147             genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7148         }
7149     }
7150     /* The result will be where the first operand is sitting */
7151
7152     /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
7153     genRecoverReg(op1, 0, RegSet::KEEP_REG);
7154
7155     reg = op1->gtRegNum;
7156
7157     /* 'add'/'sub' set all CC flags, others only ZF+SF */
7158
7159     if (tree->gtSetFlags())
7160         genFlagsEqualToReg(tree, reg);
7161
7162     genReleaseReg(op1);
7163
7164 #if !CPU_LOAD_STORE_ARCH
7165 CHK_OVF:
7166 #endif // !CPU_LOAD_STORE_ARCH
7167
7168     /* Do we need an overflow check */
7169
7170     if (ovfl)
7171         genCheckOverflow(tree);
7172
7173     genCodeForTree_DONE(tree, reg);
7174 }
7175
7176 /*****************************************************************************
7177  *
7178  *  Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
7179  *  Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
7180  */
7181
7182 void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7183 {
7184     instruction      ins;
7185     const genTreeOps oper     = tree->OperGet();
7186     const var_types  treeType = tree->TypeGet();
7187     GenTreePtr       op1      = tree->gtOp.gtOp1;
7188     GenTreePtr       op2      = tree->gtGetOp2();
7189     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7190     regNumber        reg      = DUMMY_INIT(REG_CORRUPT);
7191     regMaskTP        needReg  = destReg;
7192     regMaskTP        addrReg;
7193
7194     /* Figure out what instruction to generate */
7195
7196     bool isArith;
7197     switch (oper)
7198     {
7199         case GT_ASG_AND:
7200             ins     = INS_AND;
7201             isArith = false;
7202             break;
7203         case GT_ASG_OR:
7204             ins     = INS_OR;
7205             isArith = false;
7206             break;
7207         case GT_ASG_XOR:
7208             ins     = INS_XOR;
7209             isArith = false;
7210             break;
7211         case GT_ASG_ADD:
7212             ins     = INS_add;
7213             isArith = true;
7214             break;
7215         case GT_ASG_SUB:
7216             ins     = INS_sub;
7217             isArith = true;
7218             break;
7219         default:
7220             unreached();
7221     }
7222
7223     bool ovfl = false;
7224
7225     if (isArith)
7226     {
7227         // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
7228
7229         ovfl = tree->gtOverflow();
7230
7231         // We can't use += with overflow if the value cannot be changed
7232         // in case of an overflow-exception which the "+" might cause
7233         noway_assert(!ovfl ||
7234                      ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
7235
7236         /* Do not allow overflow instructions with refs/byrefs */
7237
7238         noway_assert(!ovfl || !varTypeIsGC(treeType));
7239
7240         // We disallow overflow and byte-ops here as it is too much trouble
7241         noway_assert(!ovfl || !varTypeIsByte(treeType));
7242
7243         /* Is the second operand a constant? */
7244
7245         if (op2->IsIntCnsFitsInI32())
7246         {
7247             int ival = (int)op2->gtIntCon.gtIconVal;
7248
7249             /* What is the target of the assignment? */
7250
7251             switch (op1->gtOper)
7252             {
7253                 case GT_REG_VAR:
7254
7255                 REG_VAR4:
7256
7257                     reg = op1->gtRegVar.gtRegNum;
7258
7259                     /* No registers are needed for addressing */
7260
7261                     addrReg = RBM_NONE;
7262 #if !CPU_LOAD_STORE_ARCH
7263                 INCDEC_REG:
7264 #endif
7265                     /* We're adding a constant to a register */
7266
7267                     if (oper == GT_ASG_ADD)
7268                         genIncRegBy(reg, ival, tree, treeType, ovfl);
7269                     else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7270                                       ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
7271                                                                                                  // 0x80000000.
7272                                                                                                  // Therefore we can't
7273                                                                                                  // use -ival.
7274                              )
7275                         /* For unsigned overflow, we have to use INS_sub to set
7276                             the flags correctly */
7277                         genDecRegBy(reg, ival, tree);
7278                     else
7279                         genIncRegBy(reg, -ival, tree, treeType, ovfl);
7280
7281                     break;
7282
7283                 case GT_LCL_VAR:
7284
7285                     /* Does the variable live in a register? */
7286
7287                     if (genMarkLclVar(op1))
7288                         goto REG_VAR4;
7289
7290                     __fallthrough;
7291
7292                 default:
7293
7294                     /* Make the target addressable for load/store */
7295                     addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
7296
7297 #if !CPU_LOAD_STORE_ARCH
7298                     // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7299
7300                     /* For small types with overflow check, we need to
7301                         sign/zero extend the result, so we need it in a reg */
7302
7303                     if (ovfl && genTypeSize(treeType) < sizeof(int))
7304 #endif // !CPU_LOAD_STORE_ARCH
7305                     {
7306                         // Load op1 into a reg
7307
7308                         reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
7309
7310                         inst_RV_TT(INS_mov, reg, op1);
7311
7312                         // Issue the add/sub and the overflow check
7313
7314                         inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
7315                         regTracker.rsTrackRegTrash(reg);
7316
7317                         if (ovfl)
7318                         {
7319                             genCheckOverflow(tree);
7320                         }
7321
7322                         /* Store the (sign/zero extended) result back to
7323                             the stack location of the variable */
7324
7325                         inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7326
7327                         break;
7328                     }
7329 #if !CPU_LOAD_STORE_ARCH
7330                     else
7331                     {
7332                         /* Add/subtract the new value into/from the target */
7333
7334                         if (op1->InReg())
7335                         {
7336                             reg = op1->gtRegNum;
7337                             goto INCDEC_REG;
7338                         }
7339
7340                         /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
7341                         if (!ovfl && (ival == 1 || ival == -1) &&
7342                             !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
7343                         {
7344                             noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
7345                             if (oper == GT_ASG_SUB)
7346                                 ival = -ival;
7347
7348                             ins = (ival > 0) ? INS_inc : INS_dec;
7349                             inst_TT(ins, op1);
7350                         }
7351                         else
7352                         {
7353                             inst_TT_IV(ins, op1, ival);
7354                         }
7355
7356                         if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
7357                         {
7358                             if (tree->gtSetFlags())
7359                                 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
7360                         }
7361
7362                         break;
7363                     }
7364 #endif        // !CPU_LOAD_STORE_ARCH
7365             } // end switch (op1->gtOper)
7366
7367             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7368
7369             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7370             return;
7371         } // end if (op2->IsIntCnsFitsInI32())
7372     }     // end if (isArith)
7373
7374     noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
7375
7376     /* Is the target a register or local variable? */
7377
7378     switch (op1->gtOper)
7379     {
7380         case GT_LCL_VAR:
7381
7382             /* Does the target variable live in a register? */
7383
7384             if (!genMarkLclVar(op1))
7385                 break;
7386
7387             __fallthrough;
7388
7389         case GT_REG_VAR:
7390
7391             /* Get hold of the target register */
7392
7393             reg = op1->gtRegVar.gtRegNum;
7394
7395             /* Make sure the target of the store is available */
7396
7397             if (regSet.rsMaskUsed & genRegMask(reg))
7398             {
7399                 regSet.rsSpillReg(reg);
7400             }
7401
7402             /* Make the RHS addressable */
7403
7404             addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
7405
7406             /* Compute the new value into the target register */
7407             CLANG_FORMAT_COMMENT_ANCHOR;
7408
7409 #if CPU_HAS_BYTE_REGS
7410
7411             // Fix 383833 X86 ILGEN
7412             regNumber reg2;
7413             if (op2->InReg())
7414             {
7415                 reg2 = op2->gtRegNum;
7416             }
7417             else
7418             {
7419                 reg2 = REG_STK;
7420             }
7421
7422             // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
7423             // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
7424             //
7425             if (varTypeIsByte(treeType) &&
7426                 (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
7427             {
7428                 // We will force op2 into a register (via sign/zero extending load)
7429                 // for the cases where op2 is in memory and thus could have
7430                 // an unmapped page just beyond its location
7431                 //
7432                 if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
7433                 {
7434                     genCodeForTree(op2, 0);
7435                     assert(op2->InReg());
7436                 }
7437
7438                 inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
7439
7440                 bool canOmit = false;
7441
7442                 if (varTypeIsUnsigned(treeType))
7443                 {
7444                     // When op2 is a byte sized constant we can omit the zero extend instruction
7445                     if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
7446                     {
7447                         canOmit = true;
7448                     }
7449                 }
7450                 else // treeType is signed
7451                 {
7452                     // When op2 is a positive 7-bit or smaller constant
7453                     // we can omit the sign extension sequence.
7454                     if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
7455                     {
7456                         canOmit = true;
7457                     }
7458                 }
7459
7460                 if (!canOmit)
7461                 {
7462                     // If reg is a byte reg then we can use a movzx/movsx instruction
7463                     //
7464                     if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
7465                     {
7466                         instruction extendIns = ins_Move_Extend(treeType, true);
7467                         inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
7468                     }
7469                     else // we can't encode a movzx/movsx instruction
7470                     {
7471                         if (varTypeIsUnsigned(treeType))
7472                         {
7473                             // otherwise, we must zero the upper 24 bits of 'reg'
7474                             inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
7475                         }
7476                         else // treeType is signed
7477                         {
7478                             // otherwise, we must sign extend the result in the non-byteable register 'reg'
7479                             // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
7480                             // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
7481                             //
7482                             inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
7483                             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
7484                         }
7485                     }
7486                 }
7487             }
7488             else
7489 #endif // CPU_HAS_BYTE_REGS
7490             {
7491                 inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
7492             }
7493
7494             /* The zero flag is now equal to the register value */
7495
7496             if (tree->gtSetFlags())
7497                 genFlagsEqualToReg(tree, reg);
7498
7499             /* Remember that we trashed the target */
7500
7501             regTracker.rsTrackRegTrash(reg);
7502
7503             /* Free up anything that was tied up by the RHS */
7504
7505             genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7506
7507             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7508             return;
7509
7510         default:
7511             break;
7512     } // end switch (op1->gtOper)
7513
7514 #if !CPU_LOAD_STORE_ARCH
7515     /* Special case: "x ^= -1" is actually "not(x)" */
7516
7517     if (oper == GT_ASG_XOR)
7518     {
7519         if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
7520         {
7521             addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
7522             inst_TT(INS_NOT, op1);
7523             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7524
7525             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7526             return;
7527         }
7528     }
7529 #endif // !CPU_LOAD_STORE_ARCH
7530
7531     /* Setup target mask for op2 (byte-regs for small operands) */
7532
7533     unsigned needMask;
7534     needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
7535
7536     /* Is the second operand a constant? */
7537
7538     if (op2->IsIntCnsFitsInI32())
7539     {
7540         int ival = (int)op2->gtIntCon.gtIconVal;
7541
7542         /* Make the target addressable */
7543         addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
7544
7545         inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
7546
7547         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
7548
7549         genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7550         return;
7551     }
7552
7553     /* Is the value or the address to be computed first? */
7554
7555     if (tree->gtFlags & GTF_REVERSE_OPS)
7556     {
7557         /* Compute the new value into a register */
7558
7559         genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7560
7561         /* Make the target addressable for load/store */
7562         addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
7563         regSet.rsLockUsedReg(addrReg);
7564
7565 #if !CPU_LOAD_STORE_ARCH
7566         // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7567         /* For small types with overflow check, we need to
7568             sign/zero extend the result, so we need it in a reg */
7569
7570         if (ovfl && genTypeSize(treeType) < sizeof(int))
7571 #endif // !CPU_LOAD_STORE_ARCH
7572         {
7573             reg = regSet.rsPickReg();
7574             regSet.rsLockReg(genRegMask(reg));
7575
7576             noway_assert(genIsValidReg(reg));
7577
7578             /* Generate "ldr reg, [var]" */
7579
7580             inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
7581
7582             if (op1->gtOper == GT_LCL_VAR)
7583                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7584             else
7585                 regTracker.rsTrackRegTrash(reg);
7586
7587             /* Make sure the new value is in a register */
7588
7589             genRecoverReg(op2, 0, RegSet::KEEP_REG);
7590
7591             /* Compute the new value */
7592
7593             inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7594
7595             if (ovfl)
7596                 genCheckOverflow(tree);
7597
7598             /* Move the new value back to the variable */
7599             /* Generate "str reg, [var]" */
7600
7601             inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7602             regSet.rsUnlockReg(genRegMask(reg));
7603
7604             if (op1->gtOper == GT_LCL_VAR)
7605                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
7606         }
7607 #if !CPU_LOAD_STORE_ARCH
7608         else
7609         {
7610             /* Make sure the new value is in a register */
7611
7612             genRecoverReg(op2, 0, RegSet::KEEP_REG);
7613
7614             /* Add the new value into the target */
7615
7616             inst_TT_RV(ins, op1, op2->gtRegNum);
7617         }
7618 #endif // !CPU_LOAD_STORE_ARCH
7619         /* Free up anything that was tied up either side */
7620         regSet.rsUnlockUsedReg(addrReg);
7621         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7622         genReleaseReg(op2);
7623     }
7624     else
7625     {
7626         /* Make the target addressable */
7627
7628         addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
7629
7630         /* Compute the new value into a register */
7631
7632         genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7633         regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
7634
7635         /* Make sure the target is still addressable */
7636
7637         addrReg = genKeepAddressable(op1, addrReg);
7638         regSet.rsLockUsedReg(addrReg);
7639
7640 #if !CPU_LOAD_STORE_ARCH
7641         // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7642
7643         /* For small types with overflow check, we need to
7644             sign/zero extend the result, so we need it in a reg */
7645
7646         if (ovfl && genTypeSize(treeType) < sizeof(int))
7647 #endif // !CPU_LOAD_STORE_ARCH
7648         {
7649             reg = regSet.rsPickReg();
7650
7651             inst_RV_TT(INS_mov, reg, op1);
7652
7653             inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7654             regTracker.rsTrackRegTrash(reg);
7655
7656             if (ovfl)
7657                 genCheckOverflow(tree);
7658
7659             inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7660
7661             if (op1->gtOper == GT_LCL_VAR)
7662                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7663         }
7664 #if !CPU_LOAD_STORE_ARCH
7665         else
7666         {
7667             /* Add the new value into the target */
7668
7669             inst_TT_RV(ins, op1, op2->gtRegNum);
7670         }
7671 #endif
7672
7673         /* Free up anything that was tied up either side */
7674         regSet.rsUnlockUsedReg(addrReg);
7675         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7676
7677         regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
7678         genReleaseReg(op2);
7679     }
7680
7681     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7682 }
7683
7684 /*****************************************************************************
7685  *
7686  *  Generate code for GT_UMOD.
7687  */
7688
7689 void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7690 {
7691     assert(tree->OperGet() == GT_UMOD);
7692
7693     GenTreePtr      op1      = tree->gtOp.gtOp1;
7694     GenTreePtr      op2      = tree->gtOp.gtOp2;
7695     const var_types treeType = tree->TypeGet();
7696     regMaskTP       needReg  = destReg;
7697     regNumber       reg;
7698
7699     /* Is this a division by an integer constant? */
7700
7701     noway_assert(op2);
7702     if (compiler->fgIsUnsignedModOptimizable(op2))
7703     {
7704         /* Generate the operand into some register */
7705
7706         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7707         noway_assert(op1->InReg());
7708
7709         reg = op1->gtRegNum;
7710
7711         /* Generate the appropriate sequence */
7712         size_t ival = op2->gtIntCon.gtIconVal - 1;
7713         inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
7714
7715         /* The register is now trashed */
7716
7717         regTracker.rsTrackRegTrash(reg);
7718
7719         genCodeForTree_DONE(tree, reg);
7720         return;
7721     }
7722
7723     genCodeForGeneralDivide(tree, destReg, bestReg);
7724 }
7725
7726 /*****************************************************************************
7727  *
7728  *  Generate code for GT_MOD.
7729  */
7730
7731 void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7732 {
7733     assert(tree->OperGet() == GT_MOD);
7734
7735     GenTreePtr      op1      = tree->gtOp.gtOp1;
7736     GenTreePtr      op2      = tree->gtOp.gtOp2;
7737     const var_types treeType = tree->TypeGet();
7738     regMaskTP       needReg  = destReg;
7739     regNumber       reg;
7740
7741     /* Is this a division by an integer constant? */
7742
7743     noway_assert(op2);
7744     if (compiler->fgIsSignedModOptimizable(op2))
7745     {
7746         ssize_t     ival = op2->gtIntCon.gtIconVal;
7747         BasicBlock* skip = genCreateTempLabel();
7748
7749         /* Generate the operand into some register */
7750
7751         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7752         noway_assert(op1->InReg());
7753
7754         reg = op1->gtRegNum;
7755
7756         /* Generate the appropriate sequence */
7757
7758         inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
7759
7760         /* The register is now trashed */
7761
7762         regTracker.rsTrackRegTrash(reg);
7763
7764         /* Check and branch for a postive value */
7765         emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7766         inst_JMP(jmpGEL, skip);
7767
7768         /* Generate the rest of the sequence and we're done */
7769
7770         genIncRegBy(reg, -1, NULL, treeType);
7771         ival = -ival;
7772         if ((treeType == TYP_LONG) && ((int)ival != ival))
7773         {
7774             regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
7775             instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
7776             inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
7777         }
7778         else
7779         {
7780             inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
7781         }
7782         genIncRegBy(reg, 1, NULL, treeType);
7783
7784         /* Define the 'skip' label and we're done */
7785
7786         genDefineTempLabel(skip);
7787
7788         genCodeForTree_DONE(tree, reg);
7789         return;
7790     }
7791
7792     genCodeForGeneralDivide(tree, destReg, bestReg);
7793 }
7794
7795 /*****************************************************************************
7796  *
7797  *  Generate code for GT_UDIV.
7798  */
7799
7800 void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7801 {
7802     assert(tree->OperGet() == GT_UDIV);
7803
7804     GenTreePtr      op1      = tree->gtOp.gtOp1;
7805     GenTreePtr      op2      = tree->gtOp.gtOp2;
7806     const var_types treeType = tree->TypeGet();
7807     regMaskTP       needReg  = destReg;
7808     regNumber       reg;
7809
7810     /* Is this a division by an integer constant? */
7811
7812     noway_assert(op2);
7813     if (compiler->fgIsUnsignedDivOptimizable(op2))
7814     {
7815         size_t ival = op2->gtIntCon.gtIconVal;
7816
7817         /* Division by 1 must be handled elsewhere */
7818
7819         noway_assert(ival != 1 || compiler->opts.MinOpts());
7820
7821         /* Generate the operand into some register */
7822
7823         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7824         noway_assert(op1->InReg());
7825
7826         reg = op1->gtRegNum;
7827
7828         /* Generate "shr reg, log2(value)" */
7829
7830         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
7831
7832         /* The register is now trashed */
7833
7834         regTracker.rsTrackRegTrash(reg);
7835
7836         genCodeForTree_DONE(tree, reg);
7837         return;
7838     }
7839
7840     genCodeForGeneralDivide(tree, destReg, bestReg);
7841 }
7842
7843 /*****************************************************************************
7844  *
7845  *  Generate code for GT_DIV.
7846  */
7847
7848 void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7849 {
7850     assert(tree->OperGet() == GT_DIV);
7851
7852     GenTreePtr      op1      = tree->gtOp.gtOp1;
7853     GenTreePtr      op2      = tree->gtOp.gtOp2;
7854     const var_types treeType = tree->TypeGet();
7855     regMaskTP       needReg  = destReg;
7856     regNumber       reg;
7857
7858     /* Is this a division by an integer constant? */
7859
7860     noway_assert(op2);
7861     if (compiler->fgIsSignedDivOptimizable(op2))
7862     {
7863         ssize_t ival_s = op2->gtIntConCommon.IconValue();
7864         assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
7865         size_t ival = static_cast<size_t>(ival_s);
7866
7867         /* Division by 1 must be handled elsewhere */
7868
7869         noway_assert(ival != 1);
7870
7871         BasicBlock* onNegDivisee = genCreateTempLabel();
7872
7873         /* Generate the operand into some register */
7874
7875         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7876         noway_assert(op1->InReg());
7877
7878         reg = op1->gtRegNum;
7879
7880         if (ival == 2)
7881         {
7882             /* Generate "sar reg, log2(value)" */
7883
7884             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
7885
7886             // Check and branch for a postive value, skipping the INS_ADDC instruction
7887             emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7888             inst_JMP(jmpGEL, onNegDivisee);
7889
7890             // Add the carry flag to 'reg'
7891             inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
7892
7893             /* Define the 'onNegDivisee' label and we're done */
7894
7895             genDefineTempLabel(onNegDivisee);
7896
7897             /* The register is now trashed */
7898
7899             regTracker.rsTrackRegTrash(reg);
7900
7901             /* The result is the same as the operand */
7902
7903             reg = op1->gtRegNum;
7904         }
7905         else
7906         {
7907             /* Generate the following sequence */
7908             /*
7909             test    reg, reg
7910             jns     onNegDivisee
7911             add     reg, ival-1
7912             onNegDivisee:
7913             sar     reg, log2(ival)
7914             */
7915
7916             instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
7917
7918             // Check and branch for a postive value, skipping the INS_add instruction
7919             emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7920             inst_JMP(jmpGEL, onNegDivisee);
7921
7922             inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
7923
7924             /* Define the 'onNegDivisee' label and we're done */
7925
7926             genDefineTempLabel(onNegDivisee);
7927
7928             /* Generate "sar reg, log2(value)" */
7929
7930             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
7931
7932             /* The register is now trashed */
7933
7934             regTracker.rsTrackRegTrash(reg);
7935
7936             /* The result is the same as the operand */
7937
7938             reg = op1->gtRegNum;
7939         }
7940
7941         genCodeForTree_DONE(tree, reg);
7942         return;
7943     }
7944
7945     genCodeForGeneralDivide(tree, destReg, bestReg);
7946 }
7947
7948 /*****************************************************************************
7949  *
7950  *  Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
7951  *  (if op2 is not a power of 2 constant).
7952  */
7953
7954 void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7955 {
7956     assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
7957            tree->OperGet() == GT_DIV);
7958
7959     GenTreePtr      op1      = tree->gtOp.gtOp1;
7960     GenTreePtr      op2      = tree->gtOp.gtOp2;
7961     const var_types treeType = tree->TypeGet();
7962     regMaskTP       needReg  = destReg;
7963     regNumber       reg;
7964     instruction     ins;
7965     bool            gotOp1;
7966     regMaskTP       addrReg;
7967
7968 #if USE_HELPERS_FOR_INT_DIV
7969     noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
7970 #endif
7971
7972 #if defined(_TARGET_XARCH_)
7973
7974     /* Which operand are we supposed to evaluate first? */
7975
7976     if (tree->gtFlags & GTF_REVERSE_OPS)
7977     {
7978         /* We'll evaluate 'op2' first */
7979
7980         gotOp1 = false;
7981         destReg &= ~op1->gtRsvdRegs;
7982
7983         /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
7984         if (op1->gtOper == GT_LCL_VAR)
7985         {
7986             unsigned varNum = op1->gtLclVarCommon.gtLclNum;
7987             noway_assert(varNum < compiler->lvaCount);
7988             LclVarDsc* varDsc = compiler->lvaTable + varNum;
7989             if (varDsc->lvRegister)
7990             {
7991                 destReg &= ~genRegMask(varDsc->lvRegNum);
7992             }
7993         }
7994     }
7995     else
7996     {
7997         /* We'll evaluate 'op1' first */
7998
7999         gotOp1 = true;
8000
8001         regMaskTP op1Mask;
8002         if (RBM_EAX & op2->gtRsvdRegs)
8003             op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8004         else
8005             op1Mask = RBM_EAX; // EAX would be ideal
8006
8007         /* Generate the dividend into EAX and hold on to it. freeOnly=true */
8008
8009         genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8010     }
8011
8012     /* We want to avoid using EAX or EDX for the second operand */
8013
8014     destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
8015
8016     /* Make the second operand addressable */
8017     op2 = genCodeForCommaTree(op2);
8018
8019     /* Special case: if op2 is a local var we are done */
8020
8021     if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
8022     {
8023         if (!op2->InReg())
8024             addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
8025         else
8026             addrReg = 0;
8027     }
8028     else
8029     {
8030         genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8031
8032         noway_assert(op2->InReg());
8033         addrReg = genRegMask(op2->gtRegNum);
8034     }
8035
8036     /* Make sure we have the dividend in EAX */
8037
8038     if (gotOp1)
8039     {
8040         /* We've previously computed op1 into EAX */
8041
8042         genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
8043     }
8044     else
8045     {
8046         /* Compute op1 into EAX and hold on to it */
8047
8048         genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8049     }
8050
8051     noway_assert(op1->InReg());
8052     noway_assert(op1->gtRegNum == REG_EAX);
8053
8054     /* We can now safely (we think) grab EDX */
8055
8056     regSet.rsGrabReg(RBM_EDX);
8057     regSet.rsLockReg(RBM_EDX);
8058
8059     /* Convert the integer in EAX into a un/signed long in EDX:EAX */
8060
8061     const genTreeOps oper = tree->OperGet();
8062
8063     if (oper == GT_UMOD || oper == GT_UDIV)
8064         instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
8065     else
8066         instGen(INS_cdq);
8067
8068     /* Make sure the divisor is still addressable */
8069
8070     addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
8071
8072     /* Perform the division */
8073
8074     if (oper == GT_UMOD || oper == GT_UDIV)
8075         inst_TT(INS_UNSIGNED_DIVIDE, op2);
8076     else
8077         inst_TT(INS_SIGNED_DIVIDE, op2);
8078
8079     /* Free up anything tied up by the divisor's address */
8080
8081     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
8082
8083     /* Unlock and free EDX */
8084
8085     regSet.rsUnlockReg(RBM_EDX);
8086
8087     /* Free up op1 (which is in EAX) as well */
8088
8089     genReleaseReg(op1);
8090
8091     /* Both EAX and EDX are now trashed */
8092
8093     regTracker.rsTrackRegTrash(REG_EAX);
8094     regTracker.rsTrackRegTrash(REG_EDX);
8095
8096     /* Figure out which register the result is in */
8097
8098     reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
8099
8100     /* Don't forget to mark the first operand as using EAX and EDX */
8101
8102     op1->gtRegNum = reg;
8103
8104     genCodeForTree_DONE(tree, reg);
8105
8106 #elif defined(_TARGET_ARM_)
8107
8108     /* Which operand are we supposed to evaluate first? */
8109
8110     if (tree->gtFlags & GTF_REVERSE_OPS)
8111     {
8112         /* We'll evaluate 'op2' first */
8113
8114         gotOp1 = false;
8115         destReg &= ~op1->gtRsvdRegs;
8116
8117         /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8118         if (op1->gtOper == GT_LCL_VAR)
8119         {
8120             unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8121             noway_assert(varNum < compiler->lvaCount);
8122             LclVarDsc* varDsc = compiler->lvaTable + varNum;
8123             if (varDsc->lvRegister)
8124             {
8125                 destReg &= ~genRegMask(varDsc->lvRegNum);
8126             }
8127         }
8128     }
8129     else
8130     {
8131         /* We'll evaluate 'op1' first */
8132
8133         gotOp1            = true;
8134         regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8135
8136         /* Generate the dividend into a register and hold on to it. */
8137
8138         genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8139     }
8140
8141     /* Evaluate the second operand into a register and hold onto it. */
8142
8143     genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8144
8145     noway_assert(op2->InReg());
8146     addrReg = genRegMask(op2->gtRegNum);
8147
8148     if (gotOp1)
8149     {
8150         // Recover op1 if spilled
8151         genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
8152     }
8153     else
8154     {
8155         /* Compute op1 into any register and hold on to it */
8156         genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8157     }
8158     noway_assert(op1->InReg());
8159
8160     reg = regSet.rsPickReg(needReg, bestReg);
8161
8162     // Perform the divison
8163
8164     const genTreeOps oper = tree->OperGet();
8165
8166     if (oper == GT_UMOD || oper == GT_UDIV)
8167         ins = INS_udiv;
8168     else
8169         ins = INS_sdiv;
8170
8171     getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
8172
8173     if (oper == GT_UMOD || oper == GT_MOD)
8174     {
8175         getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
8176         getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
8177     }
8178     /* Free up op1 and op2 */
8179     genReleaseReg(op1);
8180     genReleaseReg(op2);
8181
8182     genCodeForTree_DONE(tree, reg);
8183
8184 #else
8185 #error "Unknown _TARGET_"
8186 #endif
8187 }
8188
8189 /*****************************************************************************
8190  *
8191  *  Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
8192  */
8193
8194 void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
8195 {
8196     assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
8197
8198     const genTreeOps oper     = tree->OperGet();
8199     GenTreePtr       op1      = tree->gtOp.gtOp1;
8200     GenTreePtr       op2      = tree->gtOp.gtOp2;
8201     const var_types  treeType = tree->TypeGet();
8202     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8203     regMaskTP        needReg  = destReg;
8204     regNumber        reg;
8205     instruction      ins;
8206     regMaskTP        addrReg;
8207
8208     switch (oper)
8209     {
8210         case GT_ASG_LSH:
8211             ins = INS_SHIFT_LEFT_LOGICAL;
8212             break;
8213         case GT_ASG_RSH:
8214             ins = INS_SHIFT_RIGHT_ARITHM;
8215             break;
8216         case GT_ASG_RSZ:
8217             ins = INS_SHIFT_RIGHT_LOGICAL;
8218             break;
8219         default:
8220             unreached();
8221     }
8222
8223     noway_assert(!varTypeIsGC(treeType));
8224     noway_assert(op2);
8225
8226     /* Shifts by a constant amount are easier */
8227
8228     if (op2->IsCnsIntOrI())
8229     {
8230         /* Make the target addressable */
8231
8232         addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
8233
8234         /* Are we shifting a register left by 1 bit? */
8235
8236         if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && op1->InReg())
8237         {
8238             /* The target lives in a register */
8239
8240             reg = op1->gtRegNum;
8241
8242             /* "add reg, reg" is cheaper than "shl reg, 1" */
8243
8244             inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
8245         }
8246         else
8247         {
8248 #if CPU_LOAD_STORE_ARCH
8249             if (!op1->InReg())
8250             {
8251                 regSet.rsLockUsedReg(addrReg);
8252
8253                 // Load op1 into a reg
8254
8255                 reg = regSet.rsPickReg(RBM_ALLINT);
8256
8257                 inst_RV_TT(INS_mov, reg, op1);
8258
8259                 // Issue the shift
8260
8261                 inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
8262                 regTracker.rsTrackRegTrash(reg);
8263
8264                 /* Store the (sign/zero extended) result back to the stack location of the variable */
8265
8266                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
8267
8268                 regSet.rsUnlockUsedReg(addrReg);
8269             }
8270             else
8271 #endif // CPU_LOAD_STORE_ARCH
8272             {
8273                 /* Shift by the constant value */
8274
8275                 inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
8276             }
8277         }
8278
8279         /* If the target is a register, it has a new value */
8280
8281         if (op1->InReg())
8282             regTracker.rsTrackRegTrash(op1->gtRegNum);
8283
8284         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8285
8286         /* The zero flag is now equal to the target value */
8287         /* X86: But only if the shift count is != 0 */
8288
8289         if (op2->gtIntCon.gtIconVal != 0)
8290         {
8291             if (tree->gtSetFlags())
8292             {
8293                 if (op1->gtOper == GT_LCL_VAR)
8294                 {
8295                     genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
8296                 }
8297                 else if (op1->gtOper == GT_REG_VAR)
8298                 {
8299                     genFlagsEqualToReg(tree, op1->gtRegNum);
8300                 }
8301             }
8302         }
8303         else
8304         {
8305             // It is possible for the shift count to equal 0 with valid
8306             // IL, and not be optimized away, in the case where the node
8307             // is of a small type.  The sequence of instructions looks like
8308             // ldsfld, shr, stsfld and executed on a char field.  This will
8309             // never happen with code produced by our compilers, because the
8310             // compilers will insert a conv.u2 before the stsfld (which will
8311             // lead us down a different codepath in the JIT and optimize away
8312             // the shift by zero).  This case is not worth optimizing and we
8313             // will just make sure to generate correct code for it.
8314
8315             genFlagsEqualToNone();
8316         }
8317     }
8318     else
8319     {
8320         regMaskTP op2Regs = RBM_NONE;
8321         if (REG_SHIFT != REG_NA)
8322             op2Regs = RBM_SHIFT;
8323
8324         regMaskTP tempRegs;
8325
8326         if (tree->gtFlags & GTF_REVERSE_OPS)
8327         {
8328             tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
8329             genCodeForTree(op2, tempRegs);
8330             regSet.rsMarkRegUsed(op2);
8331
8332             tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
8333             addrReg  = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8334
8335             genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
8336         }
8337         else
8338         {
8339             /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
8340             regMaskTP excludeMask = op2->gtRsvdRegs;
8341             if (REG_SHIFT != REG_NA)
8342                 excludeMask |= RBM_SHIFT;
8343
8344             tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
8345             addrReg  = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8346
8347             /* Load the shift count into the necessary register */
8348             genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
8349         }
8350
8351         /* Make sure the address registers are still here */
8352         addrReg = genKeepAddressable(op1, addrReg, op2Regs);
8353
8354 #ifdef _TARGET_XARCH_
8355         /* Perform the shift */
8356         inst_TT_CL(ins, op1);
8357 #else
8358         /* Perform the shift */
8359         noway_assert(op2->InReg());
8360         op2Regs = genRegMask(op2->gtRegNum);
8361
8362         regSet.rsLockUsedReg(addrReg | op2Regs);
8363         inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
8364         regSet.rsUnlockUsedReg(addrReg | op2Regs);
8365 #endif
8366         /* Free the address registers */
8367         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8368
8369         /* If the value is in a register, it's now trash */
8370
8371         if (op1->InReg())
8372             regTracker.rsTrackRegTrash(op1->gtRegNum);
8373
8374         /* Release the op2 [RBM_SHIFT] operand */
8375
8376         genReleaseReg(op2);
8377     }
8378
8379     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
8380 }
8381
8382 /*****************************************************************************
8383  *
8384  *  Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
8385  */
8386
8387 void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
8388 {
8389     assert(tree->OperIsShift());
8390
8391     const genTreeOps oper     = tree->OperGet();
8392     GenTreePtr       op1      = tree->gtOp.gtOp1;
8393     GenTreePtr       op2      = tree->gtOp.gtOp2;
8394     const var_types  treeType = tree->TypeGet();
8395     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8396     regMaskTP        needReg  = destReg;
8397     regNumber        reg;
8398     instruction      ins;
8399
8400     switch (oper)
8401     {
8402         case GT_LSH:
8403             ins = INS_SHIFT_LEFT_LOGICAL;
8404             break;
8405         case GT_RSH:
8406             ins = INS_SHIFT_RIGHT_ARITHM;
8407             break;
8408         case GT_RSZ:
8409             ins = INS_SHIFT_RIGHT_LOGICAL;
8410             break;
8411         default:
8412             unreached();
8413     }
8414
8415     /* Is the shift count constant? */
8416     noway_assert(op2);
8417     if (op2->IsIntCnsFitsInI32())
8418     {
8419         // TODO: Check to see if we could generate a LEA instead!
8420
8421         /* Compute the left operand into any free register */
8422
8423         genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
8424
8425         noway_assert(op1->InReg());
8426         reg = op1->gtRegNum;
8427
8428         /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
8429
8430         // On ARM, until proven otherwise by performance numbers, just do the shift.
8431         // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
8432         // It's smaller than two "add reg, reg".
8433
8434         CLANG_FORMAT_COMMENT_ANCHOR;
8435
8436 #ifndef _TARGET_ARM_
8437         if (oper == GT_LSH)
8438         {
8439             emitAttr size = emitActualTypeSize(treeType);
8440             if (op2->gtIntConCommon.IconValue() == 1)
8441             {
8442                 /* "add reg, reg" is smaller and faster than "shl reg, 1" */
8443                 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8444             }
8445             else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
8446             {
8447                 /* two "add reg, reg" instructions are faster than "shl reg, 2" */
8448                 inst_RV_RV(INS_add, reg, reg, treeType);
8449                 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8450             }
8451             else
8452                 goto DO_SHIFT_BY_CNS;
8453         }
8454         else
8455 #endif // _TARGET_ARM_
8456         {
8457 #ifndef _TARGET_ARM_
8458         DO_SHIFT_BY_CNS:
8459 #endif // _TARGET_ARM_
8460             // If we are shifting 'reg' by zero bits and do not need the flags to be set
8461             // then we can just skip emitting the instruction as 'reg' is already correct.
8462             //
8463             if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
8464             {
8465                 /* Generate the appropriate shift instruction */
8466                 inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
8467             }
8468         }
8469     }
8470     else
8471     {
8472         /* Calculate a useful register mask for computing op1 */
8473         needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
8474         regMaskTP op2RegMask;
8475 #ifdef _TARGET_XARCH_
8476         op2RegMask = RBM_ECX;
8477 #else
8478         op2RegMask = RBM_NONE;
8479 #endif
8480         needReg = regSet.rsMustExclude(needReg, op2RegMask);
8481
8482         regMaskTP tempRegs;
8483
8484         /* Which operand are we supposed to evaluate first? */
8485         if (tree->gtFlags & GTF_REVERSE_OPS)
8486         {
8487             /* Load the shift count [into ECX on XARCH] */
8488             tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
8489             genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8490
8491             /* We must not target the register that is holding op2 */
8492             needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
8493
8494             /* Now evaluate 'op1' into a free register */
8495             genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8496
8497             /* Recover op2 into ECX */
8498             genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
8499         }
8500         else
8501         {
8502             /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
8503             tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
8504             genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8505
8506             /* Load the shift count [into ECX on XARCH] */
8507             genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8508         }
8509
8510         noway_assert(op2->InReg());
8511 #ifdef _TARGET_XARCH_
8512         noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
8513 #endif
8514         // Check for the case of op1 being spilled during the evaluation of op2
8515         if (op1->gtFlags & GTF_SPILLED)
8516         {
8517             // The register has been spilled -- reload it to any register except ECX
8518             regSet.rsLockUsedReg(op2RegMask);
8519             regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
8520             regSet.rsUnlockUsedReg(op2RegMask);
8521         }
8522
8523         noway_assert(op1->InReg());
8524         reg = op1->gtRegNum;
8525
8526 #ifdef _TARGET_ARM_
8527         /* Perform the shift */
8528         getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
8529 #else
8530         /* Perform the shift */
8531         inst_RV_CL(ins, reg);
8532 #endif
8533         genReleaseReg(op2);
8534     }
8535
8536     noway_assert(op1->InReg());
8537     noway_assert(reg == op1->gtRegNum);
8538
8539     /* The register is now trashed */
8540     genReleaseReg(op1);
8541     regTracker.rsTrackRegTrash(reg);
8542
8543     genCodeForTree_DONE(tree, reg);
8544 }
8545
8546 /*****************************************************************************
8547  *
8548  *  Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
8549  *  Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
8550  */
8551
8552 void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
8553 {
8554     assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
8555            tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
8556
8557     const genTreeOps oper     = tree->OperGet();
8558     GenTreePtr       op1      = tree->gtOp.gtOp1;
8559     const var_types  treeType = tree->TypeGet();
8560     regMaskTP        needReg  = destReg;
8561     regNumber        reg;
8562
8563     // Longs and float comparisons are converted to "?:"
8564     noway_assert(!compiler->fgMorphRelopToQmark(op1));
8565
8566     // Check if we can use the currently set flags. Else set them
8567
8568     emitJumpKind jumpKind = genCondSetFlags(tree);
8569
8570     // Grab a register to materialize the bool value into
8571
8572     bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
8573
8574     // Check that the predictor did the right job
8575     noway_assert(bestReg);
8576
8577     // If needReg is in bestReg then use it
8578     if (needReg & bestReg)
8579         reg = regSet.rsGrabReg(needReg & bestReg);
8580     else
8581         reg = regSet.rsGrabReg(bestReg);
8582
8583 #if defined(_TARGET_ARM_)
8584
8585     // Generate:
8586     //      jump-if-true L_true
8587     //      mov reg, 0
8588     //      jmp L_end
8589     //    L_true:
8590     //      mov reg, 1
8591     //    L_end:
8592
8593     BasicBlock* L_true;
8594     BasicBlock* L_end;
8595
8596     L_true = genCreateTempLabel();
8597     L_end  = genCreateTempLabel();
8598
8599     inst_JMP(jumpKind, L_true);
8600     getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
8601     inst_JMP(EJ_jmp, L_end);
8602     genDefineTempLabel(L_true);
8603     getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
8604     genDefineTempLabel(L_end);
8605
8606     regTracker.rsTrackRegTrash(reg);
8607
8608 #elif defined(_TARGET_XARCH_)
8609     regMaskTP regs = genRegMask(reg);
8610     noway_assert(regs & RBM_BYTE_REGS);
8611
8612     // Set (lower byte of) reg according to the flags
8613
8614     /* Look for the special case where just want to transfer the carry bit */
8615
8616     if (jumpKind == EJ_jb)
8617     {
8618         inst_RV_RV(INS_SUBC, reg, reg);
8619         inst_RV(INS_NEG, reg, TYP_INT);
8620         regTracker.rsTrackRegTrash(reg);
8621     }
8622     else if (jumpKind == EJ_jae)
8623     {
8624         inst_RV_RV(INS_SUBC, reg, reg);
8625         genIncRegBy(reg, 1, tree, TYP_INT);
8626         regTracker.rsTrackRegTrash(reg);
8627     }
8628     else
8629     {
8630         inst_SET(jumpKind, reg);
8631
8632         regTracker.rsTrackRegTrash(reg);
8633
8634         if (treeType == TYP_INT)
8635         {
8636             // Set the higher bytes to 0
8637             inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
8638         }
8639         else
8640         {
8641             noway_assert(treeType == TYP_BYTE);
8642         }
8643     }
8644 #else
8645     NYI("TARGET");
8646 #endif // _TARGET_XXX
8647
8648     genCodeForTree_DONE(tree, reg);
8649 }
8650
8651 //------------------------------------------------------------------------
8652 // genCodeForCopyObj: Generate code for a CopyObj node
8653 //
8654 // Arguments:
8655 //    tree    - The CopyObj node we are going to generate code for.
8656 //    destReg - The register mask for register(s), if any, that will be defined.
8657 //
8658 // Return Value:
8659 //    None
8660
8661 void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
8662 {
8663     // If the value class doesn't have any fields that are GC refs or
8664     // the target isn't on the GC-heap, we can merge it with CPBLK.
8665     // GC fields cannot be copied directly, instead we will
8666     // need to use a jit-helper for that.
8667     assert(tree->gtOper == GT_ASG);
8668     assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
8669
8670     GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
8671     assert(cpObjOp->HasGCPtr());
8672
8673 #ifdef _TARGET_ARM_
8674     if (cpObjOp->IsVolatile())
8675     {
8676         // Emit a memory barrier instruction before the CopyBlk
8677         instGen_MemoryBarrier();
8678     }
8679 #endif
8680     assert(tree->gtOp.gtOp2->OperIsIndir());
8681     GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
8682     GenTreePtr dstObj = cpObjOp->Addr();
8683
8684     noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
8685
8686 #ifdef DEBUG
8687     CORINFO_CLASS_HANDLE clsHnd       = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
8688     size_t               debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
8689
8690     // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
8691     // The EE currently does not allow this.  Let's assert it just to be safe.
8692     noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
8693 #endif
8694
8695     size_t   blkSize    = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
8696     unsigned slots      = cpObjOp->gtSlots;
8697     BYTE*    gcPtrs     = cpObjOp->gtGcPtrs;
8698     unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
8699     assert(blkSize == cpObjOp->gtBlkSize);
8700
8701     GenTreePtr treeFirst, treeSecond;
8702     regNumber  regFirst, regSecond;
8703
8704     // Check what order the object-ptrs have to be evaluated in ?
8705
8706     if (tree->gtFlags & GTF_REVERSE_OPS)
8707     {
8708         treeFirst  = srcObj;
8709         treeSecond = dstObj;
8710 #if CPU_USES_BLOCK_MOVE
8711         regFirst  = REG_ESI;
8712         regSecond = REG_EDI;
8713 #else
8714         regFirst  = REG_ARG_1;
8715         regSecond = REG_ARG_0;
8716 #endif
8717     }
8718     else
8719     {
8720         treeFirst  = dstObj;
8721         treeSecond = srcObj;
8722 #if CPU_USES_BLOCK_MOVE
8723         regFirst  = REG_EDI;
8724         regSecond = REG_ESI;
8725 #else
8726         regFirst  = REG_ARG_0;
8727         regSecond = REG_ARG_1;
8728 #endif
8729     }
8730
8731     bool     dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
8732     bool     srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
8733     emitAttr srcType      = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8734     emitAttr dstType      = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8735
8736 #if CPU_USES_BLOCK_MOVE
8737     // Materialize the trees in the order desired
8738
8739     genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8740     genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8741     genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8742
8743     // Grab ECX because it will be trashed by the helper
8744     //
8745     regSet.rsGrabReg(RBM_ECX);
8746
8747     while (blkSize >= TARGET_POINTER_SIZE)
8748     {
8749         if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
8750         {
8751             // Note that we can use movsd even if it is a GC pointer being transfered
8752             // because the value is not cached anywhere.  If we did this in two moves,
8753             // we would have to make certain we passed the appropriate GC info on to
8754             // the emitter.
8755             instGen(INS_movsp);
8756         }
8757         else
8758         {
8759             // This helper will act like a MOVSD
8760             //    -- inputs EDI and ESI are byrefs
8761             //    -- including incrementing of ESI and EDI by 4
8762             //    -- helper will trash ECX
8763             //
8764             regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8765             regSet.rsLockUsedReg(argRegs);
8766             genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8767                               0,           // argSize
8768                               EA_PTRSIZE); // retSize
8769             regSet.rsUnlockUsedReg(argRegs);
8770         }
8771
8772         blkSize -= TARGET_POINTER_SIZE;
8773     }
8774
8775     // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
8776
8777     regTracker.rsTrackRegTrash(REG_EDI);
8778     regTracker.rsTrackRegTrash(REG_ESI);
8779     regTracker.rsTrackRegTrash(REG_ECX);
8780
8781     gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
8782
8783     /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8784         it is a emitNoGChelper. However, we have to let the emitter know that
8785         the GC liveness has changed. We do this by creating a new label.
8786         */
8787
8788     noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8789
8790     genDefineTempLabel(&dummyBB);
8791
8792 #else //  !CPU_USES_BLOCK_MOVE
8793
8794 #ifndef _TARGET_ARM_
8795 // Currently only the ARM implementation is provided
8796 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
8797 #endif
8798
8799     // Materialize the trees in the order desired
8800     bool      helperUsed;
8801     regNumber regDst;
8802     regNumber regSrc;
8803     regNumber regTemp;
8804
8805     if ((gcPtrCount > 0) && !dstIsOnStack)
8806     {
8807         genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8808         genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8809         genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8810
8811         /* The helper is a Asm-routine that will trash R2,R3 and LR */
8812         {
8813             /* Spill any callee-saved registers which are being used */
8814             regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
8815
8816             if (spillRegs)
8817             {
8818                 regSet.rsSpillRegs(spillRegs);
8819             }
8820         }
8821
8822         // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
8823         // We will also use it as the temp register for our load/store sequences
8824         //
8825         assert(REG_R2 == REG_TMP_1);
8826         regTemp    = regSet.rsGrabReg(RBM_R2);
8827         helperUsed = true;
8828     }
8829     else
8830     {
8831         genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
8832         genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
8833         genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
8834
8835         // Grab any temp register to use for our load/store sequences
8836         //
8837         regTemp    = regSet.rsGrabReg(RBM_ALLINT);
8838         helperUsed = false;
8839     }
8840     assert(dstObj->InReg());
8841     assert(srcObj->InReg());
8842
8843     regDst = dstObj->gtRegNum;
8844     regSrc = srcObj->gtRegNum;
8845
8846     assert(regDst != regTemp);
8847     assert(regSrc != regTemp);
8848
8849     instruction loadIns  = ins_Load(TYP_I_IMPL);  // INS_ldr
8850     instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
8851
8852     size_t offset = 0;
8853     while (blkSize >= TARGET_POINTER_SIZE)
8854     {
8855         CorInfoGCType gcType;
8856         CorInfoGCType gcTypeNext = TYPE_GC_NONE;
8857         var_types     type       = TYP_I_IMPL;
8858
8859 #if FEATURE_WRITE_BARRIER
8860         gcType                   = (CorInfoGCType)(*gcPtrs++);
8861         if (blkSize > TARGET_POINTER_SIZE)
8862             gcTypeNext = (CorInfoGCType)(*gcPtrs);
8863
8864         if (gcType == TYPE_GC_REF)
8865             type = TYP_REF;
8866         else if (gcType == TYPE_GC_BYREF)
8867             type = TYP_BYREF;
8868
8869         if (helperUsed)
8870         {
8871             assert(regDst == REG_ARG_0);
8872             assert(regSrc == REG_ARG_1);
8873             assert(regTemp == REG_R2);
8874         }
8875 #else
8876         gcType = TYPE_GC_NONE;
8877 #endif // FEATURE_WRITE_BARRIER
8878
8879         blkSize -= TARGET_POINTER_SIZE;
8880
8881         emitAttr opSize = emitTypeSize(type);
8882
8883         if (!helperUsed || (gcType == TYPE_GC_NONE))
8884         {
8885             getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
8886             getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
8887             offset += TARGET_POINTER_SIZE;
8888
8889             if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
8890             {
8891                 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
8892                 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
8893                 offset = 0;
8894             }
8895         }
8896         else
8897         {
8898             assert(offset == 0);
8899
8900             // The helper will act like this:
8901             //    -- inputs R0 and R1 are byrefs
8902             //    -- helper will perform copy from *R1 into *R0
8903             //    -- helper will perform post increment of R0 and R1 by 4
8904             //    -- helper will trash R2
8905             //    -- helper will trash R3
8906             //    -- calling the helper implicitly trashes LR
8907             //
8908             assert(helperUsed);
8909             regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8910             regSet.rsLockUsedReg(argRegs);
8911             genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8912                               0,           // argSize
8913                               EA_PTRSIZE); // retSize
8914
8915             regSet.rsUnlockUsedReg(argRegs);
8916             regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
8917         }
8918     }
8919
8920     regTracker.rsTrackRegTrash(regDst);
8921     regTracker.rsTrackRegTrash(regSrc);
8922     regTracker.rsTrackRegTrash(regTemp);
8923
8924     gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
8925
8926     /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8927         it is a emitNoGChelper. However, we have to let the emitter know that
8928         the GC liveness has changed. We do this by creating a new label.
8929         */
8930
8931     noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8932
8933     genDefineTempLabel(&dummyBB);
8934
8935 #endif //  !CPU_USES_BLOCK_MOVE
8936
8937     assert(blkSize == 0);
8938
8939     genReleaseReg(dstObj);
8940     genReleaseReg(srcObj);
8941
8942     genCodeForTree_DONE(tree, REG_NA);
8943
8944 #ifdef _TARGET_ARM_
8945     if (cpObjOp->IsVolatile())
8946     {
8947         // Emit a memory barrier instruction after the CopyBlk
8948         instGen_MemoryBarrier();
8949     }
8950 #endif
8951 }
8952
8953 //------------------------------------------------------------------------
8954 // genCodeForBlkOp: Generate code for a block copy or init operation
8955 //
8956 // Arguments:
8957 //    tree    - The block assignment
8958 //    destReg - The expected destination register
8959 //
8960 void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
8961 {
8962     genTreeOps oper    = tree->OperGet();
8963     GenTreePtr dest    = tree->gtOp.gtOp1;
8964     GenTreePtr src     = tree->gtGetOp2();
8965     regMaskTP  needReg = destReg;
8966     regMaskTP  regs    = regSet.rsMaskUsed;
8967     GenTreePtr opsPtr[3];
8968     regMaskTP  regsPtr[3];
8969     GenTreePtr destPtr;
8970     GenTreePtr srcPtrOrVal;
8971
8972     noway_assert(tree->OperIsBlkOp());
8973
8974     bool       isCopyBlk    = false;
8975     bool       isInitBlk    = false;
8976     bool       hasGCpointer = false;
8977     unsigned   blockSize    = dest->AsBlk()->gtBlkSize;
8978     GenTreePtr sizeNode     = nullptr;
8979     bool       sizeIsConst  = true;
8980     if (dest->gtOper == GT_DYN_BLK)
8981     {
8982         sizeNode    = dest->AsDynBlk()->gtDynamicSize;
8983         sizeIsConst = false;
8984     }
8985
8986     if (tree->OperIsCopyBlkOp())
8987     {
8988         isCopyBlk = true;
8989         if (dest->gtOper == GT_OBJ)
8990         {
8991             if (dest->AsObj()->gtGcPtrCount != 0)
8992             {
8993                 genCodeForCopyObj(tree, destReg);
8994                 return;
8995             }
8996         }
8997     }
8998     else
8999     {
9000         isInitBlk = true;
9001     }
9002
9003     // Ensure that we have an address in the CopyBlk case.
9004     if (isCopyBlk)
9005     {
9006         // TODO-1stClassStructs: Allow a lclVar here.
9007         assert(src->OperIsIndir());
9008         srcPtrOrVal = src->AsIndir()->Addr();
9009     }
9010     else
9011     {
9012         srcPtrOrVal = src;
9013     }
9014
9015 #ifdef _TARGET_ARM_
9016     if (dest->AsBlk()->IsVolatile())
9017     {
9018         // Emit a memory barrier instruction before the InitBlk/CopyBlk
9019         instGen_MemoryBarrier();
9020     }
9021 #endif
9022     {
9023         destPtr = dest->AsBlk()->Addr();
9024         noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
9025         noway_assert(
9026             (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
9027             (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
9028
9029         noway_assert(destPtr && srcPtrOrVal);
9030
9031 #if CPU_USES_BLOCK_MOVE
9032         regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
9033
9034         /* Some special code for block moves/inits for constant sizes */
9035
9036         //
9037         // Is this a fixed size COPYBLK?
9038         //      or a fixed size INITBLK with a constant init value?
9039         //
9040         if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
9041         {
9042             size_t      length  = blockSize;
9043             size_t      initVal = 0;
9044             instruction ins_P, ins_PR, ins_B;
9045
9046             if (isInitBlk)
9047             {
9048                 ins_P  = INS_stosp;
9049                 ins_PR = INS_r_stosp;
9050                 ins_B  = INS_stosb;
9051
9052                 /* Properly extend the init constant from a U1 to a U4 */
9053                 initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
9054
9055                 /* If it is a non-zero value we have to replicate      */
9056                 /* the byte value four times to form the DWORD         */
9057                 /* Then we change this new value into the tree-node      */
9058
9059                 if (initVal)
9060                 {
9061                     initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9062 #ifdef _TARGET_64BIT_
9063                     if (length > 4)
9064                     {
9065                         initVal             = initVal | (initVal << 32);
9066                         srcPtrOrVal->gtType = TYP_LONG;
9067                     }
9068                     else
9069                     {
9070                         srcPtrOrVal->gtType = TYP_INT;
9071                     }
9072 #endif // _TARGET_64BIT_
9073                 }
9074                 srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9075             }
9076             else
9077             {
9078                 ins_P  = INS_movsp;
9079                 ins_PR = INS_r_movsp;
9080                 ins_B  = INS_movsb;
9081             }
9082
9083             // Determine if we will be using SSE2
9084             unsigned movqLenMin = 8;
9085             unsigned movqLenMax = 24;
9086
9087             bool bWillUseSSE2      = false;
9088             bool bWillUseOnlySSE2  = false;
9089             bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
9090
9091 #ifdef _TARGET_64BIT_
9092
9093 // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
9094 // there is no point in wasting space on the bigger instructions
9095
9096 #else // !_TARGET_64BIT_
9097
9098             if (compiler->opts.compCanUseSSE2)
9099             {
9100                 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
9101
9102                 /* Adjust for BB weight */
9103                 if (curBBweight == BB_ZERO_WEIGHT)
9104                 {
9105                     // Don't bother with this optimization in
9106                     // rarely run blocks
9107                     movqLenMax = movqLenMin = 0;
9108                 }
9109                 else if (curBBweight < BB_UNITY_WEIGHT)
9110                 {
9111                     // Be less aggressive when we are inside a conditional
9112                     movqLenMax = 16;
9113                 }
9114                 else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
9115                 {
9116                     // Be more aggressive when we are inside a loop
9117                     movqLenMax = 48;
9118                 }
9119
9120                 if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
9121                 {
9122                     // Be more aggressive when optimizing for speed
9123                     // InitBlk uses fewer instructions
9124                     movqLenMax += 16;
9125                 }
9126
9127                 if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
9128                 {
9129                     bWillUseSSE2 = true;
9130
9131                     if ((length % 8) == 0)
9132                     {
9133                         bWillUseOnlySSE2 = true;
9134                         if (isInitBlk && (initVal == 0))
9135                         {
9136                             bNeedEvaluateCnst = false;
9137                             noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
9138                         }
9139                     }
9140                 }
9141             }
9142
9143 #endif // !_TARGET_64BIT_
9144
9145             const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
9146             /* Evaluate dest and src/val */
9147
9148             if (tree->gtFlags & GTF_REVERSE_OPS)
9149             {
9150                 if (bNeedEvaluateCnst)
9151                 {
9152                     genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9153                 }
9154                 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9155                 if (bNeedEvaluateCnst)
9156                 {
9157                     genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
9158                 }
9159             }
9160             else
9161             {
9162                 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9163                 if (bNeedEvaluateCnst)
9164                 {
9165                     genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9166                 }
9167                 genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
9168             }
9169
9170             bool bTrashedESI = false;
9171             bool bTrashedEDI = false;
9172
9173             if (bWillUseSSE2)
9174             {
9175                 int       blkDisp = 0;
9176                 regNumber xmmReg  = REG_XMM0;
9177
9178                 if (isInitBlk)
9179                 {
9180                     if (initVal)
9181                     {
9182                         getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
9183                         getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
9184                     }
9185                     else
9186                     {
9187                         getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
9188                     }
9189                 }
9190
9191                 JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
9192                                        length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
9193
9194                 while (length > 7)
9195                 {
9196                     if (isInitBlk)
9197                     {
9198                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9199                     }
9200                     else
9201                     {
9202                         getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
9203                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9204                     }
9205                     blkDisp += 8;
9206                     length -= 8;
9207                 }
9208
9209                 if (length > 0)
9210                 {
9211                     noway_assert(bNeedEvaluateCnst);
9212                     noway_assert(!bWillUseOnlySSE2);
9213
9214                     if (isCopyBlk)
9215                     {
9216                         inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
9217                         bTrashedESI = true;
9218                     }
9219
9220                     inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
9221                     bTrashedEDI = true;
9222
9223                     if (length >= REGSIZE_BYTES)
9224                     {
9225                         instGen(ins_P);
9226                         length -= REGSIZE_BYTES;
9227                     }
9228                 }
9229             }
9230             else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
9231             {
9232                 /* For small code, we can only use ins_DR to generate fast
9233                     and small code. We also can't use "rep movsb" because
9234                     we may not atomically reading and writing the DWORD */
9235
9236                 noway_assert(bNeedEvaluateCnst);
9237
9238                 goto USE_DR;
9239             }
9240             else if (length <= 4 * REGSIZE_BYTES)
9241             {
9242                 noway_assert(bNeedEvaluateCnst);
9243
9244                 while (length >= REGSIZE_BYTES)
9245                 {
9246                     instGen(ins_P);
9247                     length -= REGSIZE_BYTES;
9248                 }
9249
9250                 bTrashedEDI = true;
9251                 if (isCopyBlk)
9252                     bTrashedESI = true;
9253             }
9254             else
9255             {
9256             USE_DR:
9257                 noway_assert(bNeedEvaluateCnst);
9258
9259                 /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
9260                 genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
9261
9262                 length &= (REGSIZE_BYTES - 1);
9263
9264                 instGen(ins_PR);
9265
9266                 regTracker.rsTrackRegTrash(REG_ECX);
9267
9268                 bTrashedEDI = true;
9269                 if (isCopyBlk)
9270                     bTrashedESI = true;
9271             }
9272
9273             /* Now take care of the remainder */
9274             CLANG_FORMAT_COMMENT_ANCHOR;
9275
9276 #ifdef _TARGET_64BIT_
9277             if (length > 4)
9278             {
9279                 noway_assert(bNeedEvaluateCnst);
9280                 noway_assert(length < 8);
9281
9282                 instGen((isInitBlk) ? INS_stosd : INS_movsd);
9283                 length -= 4;
9284
9285                 bTrashedEDI = true;
9286                 if (isCopyBlk)
9287                     bTrashedESI = true;
9288             }
9289
9290 #endif // _TARGET_64BIT_
9291
9292             if (length)
9293             {
9294                 noway_assert(bNeedEvaluateCnst);
9295
9296                 while (length--)
9297                 {
9298                     instGen(ins_B);
9299                 }
9300
9301                 bTrashedEDI = true;
9302                 if (isCopyBlk)
9303                     bTrashedESI = true;
9304             }
9305
9306             noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
9307             if (bTrashedEDI)
9308                 regTracker.rsTrackRegTrash(REG_EDI);
9309             if (bTrashedESI)
9310                 regTracker.rsTrackRegTrash(REG_ESI);
9311             // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9312
9313             genReleaseReg(destPtr);
9314             if (bNeedEvaluateCnst)
9315                 genReleaseReg(srcPtrOrVal);
9316         }
9317         else
9318         {
9319             //
9320             // This a variable-sized COPYBLK/INITBLK,
9321             //   or a fixed size INITBLK with a variable init value,
9322             //
9323
9324             // What order should the Dest, Val/Src, and Size be calculated
9325
9326             compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
9327
9328             noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
9329             genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
9330             genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
9331             if (opsPtr[2] != nullptr)
9332             {
9333                 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
9334             }
9335             genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9336             genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9337
9338             noway_assert((destPtr->InReg()) && // Dest
9339                          (destPtr->gtRegNum == REG_EDI));
9340
9341             noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9342                          (genRegMask(srcPtrOrVal->gtRegNum) == regs));
9343
9344             if (sizeIsConst)
9345             {
9346                 inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
9347             }
9348             else
9349             {
9350                 noway_assert((sizeNode->InReg()) && // Size
9351                              (sizeNode->gtRegNum == REG_ECX));
9352             }
9353
9354             if (isInitBlk)
9355                 instGen(INS_r_stosb);
9356             else
9357                 instGen(INS_r_movsb);
9358
9359             regTracker.rsTrackRegTrash(REG_EDI);
9360             regTracker.rsTrackRegTrash(REG_ECX);
9361
9362             if (isCopyBlk)
9363                 regTracker.rsTrackRegTrash(REG_ESI);
9364             // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9365
9366             genReleaseReg(opsPtr[0]);
9367             genReleaseReg(opsPtr[1]);
9368             if (opsPtr[2] != nullptr)
9369             {
9370                 genReleaseReg(opsPtr[2]);
9371             }
9372         }
9373
9374 #else // !CPU_USES_BLOCK_MOVE
9375
9376 #ifndef _TARGET_ARM_
9377 // Currently only the ARM implementation is provided
9378 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
9379 #endif
9380         //
9381         // Is this a fixed size COPYBLK?
9382         //      or a fixed size INITBLK with a constant init value?
9383         //
9384         if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
9385         {
9386             GenTreePtr dstOp          = destPtr;
9387             GenTreePtr srcOp          = srcPtrOrVal;
9388             unsigned   length         = blockSize;
9389             unsigned   fullStoreCount = length / TARGET_POINTER_SIZE;
9390             unsigned   initVal        = 0;
9391             bool       useLoop        = false;
9392
9393             if (isInitBlk)
9394             {
9395                 /* Properly extend the init constant from a U1 to a U4 */
9396                 initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
9397
9398                 /* If it is a non-zero value we have to replicate      */
9399                 /* the byte value four times to form the DWORD         */
9400                 /* Then we store this new value into the tree-node      */
9401
9402                 if (initVal != 0)
9403                 {
9404                     initVal                         = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9405                     srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9406                 }
9407             }
9408
9409             // Will we be using a loop to implement this INITBLK/COPYBLK?
9410             if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
9411             {
9412                 useLoop = true;
9413             }
9414
9415             regMaskTP usedRegs;
9416             regNumber regDst;
9417             regNumber regSrc;
9418             regNumber regTemp;
9419
9420             /* Evaluate dest and src/val */
9421
9422             if (tree->gtFlags & GTF_REVERSE_OPS)
9423             {
9424                 genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9425                 assert(srcOp->InReg());
9426
9427                 genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9428                 assert(dstOp->InReg());
9429                 regDst = dstOp->gtRegNum;
9430
9431                 genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
9432                 regSrc = srcOp->gtRegNum;
9433             }
9434             else
9435             {
9436                 genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9437                 assert(dstOp->InReg());
9438
9439                 genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9440                 assert(srcOp->InReg());
9441                 regSrc = srcOp->gtRegNum;
9442
9443                 genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
9444                 regDst = dstOp->gtRegNum;
9445             }
9446             assert(dstOp->InReg());
9447             assert(srcOp->InReg());
9448
9449             regDst                = dstOp->gtRegNum;
9450             regSrc                = srcOp->gtRegNum;
9451             usedRegs              = (genRegMask(regSrc) | genRegMask(regDst));
9452             bool     dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
9453             emitAttr dstType      = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9454             emitAttr srcType;
9455
9456             if (isCopyBlk)
9457             {
9458                 // Prefer a low register,but avoid one of the ones we've already grabbed
9459                 regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9460                 usedRegs |= genRegMask(regTemp);
9461                 bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
9462                 srcType           = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9463             }
9464             else
9465             {
9466                 regTemp = REG_STK;
9467                 srcType = EA_PTRSIZE;
9468             }
9469
9470             instruction loadIns  = ins_Load(TYP_I_IMPL);  // INS_ldr
9471             instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
9472
9473             int finalOffset;
9474
9475             // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
9476             if (!useLoop)
9477             {
9478                 for (unsigned i = 0; i < fullStoreCount; i++)
9479                 {
9480                     if (isCopyBlk)
9481                     {
9482                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
9483                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
9484                         gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9485                         regTracker.rsTrackRegTrash(regTemp);
9486                     }
9487                     else
9488                     {
9489                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
9490                     }
9491                 }
9492
9493                 finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
9494                 length -= finalOffset;
9495             }
9496             else // We will use a loop to implement this INITBLK/COPYBLK
9497             {
9498                 unsigned pairStoreLoopCount = fullStoreCount / 2;
9499
9500                 // We need a second temp register for CopyBlk
9501                 regNumber regTemp2 = REG_STK;
9502                 if (isCopyBlk)
9503                 {
9504                     // Prefer a low register, but avoid one of the ones we've already grabbed
9505                     regTemp2 =
9506                         regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9507                     usedRegs |= genRegMask(regTemp2);
9508                 }
9509
9510                 // Pick and initialize the loop counter register
9511                 regNumber regLoopIndex;
9512                 regLoopIndex =
9513                     regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9514                 genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
9515
9516                 // Create and define the Basic Block for the loop top
9517                 BasicBlock* loopTopBlock = genCreateTempLabel();
9518                 genDefineTempLabel(loopTopBlock);
9519
9520                 // The loop body
9521                 if (isCopyBlk)
9522                 {
9523                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9524                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
9525                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9526                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
9527                     getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
9528                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9529                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
9530                     regTracker.rsTrackRegTrash(regSrc);
9531                     regTracker.rsTrackRegTrash(regTemp);
9532                     regTracker.rsTrackRegTrash(regTemp2);
9533                 }
9534                 else // isInitBlk
9535                 {
9536                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9537                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
9538                 }
9539
9540                 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
9541                 regTracker.rsTrackRegTrash(regDst);
9542                 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
9543                 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
9544                 inst_JMP(jmpGTS, loopTopBlock);
9545
9546                 regTracker.rsTrackRegIntCns(regLoopIndex, 0);
9547
9548                 length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
9549
9550                 if (length & TARGET_POINTER_SIZE)
9551                 {
9552                     if (isCopyBlk)
9553                     {
9554                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9555                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9556                     }
9557                     else
9558                     {
9559                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9560                     }
9561                     finalOffset = TARGET_POINTER_SIZE;
9562                     length -= TARGET_POINTER_SIZE;
9563                 }
9564                 else
9565                 {
9566                     finalOffset = 0;
9567                 }
9568             }
9569
9570             if (length & sizeof(short))
9571             {
9572                 loadIns  = ins_Load(TYP_USHORT);  // INS_ldrh
9573                 storeIns = ins_Store(TYP_USHORT); // INS_strh
9574
9575                 if (isCopyBlk)
9576                 {
9577                     getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
9578                     getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
9579                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9580                     regTracker.rsTrackRegTrash(regTemp);
9581                 }
9582                 else
9583                 {
9584                     getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
9585                 }
9586                 length -= sizeof(short);
9587                 finalOffset += sizeof(short);
9588             }
9589
9590             if (length & sizeof(char))
9591             {
9592                 loadIns  = ins_Load(TYP_UBYTE);  // INS_ldrb
9593                 storeIns = ins_Store(TYP_UBYTE); // INS_strb
9594
9595                 if (isCopyBlk)
9596                 {
9597                     getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
9598                     getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
9599                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9600                     regTracker.rsTrackRegTrash(regTemp);
9601                 }
9602                 else
9603                 {
9604                     getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
9605                 }
9606                 length -= sizeof(char);
9607             }
9608             assert(length == 0);
9609
9610             genReleaseReg(dstOp);
9611             genReleaseReg(srcOp);
9612         }
9613         else
9614         {
9615             //
9616             // This a variable-sized COPYBLK/INITBLK,
9617             //   or a fixed size INITBLK with a variable init value,
9618             //
9619
9620             // What order should the Dest, Val/Src, and Size be calculated
9621
9622             compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
9623
9624             genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
9625             genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
9626             if (opsPtr[2] != nullptr)
9627             {
9628                 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
9629             }
9630             genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9631             genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9632
9633             noway_assert((destPtr->InReg()) && // Dest
9634                          (destPtr->gtRegNum == REG_ARG_0));
9635
9636             noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9637                          (srcPtrOrVal->gtRegNum == REG_ARG_1));
9638
9639             if (sizeIsConst)
9640             {
9641                 inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
9642             }
9643             else
9644             {
9645                 noway_assert((sizeNode->InReg()) && // Size
9646                              (sizeNode->gtRegNum == REG_ARG_2));
9647             }
9648
9649             regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9650
9651             genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
9652                                         /* GT_INITBLK */
9653                                         : CORINFO_HELP_MEMSET,
9654                               0, EA_UNKNOWN);
9655
9656             regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
9657
9658             regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9659             genReleaseReg(opsPtr[0]);
9660             genReleaseReg(opsPtr[1]);
9661             if (opsPtr[2] != nullptr)
9662             {
9663                 genReleaseReg(opsPtr[2]);
9664             }
9665         }
9666
9667         if (isCopyBlk && dest->AsBlk()->IsVolatile())
9668         {
9669             // Emit a memory barrier instruction after the CopyBlk
9670             instGen_MemoryBarrier();
9671         }
9672 #endif // !CPU_USES_BLOCK_MOVE
9673     }
9674 }
9675 BasicBlock dummyBB;
9676
9677 #ifdef _PREFAST_
9678 #pragma warning(push)
9679 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
9680 #endif
9681 void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
9682 {
9683     const genTreeOps oper     = tree->OperGet();
9684     const var_types  treeType = tree->TypeGet();
9685     GenTreePtr       op1      = tree->gtOp.gtOp1;
9686     GenTreePtr       op2      = tree->gtGetOp2IfPresent();
9687     regNumber        reg      = DUMMY_INIT(REG_CORRUPT);
9688     regMaskTP        regs     = regSet.rsMaskUsed;
9689     regMaskTP        needReg  = destReg;
9690     insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
9691     emitAttr         size;
9692     instruction      ins;
9693     regMaskTP        addrReg;
9694     GenTreePtr       opsPtr[3];
9695     regMaskTP        regsPtr[3];
9696
9697 #ifdef DEBUG
9698     addrReg = 0xDEADCAFE;
9699 #endif
9700
9701     noway_assert(tree->OperKind() & GTK_SMPOP);
9702
9703     switch (oper)
9704     {
9705         case GT_ASG:
9706             if (tree->OperIsBlkOp() && op1->gtOper != GT_LCL_VAR)
9707             {
9708                 genCodeForBlkOp(tree, destReg);
9709             }
9710             else
9711             {
9712                 genCodeForTreeSmpOpAsg(tree);
9713             }
9714             return;
9715
9716         case GT_ASG_LSH:
9717         case GT_ASG_RSH:
9718         case GT_ASG_RSZ:
9719             genCodeForAsgShift(tree, destReg, bestReg);
9720             return;
9721
9722         case GT_ASG_AND:
9723         case GT_ASG_OR:
9724         case GT_ASG_XOR:
9725         case GT_ASG_ADD:
9726         case GT_ASG_SUB:
9727             genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
9728             return;
9729
9730         case GT_CHS:
9731             addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
9732 #ifdef _TARGET_XARCH_
9733             // Note that the specialCase here occurs when the treeType specifies a byte sized operation
9734             // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
9735             //
9736             bool specialCase;
9737             specialCase = false;
9738             if (op1->gtOper == GT_REG_VAR)
9739             {
9740                 /* Get hold of the target register */
9741
9742                 reg = op1->gtRegVar.gtRegNum;
9743                 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
9744                 {
9745                     regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
9746
9747                     inst_RV_RV(INS_mov, byteReg, reg);
9748                     regTracker.rsTrackRegTrash(byteReg);
9749
9750                     inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
9751                     var_types   op1Type     = op1->TypeGet();
9752                     instruction wideningIns = ins_Move_Extend(op1Type, true);
9753                     inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
9754                     regTracker.rsTrackRegTrash(reg);
9755                     specialCase = true;
9756                 }
9757             }
9758
9759             if (!specialCase)
9760             {
9761                 inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
9762             }
9763 #else // not  _TARGET_XARCH_
9764             if (op1->InReg())
9765             {
9766                 inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
9767             }
9768             else
9769             {
9770                 // Fix 388382 ARM JitStress WP7
9771                 var_types op1Type = op1->TypeGet();
9772                 regNumber reg     = regSet.rsPickFreeReg();
9773                 inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
9774                 regTracker.rsTrackRegTrash(reg);
9775                 inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
9776                 inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
9777             }
9778 #endif
9779             if (op1->InReg())
9780                 regTracker.rsTrackRegTrash(op1->gtRegNum);
9781             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
9782
9783             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
9784             return;
9785
9786         case GT_AND:
9787         case GT_OR:
9788         case GT_XOR:
9789         case GT_ADD:
9790         case GT_SUB:
9791         case GT_MUL:
9792             genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
9793             return;
9794
9795         case GT_UMOD:
9796             genCodeForUnsignedMod(tree, destReg, bestReg);
9797             return;
9798
9799         case GT_MOD:
9800             genCodeForSignedMod(tree, destReg, bestReg);
9801             return;
9802
9803         case GT_UDIV:
9804             genCodeForUnsignedDiv(tree, destReg, bestReg);
9805             return;
9806
9807         case GT_DIV:
9808             genCodeForSignedDiv(tree, destReg, bestReg);
9809             return;
9810
9811         case GT_LSH:
9812         case GT_RSH:
9813         case GT_RSZ:
9814             genCodeForShift(tree, destReg, bestReg);
9815             return;
9816
9817         case GT_NEG:
9818         case GT_NOT:
9819
9820             /* Generate the operand into some register */
9821
9822             genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
9823             noway_assert(op1->InReg());
9824
9825             reg = op1->gtRegNum;
9826
9827             /* Negate/reverse the value in the register */
9828
9829             inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
9830
9831             /* The register is now trashed */
9832
9833             regTracker.rsTrackRegTrash(reg);
9834
9835             genCodeForTree_DONE(tree, reg);
9836             return;
9837
9838         case GT_IND:
9839         case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
9840
9841             /* Make sure the operand is addressable */
9842
9843             addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
9844
9845             genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9846
9847             /* Figure out the size of the value being loaded */
9848
9849             size = EA_ATTR(genTypeSize(tree->gtType));
9850
9851             /* Pick a register for the value */
9852
9853             if (needReg == RBM_ALLINT && bestReg == 0)
9854             {
9855                 /* Absent a better suggestion, pick a useless register */
9856
9857                 bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
9858             }
9859
9860             reg = regSet.rsPickReg(needReg, bestReg);
9861
9862             if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
9863             {
9864                 noway_assert(size == EA_PTRSIZE);
9865                 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
9866                                           (int)op1->gtIntCon.gtIconVal);
9867             }
9868             else
9869             {
9870                 /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
9871
9872                 inst_mov_RV_ST(reg, tree);
9873             }
9874
9875 #ifdef _TARGET_ARM_
9876             if (tree->gtFlags & GTF_IND_VOLATILE)
9877             {
9878                 // Emit a memory barrier instruction after the load
9879                 instGen_MemoryBarrier();
9880             }
9881 #endif
9882
9883             /* Note the new contents of the register we used */
9884
9885             regTracker.rsTrackRegTrash(reg);
9886
9887 #ifdef DEBUG
9888             /* Update the live set of register variables */
9889             if (compiler->opts.varNames)
9890                 genUpdateLife(tree);
9891 #endif
9892
9893             /* Now we can update the register pointer information */
9894
9895             // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9896             gcInfo.gcMarkRegPtrVal(reg, treeType);
9897
9898             genCodeForTree_DONE_LIFE(tree, reg);
9899             return;
9900
9901         case GT_CAST:
9902
9903             genCodeForNumericCast(tree, destReg, bestReg);
9904             return;
9905
9906         case GT_JTRUE:
9907
9908             /* Is this a test of a relational operator? */
9909
9910             if (op1->OperIsCompare())
9911             {
9912                 /* Generate the conditional jump */
9913
9914                 genCondJump(op1);
9915
9916                 genUpdateLife(tree);
9917                 return;
9918             }
9919
9920 #ifdef DEBUG
9921             compiler->gtDispTree(tree);
9922 #endif
9923             NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
9924             break;
9925
9926         case GT_SWITCH:
9927             genCodeForSwitch(tree);
9928             return;
9929
9930         case GT_RETFILT:
9931             noway_assert(tree->gtType == TYP_VOID || op1 != 0);
9932             if (op1 == 0) // endfinally
9933             {
9934                 reg = REG_NA;
9935
9936 #ifdef _TARGET_XARCH_
9937                 /* Return using a pop-jmp sequence. As the "try" block calls
9938                    the finally with a jmp, this leaves the x86 call-ret stack
9939                    balanced in the normal flow of path. */
9940
9941                 noway_assert(isFramePointerRequired());
9942                 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
9943                 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
9944 #elif defined(_TARGET_ARM_)
9945 // Nothing needed for ARM
9946 #else
9947                 NYI("TARGET");
9948 #endif
9949             }
9950             else // endfilter
9951             {
9952                 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9953                 noway_assert(op1->InReg());
9954                 noway_assert(op1->gtRegNum == REG_INTRET);
9955                 /* The return value has now been computed */
9956                 reg = op1->gtRegNum;
9957
9958                 /* Return */
9959                 instGen_Return(0);
9960             }
9961
9962             genCodeForTree_DONE(tree, reg);
9963             return;
9964
9965         case GT_RETURN:
9966
9967             // TODO: this should be done AFTER we called exit mon so that
9968             //       we are sure that we don't have to keep 'this' alive
9969
9970             if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
9971             {
9972                 /* either it's an "empty" statement or the return statement
9973                    of a synchronized method
9974                  */
9975
9976                 genPInvokeMethodEpilog();
9977             }
9978
9979             /* Is there a return value and/or an exit statement? */
9980
9981             if (op1)
9982             {
9983                 if (op1->gtType == TYP_VOID)
9984                 {
9985                     // We're returning nothing, just generate the block (shared epilog calls).
9986                     genCodeForTree(op1, 0);
9987                 }
9988 #ifdef _TARGET_ARM_
9989                 else if (op1->gtType == TYP_STRUCT)
9990                 {
9991                     if (op1->gtOper == GT_CALL)
9992                     {
9993                         // We have a return call() because we failed to tail call.
9994                         // In any case, just generate the call and be done.
9995                         assert(compiler->IsHfa(op1));
9996                         genCodeForCall(op1->AsCall(), true);
9997                         genMarkTreeInReg(op1, REG_FLOATRET);
9998                     }
9999                     else
10000                     {
10001                         assert(op1->gtOper == GT_LCL_VAR);
10002                         assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
10003                         genLoadIntoFltRetRegs(op1);
10004                     }
10005                 }
10006                 else if (op1->TypeGet() == TYP_FLOAT)
10007                 {
10008                     // This can only occur when we are returning a non-HFA struct
10009                     // that is composed of a single float field and we performed
10010                     // struct promotion and enregistered the float field.
10011                     //
10012                     genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
10013                     getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
10014                 }
10015 #endif // _TARGET_ARM_
10016                 else
10017                 {
10018                     // we can now go through this code for compiler->genReturnBB.  I've regularized all the code.
10019
10020                     // noway_assert(compiler->compCurBB != compiler->genReturnBB);
10021
10022                     noway_assert(op1->gtType != TYP_VOID);
10023
10024                     /* Generate the return value into the return register */
10025
10026                     genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
10027
10028                     /* The result must now be in the return register */
10029
10030                     noway_assert(op1->InReg());
10031                     noway_assert(op1->gtRegNum == REG_INTRET);
10032                 }
10033
10034                 /* The return value has now been computed */
10035
10036                 reg = op1->gtRegNum;
10037
10038                 genCodeForTree_DONE(tree, reg);
10039             }
10040
10041 #ifdef PROFILING_SUPPORTED
10042             // The profiling hook does not trash registers, so it's safe to call after we emit the code for
10043             // the GT_RETURN tree.
10044
10045             if (compiler->compCurBB == compiler->genReturnBB)
10046             {
10047                 genProfilingLeaveCallback();
10048             }
10049 #endif
10050 #ifdef DEBUG
10051             if (compiler->opts.compStackCheckOnRet)
10052             {
10053                 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
10054                              compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
10055                              compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
10056                 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
10057
10058                 BasicBlock*  esp_check = genCreateTempLabel();
10059                 emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
10060                 inst_JMP(jmpEqual, esp_check);
10061                 getEmitter()->emitIns(INS_BREAKPOINT);
10062                 genDefineTempLabel(esp_check);
10063             }
10064 #endif
10065             return;
10066
10067         case GT_COMMA:
10068
10069             if (tree->gtFlags & GTF_REVERSE_OPS)
10070             {
10071                 if (tree->gtType == TYP_VOID)
10072                 {
10073                     genEvalSideEffects(op2);
10074                     genUpdateLife(op2);
10075                     genEvalSideEffects(op1);
10076                     genUpdateLife(tree);
10077                     return;
10078                 }
10079
10080                 // Generate op2
10081                 genCodeForTree(op2, needReg);
10082                 genUpdateLife(op2);
10083
10084                 noway_assert(op2->InReg());
10085
10086                 regSet.rsMarkRegUsed(op2);
10087
10088                 // Do side effects of op1
10089                 genEvalSideEffects(op1);
10090
10091                 // Recover op2 if spilled
10092                 genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
10093
10094                 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
10095
10096                 // set gc info if we need so
10097                 gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
10098
10099                 genUpdateLife(tree);
10100                 genCodeForTree_DONE(tree, op2->gtRegNum);
10101
10102                 return;
10103             }
10104             else
10105             {
10106                 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
10107
10108                 /* Generate side effects of the first operand */
10109
10110                 genEvalSideEffects(op1);
10111                 genUpdateLife(op1);
10112
10113                 /* Is the value of the second operand used? */
10114
10115                 if (tree->gtType == TYP_VOID)
10116                 {
10117                     /* The right operand produces no result. The morpher is
10118                        responsible for resetting the type of GT_COMMA nodes
10119                        to TYP_VOID if op2 isn't meant to yield a result. */
10120
10121                     genEvalSideEffects(op2);
10122                     genUpdateLife(tree);
10123                     return;
10124                 }
10125
10126                 /* Generate the second operand, i.e. the 'real' value */
10127
10128                 genCodeForTree(op2, needReg);
10129                 noway_assert(op2->InReg());
10130
10131                 /* The result of 'op2' is also the final result */
10132
10133                 reg = op2->gtRegNum;
10134
10135                 /* Remember whether we set the flags */
10136
10137                 tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
10138
10139                 genCodeForTree_DONE(tree, reg);
10140                 return;
10141             }
10142
10143         case GT_BOX:
10144             genCodeForTree(op1, needReg);
10145             noway_assert(op1->InReg());
10146
10147             /* The result of 'op1' is also the final result */
10148
10149             reg = op1->gtRegNum;
10150
10151             /* Remember whether we set the flags */
10152
10153             tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
10154
10155             genCodeForTree_DONE(tree, reg);
10156             return;
10157
10158         case GT_QMARK:
10159
10160             genCodeForQmark(tree, destReg, bestReg);
10161             return;
10162
10163         case GT_NOP:
10164
10165 #if OPT_BOOL_OPS
10166             if (op1 == NULL)
10167                 return;
10168 #endif
10169             __fallthrough;
10170
10171         case GT_INIT_VAL:
10172
10173             /* Generate the operand into some register */
10174
10175             genCodeForTree(op1, needReg);
10176
10177             /* The result is the same as the operand */
10178
10179             reg = op1->gtRegNum;
10180
10181             genCodeForTree_DONE(tree, reg);
10182             return;
10183
10184         case GT_INTRINSIC:
10185
10186             switch (tree->gtIntrinsic.gtIntrinsicId)
10187             {
10188                 case CORINFO_INTRINSIC_Round:
10189                 {
10190                     noway_assert(tree->gtType == TYP_INT);
10191
10192 #if FEATURE_STACK_FP_X87
10193                     genCodeForTreeFlt(op1);
10194
10195                     /* Store the FP value into the temp */
10196                     TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
10197
10198                     FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10199                     FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
10200                     inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
10201
10202                     reg = regSet.rsPickReg(needReg, bestReg);
10203                     regTracker.rsTrackRegTrash(reg);
10204
10205                     inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
10206
10207                     compiler->tmpRlsTemp(temp);
10208 #else
10209                     genCodeForTreeFloat(tree, needReg, bestReg);
10210                     return;
10211 #endif
10212                 }
10213                 break;
10214
10215                 default:
10216                     noway_assert(!"unexpected math intrinsic");
10217             }
10218
10219             genCodeForTree_DONE(tree, reg);
10220             return;
10221
10222         case GT_LCLHEAP:
10223
10224             reg = genLclHeap(op1);
10225             genCodeForTree_DONE(tree, reg);
10226             return;
10227
10228         case GT_EQ:
10229         case GT_NE:
10230         case GT_LT:
10231         case GT_LE:
10232         case GT_GE:
10233         case GT_GT:
10234             genCodeForRelop(tree, destReg, bestReg);
10235             return;
10236
10237         case GT_ADDR:
10238
10239             genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
10240             return;
10241
10242 #ifdef _TARGET_XARCH_
10243         case GT_LOCKADD:
10244
10245             // This is for a locked add operation.  We know that the resulting value doesn't "go" anywhere.
10246             // For reference, op1 is the location.  op2 is the addend or the value.
10247             if (op2->OperIsConst())
10248             {
10249                 noway_assert(op2->TypeGet() == TYP_INT);
10250                 ssize_t cns = op2->gtIntCon.gtIconVal;
10251
10252                 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
10253                 switch (cns)
10254                 {
10255                     case 1:
10256                         instGen(INS_lock);
10257                         instEmit_RM(INS_inc, op1, op1, 0);
10258                         break;
10259                     case -1:
10260                         instGen(INS_lock);
10261                         instEmit_RM(INS_dec, op1, op1, 0);
10262                         break;
10263                     default:
10264                         assert((int)cns == cns); // By test above for AMD64.
10265                         instGen(INS_lock);
10266                         inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
10267                         break;
10268                 }
10269                 genReleaseReg(op1);
10270             }
10271             else
10272             {
10273                 // non constant addend means it needs to go into a register.
10274                 ins = INS_add;
10275                 goto LockBinOpCommon;
10276             }
10277
10278             genFlagsEqualToNone(); // We didn't compute a result into a register.
10279             genUpdateLife(tree);   // We didn't compute an operand into anything.
10280             return;
10281
10282         case GT_XADD:
10283             ins = INS_xadd;
10284             goto LockBinOpCommon;
10285         case GT_XCHG:
10286             ins = INS_xchg;
10287             goto LockBinOpCommon;
10288         LockBinOpCommon:
10289         {
10290             // Compute the second operand into a register.  xadd and xchg are r/m32, r32.  So even if op2
10291             // is a constant, it needs to be in a register.  This should be the output register if
10292             // possible.
10293             //
10294             // For reference, gtOp1 is the location.  gtOp2 is the addend or the value.
10295
10296             GenTreePtr location = op1;
10297             GenTreePtr value    = op2;
10298
10299             // Again, a friendly reminder.  IL calling convention is left to right.
10300             if (tree->gtFlags & GTF_REVERSE_OPS)
10301             {
10302                 // The atomic operations destroy this argument, so force it into a scratch register
10303                 reg = regSet.rsPickFreeReg();
10304                 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10305
10306                 // Must evaluate location into a register
10307                 genCodeForTree(location, needReg, RBM_NONE);
10308                 assert(location->InReg());
10309                 regSet.rsMarkRegUsed(location);
10310                 regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
10311                 genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
10312                 regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
10313
10314                 if (ins != INS_xchg)
10315                 {
10316                     // xchg implies the lock prefix, but xadd and add require it.
10317                     instGen(INS_lock);
10318                 }
10319                 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10320                 genReleaseReg(value);
10321                 regTracker.rsTrackRegTrash(reg);
10322                 genReleaseReg(location);
10323             }
10324             else
10325             {
10326                 regMaskTP addrReg;
10327                 if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
10328                                        needReg, RegSet::KEEP_REG, &addrReg))
10329                 {
10330                     genUpdateLife(location);
10331
10332                     reg = regSet.rsPickFreeReg();
10333                     genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10334                     addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
10335
10336                     if (ins != INS_xchg)
10337                     {
10338                         // xchg implies the lock prefix, but xadd and add require it.
10339                         instGen(INS_lock);
10340                     }
10341
10342                     // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10343                     // inst_TT_RV(ins, location, reg);
10344                     sched_AM(ins, EA_4BYTE, reg, false, location, 0);
10345
10346                     genReleaseReg(value);
10347                     regTracker.rsTrackRegTrash(reg);
10348                     genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
10349                 }
10350                 else
10351                 {
10352                     // Must evalute location into a register.
10353                     genCodeForTree(location, needReg, RBM_NONE);
10354                     assert(location->InReg());
10355                     regSet.rsMarkRegUsed(location);
10356
10357                     // xadd destroys this argument, so force it into a scratch register
10358                     reg = regSet.rsPickFreeReg();
10359                     genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10360                     regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
10361                     genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
10362                     regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
10363
10364                     if (ins != INS_xchg)
10365                     {
10366                         // xchg implies the lock prefix, but xadd and add require it.
10367                         instGen(INS_lock);
10368                     }
10369
10370                     instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10371
10372                     genReleaseReg(value);
10373                     regTracker.rsTrackRegTrash(reg);
10374                     genReleaseReg(location);
10375                 }
10376             }
10377
10378             // The flags are equal to the target of the tree (i.e. the result of the add), not to the
10379             // result in the register.  If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
10380             // that information to set the flags.  Doesn't seem like there is a good reason for that.
10381             // Therefore, trash the flags.
10382             genFlagsEqualToNone();
10383
10384             if (ins == INS_add)
10385             {
10386                 // If the operator was add, then we were called from the GT_LOCKADD
10387                 // case.  In that case we don't use the result, so we don't need to
10388                 // update anything.
10389                 genUpdateLife(tree);
10390             }
10391             else
10392             {
10393                 genCodeForTree_DONE(tree, reg);
10394             }
10395         }
10396             return;
10397
10398 #else // !_TARGET_XARCH_
10399
10400         case GT_LOCKADD:
10401         case GT_XADD:
10402         case GT_XCHG:
10403
10404             NYI_ARM("LOCK instructions");
10405 #endif
10406
10407         case GT_ARR_LENGTH:
10408         {
10409             // Make the corresponding ind(a + c) node, and do codegen for that.
10410             GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
10411                                                       compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
10412             tree->SetOper(GT_IND);
10413             tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
10414             assert(tree->TypeGet() == TYP_INT);
10415             tree->gtOp.gtOp1 = addr;
10416             genCodeForTree(tree, destReg, bestReg);
10417             return;
10418         }
10419
10420         case GT_OBJ:
10421             // All GT_OBJ nodes must have been morphed prior to this.
10422             noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
10423
10424         default:
10425 #ifdef DEBUG
10426             compiler->gtDispTree(tree);
10427 #endif
10428             noway_assert(!"unexpected unary/binary operator");
10429     } // end switch (oper)
10430
10431     unreached();
10432 }
10433 #ifdef _PREFAST_
10434 #pragma warning(pop) // End suppress PREFast warning about overly large function
10435 #endif
10436
10437 regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
10438 {
10439     instruction ins;
10440     emitAttr    size;
10441     bool        unsv;
10442     bool        andv = false;
10443     regNumber   reg;
10444     GenTreePtr  op1     = tree->gtOp.gtOp1->gtEffectiveVal();
10445     var_types   dstType = tree->CastToType();
10446     var_types   srcType = op1->TypeGet();
10447
10448     if (genTypeSize(srcType) < genTypeSize(dstType))
10449     {
10450         // Widening cast
10451
10452         /* we need the source size */
10453
10454         size = EA_ATTR(genTypeSize(srcType));
10455
10456         noway_assert(size < EA_PTRSIZE);
10457
10458         unsv = varTypeIsUnsigned(srcType);
10459         ins  = ins_Move_Extend(srcType, op1->InReg());
10460
10461         /*
10462             Special case: for a cast of byte to char we first
10463             have to expand the byte (w/ sign extension), then
10464             mask off the high bits.
10465             Use 'movsx' followed by 'and'
10466         */
10467         if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
10468         {
10469             noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
10470             andv = true;
10471         }
10472     }
10473     else
10474     {
10475         // Narrowing cast, or sign-changing cast
10476
10477         noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
10478
10479         size = EA_ATTR(genTypeSize(dstType));
10480
10481         unsv = varTypeIsUnsigned(dstType);
10482         ins  = ins_Move_Extend(dstType, op1->InReg());
10483     }
10484
10485     noway_assert(size < EA_PTRSIZE);
10486
10487     // Set bestReg to the same register a op1 if op1 is a regVar and is available
10488     if (op1->InReg())
10489     {
10490         regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
10491         if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
10492         {
10493             bestReg = op1RegMask;
10494         }
10495     }
10496
10497     /* Is the value sitting in a non-byte-addressable register? */
10498
10499     if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
10500     {
10501         if (unsv)
10502         {
10503             // for unsigned values we can AND, so it needs not be a byte register
10504
10505             reg = regSet.rsPickReg(needReg, bestReg);
10506
10507             ins = INS_AND;
10508         }
10509         else
10510         {
10511             /* Move the value into a byte register */
10512
10513             reg = regSet.rsGrabReg(RBM_BYTE_REGS);
10514         }
10515
10516         if (reg != op1->gtRegNum)
10517         {
10518             /* Move the value into that register */
10519
10520             regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
10521             inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
10522
10523             /* The value has a new home now */
10524
10525             op1->gtRegNum = reg;
10526         }
10527     }
10528     else
10529     {
10530         /* Pick a register for the value (general case) */
10531
10532         reg = regSet.rsPickReg(needReg, bestReg);
10533
10534         // if we (might) need to set the flags and the value is in the same register
10535         // and we have an unsigned value then use AND instead of MOVZX
10536         if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
10537         {
10538 #ifdef _TARGET_X86_
10539             noway_assert(ins == INS_movzx);
10540 #endif
10541             ins = INS_AND;
10542         }
10543     }
10544
10545     if (ins == INS_AND)
10546     {
10547         noway_assert(andv == false && unsv);
10548
10549         /* Generate "and reg, MASK */
10550
10551         insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10552         inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
10553
10554         if (tree->gtSetFlags())
10555             genFlagsEqualToReg(tree, reg);
10556     }
10557     else
10558     {
10559 #ifdef _TARGET_XARCH_
10560         noway_assert(ins == INS_movsx || ins == INS_movzx);
10561 #endif
10562
10563         /* Generate "movsx/movzx reg, [addr]" */
10564
10565         inst_RV_ST(ins, size, reg, op1);
10566
10567         /* Mask off high bits for cast from byte to char */
10568
10569         if (andv)
10570         {
10571 #ifdef _TARGET_XARCH_
10572             noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
10573 #endif
10574             insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10575             inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
10576
10577             if (tree->gtSetFlags())
10578                 genFlagsEqualToReg(tree, reg);
10579         }
10580     }
10581
10582     regTracker.rsTrackRegTrash(reg);
10583     return reg;
10584 }
10585
10586 void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
10587 {
10588     GenTreePtr op1      = tree->gtOp.gtOp1;
10589     var_types  dstType  = tree->CastToType();
10590     var_types  baseType = TYP_INT;
10591     regNumber  reg      = DUMMY_INIT(REG_CORRUPT);
10592     regMaskTP  needReg  = destReg;
10593     regMaskTP  addrReg;
10594     emitAttr   size;
10595     BOOL       unsv;
10596
10597     /*
10598       * Constant casts should have been folded earlier
10599       * If not finite don't bother
10600       * We don't do this optimization for debug code/no optimization
10601       */
10602
10603     noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
10604                  tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
10605                  !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
10606
10607     noway_assert(dstType != TYP_VOID);
10608
10609     /* What type are we casting from? */
10610
10611     switch (op1->TypeGet())
10612     {
10613         case TYP_LONG:
10614
10615             /* Special case: the long is generated via the mod of long
10616                with an int.  This is really an int and need not be
10617                converted to a reg pair. NOTE: the flag only indicates
10618                that this is a case to TYP_INT, it hasn't actually
10619                verified the second operand of the MOD! */
10620
10621             if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
10622             {
10623
10624                 /* Verify that the op2 of the mod node is
10625                    1) An integer tree, or
10626                    2) A long constant that is small enough to fit in an integer
10627                 */
10628
10629                 GenTreePtr modop2 = op1->gtOp.gtOp2;
10630                 if ((genActualType(modop2->gtType) == TYP_INT) ||
10631                     ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
10632                 {
10633                     genCodeForTree(op1, destReg, bestReg);
10634
10635 #ifdef _TARGET_64BIT_
10636                     reg = op1->gtRegNum;
10637 #else  // _TARGET_64BIT_
10638                     reg = genRegPairLo(op1->gtRegPair);
10639 #endif //_TARGET_64BIT_
10640
10641                     genCodeForTree_DONE(tree, reg);
10642                     return;
10643                 }
10644             }
10645
10646             /* Make the operand addressable.  When gtOverflow() is true,
10647                hold on to the addrReg as we will need it to access the higher dword */
10648
10649             op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
10650                                             // twice!)
10651                                             // See, e.g., the TYP_INT case below...
10652
10653             addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
10654
10655             /* Load the lower half of the value into some register */
10656
10657             if (op1->InReg())
10658             {
10659                 /* Can we simply use the low part of the value? */
10660                 reg = genRegPairLo(op1->gtRegPair);
10661
10662                 if (tree->gtOverflow())
10663                     goto REG_OK;
10664
10665                 regMaskTP loMask;
10666                 loMask = genRegMask(reg);
10667                 if (loMask & regSet.rsRegMaskFree())
10668                     bestReg = loMask;
10669             }
10670
10671             // for cast overflow we need to preserve addrReg for testing the hiDword
10672             // so we lock it to prevent regSet.rsPickReg from picking it.
10673             if (tree->gtOverflow())
10674                 regSet.rsLockUsedReg(addrReg);
10675
10676             reg = regSet.rsPickReg(needReg, bestReg);
10677
10678             if (tree->gtOverflow())
10679                 regSet.rsUnlockUsedReg(addrReg);
10680
10681             noway_assert(genStillAddressable(op1));
10682
10683         REG_OK:
10684             if (!op1->InReg() || (reg != genRegPairLo(op1->gtRegPair)))
10685             {
10686                 /* Generate "mov reg, [addr-mode]" */
10687                 inst_RV_TT(ins_Load(TYP_INT), reg, op1);
10688             }
10689
10690             /* conv.ovf.i8i4, or conv.ovf.u8u4 */
10691
10692             if (tree->gtOverflow())
10693             {
10694                 regNumber hiReg = (op1->InReg()) ? genRegPairHi(op1->gtRegPair) : REG_NA;
10695
10696                 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10697                 emitJumpKind jmpLTS      = genJumpKindForOper(GT_LT, CK_SIGNED);
10698
10699                 switch (dstType)
10700                 {
10701                     case TYP_INT:
10702                         // conv.ovf.i8.i4
10703                         /*  Generate the following sequence
10704
10705                                 test loDWord, loDWord   // set flags
10706                                 jl neg
10707                            pos: test hiDWord, hiDWord   // set flags
10708                                 jne ovf
10709                                 jmp done
10710                            neg: cmp hiDWord, 0xFFFFFFFF
10711                                 jne ovf
10712                           done:
10713
10714                         */
10715
10716                         instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
10717                         if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4       (i4 > 0 and upper bits 0)
10718                         {
10719                             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
10720                             goto UPPER_BITS_ZERO;
10721                         }
10722
10723 #if CPU_LOAD_STORE_ARCH
10724                         // This is tricky.
10725                         // We will generate code like
10726                         // if (...)
10727                         // {
10728                         // ...
10729                         // }
10730                         // else
10731                         // {
10732                         // ...
10733                         // }
10734                         // We load the tree op1 into regs when we generate code for if clause.
10735                         // When we generate else clause, we see the tree is already loaded into reg, and start use it
10736                         // directly.
10737                         // Well, when the code is run, we may execute else clause without going through if clause.
10738                         //
10739                         genCodeForTree(op1, 0);
10740 #endif
10741
10742                         BasicBlock* neg;
10743                         BasicBlock* done;
10744
10745                         neg  = genCreateTempLabel();
10746                         done = genCreateTempLabel();
10747
10748                         // Is the loDWord positive or negative
10749                         inst_JMP(jmpLTS, neg);
10750
10751                         // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
10752
10753                         if (hiReg < REG_STK)
10754                         {
10755                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
10756                         }
10757                         else
10758                         {
10759                             inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
10760                         }
10761
10762                         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10763                         inst_JMP(EJ_jmp, done);
10764
10765                         // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
10766
10767                         genDefineTempLabel(neg);
10768
10769                         if (hiReg < REG_STK)
10770                         {
10771                             inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
10772                         }
10773                         else
10774                         {
10775                             inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
10776                         }
10777                         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10778
10779                         // Done
10780
10781                         genDefineTempLabel(done);
10782
10783                         break;
10784
10785                     case TYP_UINT: // conv.ovf.u8u4
10786                     UPPER_BITS_ZERO:
10787                         // Just check that the upper DWord is 0
10788
10789                         if (hiReg < REG_STK)
10790                         {
10791                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
10792                         }
10793                         else
10794                         {
10795                             inst_TT_IV(INS_cmp, op1, 0, 4);
10796                         }
10797
10798                         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10799                         break;
10800
10801                     default:
10802                         noway_assert(!"Unexpected dstType");
10803                         break;
10804                 }
10805
10806                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
10807             }
10808
10809             regTracker.rsTrackRegTrash(reg);
10810             genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
10811
10812             genCodeForTree_DONE(tree, reg);
10813             return;
10814
10815         case TYP_BOOL:
10816         case TYP_BYTE:
10817         case TYP_SHORT:
10818         case TYP_CHAR:
10819         case TYP_UBYTE:
10820             break;
10821
10822         case TYP_UINT:
10823         case TYP_INT:
10824             break;
10825
10826 #if FEATURE_STACK_FP_X87
10827         case TYP_FLOAT:
10828             NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
10829             break;
10830
10831         case TYP_DOUBLE:
10832             if (compiler->opts.compCanUseSSE2)
10833             {
10834                 // do the SSE2 based cast inline
10835                 // getting the fp operand
10836
10837                 regMaskTP addrRegInt = 0;
10838                 regMaskTP addrRegFlt = 0;
10839
10840                 // make the operand addressable
10841                 // We don't want to collapse constant doubles into floats, as the SSE2 instruction
10842                 // operates on doubles. Note that these (casts from constant doubles) usually get
10843                 // folded, but we don't do it for some cases (infinitys, etc). So essentially this
10844                 // shouldn't affect performance or size at all. We're fixing this for #336067
10845                 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
10846                 if (!addrRegFlt && !op1->IsRegVar())
10847                 {
10848                     // we have the address
10849
10850                     inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
10851                     genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10852                     genUpdateLife(op1);
10853
10854                     reg = regSet.rsPickReg(needReg);
10855                     getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10856
10857                     regTracker.rsTrackRegTrash(reg);
10858                     genCodeForTree_DONE(tree, reg);
10859                 }
10860                 else
10861                 {
10862                     // we will need to use a temp to get it into the xmm reg
10863                     var_types typeTemp = op1->TypeGet();
10864                     TempDsc*  temp     = compiler->tmpGetTemp(typeTemp);
10865
10866                     size = EA_ATTR(genTypeSize(typeTemp));
10867
10868                     if (addrRegFlt)
10869                     {
10870                         // On the fp stack; Take reg to top of stack
10871
10872                         FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10873                     }
10874                     else
10875                     {
10876                         // op1->IsRegVar()
10877                         // pick a register
10878                         reg = regSet.PickRegFloat();
10879                         if (!op1->IsRegVarDeath())
10880                         {
10881                             // Load it on the fp stack
10882                             genLoadStackFP(op1, reg);
10883                         }
10884                         else
10885                         {
10886                             // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
10887                             genLoadStackFP(op1, reg);
10888                             FlatFPX87_MoveToTOS(&compCurFPState, reg);
10889                         }
10890                     }
10891
10892                     // pop it off the fp stack
10893                     compCurFPState.Pop();
10894
10895                     getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
10896                     // pick a reg
10897                     reg = regSet.rsPickReg(needReg);
10898
10899                     inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
10900                     getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10901
10902                     // done..release the temp
10903                     compiler->tmpRlsTemp(temp);
10904
10905                     // the reg is now trashed
10906                     regTracker.rsTrackRegTrash(reg);
10907                     genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10908                     genUpdateLife(op1);
10909                     genCodeForTree_DONE(tree, reg);
10910                 }
10911             }
10912 #else
10913         case TYP_FLOAT:
10914         case TYP_DOUBLE:
10915             genCodeForTreeFloat(tree, needReg, bestReg);
10916 #endif // FEATURE_STACK_FP_X87
10917             return;
10918
10919         default:
10920             noway_assert(!"unexpected cast type");
10921     }
10922
10923     if (tree->gtOverflow())
10924     {
10925         /* Compute op1 into a register, and free the register */
10926
10927         genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
10928         reg = op1->gtRegNum;
10929
10930         /* Do we need to compare the value, or just check masks */
10931
10932         ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
10933         ssize_t typeMask;
10934
10935         switch (dstType)
10936         {
10937             case TYP_BYTE:
10938                 typeMask = ssize_t((int)0xFFFFFF80);
10939                 typeMin  = SCHAR_MIN;
10940                 typeMax  = SCHAR_MAX;
10941                 unsv     = (tree->gtFlags & GTF_UNSIGNED);
10942                 break;
10943             case TYP_SHORT:
10944                 typeMask = ssize_t((int)0xFFFF8000);
10945                 typeMin  = SHRT_MIN;
10946                 typeMax  = SHRT_MAX;
10947                 unsv     = (tree->gtFlags & GTF_UNSIGNED);
10948                 break;
10949             case TYP_INT:
10950                 typeMask = ssize_t((int)0x80000000L);
10951 #ifdef _TARGET_64BIT_
10952                 unsv    = (tree->gtFlags & GTF_UNSIGNED);
10953                 typeMin = INT_MIN;
10954                 typeMax = INT_MAX;
10955 #else // _TARGET_64BIT_
10956                 noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
10957                 unsv     = true;
10958 #endif // _TARGET_64BIT_
10959                 break;
10960             case TYP_UBYTE:
10961                 unsv     = true;
10962                 typeMask = ssize_t((int)0xFFFFFF00L);
10963                 break;
10964             case TYP_CHAR:
10965                 unsv     = true;
10966                 typeMask = ssize_t((int)0xFFFF0000L);
10967                 break;
10968             case TYP_UINT:
10969                 unsv = true;
10970 #ifdef _TARGET_64BIT_
10971                 typeMask = 0xFFFFFFFF00000000LL;
10972 #else  // _TARGET_64BIT_
10973                 typeMask = 0x80000000L;
10974                 noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
10975 #endif // _TARGET_64BIT_
10976                 break;
10977             default:
10978                 NO_WAY("Unknown type");
10979                 return;
10980         }
10981
10982         // If we just have to check a mask.
10983         // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
10984         // or conv.i4u4
10985
10986         if (unsv)
10987         {
10988             inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
10989             emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10990             genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10991         }
10992         else
10993         {
10994             // Check the value is in range.
10995             // This must be conv.ovf.i4i1, etc.
10996
10997             // Compare with the MAX
10998
10999             noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
11000
11001             inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
11002             emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
11003             genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
11004
11005             // Compare with the MIN
11006
11007             inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
11008             emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
11009             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
11010         }
11011
11012         genCodeForTree_DONE(tree, reg);
11013         return;
11014     }
11015
11016     /* Make the operand addressable */
11017
11018     addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
11019
11020     reg = genIntegerCast(tree, needReg, bestReg);
11021
11022     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11023
11024     genCodeForTree_DONE(tree, reg);
11025 }
11026
11027 /*****************************************************************************
11028  *
11029  *  Generate code for a leaf node of type GT_ADDR
11030  */
11031
11032 void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
11033 {
11034     genTreeOps      oper     = tree->OperGet();
11035     const var_types treeType = tree->TypeGet();
11036     GenTreePtr      op1;
11037     regNumber       reg;
11038     regMaskTP       needReg = destReg;
11039     regMaskTP       addrReg;
11040
11041 #ifdef DEBUG
11042     reg     = (regNumber)0xFEEFFAAF; // to detect uninitialized use
11043     addrReg = 0xDEADCAFE;
11044 #endif
11045
11046     // We should get here for ldloca, ldarga, ldslfda, ldelema,
11047     // or ldflda.
11048     if (oper == GT_ARR_ELEM)
11049     {
11050         op1 = tree;
11051     }
11052     else
11053     {
11054         op1 = tree->gtOp.gtOp1;
11055     }
11056
11057     // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
11058     if (oper == GT_ARR_ELEM)
11059     {
11060         // To get the address of the array element,
11061         // we first call genMakeAddrArrElem to make the element addressable.
11062         //     (That is, for example, we first emit code to calculate EBX, and EAX.)
11063         // And then use lea to obtain the address.
11064         //     (That is, for example, we then emit
11065         //         lea EBX, bword ptr [EBX+4*EAX+36]
11066         //      to obtain the address of the array element.)
11067         addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
11068     }
11069     else
11070     {
11071         addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
11072     }
11073
11074     noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
11075
11076     // We want to reuse one of the scratch registers that were used
11077     // in forming the address mode as the target register for the lea.
11078     // If bestReg is unset or if it is set to one of the registers used to
11079     // form the address (i.e. addrReg), we calculate the scratch register
11080     // to use as the target register for the LEA
11081
11082     bestReg = regSet.rsUseIfZero(bestReg, addrReg);
11083     bestReg = regSet.rsNarrowHint(bestReg, addrReg);
11084
11085     /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
11086        it since keepReg==false.
11087        If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
11088        So this is guaranteed not to spill addrReg */
11089
11090     reg = regSet.rsPickReg(needReg, bestReg);
11091
11092     // Slight workaround, force the inst routine to think that
11093     // value being loaded is an int (since that is what what
11094     // LEA will return)  otherwise it would try to allocate
11095     // two registers for a long etc.
11096     noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
11097     op1->gtType = treeType;
11098
11099     inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
11100
11101     // The Lea instruction above better not have tried to put the
11102     // 'value' pointed to by 'op1' in a register, LEA will not work.
11103     noway_assert(!(op1->InReg()));
11104
11105     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11106     // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
11107     noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
11108
11109     regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
11110     gcInfo.gcMarkRegPtrVal(reg, treeType);
11111
11112     genCodeForTree_DONE(tree, reg);
11113 }
11114
11115 #ifdef _TARGET_ARM_
11116
11117 /*****************************************************************************
11118  *
11119  * Move (load/store) between float ret regs and struct promoted variable.
11120  *
11121  * varDsc - The struct variable to be loaded from or stored into.
11122  * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
11123  *
11124  */
11125 void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
11126 {
11127     regNumber curReg = REG_FLOATRET;
11128
11129     unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
11130     for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
11131     {
11132         LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
11133
11134         // Is the struct field promoted and sitting in a register?
11135         if (varDscFld->lvRegister)
11136         {
11137             // Move from the struct field into curReg if load
11138             // else move into struct field from curReg if store
11139             regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
11140             regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
11141             if (srcReg != dstReg)
11142             {
11143                 inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
11144                 regTracker.rsTrackRegCopy(dstReg, srcReg);
11145             }
11146         }
11147         else
11148         {
11149             // This field is in memory, do a move between the field and float registers.
11150             emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
11151             if (isLoadIntoFlt)
11152             {
11153                 getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11154                 regTracker.rsTrackRegTrash(curReg);
11155             }
11156             else
11157             {
11158                 getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11159             }
11160         }
11161
11162         // Advance the current reg.
11163         curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
11164     }
11165 }
11166
11167 void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
11168 {
11169     assert(tree->TypeGet() == TYP_STRUCT);
11170     assert(tree->gtOper == GT_LCL_VAR);
11171     LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
11172     int        slots  = varDsc->lvSize() / REGSIZE_BYTES;
11173     if (varDsc->lvPromoted)
11174     {
11175         genLdStFltRetRegsPromotedVar(varDsc, true);
11176     }
11177     else
11178     {
11179         if (slots <= 2)
11180         {
11181             // Use the load float/double instruction.
11182             inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
11183                        (slots == 1) ? EA_4BYTE : EA_8BYTE);
11184         }
11185         else
11186         {
11187             // Use the load store multiple instruction.
11188             regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11189             inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
11190             regTracker.rsTrackRegTrash(reg);
11191             getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11192         }
11193     }
11194     genMarkTreeInReg(tree, REG_FLOATRET);
11195 }
11196
11197 void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
11198 {
11199     assert(tree->TypeGet() == TYP_STRUCT);
11200     assert(tree->OperGet() == GT_ASG);
11201
11202     // LHS should be lcl var or fld.
11203     GenTreePtr op1 = tree->gtOp.gtOp1;
11204
11205     // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
11206     // handling multiple levels of inlined functions that return HFA on the right-hand-side.
11207     // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
11208     // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
11209     // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
11210     // as a regular assert().
11211     noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
11212     unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11213     assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
11214
11215     // The RHS should be a call.
11216     GenTreePtr op2 = tree->gtOp.gtOp2;
11217     assert(op2->gtOper == GT_CALL);
11218
11219     // Generate code for call and copy the return registers into the local.
11220     regMaskTP retMask = genCodeForCall(op2->AsCall(), true);
11221
11222     // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
11223     CLANG_FORMAT_COMMENT_ANCHOR;
11224
11225 #ifdef DEBUG
11226     regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
11227     assert((mask & (mask - 1)) == 0);
11228     assert(mask <= (1 << MAX_HFA_RET_SLOTS));
11229     assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
11230 #endif
11231
11232     int slots = genCountBits(retMask & RBM_ALLFLOAT);
11233
11234     LclVarDsc* varDsc = &compiler->lvaTable[varNum];
11235
11236     if (varDsc->lvPromoted)
11237     {
11238         genLdStFltRetRegsPromotedVar(varDsc, false);
11239     }
11240     else
11241     {
11242         if (slots <= 2)
11243         {
11244             inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
11245                        (slots == 1) ? EA_4BYTE : EA_8BYTE);
11246         }
11247         else
11248         {
11249             regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11250             inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
11251             regTracker.rsTrackRegTrash(reg);
11252             getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11253         }
11254     }
11255 }
11256
11257 #endif // _TARGET_ARM_
11258
11259 /*****************************************************************************
11260  *
11261  *  Generate code for a GT_ASG tree
11262  */
11263
11264 #ifdef _PREFAST_
11265 #pragma warning(push)
11266 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11267 #endif
11268 void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
11269 {
11270     noway_assert(tree->gtOper == GT_ASG);
11271
11272     GenTreePtr  op1     = tree->gtOp.gtOp1;
11273     GenTreePtr  op2     = tree->gtOp.gtOp2;
11274     regMaskTP   needReg = RBM_ALLINT;
11275     regMaskTP   bestReg = RBM_CORRUPT;
11276     regMaskTP   addrReg = DUMMY_INIT(RBM_CORRUPT);
11277     bool        ovfl    = false; // Do we need an overflow check
11278     bool        volat   = false; // Is this a volatile store
11279     regMaskTP   regGC;
11280     instruction ins;
11281     unsigned    lclVarNum = compiler->lvaCount;
11282     unsigned    lclILoffs = DUMMY_INIT(0);
11283
11284 #ifdef _TARGET_ARM_
11285     if (tree->gtType == TYP_STRUCT)
11286     {
11287         // We use copy block to assign structs, however to receive HFAs in registers
11288         // from a CALL, we use assignment, var = (hfa) call();
11289         assert(compiler->IsHfa(tree));
11290         genStoreFromFltRetRegs(tree);
11291         return;
11292     }
11293 #endif
11294
11295 #ifdef DEBUG
11296     if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
11297     {
11298         if (varTypeIsFloating(op1))
11299             assert(!"Bad IL: Illegal assignment of integer into float!");
11300         else
11301             assert(!"Bad IL: Illegal assignment of float into integer!");
11302     }
11303 #endif
11304
11305     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
11306     {
11307         op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
11308     }
11309
11310     /* Is the target a register or local variable? */
11311     switch (op1->gtOper)
11312     {
11313         unsigned   varNum;
11314         LclVarDsc* varDsc;
11315
11316         case GT_LCL_VAR:
11317             varNum = op1->gtLclVarCommon.gtLclNum;
11318             noway_assert(varNum < compiler->lvaCount);
11319             varDsc = compiler->lvaTable + varNum;
11320
11321             /* For non-debuggable code, every definition of a lcl-var has
11322              * to be checked to see if we need to open a new scope for it.
11323              * Remember the local var info to call siCheckVarScope
11324              * AFTER code generation of the assignment.
11325              */
11326             if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
11327             {
11328                 lclVarNum = varNum;
11329                 lclILoffs = op1->gtLclVar.gtLclILoffs;
11330             }
11331
11332             /* Check against dead store ? (with min opts we may have dead stores) */
11333
11334             noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
11335
11336             /* Does this variable live in a register? */
11337
11338             if (genMarkLclVar(op1))
11339                 goto REG_VAR2;
11340
11341             break;
11342
11343         REG_VAR2:
11344
11345             /* Get hold of the target register */
11346
11347             regNumber op1Reg;
11348
11349             op1Reg = op1->gtRegVar.gtRegNum;
11350
11351 #ifdef DEBUG
11352             /* Compute the RHS (hopefully) into the variable's register.
11353                For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
11354                as variables are kept alive everywhere. So we have to be
11355                careful if we want to compute the value directly into
11356                the variable's register. */
11357
11358             bool needToUpdateRegSetCheckLevel;
11359             needToUpdateRegSetCheckLevel = false;
11360 #endif
11361
11362             // We should only be accessing lvVarIndex if varDsc is tracked.
11363             assert(varDsc->lvTracked);
11364
11365             if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
11366             {
11367                 noway_assert(compiler->opts.compDbgCode);
11368
11369                 /* The predictor might expect us to generate op2 directly
11370                    into the var's register. However, since the variable is
11371                    already alive, first kill it and its register. */
11372
11373                 if (rpCanAsgOperWithoutReg(op2, true))
11374                 {
11375                     genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
11376                     needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11377 #ifdef DEBUG
11378                     needToUpdateRegSetCheckLevel = true;
11379 #endif
11380                 }
11381             }
11382             else
11383             {
11384                 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11385             }
11386
11387 #ifdef DEBUG
11388
11389             /* Special cases: op2 is a GT_CNS_INT */
11390
11391             if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
11392             {
11393                 /* Save the old life status */
11394
11395                 VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
11396                 VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
11397
11398                 /* Set a flag to avoid printing the message
11399                    and remember that life was changed. */
11400
11401                 genTempLiveChg = false;
11402             }
11403 #endif
11404
11405 #ifdef DEBUG
11406             if (needToUpdateRegSetCheckLevel)
11407                 compiler->compRegSetCheckLevel++;
11408 #endif
11409             genCodeForTree(op2, needReg, genRegMask(op1Reg));
11410 #ifdef DEBUG
11411             if (needToUpdateRegSetCheckLevel)
11412                 compiler->compRegSetCheckLevel--;
11413             noway_assert(compiler->compRegSetCheckLevel >= 0);
11414 #endif
11415             noway_assert(op2->InReg());
11416
11417             /* Make sure the value ends up in the right place ... */
11418
11419             if (op2->gtRegNum != op1Reg)
11420             {
11421                 /* Make sure the target of the store is available */
11422
11423                 if (regSet.rsMaskUsed & genRegMask(op1Reg))
11424                     regSet.rsSpillReg(op1Reg);
11425
11426 #ifdef _TARGET_ARM_
11427                 if (op1->TypeGet() == TYP_FLOAT)
11428                 {
11429                     // This can only occur when we are returning a non-HFA struct
11430                     // that is composed of a single float field.
11431                     //
11432                     inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
11433                 }
11434                 else
11435 #endif // _TARGET_ARM_
11436                 {
11437                     inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
11438                 }
11439
11440                 /* The value has been transferred to 'op1Reg' */
11441
11442                 regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
11443
11444                 if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
11445                     gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
11446
11447                 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11448             }
11449             else
11450             {
11451                 // First we need to remove it from the original reg set mask (or else trigger an
11452                 // assert when we add it to the other reg set mask).
11453                 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
11454                 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11455
11456                 // The emitter has logic that tracks the GCness of registers and asserts if you
11457                 // try to do bad things to a GC pointer (like lose its GCness).
11458
11459                 // An explict cast of a GC pointer to an int (which is legal if the
11460                 // pointer is pinned) is encoded as an assignment of a GC source
11461                 // to a integer variable.  Unfortunately if the source was the last
11462                 // use, and the source register gets reused by the destination, no
11463                 // code gets emitted (That is where we are at right now).  The emitter
11464                 // thinks the register is a GC pointer (it did not see the cast).
11465                 // This causes asserts, as well as bad GC info since we will continue
11466                 // to report the register as a GC pointer even if we do arithmetic
11467                 // with it. So force the emitter to see the change in the type
11468                 // of variable by placing a label.
11469                 // We only have to do this check at this point because in the
11470                 // CAST morphing, we create a temp and assignment whenever we
11471                 // have a cast that loses its GCness.
11472
11473                 if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
11474                 {
11475                     void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
11476                                                              gcInfo.gcRegByrefSetCur);
11477                 }
11478             }
11479
11480             addrReg = 0;
11481
11482             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
11483             goto LExit;
11484
11485         case GT_LCL_FLD:
11486
11487             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
11488             // to worry about it being enregistered.
11489             noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
11490             break;
11491
11492         case GT_CLS_VAR:
11493
11494             __fallthrough;
11495
11496         case GT_IND:
11497         case GT_NULLCHECK:
11498
11499             assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
11500
11501             if (op1->gtFlags & GTF_IND_VOLATILE)
11502             {
11503                 volat = true;
11504             }
11505
11506             break;
11507
11508         default:
11509             break;
11510     }
11511
11512     /* Is the value being assigned a simple one? */
11513
11514     noway_assert(op2);
11515     switch (op2->gtOper)
11516     {
11517         case GT_LCL_VAR:
11518
11519             if (!genMarkLclVar(op2))
11520                 goto SMALL_ASG;
11521
11522             __fallthrough;
11523
11524         case GT_REG_VAR:
11525
11526             /* Is the target a byte/short/char value? */
11527
11528             if (varTypeIsSmall(op1->TypeGet()))
11529                 goto SMALL_ASG;
11530
11531             if (tree->gtFlags & GTF_REVERSE_OPS)
11532                 goto SMALL_ASG;
11533
11534             /* Make the target addressable */
11535
11536             op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
11537
11538             addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11539
11540             /* Does the write barrier helper do the assignment? */
11541
11542             regGC = WriteBarrier(op1, op2, addrReg);
11543
11544             // Was assignment done by the WriteBarrier
11545             if (regGC == RBM_NONE)
11546             {
11547 #ifdef _TARGET_ARM_
11548                 if (volat)
11549                 {
11550                     // Emit a memory barrier instruction before the store
11551                     instGen_MemoryBarrier();
11552                 }
11553 #endif
11554
11555                 /* Move the value into the target */
11556
11557                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
11558
11559                 // This is done in WriteBarrier when (regGC != RBM_NONE)
11560
11561                 /* Free up anything that was tied up by the LHS */
11562                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11563             }
11564
11565             /* Free up the RHS */
11566             genUpdateLife(op2);
11567
11568             /* Remember that we've also touched the op2 register */
11569
11570             addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
11571             break;
11572
11573         case GT_CNS_INT:
11574
11575             ssize_t ival;
11576             ival = op2->gtIntCon.gtIconVal;
11577             emitAttr size;
11578             size = emitTypeSize(tree->TypeGet());
11579
11580             ins = ins_Store(op1->TypeGet());
11581
11582             // If we are storing a constant into a local variable
11583             // we extend the size of the store here
11584             // this normally takes place in CodeGen::inst_TT_IV on x86.
11585             //
11586             if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
11587             {
11588                 unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
11589                 LclVarDsc* varDsc = compiler->lvaTable + varNum;
11590
11591                 // Fix the immediate by sign extending if needed
11592                 if (!varTypeIsUnsigned(varDsc->TypeGet()))
11593                 {
11594                     if (size == EA_1BYTE)
11595                     {
11596                         if ((ival & 0x7f) != ival)
11597                             ival = ival | 0xffffff00;
11598                     }
11599                     else
11600                     {
11601                         assert(size == EA_2BYTE);
11602                         if ((ival & 0x7fff) != ival)
11603                             ival = ival | 0xffff0000;
11604                     }
11605                 }
11606
11607                 // A local stack slot is at least 4 bytes in size, regardless of
11608                 // what the local var is typed as, so auto-promote it here
11609                 // unless it is a field of a promoted struct
11610                 if (!varDsc->lvIsStructField)
11611                 {
11612                     size = EA_SET_SIZE(size, EA_4BYTE);
11613                     ins  = ins_Store(TYP_INT);
11614                 }
11615             }
11616
11617             /* Make the target addressable */
11618
11619             addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11620
11621 #ifdef _TARGET_ARM_
11622             if (volat)
11623             {
11624                 // Emit a memory barrier instruction before the store
11625                 instGen_MemoryBarrier();
11626             }
11627 #endif
11628
11629             /* Move the value into the target */
11630
11631             noway_assert(op1->gtOper != GT_REG_VAR);
11632             if (compiler->opts.compReloc && op2->IsIconHandle())
11633             {
11634                 /* The constant is actually a handle that may need relocation
11635                    applied to it.  genComputeReg will do the right thing (see
11636                    code in genCodeForTreeConst), so we'll just call it to load
11637                    the constant into a register. */
11638
11639                 genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
11640                 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11641                 noway_assert(op2->InReg());
11642                 inst_TT_RV(ins, op1, op2->gtRegNum);
11643                 genReleaseReg(op2);
11644             }
11645             else
11646             {
11647                 regSet.rsLockUsedReg(addrReg);
11648
11649 #if REDUNDANT_LOAD
11650                 bool      copyIconFromReg = true;
11651                 regNumber iconReg         = REG_NA;
11652
11653 #ifdef _TARGET_ARM_
11654                 // Only if the constant can't be encoded in a small instruction,
11655                 // look for another register to copy the value from. (Assumes
11656                 // target is a small register.)
11657                 if ((op1->InReg()) && !isRegPairType(tree->gtType) &&
11658                     arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
11659                 {
11660                     copyIconFromReg = false;
11661                 }
11662 #endif // _TARGET_ARM_
11663
11664                 if (copyIconFromReg)
11665                 {
11666                     iconReg = regTracker.rsIconIsInReg(ival);
11667                     if (iconReg == REG_NA)
11668                         copyIconFromReg = false;
11669                 }
11670
11671                 if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
11672                                         (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
11673                 {
11674                     /* Move the value into the target */
11675
11676                     inst_TT_RV(ins, op1, iconReg, 0, size);
11677                 }
11678                 else
11679 #endif // REDUNDANT_LOAD
11680                 {
11681                     inst_TT_IV(ins, op1, ival, 0, size);
11682                 }
11683
11684                 regSet.rsUnlockUsedReg(addrReg);
11685             }
11686
11687             /* Free up anything that was tied up by the LHS */
11688
11689             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11690             break;
11691
11692         default:
11693
11694         SMALL_ASG:
11695
11696             bool             isWriteBarrier = false;
11697             regMaskTP        needRegOp1     = RBM_ALLINT;
11698             RegSet::ExactReg mustReg        = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
11699
11700             /*  Is the LHS more complex than the RHS? */
11701
11702             if (tree->gtFlags & GTF_REVERSE_OPS)
11703             {
11704                 /* Is the target a byte/short/char value? */
11705
11706                 if (varTypeIsSmall(op1->TypeGet()))
11707                 {
11708                     noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
11709                                  // TODO: Why does this have to be true?
11710                                  compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
11711                                  compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
11712
11713                     if (op2->gtOper == GT_CAST && !op2->gtOverflow())
11714                     {
11715                         /* Special case: cast to small type */
11716
11717                         if (op2->CastToType() >= op1->gtType)
11718                         {
11719                             /* Make sure the cast operand is not > int */
11720
11721                             if (op2->CastFromType() <= TYP_INT)
11722                             {
11723                                 /* Cast via a non-smaller type */
11724
11725                                 op2 = op2->gtCast.CastOp();
11726                             }
11727                         }
11728                     }
11729
11730                     if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
11731                     {
11732                         unsigned mask;
11733                         switch (op1->gtType)
11734                         {
11735                             case TYP_BYTE:
11736                                 mask = 0x000000FF;
11737                                 break;
11738                             case TYP_SHORT:
11739                                 mask = 0x0000FFFF;
11740                                 break;
11741                             case TYP_CHAR:
11742                                 mask = 0x0000FFFF;
11743                                 break;
11744                             default:
11745                                 goto SIMPLE_SMALL;
11746                         }
11747
11748                         if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
11749                         {
11750                             /* Redundant AND */
11751
11752                             op2 = op2->gtOp.gtOp1;
11753                         }
11754                     }
11755
11756                 /* Must get the new value into a byte register */
11757
11758                 SIMPLE_SMALL:
11759                     if (varTypeIsByte(op1->TypeGet()))
11760                         genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
11761                     else
11762                         goto NOT_SMALL;
11763                 }
11764                 else
11765                 {
11766                 NOT_SMALL:
11767                     /* Generate the RHS into a register */
11768
11769                     isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11770                     if (isWriteBarrier)
11771                     {
11772 #if NOGC_WRITE_BARRIERS
11773                         // Exclude the REG_WRITE_BARRIER from op2's needReg mask
11774                         needReg = Target::exclude_WriteBarrierReg(needReg);
11775                         mustReg = RegSet::EXACT_REG;
11776 #else  // !NOGC_WRITE_BARRIERS
11777                         // This code should be generic across architectures.
11778
11779                         // For the standard JIT Helper calls
11780                         // op1 goes into REG_ARG_0 and
11781                         // op2 goes into REG_ARG_1
11782                         //
11783                         needRegOp1 = RBM_ARG_0;
11784                         needReg    = RBM_ARG_1;
11785 #endif // !NOGC_WRITE_BARRIERS
11786                     }
11787                     genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11788                 }
11789
11790                 noway_assert(op2->InReg());
11791
11792                 /* Make the target addressable */
11793
11794                 op1     = genCodeForCommaTree(op1); // Strip off any comma expressions.
11795                 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11796
11797                 /*  Make sure the RHS register hasn't been spilled;
11798                     keep the register marked as "used", otherwise
11799                     we might get the pointer lifetimes wrong.
11800                 */
11801
11802                 if (varTypeIsByte(op1->TypeGet()))
11803                     needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11804
11805                 genRecoverReg(op2, needReg, RegSet::KEEP_REG);
11806                 noway_assert(op2->InReg());
11807
11808                 /* Lock the RHS temporarily (lock only already used) */
11809
11810                 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
11811
11812                 /* Make sure the LHS is still addressable */
11813
11814                 addrReg = genKeepAddressable(op1, addrReg);
11815
11816                 /* We can unlock (only already used ) the RHS register */
11817
11818                 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
11819
11820                 /* Does the write barrier helper do the assignment? */
11821
11822                 regGC = WriteBarrier(op1, op2, addrReg);
11823
11824                 if (regGC != 0)
11825                 {
11826                     // Yes, assignment done by the WriteBarrier
11827                     noway_assert(isWriteBarrier);
11828                 }
11829                 else
11830                 {
11831 #ifdef _TARGET_ARM_
11832                     if (volat)
11833                     {
11834                         // Emit a memory barrier instruction before the store
11835                         instGen_MemoryBarrier();
11836                     }
11837 #endif
11838
11839                     /* Move the value into the target */
11840
11841                     inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11842                 }
11843
11844 #ifdef DEBUG
11845                 /* Update the current liveness info */
11846                 if (compiler->opts.varNames)
11847                     genUpdateLife(tree);
11848 #endif
11849
11850                 // If op2 register is still in use, free it.  (Might not be in use, if
11851                 // a full-call write barrier was done, and the register was a caller-saved
11852                 // register.)
11853                 regMaskTP op2RM = genRegMask(op2->gtRegNum);
11854                 if (op2RM & regSet.rsMaskUsed)
11855                     regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
11856
11857                 // This is done in WriteBarrier when (regGC != 0)
11858                 if (regGC == 0)
11859                 {
11860                     /* Free up anything that was tied up by the LHS */
11861                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11862                 }
11863             }
11864             else
11865             {
11866                 /* Make the target addressable */
11867
11868                 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11869
11870                 if (isWriteBarrier)
11871                 {
11872 #if NOGC_WRITE_BARRIERS
11873                     /* Try to avoid RBM_TMP_0 */
11874                     needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
11875                     mustReg    = RegSet::EXACT_REG; // For op2
11876 #else                                               // !NOGC_WRITE_BARRIERS
11877                     // This code should be generic across architectures.
11878
11879                     // For the standard JIT Helper calls
11880                     // op1 goes into REG_ARG_0 and
11881                     // op2 goes into REG_ARG_1
11882                     //
11883                     needRegOp1 = RBM_ARG_0;
11884                     needReg    = RBM_ARG_1;
11885                     mustReg    = RegSet::EXACT_REG; // For op2
11886 #endif                                              // !NOGC_WRITE_BARRIERS
11887                 }
11888
11889                 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
11890
11891                 op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
11892
11893                 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11894
11895 #if CPU_HAS_BYTE_REGS
11896                 /* Is the target a byte value? */
11897                 if (varTypeIsByte(op1->TypeGet()))
11898                 {
11899                     /* Must get the new value into a byte register */
11900                     needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11901                     mustReg = RegSet::EXACT_REG;
11902
11903                     if (op2->gtType >= op1->gtType)
11904                         op2->gtFlags |= GTF_SMALL_OK;
11905                 }
11906 #endif
11907
11908 #if NOGC_WRITE_BARRIERS
11909                 /* For WriteBarrier we can't use REG_WRITE_BARRIER */
11910                 if (isWriteBarrier)
11911                     needReg = Target::exclude_WriteBarrierReg(needReg);
11912
11913                 /* Also avoid using the previously computed addrReg(s) */
11914                 bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
11915
11916                 /* If we have a reg available to grab then use bestReg */
11917                 if (bestReg & regSet.rsRegMaskCanGrab())
11918                     needReg = bestReg;
11919
11920                 mustReg = RegSet::EXACT_REG;
11921 #endif
11922
11923                 /* Generate the RHS into a register */
11924                 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11925                 noway_assert(op2->InReg());
11926
11927                 /* Make sure the target is still addressable */
11928                 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11929                 noway_assert(op2->InReg());
11930
11931                 /* Does the write barrier helper do the assignment? */
11932
11933                 regGC = WriteBarrier(op1, op2, addrReg);
11934
11935                 if (regGC != 0)
11936                 {
11937                     // Yes, assignment done by the WriteBarrier
11938                     noway_assert(isWriteBarrier);
11939                 }
11940                 else
11941                 {
11942                     assert(!isWriteBarrier);
11943
11944 #ifdef _TARGET_ARM_
11945                     if (volat)
11946                     {
11947                         // Emit a memory barrier instruction before the store
11948                         instGen_MemoryBarrier();
11949                     }
11950 #endif
11951
11952                     /* Move the value into the target */
11953
11954                     inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11955                 }
11956
11957                 /* The new value is no longer needed */
11958
11959                 genReleaseReg(op2);
11960
11961 #ifdef DEBUG
11962                 /* Update the current liveness info */
11963                 if (compiler->opts.varNames)
11964                     genUpdateLife(tree);
11965 #endif
11966
11967                 // This is done in WriteBarrier when (regGC != 0)
11968                 if (regGC == 0)
11969                 {
11970                     /* Free up anything that was tied up by the LHS */
11971                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11972                 }
11973             }
11974
11975             addrReg = RBM_NONE;
11976             break;
11977     }
11978
11979     noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
11980     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
11981
11982 LExit:
11983     /* For non-debuggable code, every definition of a lcl-var has
11984      * to be checked to see if we need to open a new scope for it.
11985      */
11986     if (lclVarNum < compiler->lvaCount)
11987         siCheckVarScope(lclVarNum, lclILoffs);
11988 }
11989 #ifdef _PREFAST_
11990 #pragma warning(pop)
11991 #endif
11992
11993 /*****************************************************************************
11994  *
11995  *  Generate code to complete the assignment operation
11996  */
11997
11998 void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
11999 {
12000     const var_types treeType = tree->TypeGet();
12001     GenTreePtr      op1      = tree->gtOp.gtOp1;
12002     GenTreePtr      op2      = tree->gtOp.gtOp2;
12003     noway_assert(op2);
12004
12005     if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
12006         genUpdateLife(op1);
12007     genUpdateLife(tree);
12008
12009 #if REDUNDANT_LOAD
12010
12011     if (op1->gtOper == GT_LCL_VAR)
12012         regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
12013
12014     /* Have we just assigned a value that is in a register? */
12015
12016     if (op2->InReg() && tree->gtOper == GT_ASG)
12017     {
12018         regTracker.rsTrackRegAssign(op1, op2);
12019     }
12020
12021 #endif
12022
12023     noway_assert(addrReg != 0xDEADCAFE);
12024
12025     gcInfo.gcMarkRegSetNpt(addrReg);
12026
12027     if (ovfl)
12028     {
12029         noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
12030
12031         /* If it is not in a register and it is a small type, then
12032            we must have loaded it up from memory, done the increment,
12033            checked for overflow, and then stored it back to memory */
12034
12035         bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->InReg());
12036
12037         if (!ovfCheckDone)
12038         {
12039             // For small sizes, reg should be set as we sign/zero extend it.
12040
12041             noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
12042
12043             /* Currently we don't morph x=x+y into x+=y in try blocks
12044              * if we need overflow check, as x+y may throw an exception.
12045              * We can do it if x is not live on entry to the catch block.
12046              */
12047             noway_assert(!compiler->compCurBB->hasTryIndex());
12048
12049             genCheckOverflow(tree);
12050         }
12051     }
12052 }
12053
12054 /*****************************************************************************
12055  *
12056  *  Generate code for a special op tree
12057  */
12058
12059 void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
12060 {
12061     genTreeOps oper = tree->OperGet();
12062     regNumber  reg  = DUMMY_INIT(REG_CORRUPT);
12063     regMaskTP  regs = regSet.rsMaskUsed;
12064
12065     noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
12066
12067     switch (oper)
12068     {
12069         case GT_CALL:
12070             regs = genCodeForCall(tree->AsCall(), true);
12071
12072             /* If the result is in a register, make sure it ends up in the right place */
12073
12074             if (regs != RBM_NONE)
12075             {
12076                 genMarkTreeInReg(tree, genRegNumFromMask(regs));
12077             }
12078
12079             genUpdateLife(tree);
12080             return;
12081
12082         case GT_FIELD:
12083             NO_WAY("should not see this operator in this phase");
12084             break;
12085
12086         case GT_ARR_BOUNDS_CHECK:
12087         {
12088 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
12089             // MUST NEVER CHECK-IN WITH THIS ENABLED.
12090             // This is just for convenience in doing performance investigations and requires x86ret builds
12091             if (!JitConfig.JitNoRngChk())
12092 #endif
12093                 genRangeCheck(tree);
12094         }
12095             return;
12096
12097         case GT_ARR_ELEM:
12098             genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
12099             return;
12100
12101         case GT_CMPXCHG:
12102         {
12103 #if defined(_TARGET_XARCH_)
12104             // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
12105
12106             // Since this is a "call", evaluate the operands from right to left.  Don't worry about spilling
12107             // right now, just get the trees evaluated.
12108
12109             // As a friendly reminder.  IL args are evaluated left to right.
12110
12111             GenTreePtr location  = tree->gtCmpXchg.gtOpLocation;  // arg1
12112             GenTreePtr value     = tree->gtCmpXchg.gtOpValue;     // arg2
12113             GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
12114             regMaskTP  addrReg;
12115
12116             bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
12117                                              RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
12118
12119             if (!isAddr)
12120             {
12121                 genCodeForTree(location, RBM_NONE, RBM_NONE);
12122                 assert(location->InReg());
12123                 addrReg = genRegMask(location->gtRegNum);
12124                 regSet.rsMarkRegUsed(location);
12125             }
12126
12127             // We must have a reg for the Value, but it doesn't really matter which register.
12128
12129             // Try to avoid EAX and the address regsiter if possible.
12130             genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
12131
12132 #ifdef DEBUG
12133             // cmpxchg uses EAX as an implicit operand to hold the comparand
12134             // We're going to destroy EAX in this operation, so we better not be keeping
12135             // anything important in it.
12136             if (RBM_EAX & regSet.rsMaskVars)
12137             {
12138                 // We have a variable enregistered in EAX.  Make sure it goes dead in this tree.
12139                 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
12140                 {
12141                     const LclVarDsc& varDesc = compiler->lvaTable[varNum];
12142                     if (!varDesc.lvIsRegCandidate())
12143                         continue;
12144                     if (!varDesc.lvRegister)
12145                         continue;
12146                     if (isFloatRegType(varDesc.lvType))
12147                         continue;
12148                     if (varDesc.lvRegNum != REG_EAX)
12149                         continue;
12150                     // We may need to check lvOtherReg.
12151
12152                     // If the variable isn't going dead during this tree, we've just trashed a local with
12153                     // cmpxchg.
12154                     noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
12155
12156                     break;
12157                 }
12158             }
12159 #endif
12160             genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
12161
12162             // By this point we've evaluated everything.  However the odds are that we've spilled something by
12163             // now.  Let's recover all the registers and force them to stay.
12164
12165             // Well, we just computed comparand, so it's still in EAX.
12166             noway_assert(comparand->gtRegNum == REG_EAX);
12167             regSet.rsLockUsedReg(RBM_EAX);
12168
12169             // Stick it anywhere other than EAX.
12170             genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
12171             reg = value->gtRegNum;
12172             noway_assert(reg != REG_EAX);
12173             regSet.rsLockUsedReg(genRegMask(reg));
12174
12175             if (isAddr)
12176             {
12177                 addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
12178             }
12179             else
12180             {
12181                 genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
12182             }
12183
12184             regSet.rsUnlockUsedReg(genRegMask(reg));
12185             regSet.rsUnlockUsedReg(RBM_EAX);
12186
12187             instGen(INS_lock);
12188             if (isAddr)
12189             {
12190                 sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
12191                 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
12192             }
12193             else
12194             {
12195                 instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
12196                 genReleaseReg(location);
12197             }
12198
12199             genReleaseReg(value);
12200             genReleaseReg(comparand);
12201
12202             // EAX and the value register are both trashed at this point.
12203             regTracker.rsTrackRegTrash(REG_EAX);
12204             regTracker.rsTrackRegTrash(reg);
12205
12206             reg = REG_EAX;
12207
12208             genFlagsEqualToNone();
12209             break;
12210 #else // not defined(_TARGET_XARCH_)
12211             NYI("GT_CMPXCHG codegen");
12212             break;
12213 #endif
12214         }
12215
12216         default:
12217 #ifdef DEBUG
12218             compiler->gtDispTree(tree);
12219 #endif
12220             noway_assert(!"unexpected operator");
12221             NO_WAY("unexpected operator");
12222     }
12223
12224     noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
12225     genCodeForTree_DONE(tree, reg);
12226 }
12227
12228 /*****************************************************************************
12229  *
12230  *  Generate code for the given tree. tree->gtRegNum will be set to the
12231  *  register where the tree lives.
12232  *
12233  *  If 'destReg' is non-zero, we'll do our best to compute the value into a
12234  *  register that is in that register set.
12235  *  Use genComputeReg() if you need the tree in a specific register.
12236  *  Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
12237  *  the register can only be used for read, but not for write.
12238  *  Use genMakeAddressable() if you only need the tree to be accessible
12239  *  using a complex addressing mode, and do not necessarily need the tree
12240  *  materialized in a register.
12241  *
12242  *  The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
12243  *
12244  *  The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
12245  *  register will not be consumed right away and could possibly be spilled.
12246  */
12247
12248 void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
12249 {
12250 #if 0
12251     if  (compiler->verbose)
12252     {
12253         printf("Generating code for tree ");
12254         Compiler::printTreeID(tree);
12255         printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
12256     }
12257     genStressRegs(tree);
12258 #endif
12259
12260     noway_assert(tree);
12261     noway_assert(tree->gtOper != GT_STMT);
12262     assert(tree->IsNodeProperlySized());
12263
12264     // When assigning to a enregistered local variable we receive
12265     // a hint that we should target the register that is used to
12266     // hold the enregistered local variable.
12267     // When receiving this hint both destReg and bestReg masks are set
12268     // to the register that is used by the enregistered local variable.
12269     //
12270     // However it is possible to us to have a different local variable
12271     // targeting the same register to become alive (and later die)
12272     // as we descend the expression tree.
12273     //
12274     // To handle such cases we will remove any registers that are alive from the
12275     // both the destReg and bestReg masks.
12276     //
12277     regMaskTP liveMask = genLiveMask(tree);
12278
12279     // This removes any registers used to hold enregistered locals
12280     // from the destReg and bestReg masks.
12281     // After this either mask could become 0
12282     //
12283     destReg &= ~liveMask;
12284     bestReg &= ~liveMask;
12285
12286     /* 'destReg' of 0 really means 'any' */
12287
12288     destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
12289
12290     if (destReg != RBM_ALL(tree->TypeGet()))
12291         bestReg = regSet.rsUseIfZero(bestReg, destReg);
12292
12293     // Long, float, and double have their own codegen functions
12294     switch (tree->TypeGet())
12295     {
12296
12297         case TYP_LONG:
12298 #if !CPU_HAS_FP_SUPPORT
12299         case TYP_DOUBLE:
12300 #endif
12301             genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
12302             return;
12303
12304 #if CPU_HAS_FP_SUPPORT
12305         case TYP_FLOAT:
12306         case TYP_DOUBLE:
12307
12308             // For comma nodes, we'll get back here for the last node in the comma list.
12309             if (tree->gtOper != GT_COMMA)
12310             {
12311                 genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
12312                 return;
12313             }
12314             break;
12315 #endif
12316
12317 #ifdef DEBUG
12318         case TYP_UINT:
12319         case TYP_ULONG:
12320             noway_assert(!"These types are only used as markers in GT_CAST nodes");
12321             break;
12322 #endif
12323
12324         default:
12325             break;
12326     }
12327
12328     /* Is the value already in a register? */
12329
12330     if (tree->InReg())
12331     {
12332         genCodeForTree_REG_VAR1(tree);
12333         return;
12334     }
12335
12336     /* We better not have a spilled value here */
12337
12338     noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
12339
12340     /* Figure out what kind of a node we have */
12341
12342     unsigned kind = tree->OperKind();
12343
12344     if (kind & GTK_CONST)
12345     {
12346         /* Handle constant nodes */
12347
12348         genCodeForTreeConst(tree, destReg, bestReg);
12349     }
12350     else if (kind & GTK_LEAF)
12351     {
12352         /* Handle leaf nodes */
12353
12354         genCodeForTreeLeaf(tree, destReg, bestReg);
12355     }
12356     else if (kind & GTK_SMPOP)
12357     {
12358         /* Handle 'simple' unary/binary operators */
12359
12360         genCodeForTreeSmpOp(tree, destReg, bestReg);
12361     }
12362     else
12363     {
12364         /* Handle special operators */
12365
12366         genCodeForTreeSpecialOp(tree, destReg, bestReg);
12367     }
12368 }
12369
12370 /*****************************************************************************
12371  *
12372  *  Generate code for all the basic blocks in the function.
12373  */
12374
12375 #ifdef _PREFAST_
12376 #pragma warning(push)
12377 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12378 #endif
12379 void CodeGen::genCodeForBBlist()
12380 {
12381     unsigned   varNum;
12382     LclVarDsc* varDsc;
12383
12384     unsigned savedStkLvl;
12385
12386 #ifdef DEBUG
12387     genInterruptibleUsed = true;
12388     unsigned stmtNum     = 0;
12389     unsigned totalCostEx = 0;
12390     unsigned totalCostSz = 0;
12391
12392     // You have to be careful if you create basic blocks from now on
12393     compiler->fgSafeBasicBlockCreation = false;
12394
12395     // This stress mode is not comptible with fully interruptible GC
12396     if (genInterruptible && compiler->opts.compStackCheckOnCall)
12397     {
12398         compiler->opts.compStackCheckOnCall = false;
12399     }
12400
12401     // This stress mode is not comptible with fully interruptible GC
12402     if (genInterruptible && compiler->opts.compStackCheckOnRet)
12403     {
12404         compiler->opts.compStackCheckOnRet = false;
12405     }
12406 #endif
12407
12408     // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
12409     genPrepForEHCodegen();
12410
12411     assert(!compiler->fgFirstBBScratch ||
12412            compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
12413
12414     /* Initialize the spill tracking logic */
12415
12416     regSet.rsSpillBeg();
12417
12418     /* Initialize the line# tracking logic */
12419
12420     if (compiler->opts.compScopeInfo)
12421     {
12422         siInit();
12423     }
12424
12425 #ifdef _TARGET_X86_
12426     if (compiler->compTailCallUsed)
12427     {
12428         noway_assert(isFramePointerUsed());
12429         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12430     }
12431 #endif
12432
12433     if (compiler->opts.compDbgEnC)
12434     {
12435         noway_assert(isFramePointerUsed());
12436         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12437     }
12438
12439     /* If we have any pinvoke calls, we might potentially trash everything */
12440
12441     if (compiler->info.compCallUnmanaged)
12442     {
12443         noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
12444         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12445     }
12446
12447     /* Initialize the pointer tracking code */
12448
12449     gcInfo.gcRegPtrSetInit();
12450     gcInfo.gcVarPtrSetInit();
12451
12452     /* If any arguments live in registers, mark those regs as such */
12453
12454     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
12455     {
12456         /* Is this variable a parameter assigned to a register? */
12457
12458         if (!varDsc->lvIsParam || !varDsc->lvRegister)
12459             continue;
12460
12461         /* Is the argument live on entry to the method? */
12462
12463         if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
12464             continue;
12465
12466 #if CPU_HAS_FP_SUPPORT
12467         /* Is this a floating-point argument? */
12468
12469         if (varDsc->IsFloatRegType())
12470             continue;
12471
12472         noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
12473 #endif
12474
12475         /* Mark the register as holding the variable */
12476
12477         if (isRegPairType(varDsc->lvType))
12478         {
12479             regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
12480
12481             if (varDsc->lvOtherReg != REG_STK)
12482                 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12483         }
12484         else
12485         {
12486             regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
12487         }
12488     }
12489
12490     unsigned finallyNesting = 0;
12491
12492     // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
12493     // allocation at the start of each basic block.
12494     VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
12495
12496     /*-------------------------------------------------------------------------
12497      *
12498      *  Walk the basic blocks and generate code for each one
12499      *
12500      */
12501
12502     BasicBlock* block;
12503     BasicBlock* lblk; /* previous block */
12504
12505     for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
12506     {
12507 #ifdef DEBUG
12508         if (compiler->verbose)
12509         {
12510             printf("\n=============== Generating ");
12511             block->dspBlockHeader(compiler, true, true);
12512             compiler->fgDispBBLiveness(block);
12513         }
12514 #endif // DEBUG
12515
12516         VARSET_TP liveSet(VarSetOps::UninitVal());
12517
12518         regMaskTP gcrefRegs = 0;
12519         regMaskTP byrefRegs = 0;
12520
12521         /* Does any other block jump to this point ? */
12522
12523         if (block->bbFlags & BBF_JMP_TARGET)
12524         {
12525             /* Someone may jump here, so trash all regs */
12526
12527             regTracker.rsTrackRegClr();
12528
12529             genFlagsEqualToNone();
12530         }
12531         else
12532         {
12533             /* No jump, but pointers always need to get trashed for proper GC tracking */
12534
12535             regTracker.rsTrackRegClrPtr();
12536         }
12537
12538         /* No registers are used or locked on entry to a basic block */
12539
12540         regSet.rsMaskUsed = RBM_NONE;
12541         regSet.rsMaskMult = RBM_NONE;
12542         regSet.rsMaskLock = RBM_NONE;
12543
12544         // If we need to reserve registers such that they are not used
12545         // by CodeGen in this BasicBlock we do so here.
12546         // On the ARM when we have large frame offsets for locals we
12547         // will have RBM_R10 in the regSet.rsMaskResvd set,
12548         // additionally if a LocAlloc or alloca is used RBM_R9 is in
12549         // the regSet.rsMaskResvd set and we lock these registers here.
12550         //
12551         if (regSet.rsMaskResvd != RBM_NONE)
12552         {
12553             regSet.rsLockReg(regSet.rsMaskResvd);
12554             regSet.rsSetRegsModified(regSet.rsMaskResvd);
12555         }
12556
12557         /* Figure out which registers hold variables on entry to this block */
12558
12559         regMaskTP specialUseMask = regSet.rsMaskResvd;
12560
12561         specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
12562         regSet.ClearMaskVars();
12563         VarSetOps::ClearD(compiler, compiler->compCurLife);
12564         VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
12565
12566 #if FEATURE_STACK_FP_X87
12567         VarSetOps::AssignNoCopy(compiler, genFPregVars,
12568                                 VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
12569         genFPregCnt     = VarSetOps::Count(compiler, genFPregVars);
12570         genFPdeadRegCnt = 0;
12571 #endif
12572         gcInfo.gcResetForBB();
12573
12574         genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
12575 #if FEATURE_STACK_FP_X87
12576         VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
12577 #endif
12578
12579         // We should never enregister variables in any of the specialUseMask registers
12580         noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
12581
12582         VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
12583         while (iter.NextElem(&varIndex))
12584         {
12585             varNum = compiler->lvaTrackedToVarNum[varIndex];
12586             varDsc = compiler->lvaTable + varNum;
12587             assert(varDsc->lvTracked);
12588             /* Ignore the variable if it's not not in a reg */
12589
12590             if (!varDsc->lvRegister)
12591                 continue;
12592             if (isFloatRegType(varDsc->lvType))
12593                 continue;
12594
12595             /* Get hold of the index and the bitmask for the variable */
12596             regNumber regNum  = varDsc->lvRegNum;
12597             regMaskTP regMask = genRegMask(regNum);
12598
12599             regSet.AddMaskVars(regMask);
12600
12601             if (varDsc->lvType == TYP_REF)
12602                 gcrefRegs |= regMask;
12603             else if (varDsc->lvType == TYP_BYREF)
12604                 byrefRegs |= regMask;
12605
12606             /* Mark the register holding the variable as such */
12607
12608             if (varTypeIsMultiReg(varDsc))
12609             {
12610                 regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
12611                 if (varDsc->lvOtherReg != REG_STK)
12612                 {
12613                     regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12614                     regMask |= genRegMask(varDsc->lvOtherReg);
12615                 }
12616             }
12617             else
12618             {
12619                 regTracker.rsTrackRegLclVar(regNum, varNum);
12620             }
12621         }
12622
12623         gcInfo.gcPtrArgCnt = 0;
12624
12625 #if FEATURE_STACK_FP_X87
12626
12627         regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
12628
12629         memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
12630         memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
12631
12632         // Setup fp state on block entry
12633         genSetupStateStackFP(block);
12634
12635 #ifdef DEBUG
12636         if (compiler->verbose)
12637         {
12638             JitDumpFPState();
12639         }
12640 #endif // DEBUG
12641 #endif // FEATURE_STACK_FP_X87
12642
12643         /* Make sure we keep track of what pointers are live */
12644
12645         noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
12646         gcInfo.gcRegGCrefSetCur = gcrefRegs;
12647         gcInfo.gcRegByrefSetCur = byrefRegs;
12648
12649         /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
12650            represent the exception object (TYP_REF).
12651            We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
12652            to the block,  it will be the first thing evaluated
12653            (thanks to GTF_ORDER_SIDEEFF).
12654          */
12655
12656         if (handlerGetsXcptnObj(block->bbCatchTyp))
12657         {
12658             GenTreePtr firstStmt = block->FirstNonPhiDef();
12659             if (firstStmt != NULL)
12660             {
12661                 GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
12662                 if (compiler->gtHasCatchArg(firstTree))
12663                 {
12664                     gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
12665                 }
12666             }
12667         }
12668
12669         /* Start a new code output block */
12670         CLANG_FORMAT_COMMENT_ANCHOR;
12671
12672 #if FEATURE_EH_FUNCLETS
12673 #if defined(_TARGET_ARM_)
12674         genInsertNopForUnwinder(block);
12675 #endif // defined(_TARGET_ARM_)
12676
12677         genUpdateCurrentFunclet(block);
12678 #endif // FEATURE_EH_FUNCLETS
12679
12680 #ifdef _TARGET_XARCH_
12681         if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
12682         {
12683             getEmitter()->emitLoopAlign();
12684         }
12685 #endif
12686
12687 #ifdef DEBUG
12688         if (compiler->opts.dspCode)
12689             printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
12690 #endif
12691
12692         block->bbEmitCookie = NULL;
12693
12694         if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
12695         {
12696             /* Mark a label and update the current set of live GC refs */
12697
12698             block->bbEmitCookie =
12699                 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
12700 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
12701                                            /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
12702 #else
12703                                            FALSE
12704 #endif
12705                                            );
12706         }
12707
12708         if (block == compiler->fgFirstColdBlock)
12709         {
12710 #ifdef DEBUG
12711             if (compiler->verbose)
12712             {
12713                 printf("\nThis is the start of the cold region of the method\n");
12714             }
12715 #endif
12716             // We should never have a block that falls through into the Cold section
12717             noway_assert(!lblk->bbFallsThrough());
12718
12719             // We require the block that starts the Cold section to have a label
12720             noway_assert(block->bbEmitCookie);
12721             getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
12722         }
12723
12724         /* Both stacks are always empty on entry to a basic block */
12725
12726         SetStackLevel(0);
12727 #if FEATURE_STACK_FP_X87
12728         genResetFPstkLevel();
12729 #endif // FEATURE_STACK_FP_X87
12730
12731         genAdjustStackLevel(block);
12732
12733         savedStkLvl = genStackLevel;
12734
12735         /* Tell everyone which basic block we're working on */
12736
12737         compiler->compCurBB = block;
12738
12739         siBeginBlock(block);
12740
12741         // BBF_INTERNAL blocks don't correspond to any single IL instruction.
12742         if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
12743             genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
12744
12745         bool firstMapping = true;
12746
12747         /*---------------------------------------------------------------------
12748          *
12749          *  Generate code for each statement-tree in the block
12750          *
12751          */
12752         CLANG_FORMAT_COMMENT_ANCHOR;
12753
12754 #if FEATURE_EH_FUNCLETS
12755         if (block->bbFlags & BBF_FUNCLET_BEG)
12756         {
12757             genReserveFuncletProlog(block);
12758         }
12759 #endif // FEATURE_EH_FUNCLETS
12760
12761         for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
12762         {
12763             noway_assert(stmt->gtOper == GT_STMT);
12764
12765             /* Do we have a new IL-offset ? */
12766
12767             if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
12768             {
12769                 /* Create and append a new IP-mapping entry */
12770                 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
12771                 firstMapping = false;
12772             }
12773
12774 #ifdef DEBUG
12775             if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
12776             {
12777                 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
12778                 if (compiler->opts.dspCode && compiler->opts.dspInstrs)
12779                 {
12780                     while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
12781                     {
12782                         genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
12783                     }
12784                 }
12785             }
12786 #endif // DEBUG
12787
12788             /* Get hold of the statement tree */
12789             GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
12790
12791 #ifdef DEBUG
12792             stmtNum++;
12793             if (compiler->verbose)
12794             {
12795                 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
12796                 printf("Holding variables: ");
12797                 dspRegMask(regSet.rsMaskVars);
12798                 printf("\n\n");
12799                 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
12800                 printf("\n");
12801 #if FEATURE_STACK_FP_X87
12802                 JitDumpFPState();
12803 #endif
12804
12805                 printf("Execution Order:\n");
12806                 for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
12807                 {
12808                     compiler->gtDispTree(treeNode, 0, NULL, true);
12809                 }
12810                 printf("\n");
12811             }
12812             totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
12813             totalCostSz += stmt->gtCostSz;
12814 #endif // DEBUG
12815
12816             compiler->compCurStmt = stmt;
12817
12818             compiler->compCurLifeTree = NULL;
12819             switch (tree->gtOper)
12820             {
12821                 case GT_CALL:
12822                     // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
12823                     // reported as alive even though not used within the caller for managed debugger sake.  So
12824                     // consider the return value of the method as used if generating debuggable code.
12825                     genCodeForCall(tree->AsCall(), compiler->opts.MinOpts() || compiler->opts.compDbgCode);
12826                     genUpdateLife(tree);
12827                     gcInfo.gcMarkRegSetNpt(RBM_INTRET);
12828                     break;
12829
12830                 case GT_IND:
12831                 case GT_NULLCHECK:
12832
12833                     // Just do the side effects
12834                     genEvalSideEffects(tree);
12835                     break;
12836
12837                 default:
12838                     /* Generate code for the tree */
12839
12840                     genCodeForTree(tree, 0);
12841                     break;
12842             }
12843
12844             regSet.rsSpillChk();
12845
12846             /* The value of the tree isn't used, unless it's a return stmt */
12847
12848             if (tree->gtOper != GT_RETURN)
12849                 gcInfo.gcMarkRegPtrVal(tree);
12850
12851 #if FEATURE_STACK_FP_X87
12852             genEndOfStatement();
12853 #endif
12854
12855 #ifdef DEBUG
12856             /* Make sure we didn't bungle pointer register tracking */
12857
12858             regMaskTP ptrRegs       = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
12859             regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
12860
12861             // If return is a GC-type, clear it.  Note that if a common
12862             // epilog is generated (compiler->genReturnBB) it has a void return
12863             // even though we might return a ref.  We can't use the compRetType
12864             // as the determiner because something we are tracking as a byref
12865             // might be used as a return value of a int function (which is legal)
12866             if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
12867                                               (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
12868             {
12869                 nonVarPtrRegs &= ~RBM_INTRET;
12870             }
12871
12872             // When profiling, the first statement in a catch block will be the
12873             // harmless "inc" instruction (does not interfere with the exception
12874             // object).
12875
12876             if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (stmt == block->bbTreeList) &&
12877                 (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
12878             {
12879                 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
12880             }
12881
12882             if (nonVarPtrRegs)
12883             {
12884                 printf("Regset after tree=");
12885                 Compiler::printTreeID(tree);
12886                 printf(" BB%02u gcr=", block->bbNum);
12887                 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12888                 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12889                 printf(", byr=");
12890                 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12891                 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12892                 printf(", regVars=");
12893                 printRegMaskInt(regSet.rsMaskVars);
12894                 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
12895                 printf("\n");
12896             }
12897
12898             noway_assert(nonVarPtrRegs == 0);
12899 #endif // DEBUG
12900
12901             noway_assert(stmt->gtOper == GT_STMT);
12902
12903             genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
12904
12905         } //-------- END-FOR each statement-tree of the current block ---------
12906
12907         if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
12908         {
12909             siEndBlock(block);
12910
12911             /* Is this the last block, and are there any open scopes left ? */
12912
12913             bool isLastBlockProcessed = (block->bbNext == NULL);
12914             if (block->isBBCallAlwaysPair())
12915             {
12916                 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
12917             }
12918
12919             if (isLastBlockProcessed && siOpenScopeList.scNext)
12920             {
12921                 /* This assert no longer holds, because we may insert a throw
12922                    block to demarcate the end of a try or finally region when they
12923                    are at the end of the method.  It would be nice if we could fix
12924                    our code so that this throw block will no longer be necessary. */
12925
12926                 // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
12927
12928                 siCloseAllOpenScopes();
12929             }
12930         }
12931
12932         SubtractStackLevel(savedStkLvl);
12933
12934         gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
12935
12936         if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
12937             compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
12938
12939         /* Both stacks should always be empty on exit from a basic block */
12940
12941         noway_assert(genStackLevel == 0);
12942 #if FEATURE_STACK_FP_X87
12943         noway_assert(genGetFPstkLevel() == 0);
12944
12945         // Do the FPState matching that may have to be done
12946         genCodeForEndBlockTransitionStackFP(block);
12947 #endif
12948
12949         noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
12950
12951         /* Do we need to generate a jump or return? */
12952
12953         switch (block->bbJumpKind)
12954         {
12955             case BBJ_ALWAYS:
12956                 inst_JMP(EJ_jmp, block->bbJumpDest);
12957                 break;
12958
12959             case BBJ_RETURN:
12960                 genExitCode(block);
12961                 break;
12962
12963             case BBJ_THROW:
12964                 // If we have a throw at the end of a function or funclet, we need to emit another instruction
12965                 // afterwards to help the OS unwinder determine the correct context during unwind.
12966                 // We insert an unexecuted breakpoint instruction in several situations
12967                 // following a throw instruction:
12968                 // 1. If the throw is the last instruction of the function or funclet. This helps
12969                 //    the OS unwinder determine the correct context during an unwind from the
12970                 //    thrown exception.
12971                 // 2. If this is this is the last block of the hot section.
12972                 // 3. If the subsequent block is a special throw block.
12973                 if ((block->bbNext == NULL)
12974 #if FEATURE_EH_FUNCLETS
12975                     || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
12976 #endif // FEATURE_EH_FUNCLETS
12977                     || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
12978                     block->bbNext == compiler->fgFirstColdBlock)
12979                 {
12980                     instGen(INS_BREAKPOINT); // This should never get executed
12981                 }
12982
12983                 break;
12984
12985             case BBJ_CALLFINALLY:
12986
12987 #if defined(_TARGET_X86_)
12988
12989                 /* If we are about to invoke a finally locally from a try block,
12990                    we have to set the hidden slot corresponding to the finally's
12991                    nesting level. When invoked in response to an exception, the
12992                    EE usually does it.
12993
12994                    We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
12995
12996                    This code depends on this order not being messed up.
12997                    We will emit :
12998                         mov [ebp-(n+1)],0
12999                         mov [ebp-  n  ],0xFC
13000                         push &step
13001                         jmp  finallyBlock
13002
13003                   step: mov [ebp-  n  ],0
13004                         jmp leaveTarget
13005                   leaveTarget:
13006                  */
13007
13008                 noway_assert(isFramePointerUsed());
13009
13010                 // Get the nesting level which contains the finally
13011                 compiler->fgGetNestingLevel(block, &finallyNesting);
13012
13013                 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
13014                 unsigned filterEndOffsetSlotOffs;
13015                 filterEndOffsetSlotOffs =
13016                     (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
13017
13018                 unsigned curNestingSlotOffs;
13019                 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
13020
13021                 // Zero out the slot for the next nesting level
13022                 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
13023                                            curNestingSlotOffs - sizeof(void*));
13024
13025                 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
13026                                            curNestingSlotOffs);
13027
13028                 // Now push the address of where the finally funclet should
13029                 // return to directly.
13030                 if (!(block->bbFlags & BBF_RETLESS_CALL))
13031                 {
13032                     assert(block->isBBCallAlwaysPair());
13033                     getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
13034                 }
13035                 else
13036                 {
13037                     // EE expects a DWORD, so we give him 0
13038                     inst_IV(INS_push_hide, 0);
13039                 }
13040
13041                 // Jump to the finally BB
13042                 inst_JMP(EJ_jmp, block->bbJumpDest);
13043
13044 #elif defined(_TARGET_ARM_)
13045
13046                 // Now set REG_LR to the address of where the finally funclet should
13047                 // return to directly.
13048
13049                 BasicBlock* bbFinallyRet;
13050                 bbFinallyRet = NULL;
13051
13052                 // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
13053                 // we would have otherwise created retless calls.
13054                 assert(block->isBBCallAlwaysPair());
13055
13056                 assert(block->bbNext != NULL);
13057                 assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
13058                 assert(block->bbNext->bbJumpDest != NULL);
13059                 assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
13060
13061                 bbFinallyRet = block->bbNext->bbJumpDest;
13062                 bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
13063
13064                 // Load the address where the finally funclet should return into LR.
13065                 // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
13066                 // the return.
13067                 genMov32RelocatableDisplacement(bbFinallyRet, REG_LR);
13068                 regTracker.rsTrackRegTrash(REG_LR);
13069
13070                 // Jump to the finally BB
13071                 inst_JMP(EJ_jmp, block->bbJumpDest);
13072 #else
13073                 NYI("TARGET");
13074 #endif
13075
13076                 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
13077                 // jump target using bbJumpDest - that is already used to point
13078                 // to the finally block. So just skip past the BBJ_ALWAYS unless the
13079                 // block is RETLESS.
13080                 if (!(block->bbFlags & BBF_RETLESS_CALL))
13081                 {
13082                     assert(block->isBBCallAlwaysPair());
13083
13084                     lblk  = block;
13085                     block = block->bbNext;
13086                 }
13087                 break;
13088
13089 #ifdef _TARGET_ARM_
13090
13091             case BBJ_EHCATCHRET:
13092                 // set r0 to the address the VM should return to after the catch
13093                 genMov32RelocatableDisplacement(block->bbJumpDest, REG_R0);
13094                 regTracker.rsTrackRegTrash(REG_R0);
13095
13096                 __fallthrough;
13097
13098             case BBJ_EHFINALLYRET:
13099             case BBJ_EHFILTERRET:
13100                 genReserveFuncletEpilog(block);
13101                 break;
13102
13103 #else // _TARGET_ARM_
13104
13105             case BBJ_EHFINALLYRET:
13106             case BBJ_EHFILTERRET:
13107             case BBJ_EHCATCHRET:
13108                 break;
13109
13110 #endif // _TARGET_ARM_
13111
13112             case BBJ_NONE:
13113             case BBJ_COND:
13114             case BBJ_SWITCH:
13115                 break;
13116
13117             default:
13118                 noway_assert(!"Unexpected bbJumpKind");
13119                 break;
13120         }
13121
13122 #ifdef DEBUG
13123         compiler->compCurBB = 0;
13124 #endif
13125
13126     } //------------------ END-FOR each block of the method -------------------
13127
13128     /* Nothing is live at this point */
13129     genUpdateLife(VarSetOps::MakeEmpty(compiler));
13130
13131     /* Finalize the spill  tracking logic */
13132
13133     regSet.rsSpillEnd();
13134
13135     /* Finalize the temp   tracking logic */
13136
13137     compiler->tmpEnd();
13138
13139 #ifdef DEBUG
13140     if (compiler->verbose)
13141     {
13142         printf("\n# ");
13143         printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
13144         printf("%s\n", compiler->info.compFullName);
13145     }
13146 #endif
13147 }
13148 #ifdef _PREFAST_
13149 #pragma warning(pop)
13150 #endif
13151
13152 /*****************************************************************************
13153  *
13154  *  Generate code for a long operation.
13155  *  needReg is a recommendation of which registers to use for the tree.
13156  *  For partially enregistered longs, the tree will be marked as in a register
13157  *    without loading the stack part into a register. Note that only leaf
13158  *    nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
13159  *    enregistered so that we can know the memory location of the other half.
13160  */
13161
13162 #ifdef _PREFAST_
13163 #pragma warning(push)
13164 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
13165 #endif
13166 void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
13167 {
13168     genTreeOps oper;
13169     unsigned   kind;
13170
13171     regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
13172     regMaskTP addrReg;
13173     regNumber regLo;
13174     regNumber regHi;
13175
13176     noway_assert(tree);
13177     noway_assert(tree->gtOper != GT_STMT);
13178     noway_assert(genActualType(tree->gtType) == TYP_LONG);
13179
13180     /* Figure out what kind of a node we have */
13181
13182     oper = tree->OperGet();
13183     kind = tree->OperKind();
13184
13185     if (tree->InReg())
13186     {
13187     REG_VAR_LONG:
13188         regPair = tree->gtRegPair;
13189
13190         gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
13191
13192         goto DONE;
13193     }
13194
13195     /* Is this a constant node? */
13196
13197     if (kind & GTK_CONST)
13198     {
13199         __int64 lval;
13200
13201         /* Pick a register pair for the value */
13202
13203         regPair = regSet.rsPickRegPair(needReg);
13204
13205         /* Load the value into the registers */
13206         CLANG_FORMAT_COMMENT_ANCHOR;
13207
13208 #if !CPU_HAS_FP_SUPPORT
13209         if (oper == GT_CNS_DBL)
13210         {
13211             noway_assert(sizeof(__int64) == sizeof(double));
13212
13213             noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
13214
13215             lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
13216         }
13217         else
13218 #endif
13219         {
13220             noway_assert(oper == GT_CNS_LNG);
13221
13222             lval = tree->gtLngCon.gtLconVal;
13223         }
13224
13225         genSetRegToIcon(genRegPairLo(regPair), int(lval));
13226         genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
13227         goto DONE;
13228     }
13229
13230     /* Is this a leaf node? */
13231
13232     if (kind & GTK_LEAF)
13233     {
13234         switch (oper)
13235         {
13236             case GT_LCL_VAR:
13237
13238 #if REDUNDANT_LOAD
13239
13240                 /*  This case has to consider the case in which an int64 LCL_VAR
13241                  *  may both be enregistered and also have a cached copy of itself
13242                  *  in a different set of registers.
13243                  *  We want to return the registers that have the most in common
13244                  *  with the needReg mask
13245                  */
13246
13247                 /*  Does the var have a copy of itself in the cached registers?
13248                  *  And are these cached registers both free?
13249                  *  If so use these registers if they match any needReg.
13250                  */
13251
13252                 regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
13253
13254                 if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13255                     ((genRegPairMask(regPair) & needReg) != RBM_NONE))
13256                 {
13257                     goto DONE;
13258                 }
13259
13260                 /*  Does the variable live in a register?
13261                  *  If so use these registers.
13262                  */
13263                 if (genMarkLclVar(tree))
13264                     goto REG_VAR_LONG;
13265
13266                 /*  If tree is not an enregistered variable then
13267                  *  be sure to use any cached register that contain
13268                  *  a copy of this local variable
13269                  */
13270                 if (regPair != REG_PAIR_NONE)
13271                 {
13272                     goto DONE;
13273                 }
13274 #endif
13275                 goto MEM_LEAF;
13276
13277             case GT_LCL_FLD:
13278
13279                 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
13280                 // to worry about it being enregistered.
13281                 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
13282                 goto MEM_LEAF;
13283
13284             case GT_CLS_VAR:
13285             MEM_LEAF:
13286
13287                 /* Pick a register pair for the value */
13288
13289                 regPair = regSet.rsPickRegPair(needReg);
13290
13291                 /* Load the value into the registers */
13292
13293                 instruction loadIns;
13294
13295                 loadIns = ins_Load(TYP_INT); // INS_ldr
13296                 regLo   = genRegPairLo(regPair);
13297                 regHi   = genRegPairHi(regPair);
13298
13299 #if CPU_LOAD_STORE_ARCH
13300                 {
13301                     regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
13302                     inst_RV_TT(INS_lea, regAddr, tree, 0);
13303                     regTracker.rsTrackRegTrash(regAddr);
13304
13305                     if (regLo != regAddr)
13306                     {
13307                         // assert(regLo != regAddr);  // forced by if statement
13308                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13309                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13310                     }
13311                     else
13312                     {
13313                         // assert(regHi != regAddr);  // implied by regpair property and the if statement
13314                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13315                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13316                     }
13317                 }
13318 #else
13319                 inst_RV_TT(loadIns, regLo, tree, 0);
13320                 inst_RV_TT(loadIns, regHi, tree, 4);
13321 #endif
13322
13323 #ifdef _TARGET_ARM_
13324                 if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
13325                 {
13326                     // Emit a memory barrier instruction after the load
13327                     instGen_MemoryBarrier();
13328                 }
13329 #endif
13330
13331                 regTracker.rsTrackRegTrash(regLo);
13332                 regTracker.rsTrackRegTrash(regHi);
13333
13334                 goto DONE;
13335
13336             default:
13337 #ifdef DEBUG
13338                 compiler->gtDispTree(tree);
13339 #endif
13340                 noway_assert(!"unexpected leaf");
13341         }
13342     }
13343
13344     /* Is it a 'simple' unary/binary operator? */
13345
13346     if (kind & GTK_SMPOP)
13347     {
13348         instruction insLo;
13349         instruction insHi;
13350         bool        doLo;
13351         bool        doHi;
13352         bool        setCarry = false;
13353         int         helper;
13354
13355         GenTreePtr op1 = tree->gtOp.gtOp1;
13356         GenTreePtr op2 = tree->gtGetOp2IfPresent();
13357
13358         switch (oper)
13359         {
13360             case GT_ASG:
13361             {
13362                 unsigned lclVarNum    = compiler->lvaCount;
13363                 unsigned lclVarILoffs = DUMMY_INIT(0);
13364
13365                 /* Is the target a local ? */
13366
13367                 if (op1->gtOper == GT_LCL_VAR)
13368                 {
13369                     unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
13370                     LclVarDsc* varDsc;
13371
13372                     noway_assert(varNum < compiler->lvaCount);
13373                     varDsc = compiler->lvaTable + varNum;
13374
13375                     // No dead stores, (with min opts we may have dead stores)
13376                     noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
13377
13378                     /* For non-debuggable code, every definition of a lcl-var has
13379                      * to be checked to see if we need to open a new scope for it.
13380                      * Remember the local var info to call siCheckVarScope
13381                      * AFTER codegen of the assignment.
13382                      */
13383                     if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
13384                         (compiler->info.compVarScopesCount > 0))
13385                     {
13386                         lclVarNum    = varNum;
13387                         lclVarILoffs = op1->gtLclVar.gtLclILoffs;
13388                     }
13389
13390                     /* Has the variable been assigned to a register (pair) ? */
13391
13392                     if (genMarkLclVar(op1))
13393                     {
13394                         noway_assert(op1->InReg());
13395                         regPair = op1->gtRegPair;
13396                         regLo   = genRegPairLo(regPair);
13397                         regHi   = genRegPairHi(regPair);
13398                         noway_assert(regLo != regHi);
13399
13400                         /* Is the value being assigned a constant? */
13401
13402                         if (op2->gtOper == GT_CNS_LNG)
13403                         {
13404                             /* Move the value into the target */
13405
13406                             genMakeRegPairAvailable(regPair);
13407
13408                             instruction ins;
13409                             if (regLo == REG_STK)
13410                             {
13411                                 ins = ins_Store(TYP_INT);
13412                             }
13413                             else
13414                             {
13415                                 // Always do the stack first (in case it grabs a register it can't
13416                                 // clobber regLo this way)
13417                                 if (regHi == REG_STK)
13418                                 {
13419                                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13420                                 }
13421                                 ins = INS_mov;
13422                             }
13423                             inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
13424
13425                             // The REG_STK case has already been handled
13426                             if (regHi != REG_STK)
13427                             {
13428                                 ins = INS_mov;
13429                                 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13430                             }
13431
13432                             goto DONE_ASSG_REGS;
13433                         }
13434
13435                         /* Compute the RHS into desired register pair */
13436
13437                         if (regHi != REG_STK)
13438                         {
13439                             genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
13440                             noway_assert(op2->InReg());
13441                             noway_assert(op2->gtRegPair == regPair);
13442                         }
13443                         else
13444                         {
13445                             regPairNo curPair;
13446                             regNumber curLo;
13447                             regNumber curHi;
13448
13449                             genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
13450
13451                             noway_assert(op2->InReg());
13452
13453                             curPair = op2->gtRegPair;
13454                             curLo   = genRegPairLo(curPair);
13455                             curHi   = genRegPairHi(curPair);
13456
13457                             /* move high first, target is on stack */
13458                             inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
13459
13460                             if (regLo != curLo)
13461                             {
13462                                 if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
13463                                     regSet.rsSpillReg(regLo);
13464                                 inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
13465                                 regTracker.rsTrackRegCopy(regLo, curLo);
13466                             }
13467                         }
13468
13469                         genReleaseRegPair(op2);
13470                         goto DONE_ASSG_REGS;
13471                     }
13472                 }
13473
13474                 /* Is the value being assigned a constant? */
13475
13476                 if (op2->gtOper == GT_CNS_LNG)
13477                 {
13478                     /* Make the target addressable */
13479
13480                     addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
13481
13482                     /* Move the value into the target */
13483
13484                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
13485                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13486
13487                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13488
13489                     goto LAsgExit;
13490                 }
13491
13492 #if 0
13493                 /* Catch a case where can avoid generating op reg, mem. Better pairing
13494                  * from
13495                  *     mov regHi, mem
13496                  *     op  regHi, reg
13497                  *
13498                  * To avoid problems with order of evaluation, only do this if op2 is
13499                  * a non-enregistered local variable
13500                  */
13501
13502                 if (GenTree::OperIsCommutative(oper) &&
13503                     op1->gtOper == GT_LCL_VAR &&
13504                     op2->gtOper == GT_LCL_VAR)
13505                 {
13506                     regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
13507
13508                     /* Is op2 a non-enregistered local variable? */
13509                     if (regPair == REG_PAIR_NONE)
13510                     {
13511                         regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
13512
13513                         /* Is op1 an enregistered local variable? */
13514                         if (regPair != REG_PAIR_NONE)
13515                         {
13516                             /* Swap the operands */
13517                             GenTreePtr op = op1;
13518                             op1 = op2;
13519                             op2 = op;
13520                         }
13521                     }
13522                 }
13523 #endif
13524
13525                 /* Eliminate worthless assignment "lcl = lcl" */
13526
13527                 if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
13528                     op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
13529                 {
13530                     genUpdateLife(op2);
13531                     goto LAsgExit;
13532                 }
13533
13534                 if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
13535                     // op1,op2 need to be materialized in the correct order.
13536                     (tree->gtFlags & GTF_REVERSE_OPS))
13537                 {
13538                     /* Generate the small RHS into a register pair */
13539
13540                     GenTreePtr smallOpr = op2->gtOp.gtOp1;
13541
13542                     genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
13543
13544                     /* Make the target addressable */
13545
13546                     addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
13547
13548                     /* Make sure everything is still addressable */
13549
13550                     genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
13551                     noway_assert(smallOpr->InReg());
13552                     regHi   = smallOpr->gtRegNum;
13553                     addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
13554
13555                     // conv.ovf.u8 could overflow if the original number was negative
13556                     if (op2->gtOverflow())
13557                     {
13558                         noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
13559                                      0);                              // conv.ovf.u8.un should be bashed to conv.u8.un
13560                         instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
13561                         emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
13562                         genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
13563                     }
13564
13565                     /* Move the value into the target */
13566
13567                     inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
13568                     inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
13569
13570                     /* Free up anything that was tied up by either side */
13571
13572                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13573                     genReleaseReg(smallOpr);
13574
13575 #if REDUNDANT_LOAD
13576                     if (op1->gtOper == GT_LCL_VAR)
13577                     {
13578                         /* clear this local from reg table */
13579                         regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13580
13581                         /* mark RHS registers as containing the local var */
13582                         regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
13583                     }
13584 #endif
13585                     goto LAsgExit;
13586                 }
13587
13588                 /* Is the LHS more complex than the RHS? */
13589
13590                 if (tree->gtFlags & GTF_REVERSE_OPS)
13591                 {
13592                     /* Generate the RHS into a register pair */
13593
13594                     genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
13595                     noway_assert(op2->InReg());
13596
13597                     /* Make the target addressable */
13598                     op1     = genCodeForCommaTree(op1);
13599                     addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
13600
13601                     /* Make sure the RHS register hasn't been spilled */
13602
13603                     genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
13604                 }
13605                 else
13606                 {
13607                     /* Make the target addressable */
13608
13609                     op1     = genCodeForCommaTree(op1);
13610                     addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
13611
13612                     /* Generate the RHS into a register pair */
13613
13614                     genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
13615                 }
13616
13617                 /* Lock 'op2' and make sure 'op1' is still addressable */
13618
13619                 noway_assert(op2->InReg());
13620                 regPair = op2->gtRegPair;
13621
13622                 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13623
13624                 /* Move the value into the target */
13625
13626                 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
13627                 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
13628
13629                 /* Free up anything that was tied up by either side */
13630
13631                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13632                 genReleaseRegPair(op2);
13633
13634             DONE_ASSG_REGS:
13635
13636 #if REDUNDANT_LOAD
13637
13638                 if (op1->gtOper == GT_LCL_VAR)
13639                 {
13640                     /* Clear this local from reg table */
13641
13642                     regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13643
13644                     if ((op2->InReg()) &&
13645                         /* constant has precedence over local */
13646                         //                    rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
13647                         tree->gtOper == GT_ASG)
13648                     {
13649                         regNumber regNo;
13650
13651                         /* mark RHS registers as containing the local var */
13652
13653                         regNo = genRegPairLo(op2->gtRegPair);
13654                         if (regNo != REG_STK)
13655                             regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
13656
13657                         regNo = genRegPairHi(op2->gtRegPair);
13658                         if (regNo != REG_STK)
13659                         {
13660                             /* For partially enregistered longs, we might have
13661                                stomped on op2's hiReg */
13662                             if (!(op1->InReg()) || regNo != genRegPairLo(op1->gtRegPair))
13663                             {
13664                                 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
13665                             }
13666                         }
13667                     }
13668                 }
13669 #endif
13670
13671             LAsgExit:
13672
13673                 genUpdateLife(op1);
13674                 genUpdateLife(tree);
13675
13676                 /* For non-debuggable code, every definition of a lcl-var has
13677                  * to be checked to see if we need to open a new scope for it.
13678                  */
13679                 if (lclVarNum < compiler->lvaCount)
13680                     siCheckVarScope(lclVarNum, lclVarILoffs);
13681             }
13682                 return;
13683
13684             case GT_SUB:
13685                 insLo    = INS_sub;
13686                 insHi    = INS_SUBC;
13687                 setCarry = true;
13688                 goto BINOP_OVF;
13689             case GT_ADD:
13690                 insLo    = INS_add;
13691                 insHi    = INS_ADDC;
13692                 setCarry = true;
13693                 goto BINOP_OVF;
13694
13695                 bool ovfl;
13696
13697             BINOP_OVF:
13698                 ovfl = tree->gtOverflow();
13699                 goto _BINOP;
13700
13701             case GT_AND:
13702                 insLo = insHi = INS_AND;
13703                 goto BINOP;
13704             case GT_OR:
13705                 insLo = insHi = INS_OR;
13706                 goto BINOP;
13707             case GT_XOR:
13708                 insLo = insHi = INS_XOR;
13709                 goto BINOP;
13710
13711             BINOP:
13712                 ovfl = false;
13713                 goto _BINOP;
13714
13715             _BINOP:
13716
13717                 /* The following makes an assumption about gtSetEvalOrder(this) */
13718
13719                 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
13720
13721                 /* Special case: check for "(long(intval) << 32) | longval" */
13722
13723                 if (oper == GT_OR && op1->gtOper == GT_LSH)
13724                 {
13725                     GenTreePtr lshLHS = op1->gtOp.gtOp1;
13726                     GenTreePtr lshRHS = op1->gtOp.gtOp2;
13727
13728                     if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13729                         genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13730                     {
13731
13732                         /* Throw away the cast of the shift operand. */
13733
13734                         op1 = lshLHS->gtCast.CastOp();
13735
13736                         /* Special case: check op2 for "ulong(intval)" */
13737                         if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
13738                             genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
13739                         {
13740                             /* Throw away the cast of the second operand. */
13741
13742                             op2 = op2->gtCast.CastOp();
13743                             goto SIMPLE_OR_LONG;
13744                         }
13745                         /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
13746                         else if (op2->gtOper == GT_AND)
13747                         {
13748                             GenTreePtr andLHS;
13749                             andLHS = op2->gtOp.gtOp1;
13750                             GenTreePtr andRHS;
13751                             andRHS = op2->gtOp.gtOp2;
13752
13753                             if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13754                                 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13755                                 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13756                             {
13757                                 /* Throw away the cast of the second operand. */
13758
13759                                 op2 = andLHS->gtCast.CastOp();
13760
13761                             SIMPLE_OR_LONG:
13762                                 // Load the high DWORD, ie. op1
13763
13764                                 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13765
13766                                 noway_assert(op1->InReg());
13767                                 regHi = op1->gtRegNum;
13768                                 regSet.rsMarkRegUsed(op1);
13769
13770                                 // Load the low DWORD, ie. op2
13771
13772                                 genCodeForTree(op2, needReg & ~genRegMask(regHi));
13773
13774                                 noway_assert(op2->InReg());
13775                                 regLo = op2->gtRegNum;
13776
13777                                 /* Make sure regHi is still around. Also, force
13778                                    regLo to be excluded in case regLo==regHi */
13779
13780                                 genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
13781                                 regHi = op1->gtRegNum;
13782
13783                                 regPair = gen2regs2pair(regLo, regHi);
13784                                 goto DONE;
13785                             }
13786                         }
13787
13788                         /*  Generate the following sequence:
13789                                Prepare op1 (discarding shift)
13790                                Compute op2 into some regpair
13791                                OR regpairhi, op1
13792                          */
13793
13794                         /* First, make op1 addressable */
13795
13796                         /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
13797
13798                            It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
13799                            which the long value is computed is from needReg.  But at this point the safest fix is
13800                            to exclude regSet.rsMaskResvd.
13801
13802                            Note that needReg could be the set of free registers (excluding reserved ones).  If we don't
13803                            exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
13804                            reg from
13805                            reserved set which is bound to fail.  To prevent that we avoid regSet.rsMaskResvd.
13806                          */
13807                         regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
13808
13809                         addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
13810
13811                         genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
13812
13813                         noway_assert(op2->InReg());
13814                         regPair = op2->gtRegPair;
13815                         regHi   = genRegPairHi(regPair);
13816
13817                         /* The operand might have interfered with the address */
13818
13819                         addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13820
13821                         /* Now compute the result */
13822
13823                         inst_RV_TT(insHi, regHi, op1, 0);
13824
13825                         regTracker.rsTrackRegTrash(regHi);
13826
13827                         /* Free up anything that was tied up by the LHS */
13828
13829                         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13830
13831                         /* The result is where the second operand is sitting */
13832
13833                         genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
13834
13835                         regPair = op2->gtRegPair;
13836                         goto DONE;
13837                     }
13838                 }
13839
13840                 /* Special case: check for "longval | (long(intval) << 32)" */
13841
13842                 if (oper == GT_OR && op2->gtOper == GT_LSH)
13843                 {
13844                     GenTreePtr lshLHS = op2->gtOp.gtOp1;
13845                     GenTreePtr lshRHS = op2->gtOp.gtOp2;
13846
13847                     if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13848                         genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13849
13850                     {
13851                         /* We throw away the cast of the shift operand. */
13852
13853                         op2 = lshLHS->gtCast.CastOp();
13854
13855                         /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
13856
13857                         if (op1->gtOper == GT_AND)
13858                         {
13859                             GenTreePtr andLHS = op1->gtOp.gtOp1;
13860                             GenTreePtr andRHS = op1->gtOp.gtOp2;
13861
13862                             if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13863                                 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13864                                 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13865                             {
13866                                 /* Throw away the cast of the first operand. */
13867
13868                                 op1 = andLHS->gtCast.CastOp();
13869
13870                                 // Load the low DWORD, ie. op1
13871
13872                                 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13873
13874                                 noway_assert(op1->InReg());
13875                                 regLo = op1->gtRegNum;
13876                                 regSet.rsMarkRegUsed(op1);
13877
13878                                 // Load the high DWORD, ie. op2
13879
13880                                 genCodeForTree(op2, needReg & ~genRegMask(regLo));
13881
13882                                 noway_assert(op2->InReg());
13883                                 regHi = op2->gtRegNum;
13884
13885                                 /* Make sure regLo is still around. Also, force
13886                                    regHi to be excluded in case regLo==regHi */
13887
13888                                 genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
13889                                 regLo = op1->gtRegNum;
13890
13891                                 regPair = gen2regs2pair(regLo, regHi);
13892                                 goto DONE;
13893                             }
13894                         }
13895
13896                         /*  Generate the following sequence:
13897                               Compute op1 into some regpair
13898                               Make op2 (ignoring shift) addressable
13899                               OR regPairHi, op2
13900                          */
13901
13902                         // First, generate the first operand into some register
13903
13904                         genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13905                         noway_assert(op1->InReg());
13906
13907                         /* Make the second operand addressable */
13908
13909                         addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
13910
13911                         /* Make sure the result is in a free register pair */
13912
13913                         genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13914                         regPair = op1->gtRegPair;
13915                         regHi   = genRegPairHi(regPair);
13916
13917                         /* The operand might have interfered with the address */
13918
13919                         addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
13920
13921                         /* Compute the new value */
13922
13923                         inst_RV_TT(insHi, regHi, op2, 0);
13924
13925                         /* The value in the high register has been trashed */
13926
13927                         regTracker.rsTrackRegTrash(regHi);
13928
13929                         goto DONE_OR;
13930                     }
13931                 }
13932
13933                 /* Generate the first operand into registers */
13934
13935                 if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13936                     ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
13937                 {
13938                     regPair = regSet.rsPickRegPair(needReg);
13939                     genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13940                 }
13941                 else
13942                 {
13943                     genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13944                 }
13945                 noway_assert(op1->InReg());
13946                 regMaskTP op1Mask;
13947                 regPair = op1->gtRegPair;
13948                 op1Mask = genRegPairMask(regPair);
13949
13950                 /* Make the second operand addressable */
13951                 regMaskTP needReg2;
13952                 needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
13953                 addrReg  = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
13954
13955                 // TODO: If 'op1' got spilled and 'op2' happens to be
13956                 // TODO: in a register, and we have add/mul/and/or/xor,
13957                 // TODO: reverse the operands since we can perform the
13958                 // TODO: operation directly with the spill temp, e.g.
13959                 // TODO: 'add regHi, [temp]'.
13960
13961                 /* Make sure the result is in a free register pair */
13962
13963                 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13964                 regPair = op1->gtRegPair;
13965                 op1Mask = genRegPairMask(regPair);
13966
13967                 regLo = genRegPairLo(regPair);
13968                 regHi = genRegPairHi(regPair);
13969
13970                 /* Make sure that we don't spill regLo/regHi below */
13971                 regSet.rsLockUsedReg(op1Mask);
13972
13973                 /* The operand might have interfered with the address */
13974
13975                 addrReg = genKeepAddressable(op2, addrReg);
13976
13977                 /* The value in the register pair is about to be trashed */
13978
13979                 regTracker.rsTrackRegTrash(regLo);
13980                 regTracker.rsTrackRegTrash(regHi);
13981
13982                 /* Compute the new value */
13983
13984                 doLo = true;
13985                 doHi = true;
13986
13987                 if (op2->gtOper == GT_CNS_LNG)
13988                 {
13989                     __int64 icon = op2->gtLngCon.gtLconVal;
13990
13991                     /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
13992
13993                     switch (oper)
13994                     {
13995                         case GT_AND:
13996                             if ((int)(icon) == -1)
13997                                 doLo = false;
13998                             if ((int)(icon >> 32) == -1)
13999                                 doHi = false;
14000
14001                             if (!(icon & I64(0x00000000FFFFFFFF)))
14002                             {
14003                                 genSetRegToIcon(regLo, 0);
14004                                 doLo = false;
14005                             }
14006
14007                             if (!(icon & I64(0xFFFFFFFF00000000)))
14008                             {
14009                                 /* Just to always set low first*/
14010
14011                                 if (doLo)
14012                                 {
14013                                     inst_RV_TT(insLo, regLo, op2, 0);
14014                                     doLo = false;
14015                                 }
14016                                 genSetRegToIcon(regHi, 0);
14017                                 doHi = false;
14018                             }
14019
14020                             break;
14021
14022                         case GT_OR:
14023                         case GT_XOR:
14024                             if (!(icon & I64(0x00000000FFFFFFFF)))
14025                                 doLo = false;
14026                             if (!(icon & I64(0xFFFFFFFF00000000)))
14027                                 doHi = false;
14028                             break;
14029                         default:
14030                             break;
14031                     }
14032                 }
14033
14034                 // Fix 383813 X86/ARM ILGEN
14035                 // Fix 383793 ARM ILGEN
14036                 // Fix 383911 ARM ILGEN
14037                 regMaskTP newMask;
14038                 newMask = addrReg & ~op1Mask;
14039                 regSet.rsLockUsedReg(newMask);
14040
14041                 if (doLo)
14042                 {
14043                     insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14044                     inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
14045                 }
14046                 if (doHi)
14047                 {
14048                     insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14049                     inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
14050                 }
14051
14052                 regSet.rsUnlockUsedReg(newMask);
14053                 regSet.rsUnlockUsedReg(op1Mask);
14054
14055             DONE_OR:
14056
14057                 /* Free up anything that was tied up by the LHS */
14058
14059                 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
14060
14061                 /* The result is where the first operand is sitting */
14062
14063                 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
14064
14065                 regPair = op1->gtRegPair;
14066
14067                 if (ovfl)
14068                     genCheckOverflow(tree);
14069
14070                 goto DONE;
14071
14072             case GT_UMOD:
14073
14074                 regPair = genCodeForLongModInt(tree, needReg);
14075                 goto DONE;
14076
14077             case GT_MUL:
14078
14079                 /* Special case: both operands promoted from int */
14080
14081                 assert(tree->gtIsValid64RsltMul());
14082
14083                 /* Change to an integer multiply temporarily */
14084
14085                 tree->gtType = TYP_INT;
14086
14087                 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
14088                 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
14089                 tree->gtOp.gtOp2 = op2->gtCast.CastOp();
14090
14091                 assert(tree->gtFlags & GTF_MUL_64RSLT);
14092
14093 #if defined(_TARGET_X86_)
14094                 // imul on x86 requires EDX:EAX
14095                 genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
14096                 noway_assert(tree->InReg());
14097                 noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
14098 #elif defined(_TARGET_ARM_)
14099                 genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
14100                 noway_assert(tree->InReg());
14101 #else
14102                 assert(!"Unsupported target for 64-bit multiply codegen");
14103 #endif
14104
14105                 /* Restore gtType, op1 and op2 from the change above */
14106
14107                 tree->gtType     = TYP_LONG;
14108                 tree->gtOp.gtOp1 = op1;
14109                 tree->gtOp.gtOp2 = op2;
14110
14111 #if defined(_TARGET_X86_)
14112                 /* The result is now in EDX:EAX */
14113                 regPair = REG_PAIR_EAXEDX;
14114 #elif defined(_TARGET_ARM_)
14115                 regPair = tree->gtRegPair;
14116 #endif
14117                 goto DONE;
14118
14119             case GT_LSH:
14120                 helper = CORINFO_HELP_LLSH;
14121                 goto SHIFT;
14122             case GT_RSH:
14123                 helper = CORINFO_HELP_LRSH;
14124                 goto SHIFT;
14125             case GT_RSZ:
14126                 helper = CORINFO_HELP_LRSZ;
14127                 goto SHIFT;
14128
14129             SHIFT:
14130
14131                 noway_assert(op1->gtType == TYP_LONG);
14132                 noway_assert(genActualType(op2->gtType) == TYP_INT);
14133
14134                 /* Is the second operand a constant? */
14135
14136                 if (op2->gtOper == GT_CNS_INT)
14137                 {
14138                     unsigned int count = op2->gtIntCon.gtIconVal;
14139
14140                     /* Compute the left operand into a free register pair */
14141
14142                     genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
14143                     noway_assert(op1->InReg());
14144
14145                     regPair = op1->gtRegPair;
14146                     regLo   = genRegPairLo(regPair);
14147                     regHi   = genRegPairHi(regPair);
14148
14149                     /* Assume the value in the register pair is trashed. In some cases, though,
14150                        a register might be set to zero, and we can use that information to improve
14151                        some code generation.
14152                     */
14153
14154                     regTracker.rsTrackRegTrash(regLo);
14155                     regTracker.rsTrackRegTrash(regHi);
14156
14157                     /* Generate the appropriate shift instructions */
14158
14159                     switch (oper)
14160                     {
14161                         case GT_LSH:
14162                             if (count == 0)
14163                             {
14164                                 // regHi, regLo are correct
14165                             }
14166                             else if (count < 32)
14167                             {
14168 #if defined(_TARGET_XARCH_)
14169                                 inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
14170 #elif defined(_TARGET_ARM_)
14171                                 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
14172                                 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
14173                                                               INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
14174 #else  // _TARGET_*
14175                                 NYI("INS_shld");
14176 #endif // _TARGET_*
14177                                 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
14178                             }
14179                             else // count >= 32
14180                             {
14181                                 assert(count >= 32);
14182                                 if (count < 64)
14183                                 {
14184 #if defined(_TARGET_ARM_)
14185                                     if (count == 32)
14186                                     {
14187                                         // mov low dword into high dword (i.e. shift left by 32-bits)
14188                                         inst_RV_RV(INS_mov, regHi, regLo);
14189                                     }
14190                                     else
14191                                     {
14192                                         assert(count > 32 && count < 64);
14193                                         getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
14194                                                                     count - 32);
14195                                     }
14196 #else  // _TARGET_*
14197                                     // mov low dword into high dword (i.e. shift left by 32-bits)
14198                                     inst_RV_RV(INS_mov, regHi, regLo);
14199                                     if (count > 32)
14200                                     {
14201                                         // Shift high dword left by count - 32
14202                                         inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
14203                                     }
14204 #endif // _TARGET_*
14205                                 }
14206                                 else // count >= 64
14207                                 {
14208                                     assert(count >= 64);
14209                                     genSetRegToIcon(regHi, 0);
14210                                 }
14211                                 genSetRegToIcon(regLo, 0);
14212                             }
14213                             break;
14214
14215                         case GT_RSH:
14216                             if (count == 0)
14217                             {
14218                                 // regHi, regLo are correct
14219                             }
14220                             else if (count < 32)
14221                             {
14222 #if defined(_TARGET_XARCH_)
14223                                 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14224 #elif defined(_TARGET_ARM_)
14225                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14226                                 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14227                                                               INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14228 #else  // _TARGET_*
14229                                 NYI("INS_shrd");
14230 #endif // _TARGET_*
14231                                 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
14232                             }
14233                             else // count >= 32
14234                             {
14235                                 assert(count >= 32);
14236                                 if (count < 64)
14237                                 {
14238 #if defined(_TARGET_ARM_)
14239                                     if (count == 32)
14240                                     {
14241                                         // mov high dword into low dword (i.e. shift right by 32-bits)
14242                                         inst_RV_RV(INS_mov, regLo, regHi);
14243                                     }
14244                                     else
14245                                     {
14246                                         assert(count > 32 && count < 64);
14247                                         getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
14248                                                                     count - 32);
14249                                     }
14250 #else  // _TARGET_*
14251                                     // mov high dword into low dword (i.e. shift right by 32-bits)
14252                                     inst_RV_RV(INS_mov, regLo, regHi);
14253                                     if (count > 32)
14254                                     {
14255                                         // Shift low dword right by count - 32
14256                                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
14257                                     }
14258 #endif // _TARGET_*
14259                                 }
14260
14261                                 // Propagate sign bit in high dword
14262                                 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14263
14264                                 if (count >= 64)
14265                                 {
14266                                     // Propagate the sign from the high dword
14267                                     inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
14268                                 }
14269                             }
14270                             break;
14271
14272                         case GT_RSZ:
14273                             if (count == 0)
14274                             {
14275                                 // regHi, regLo are correct
14276                             }
14277                             else if (count < 32)
14278                             {
14279 #if defined(_TARGET_XARCH_)
14280                                 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14281 #elif defined(_TARGET_ARM_)
14282                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14283                                 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14284                                                               INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14285 #else  // _TARGET_*
14286                                 NYI("INS_shrd");
14287 #endif // _TARGET_*
14288                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
14289                             }
14290                             else // count >= 32
14291                             {
14292                                 assert(count >= 32);
14293                                 if (count < 64)
14294                                 {
14295 #if defined(_TARGET_ARM_)
14296                                     if (count == 32)
14297                                     {
14298                                         // mov high dword into low dword (i.e. shift right by 32-bits)
14299                                         inst_RV_RV(INS_mov, regLo, regHi);
14300                                     }
14301                                     else
14302                                     {
14303                                         assert(count > 32 && count < 64);
14304                                         getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
14305                                                                     count - 32);
14306                                     }
14307 #else  // _TARGET_*
14308                                     // mov high dword into low dword (i.e. shift right by 32-bits)
14309                                     inst_RV_RV(INS_mov, regLo, regHi);
14310                                     if (count > 32)
14311                                     {
14312                                         // Shift low dword right by count - 32
14313                                         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
14314                                     }
14315 #endif // _TARGET_*
14316                                 }
14317                                 else // count >= 64
14318                                 {
14319                                     assert(count >= 64);
14320                                     genSetRegToIcon(regLo, 0);
14321                                 }
14322                                 genSetRegToIcon(regHi, 0);
14323                             }
14324                             break;
14325
14326                         default:
14327                             noway_assert(!"Illegal oper for long shift");
14328                             break;
14329                     }
14330
14331                     goto DONE_SHF;
14332                 }
14333
14334                 /* Which operand are we supposed to compute first? */
14335
14336                 assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
14337
14338                 if (tree->gtFlags & GTF_REVERSE_OPS)
14339                 {
14340                     /* The second operand can't be a constant */
14341
14342                     noway_assert(op2->gtOper != GT_CNS_INT);
14343
14344                     /* Load the shift count, hopefully into RBM_SHIFT */
14345                     RegSet::ExactReg exactReg;
14346                     if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
14347                         exactReg = RegSet::EXACT_REG;
14348                     else
14349                         exactReg = RegSet::ANY_REG;
14350                     genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
14351
14352                     /* Compute the left operand into REG_LNGARG_0 */
14353
14354                     genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14355                     noway_assert(op1->InReg());
14356
14357                     /* Lock op1 so that it doesn't get trashed */
14358
14359                     regSet.rsLockUsedReg(RBM_LNGARG_0);
14360
14361                     /* Make sure the shift count wasn't displaced */
14362
14363                     genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
14364
14365                     /* Lock op2 */
14366
14367                     regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14368                 }
14369                 else
14370                 {
14371                     /* Compute the left operand into REG_LNGARG_0 */
14372
14373                     genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14374                     noway_assert(op1->InReg());
14375
14376                     /* Compute the shift count into RBM_SHIFT */
14377
14378                     genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
14379
14380                     /* Lock op2 */
14381
14382                     regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14383
14384                     /* Make sure the value hasn't been displaced */
14385
14386                     genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
14387
14388                     /* Lock op1 so that it doesn't get trashed */
14389
14390                     regSet.rsLockUsedReg(RBM_LNGARG_0);
14391                 }
14392
14393 #ifndef _TARGET_X86_
14394                 /* The generic helper is a C-routine and so it follows the full ABI */
14395                 {
14396                     /* Spill any callee-saved registers which are being used */
14397                     regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
14398
14399                     /* But do not spill our argument registers. */
14400                     spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14401
14402                     if (spillRegs)
14403                     {
14404                         regSet.rsSpillRegs(spillRegs);
14405                     }
14406                 }
14407 #endif // !_TARGET_X86_
14408
14409                 /* Perform the shift by calling a helper function */
14410
14411                 noway_assert(op1->gtRegPair == REG_LNGARG_0);
14412                 noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
14413                 noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
14414
14415                 genEmitHelperCall(helper,
14416                                   0,         // argSize
14417                                   EA_8BYTE); // retSize
14418
14419 #ifdef _TARGET_X86_
14420                 /* The value in the register pair is trashed */
14421
14422                 regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
14423                 regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
14424 #else  // _TARGET_X86_
14425                 /* The generic helper is a C-routine and so it follows the full ABI */
14426                 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
14427 #endif // _TARGET_X86_
14428
14429                 /* Release both operands */
14430
14431                 regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14432                 genReleaseRegPair(op1);
14433                 genReleaseReg(op2);
14434
14435             DONE_SHF:
14436
14437                 noway_assert(op1->InReg());
14438                 regPair = op1->gtRegPair;
14439                 goto DONE;
14440
14441             case GT_NEG:
14442             case GT_NOT:
14443
14444                 /* Generate the operand into some register pair */
14445
14446                 genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
14447                 noway_assert(op1->InReg());
14448
14449                 regPair = op1->gtRegPair;
14450
14451                 /* Figure out which registers the value is in */
14452
14453                 regLo = genRegPairLo(regPair);
14454                 regHi = genRegPairHi(regPair);
14455
14456                 /* The value in the register pair is about to be trashed */
14457
14458                 regTracker.rsTrackRegTrash(regLo);
14459                 regTracker.rsTrackRegTrash(regHi);
14460
14461                 /* Unary "neg": negate the value  in the register pair */
14462                 if (oper == GT_NEG)
14463                 {
14464 #ifdef _TARGET_ARM_
14465
14466                     // ARM doesn't have an opcode that sets the carry bit like
14467                     // x86, so we can't use neg/addc/neg.  Instead we use subtract
14468                     // with carry.  Too bad this uses an extra register.
14469
14470                     // Lock regLo and regHi so we don't pick them, and then pick
14471                     // a third register to be our 0.
14472                     regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
14473                     regSet.rsLockReg(regPairMask);
14474                     regMaskTP regBest = RBM_ALLINT & ~avoidReg;
14475                     regNumber regZero = genGetRegSetToIcon(0, regBest);
14476                     regSet.rsUnlockReg(regPairMask);
14477
14478                     inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
14479                     getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
14480
14481 #elif defined(_TARGET_XARCH_)
14482
14483                     inst_RV(INS_NEG, regLo, TYP_LONG);
14484                     inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
14485                     inst_RV(INS_NEG, regHi, TYP_LONG);
14486 #else
14487                     NYI("GT_NEG on TYP_LONG");
14488 #endif
14489                 }
14490                 else
14491                 {
14492                     /* Unary "not": flip all the bits in the register pair */
14493
14494                     inst_RV(INS_NOT, regLo, TYP_LONG);
14495                     inst_RV(INS_NOT, regHi, TYP_LONG);
14496                 }
14497
14498                 goto DONE;
14499
14500             case GT_IND:
14501             case GT_NULLCHECK:
14502             {
14503                 regMaskTP tmpMask;
14504                 int       hiFirst;
14505
14506                 regMaskTP availMask = RBM_ALLINT & ~needReg;
14507
14508                 /* Make sure the operand is addressable */
14509
14510                 addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
14511
14512                 GenTreePtr addr = oper == GT_IND ? op1 : tree;
14513
14514                 /* Pick a register for the value */
14515
14516                 regPair = regSet.rsPickRegPair(needReg);
14517                 tmpMask = genRegPairMask(regPair);
14518
14519                 /* Is there any overlap between the register pair and the address? */
14520
14521                 hiFirst = FALSE;
14522
14523                 if (tmpMask & addrReg)
14524                 {
14525                     /* Does one or both of the target registers overlap? */
14526
14527                     if ((tmpMask & addrReg) != tmpMask)
14528                     {
14529                         /* Only one register overlaps */
14530
14531                         noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
14532
14533                         /* If the low register overlaps, load the upper half first */
14534
14535                         if (addrReg & genRegMask(genRegPairLo(regPair)))
14536                             hiFirst = TRUE;
14537                     }
14538                     else
14539                     {
14540                         regMaskTP regFree;
14541
14542                         /* The register completely overlaps with the address */
14543
14544                         noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
14545
14546                         /* Can we pick another pair easily? */
14547
14548                         regFree = regSet.rsRegMaskFree() & ~addrReg;
14549                         if (needReg)
14550                             regFree &= needReg;
14551
14552                         /* More than one free register available? */
14553
14554                         if (regFree && !genMaxOneBit(regFree))
14555                         {
14556                             regPair = regSet.rsPickRegPair(regFree);
14557                             tmpMask = genRegPairMask(regPair);
14558                         }
14559                         else
14560                         {
14561                             // printf("Overlap: needReg = %08X\n", needReg);
14562
14563                             // Reg-prediction won't allow this
14564                             noway_assert((regSet.rsMaskVars & addrReg) == 0);
14565
14566                             // Grab one fresh reg, and use any one of addrReg
14567
14568                             if (regFree) // Try to follow 'needReg'
14569                                 regLo = regSet.rsGrabReg(regFree);
14570                             else // Pick any reg besides addrReg
14571                                 regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
14572
14573                             unsigned  regBit = 0x1;
14574                             regNumber regNo;
14575
14576                             for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
14577                             {
14578                                 // Found one of addrReg. Use it.
14579                                 if (regBit & addrReg)
14580                                     break;
14581                             }
14582                             noway_assert(genIsValidReg(regNo)); // Should have found regNo
14583
14584                             regPair = gen2regs2pair(regLo, regNo);
14585                             tmpMask = genRegPairMask(regPair);
14586                         }
14587                     }
14588                 }
14589
14590                 /* Make sure the value is still addressable */
14591
14592                 noway_assert(genStillAddressable(tree));
14593
14594                 /* Figure out which registers the value is in */
14595
14596                 regLo = genRegPairLo(regPair);
14597                 regHi = genRegPairHi(regPair);
14598
14599                 /* The value in the register pair is about to be trashed */
14600
14601                 regTracker.rsTrackRegTrash(regLo);
14602                 regTracker.rsTrackRegTrash(regHi);
14603
14604                 /* Load the target registers from where the value is */
14605
14606                 if (hiFirst)
14607                 {
14608                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14609                     regSet.rsLockReg(genRegMask(regHi));
14610                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14611                     regSet.rsUnlockReg(genRegMask(regHi));
14612                 }
14613                 else
14614                 {
14615                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14616                     regSet.rsLockReg(genRegMask(regLo));
14617                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14618                     regSet.rsUnlockReg(genRegMask(regLo));
14619                 }
14620
14621 #ifdef _TARGET_ARM_
14622                 if (tree->gtFlags & GTF_IND_VOLATILE)
14623                 {
14624                     // Emit a memory barrier instruction after the load
14625                     instGen_MemoryBarrier();
14626                 }
14627 #endif
14628
14629                 genUpdateLife(tree);
14630                 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
14631             }
14632                 goto DONE;
14633
14634             case GT_CAST:
14635
14636                 /* What are we casting from? */
14637
14638                 switch (op1->gtType)
14639                 {
14640                     case TYP_BOOL:
14641                     case TYP_BYTE:
14642                     case TYP_CHAR:
14643                     case TYP_SHORT:
14644                     case TYP_INT:
14645                     case TYP_UBYTE:
14646                     case TYP_BYREF:
14647                     {
14648                         regMaskTP hiRegMask;
14649                         regMaskTP loRegMask;
14650
14651                         // For an unsigned cast we don't need to sign-extend the 32 bit value
14652                         if (tree->gtFlags & GTF_UNSIGNED)
14653                         {
14654                             // Does needReg have exactly two bits on and thus
14655                             // specifies the exact register pair that we want to use
14656                             if (!genMaxOneBit(needReg))
14657                             {
14658                                 regPair = regSet.rsFindRegPairNo(needReg);
14659                                 if (needReg != genRegPairMask(regPair))
14660                                     goto ANY_FREE_REG_UNSIGNED;
14661                                 loRegMask = genRegMask(genRegPairLo(regPair));
14662                                 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14663                                     goto ANY_FREE_REG_UNSIGNED;
14664                                 hiRegMask = genRegMask(genRegPairHi(regPair));
14665                             }
14666                             else
14667                             {
14668                             ANY_FREE_REG_UNSIGNED:
14669                                 loRegMask = needReg;
14670                                 hiRegMask = needReg;
14671                             }
14672
14673                             genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14674                             noway_assert(op1->InReg());
14675
14676                             regLo     = op1->gtRegNum;
14677                             loRegMask = genRegMask(regLo);
14678                             regSet.rsLockUsedReg(loRegMask);
14679                             regHi = regSet.rsPickReg(hiRegMask);
14680                             regSet.rsUnlockUsedReg(loRegMask);
14681
14682                             regPair = gen2regs2pair(regLo, regHi);
14683
14684                             // Move 0 to the higher word of the ULong
14685                             genSetRegToIcon(regHi, 0, TYP_INT);
14686
14687                             /* We can now free up the operand */
14688                             genReleaseReg(op1);
14689
14690                             goto DONE;
14691                         }
14692 #ifdef _TARGET_XARCH_
14693                         /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
14694                            and we need the result to be in those registers.
14695                            cdq is smaller so we use it for SMALL_CODE
14696                         */
14697
14698                         if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
14699                             (regSet.rsRegMaskFree() & RBM_EDX))
14700                         {
14701                             genCodeForTree(op1, RBM_EAX);
14702                             regSet.rsMarkRegUsed(op1);
14703
14704                             /* If we have to spill EDX, might as well use the faster
14705                                sar as the spill will increase code size anyway */
14706
14707                             if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
14708                             {
14709                                 hiRegMask = regSet.rsRegMaskFree();
14710                                 goto USE_SAR_FOR_CAST;
14711                             }
14712
14713                             regSet.rsGrabReg(RBM_EDX);
14714                             regTracker.rsTrackRegTrash(REG_EDX);
14715
14716                             /* Convert the int in EAX into a long in EDX:EAX */
14717
14718                             instGen(INS_cdq);
14719
14720                             /* The result is in EDX:EAX */
14721
14722                             regPair = REG_PAIR_EAXEDX;
14723                         }
14724                         else
14725 #endif
14726                         {
14727                             /* use the sar instruction to sign-extend a 32-bit integer */
14728
14729                             // Does needReg have exactly two bits on and thus
14730                             // specifies the exact register pair that we want to use
14731                             if (!genMaxOneBit(needReg))
14732                             {
14733                                 regPair = regSet.rsFindRegPairNo(needReg);
14734                                 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
14735                                     goto ANY_FREE_REG_SIGNED;
14736                                 loRegMask = genRegMask(genRegPairLo(regPair));
14737                                 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14738                                     goto ANY_FREE_REG_SIGNED;
14739                                 hiRegMask = genRegMask(genRegPairHi(regPair));
14740                             }
14741                             else
14742                             {
14743                             ANY_FREE_REG_SIGNED:
14744                                 loRegMask = needReg;
14745                                 hiRegMask = RBM_NONE;
14746                             }
14747
14748                             genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14749 #ifdef _TARGET_XARCH_
14750                         USE_SAR_FOR_CAST:
14751 #endif
14752                             noway_assert(op1->InReg());
14753
14754                             regLo     = op1->gtRegNum;
14755                             loRegMask = genRegMask(regLo);
14756                             regSet.rsLockUsedReg(loRegMask);
14757                             regHi = regSet.rsPickReg(hiRegMask);
14758                             regSet.rsUnlockUsedReg(loRegMask);
14759
14760                             regPair = gen2regs2pair(regLo, regHi);
14761
14762 #ifdef _TARGET_ARM_
14763                             /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14764                             // Use one instruction instead of two
14765                             getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
14766 #else
14767                             /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14768                             inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
14769                             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14770 #endif
14771
14772                             /* The value in the upper register is trashed */
14773
14774                             regTracker.rsTrackRegTrash(regHi);
14775                         }
14776
14777                         /* We can now free up the operand */
14778                         genReleaseReg(op1);
14779
14780                         // conv.ovf.u8 could overflow if the original number was negative
14781                         if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
14782                         {
14783                             regNumber hiReg = genRegPairHi(regPair);
14784                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14785                             emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14786                             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14787                         }
14788                     }
14789                         goto DONE;
14790
14791                     case TYP_FLOAT:
14792                     case TYP_DOUBLE:
14793
14794 #if 0
14795                 /* Load the FP value onto the coprocessor stack */
14796
14797                 genCodeForTreeFlt(op1);
14798
14799                 /* Allocate a temp for the long value */
14800
14801                 temp = compiler->tmpGetTemp(TYP_LONG);
14802
14803                 /* Store the FP value into the temp */
14804
14805                 inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
14806                 genFPstkLevel--;
14807
14808                 /* Pick a register pair for the value */
14809
14810                 regPair  = regSet.rsPickRegPair(needReg);
14811
14812                 /* Figure out which registers the value is in */
14813
14814                 regLo = genRegPairLo(regPair);
14815                 regHi = genRegPairHi(regPair);
14816
14817                 /* The value in the register pair is about to be trashed */
14818
14819                 regTracker.rsTrackRegTrash(regLo);
14820                 regTracker.rsTrackRegTrash(regHi);
14821
14822                 /* Load the converted value into the registers */
14823
14824                 inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
14825                 inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
14826
14827                 /* We no longer need the temp */
14828
14829                 compiler->tmpRlsTemp(temp);
14830                 goto DONE;
14831 #else
14832                         NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
14833                         break;
14834 #endif
14835                     case TYP_LONG:
14836                     case TYP_ULONG:
14837                     {
14838                         noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
14839
14840                         genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
14841                         regPair = op1->gtRegPair;
14842
14843                         // Do we need to set the sign-flag, or can we checked if it is set?
14844                         // and not do this "test" if so.
14845
14846                         if (op1->InReg())
14847                         {
14848                             regNumber hiReg = genRegPairHi(op1->gtRegPair);
14849                             noway_assert(hiReg != REG_STK);
14850                             instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14851                         }
14852                         else
14853                         {
14854                             inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
14855                         }
14856
14857                         emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14858                         genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14859                     }
14860                         goto DONE;
14861
14862                     default:
14863 #ifdef DEBUG
14864                         compiler->gtDispTree(tree);
14865 #endif
14866                         NO_WAY("unexpected cast to long");
14867                 }
14868                 break;
14869
14870             case GT_RETURN:
14871
14872                 /* TODO:
14873                  * This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
14874                  * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement.  We should really
14875                  * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
14876                  */
14877
14878                 // TODO: this should be done AFTER we called exit mon so that
14879                 //       we are sure that we don't have to keep 'this' alive
14880
14881                 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
14882                 {
14883                     /* either it's an "empty" statement or the return statement
14884                        of a synchronized method
14885                      */
14886
14887                     genPInvokeMethodEpilog();
14888                 }
14889
14890 #if CPU_LONG_USES_REGPAIR
14891                 /* There must be a long return value */
14892
14893                 noway_assert(op1);
14894
14895                 /* Evaluate the return value into EDX:EAX */
14896
14897                 genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
14898
14899                 noway_assert(op1->InReg());
14900                 noway_assert(op1->gtRegPair == REG_LNGRET);
14901
14902 #else
14903                 NYI("64-bit return");
14904 #endif
14905
14906 #ifdef PROFILING_SUPPORTED
14907                 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
14908                 // the GT_RETURN tree.
14909
14910                 if (compiler->compCurBB == compiler->genReturnBB)
14911                 {
14912                     genProfilingLeaveCallback();
14913                 }
14914 #endif
14915                 return;
14916
14917             case GT_QMARK:
14918                 noway_assert(!"inliner-generated ?: for longs NYI");
14919                 NO_WAY("inliner-generated ?: for longs NYI");
14920                 break;
14921
14922             case GT_COMMA:
14923
14924                 if (tree->gtFlags & GTF_REVERSE_OPS)
14925                 {
14926                     // Generate op2
14927                     genCodeForTreeLng(op2, needReg, avoidReg);
14928                     genUpdateLife(op2);
14929
14930                     noway_assert(op2->InReg());
14931
14932                     regSet.rsMarkRegPairUsed(op2);
14933
14934                     // Do side effects of op1
14935                     genEvalSideEffects(op1);
14936
14937                     // Recover op2 if spilled
14938                     genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
14939
14940                     genReleaseRegPair(op2);
14941
14942                     genUpdateLife(tree);
14943
14944                     regPair = op2->gtRegPair;
14945                 }
14946                 else
14947                 {
14948                     noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
14949
14950                     /* Generate side effects of the first operand */
14951
14952                     genEvalSideEffects(op1);
14953                     genUpdateLife(op1);
14954
14955                     /* Is the value of the second operand used? */
14956
14957                     if (tree->gtType == TYP_VOID)
14958                     {
14959                         /* The right operand produces no result */
14960
14961                         genEvalSideEffects(op2);
14962                         genUpdateLife(tree);
14963                         return;
14964                     }
14965
14966                     /* Generate the second operand, i.e. the 'real' value */
14967
14968                     genCodeForTreeLng(op2, needReg, avoidReg);
14969
14970                     /* The result of 'op2' is also the final result */
14971
14972                     regPair = op2->gtRegPair;
14973                 }
14974
14975                 goto DONE;
14976
14977             case GT_BOX:
14978             {
14979                 /* Generate the  operand, i.e. the 'real' value */
14980
14981                 genCodeForTreeLng(op1, needReg, avoidReg);
14982
14983                 /* The result of 'op1' is also the final result */
14984
14985                 regPair = op1->gtRegPair;
14986             }
14987
14988                 goto DONE;
14989
14990             case GT_NOP:
14991                 if (op1 == NULL)
14992                     return;
14993
14994                 genCodeForTreeLng(op1, needReg, avoidReg);
14995                 regPair = op1->gtRegPair;
14996                 goto DONE;
14997
14998             default:
14999                 break;
15000         }
15001
15002 #ifdef DEBUG
15003         compiler->gtDispTree(tree);
15004 #endif
15005         noway_assert(!"unexpected 64-bit operator");
15006     }
15007
15008     /* See what kind of a special operator we have here */
15009
15010     switch (oper)
15011     {
15012         regMaskTP retMask;
15013         case GT_CALL:
15014             retMask = genCodeForCall(tree->AsCall(), true);
15015             if (retMask == RBM_NONE)
15016                 regPair = REG_PAIR_NONE;
15017             else
15018                 regPair = regSet.rsFindRegPairNo(retMask);
15019             break;
15020
15021         default:
15022 #ifdef DEBUG
15023             compiler->gtDispTree(tree);
15024 #endif
15025             NO_WAY("unexpected long operator");
15026     }
15027
15028 DONE:
15029
15030     genUpdateLife(tree);
15031
15032     /* Here we've computed the value of 'tree' into 'regPair' */
15033
15034     noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
15035
15036     genMarkTreeInRegPair(tree, regPair);
15037 }
15038 #ifdef _PREFAST_
15039 #pragma warning(pop)
15040 #endif
15041
15042 /*****************************************************************************
15043  *
15044  *  Generate code for a mod of a long by an int.
15045  */
15046
15047 regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
15048 {
15049 #ifdef _TARGET_X86_
15050
15051     regPairNo regPair;
15052     regMaskTP addrReg;
15053
15054     genTreeOps oper = tree->OperGet();
15055     GenTreePtr op1  = tree->gtOp.gtOp1;
15056     GenTreePtr op2  = tree->gtOp.gtOp2;
15057
15058     /* Codegen only for Unsigned MOD */
15059     noway_assert(oper == GT_UMOD);
15060
15061     /* op2 must be a long constant in the range 2 to 0x3fffffff */
15062
15063     noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
15064                  (op2->gtLngCon.gtLconVal <= 0x3fffffff));
15065     int val = (int)op2->gtLngCon.gtLconVal;
15066
15067     op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
15068
15069     op2->gtType             = TYP_INT;
15070     op2->gtIntCon.gtIconVal = val;
15071
15072     /* Which operand are we supposed to compute first? */
15073
15074     if (tree->gtFlags & GTF_REVERSE_OPS)
15075     {
15076         /* Compute the second operand into a scratch register, other
15077            than EAX or EDX */
15078
15079         needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15080
15081         /* Special case: if op2 is a local var we are done */
15082
15083         if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15084         {
15085             addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15086         }
15087         else
15088         {
15089             genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15090
15091             noway_assert(op2->InReg());
15092             addrReg = genRegMask(op2->gtRegNum);
15093         }
15094
15095         /* Compute the first operand into EAX:EDX */
15096
15097         genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
15098         noway_assert(op1->InReg());
15099         noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15100
15101         /* And recover the second argument while locking the first one */
15102
15103         addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15104     }
15105     else
15106     {
15107         /* Compute the first operand into EAX:EDX */
15108
15109         genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
15110         noway_assert(op1->InReg());
15111         noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15112
15113         /* Compute the second operand into a scratch register, other
15114            than EAX or EDX */
15115
15116         needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15117
15118         /* Special case: if op2 is a local var we are done */
15119
15120         if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15121         {
15122             addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15123         }
15124         else
15125         {
15126             genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15127
15128             noway_assert(op2->InReg());
15129             addrReg = genRegMask(op2->gtRegNum);
15130         }
15131
15132         /* Recover the first argument */
15133
15134         genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
15135
15136         /* And recover the second argument while locking the first one */
15137
15138         addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15139     }
15140
15141     /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
15142        contains the 32bit divisor.  We want to generate the following code:
15143
15144        ==========================
15145        Unsigned (GT_UMOD)
15146
15147        cmp edx, op2->gtRegNum
15148        jb  lab_no_overflow
15149
15150        mov temp, eax
15151        mov eax, edx
15152        xor edx, edx
15153        div op2->g2RegNum
15154        mov eax, temp
15155
15156        lab_no_overflow:
15157        idiv
15158        ==========================
15159        This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
15160     */
15161
15162     BasicBlock* lab_no_overflow = genCreateTempLabel();
15163
15164     // grab a temporary register other than eax, edx, and op2->gtRegNum
15165
15166     regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
15167
15168     // EAX and tempReg will be trashed by the mov instructions.  Doing
15169     // this early won't hurt, and might prevent confusion in genSetRegToIcon.
15170
15171     regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15172     regTracker.rsTrackRegTrash(tempReg);
15173
15174     inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
15175     inst_JMP(EJ_jb, lab_no_overflow);
15176
15177     inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
15178     inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15179     genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15180     inst_TT(INS_UNSIGNED_DIVIDE, op2);
15181     inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
15182
15183     // Jump point for no overflow divide
15184
15185     genDefineTempLabel(lab_no_overflow);
15186
15187     // Issue the divide instruction
15188
15189     inst_TT(INS_UNSIGNED_DIVIDE, op2);
15190
15191     /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
15192
15193     regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15194     regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
15195     regTracker.rsTrackRegTrash(tempReg);
15196     regTracker.rsTrackRegTrash(op2->gtRegNum);
15197
15198     if (tree->gtFlags & GTF_MOD_INT_RESULT)
15199     {
15200         /* We don't need to normalize the result, because the caller wants
15201            an int (in edx) */
15202
15203         regPair = REG_PAIR_TMP_REVERSE;
15204     }
15205     else
15206     {
15207         /* The result is now in EDX, we now have to normalize it, i.e. we have
15208            to issue:
15209            mov eax, edx; xor edx, edx (for UMOD)
15210         */
15211
15212         inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15213
15214         genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15215
15216         regPair = REG_PAIR_TMP;
15217     }
15218
15219     genReleaseRegPair(op1);
15220     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
15221
15222     return regPair;
15223
15224 #else // !_TARGET_X86_
15225
15226     NYI("codegen for LongModInt");
15227
15228     return REG_PAIR_NONE;
15229
15230 #endif // !_TARGET_X86_
15231 }
15232
15233 // Given a tree, return the number of registers that are currently
15234 // used to hold integer enregistered local variables.
15235 // Note that, an enregistered TYP_LONG can take 1 or 2 registers.
15236 unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
15237 {
15238     unsigned regCount = 0;
15239
15240     VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
15241     while (iter.NextElem(&varNum))
15242     {
15243         unsigned   lclNum = compiler->lvaTrackedToVarNum[varNum];
15244         LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
15245
15246         if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
15247         {
15248             ++regCount;
15249
15250             if (varTypeIsLong(varDsc->TypeGet()))
15251             {
15252                 // For enregistered LONG/ULONG, the lower half should always be in a register.
15253                 noway_assert(varDsc->lvRegNum != REG_STK);
15254
15255                 // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
15256                 // well.
15257                 if (varDsc->lvOtherReg != REG_STK)
15258                 {
15259                     ++regCount;
15260                 }
15261             }
15262         }
15263     }
15264
15265     return regCount;
15266 }
15267
15268 /*****************************************************************************/
15269 /*****************************************************************************/
15270 #if CPU_HAS_FP_SUPPORT
15271 /*****************************************************************************
15272  *
15273  *  Generate code for a floating-point operation.
15274  */
15275
15276 void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
15277                                 regMaskTP  needReg, /* = RBM_ALLFLOAT */
15278                                 regMaskTP  bestReg) /* = RBM_NONE */
15279 {
15280     genCodeForTreeFloat(tree, needReg, bestReg);
15281
15282     if (tree->OperGet() == GT_RETURN)
15283     {
15284         // Make sure to get ALL THE EPILOG CODE
15285
15286         // TODO: this should be done AFTER we called exit mon so that
15287         //       we are sure that we don't have to keep 'this' alive
15288
15289         if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15290         {
15291             /* either it's an "empty" statement or the return statement
15292                of a synchronized method
15293              */
15294
15295             genPInvokeMethodEpilog();
15296         }
15297
15298 #ifdef PROFILING_SUPPORTED
15299         // The profiling hook does not trash registers, so it's safe to call after we emit the code for
15300         // the GT_RETURN tree.
15301
15302         if (compiler->compCurBB == compiler->genReturnBB)
15303         {
15304             genProfilingLeaveCallback();
15305         }
15306 #endif
15307     }
15308 }
15309
15310 /*****************************************************************************/
15311 #endif // CPU_HAS_FP_SUPPORT
15312
15313 /*****************************************************************************
15314  *
15315  *  Generate a table switch - the switch value (0-based) is in register 'reg'.
15316  */
15317
15318 void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
15319 {
15320     unsigned jmpTabBase;
15321
15322     if (jumpCnt == 1)
15323     {
15324         // In debug code, we don't optimize away the trivial switch statements.  So we can get here with a
15325         // BBJ_SWITCH with only a default case.  Therefore, don't generate the switch table.
15326         noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
15327         inst_JMP(EJ_jmp, jumpTab[0]);
15328         return;
15329     }
15330
15331     noway_assert(jumpCnt >= 2);
15332
15333     /* Is the number of cases right for a test and jump switch? */
15334
15335     const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
15336     const bool fDefaultFollows   = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
15337     const bool fHaveScratchReg   = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
15338
15339     unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
15340
15341     // This means really just a single cmp/jcc (aka a simple if/else)
15342     if (fFirstCaseFollows || fDefaultFollows)
15343         minSwitchTabJumpCnt++;
15344
15345 #ifdef _TARGET_ARM_
15346     // On the ARM for small switch tables we will
15347     // generate a sequence of compare and branch instructions
15348     // because the code to load the base of the switch
15349     // table is huge and hideous due to the relocation... :(
15350     //
15351     minSwitchTabJumpCnt++;
15352     if (fHaveScratchReg)
15353         minSwitchTabJumpCnt++;
15354
15355 #endif // _TARGET_ARM_
15356
15357     if (jumpCnt < minSwitchTabJumpCnt)
15358     {
15359         /* Does the first case label follow? */
15360         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
15361
15362         if (fFirstCaseFollows)
15363         {
15364             /* Check for the default case */
15365             inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15366             emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15367             inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15368
15369             /* No need to jump to the first case */
15370
15371             jumpCnt -= 2;
15372             jumpTab += 1;
15373
15374             /* Generate a series of "dec reg; jmp label" */
15375
15376             // Make sure that we can trash the register so
15377             // that we can generate a series of compares and jumps
15378             //
15379             if ((jumpCnt > 0) && !fHaveScratchReg)
15380             {
15381                 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15382                 inst_RV_RV(INS_mov, tmpReg, reg);
15383                 regTracker.rsTrackRegTrash(tmpReg);
15384                 reg = tmpReg;
15385             }
15386
15387             while (jumpCnt > 0)
15388             {
15389                 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15390                 inst_JMP(jmpEqual, *jumpTab++);
15391                 jumpCnt--;
15392             }
15393         }
15394         else
15395         {
15396             /* Check for case0 first */
15397             instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
15398             inst_JMP(jmpEqual, *jumpTab);
15399
15400             /* No need to jump to the first case or the default */
15401
15402             jumpCnt -= 2;
15403             jumpTab += 1;
15404
15405             /* Generate a series of "dec reg; jmp label" */
15406
15407             // Make sure that we can trash the register so
15408             // that we can generate a series of compares and jumps
15409             //
15410             if ((jumpCnt > 0) && !fHaveScratchReg)
15411             {
15412                 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15413                 inst_RV_RV(INS_mov, tmpReg, reg);
15414                 regTracker.rsTrackRegTrash(tmpReg);
15415                 reg = tmpReg;
15416             }
15417
15418             while (jumpCnt > 0)
15419             {
15420                 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15421                 inst_JMP(jmpEqual, *jumpTab++);
15422                 jumpCnt--;
15423             }
15424
15425             if (!fDefaultFollows)
15426             {
15427                 inst_JMP(EJ_jmp, *jumpTab);
15428             }
15429         }
15430
15431         if ((fFirstCaseFollows || fDefaultFollows) &&
15432             compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
15433         {
15434             inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
15435         }
15436
15437         return;
15438     }
15439
15440     /* First take care of the default case */
15441
15442     inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15443     emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15444     inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15445
15446     /* Generate the jump table contents */
15447
15448     jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
15449
15450 #ifdef DEBUG
15451     if (compiler->opts.dspCode)
15452         printf("\n      J_M%03u_DS%02u LABEL   DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
15453 #endif
15454
15455     for (unsigned index = 0; index < jumpCnt - 1; index++)
15456     {
15457         BasicBlock* target = jumpTab[index];
15458
15459         noway_assert(target->bbFlags & BBF_JMP_TARGET);
15460
15461 #ifdef DEBUG
15462         if (compiler->opts.dspCode)
15463             printf("            DD      L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
15464 #endif
15465
15466         getEmitter()->emitDataGenData(index, target);
15467     }
15468
15469     getEmitter()->emitDataGenEnd();
15470
15471 #ifdef _TARGET_ARM_
15472     // We need to load the address of the table into a register.
15473     // The data section might get placed a long distance away, so we
15474     // can't safely do a PC-relative ADR. :(
15475     // Pick any register except the index register.
15476     //
15477     regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
15478     genMov32RelocatableDataLabel(jmpTabBase, regTabBase);
15479     regTracker.rsTrackRegTrash(regTabBase);
15480
15481     // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
15482     getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
15483
15484 #else // !_TARGET_ARM_
15485
15486     getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
15487
15488 #endif
15489 }
15490
15491 /*****************************************************************************
15492  *
15493  *  Generate code for a switch statement.
15494  */
15495
15496 void CodeGen::genCodeForSwitch(GenTreePtr tree)
15497 {
15498     unsigned     jumpCnt;
15499     BasicBlock** jumpTab;
15500
15501     GenTreePtr oper;
15502     regNumber  reg;
15503
15504     noway_assert(tree->gtOper == GT_SWITCH);
15505     oper = tree->gtOp.gtOp1;
15506     noway_assert(genActualTypeIsIntOrI(oper->gtType));
15507
15508     /* Get hold of the jump table */
15509
15510     noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
15511
15512     jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
15513     jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
15514
15515     /* Compute the switch value into some register */
15516
15517     genCodeForTree(oper, 0);
15518
15519     /* Get hold of the register the value is in */
15520
15521     noway_assert(oper->InReg());
15522     reg = oper->gtRegNum;
15523
15524 #if FEATURE_STACK_FP_X87
15525     if (!compCurFPState.IsEmpty())
15526     {
15527         return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
15528     }
15529     else
15530 #endif // FEATURE_STACK_FP_X87
15531     {
15532         return genTableSwitch(reg, jumpCnt, jumpTab);
15533     }
15534 }
15535
15536 /*****************************************************************************/
15537 /*****************************************************************************
15538  *  Emit a call to a helper function.
15539  */
15540
15541 // inline
15542 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
15543 {
15544     // Can we call the helper function directly
15545
15546     void *addr = NULL, **pAddr = NULL;
15547
15548 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
15549     // Don't ask VM if it hasn't requested ELT hooks
15550     if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
15551         (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
15552          helper == CORINFO_HELP_PROF_FCN_TAILCALL))
15553     {
15554         addr = compiler->compProfilerMethHnd;
15555     }
15556     else
15557 #endif
15558     {
15559         addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
15560     }
15561
15562 #ifdef _TARGET_ARM_
15563     if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
15564     {
15565         // Load the address into a register and call  through a register
15566         regNumber indCallReg =
15567             regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
15568         if (addr)
15569         {
15570             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
15571         }
15572         else
15573         {
15574             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
15575             regTracker.rsTrackRegTrash(indCallReg);
15576         }
15577
15578         getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
15579                                    INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
15580                                    argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15581                                    gcInfo.gcRegByrefSetCur,
15582                                    BAD_IL_OFFSET, // ilOffset
15583                                    indCallReg,    // ireg
15584                                    REG_NA, 0, 0,  // xreg, xmul, disp
15585                                    false,         // isJump
15586                                    emitter::emitNoGChelper(helper),
15587                                    (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15588     }
15589     else
15590     {
15591         getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
15592                                    INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
15593                                    gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15594                                    0,     /* ilOffset, ireg, xreg, xmul, disp */
15595                                    false, /* isJump */
15596                                    emitter::emitNoGChelper(helper),
15597                                    (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15598     }
15599 #else
15600
15601     {
15602         emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
15603
15604         if (!addr)
15605         {
15606             callType = emitter::EC_FUNC_TOKEN_INDIR;
15607             addr     = pAddr;
15608         }
15609
15610         getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
15611                                    argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15612                                    gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15613                                    0,     /* ilOffset, ireg, xreg, xmul, disp */
15614                                    false, /* isJump */
15615                                    emitter::emitNoGChelper(helper));
15616     }
15617 #endif
15618
15619     regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
15620     regTracker.rsTrashRegsForGCInterruptability();
15621 }
15622
15623 /*****************************************************************************
15624  *
15625  *  Push the given argument list, right to left; returns the total amount of
15626  *  stuff pushed.
15627  */
15628
15629 #if !FEATURE_FIXED_OUT_ARGS
15630 #ifdef _PREFAST_
15631 #pragma warning(push)
15632 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
15633 #endif
15634 size_t CodeGen::genPushArgList(GenTreeCall* call)
15635 {
15636     GenTreeArgList* regArgs = call->gtCallLateArgs;
15637     size_t          size    = 0;
15638     regMaskTP       addrReg;
15639
15640     GenTreeArgList* args;
15641     // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
15642     // so we can iterate over this argument list more uniformly.
15643     // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
15644     GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCallArgs);
15645     if (call->gtCallObjp == NULL)
15646     {
15647         args = call->gtCallArgs;
15648     }
15649     else
15650     {
15651         firstForObjp.Current() = call->gtCallObjp;
15652         args                   = &firstForObjp;
15653     }
15654
15655     GenTreePtr curr;
15656     var_types  type;
15657     size_t     opsz;
15658
15659     for (; args; args = args->Rest())
15660     {
15661         addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
15662
15663         /* Get hold of the next argument value */
15664         curr = args->Current();
15665
15666         if (curr->IsArgPlaceHolderNode())
15667         {
15668             assert(curr->gtFlags & GTF_LATE_ARG);
15669
15670             addrReg = 0;
15671             continue;
15672         }
15673
15674         // If we have a comma expression, eval the non-last, then deal with the last.
15675         if (!(curr->gtFlags & GTF_LATE_ARG))
15676             curr = genCodeForCommaTree(curr);
15677
15678         /* See what type of a value we're passing */
15679         type = curr->TypeGet();
15680
15681         opsz = genTypeSize(genActualType(type));
15682
15683         switch (type)
15684         {
15685             case TYP_BOOL:
15686             case TYP_BYTE:
15687             case TYP_SHORT:
15688             case TYP_CHAR:
15689             case TYP_UBYTE:
15690
15691                 /* Don't want to push a small value, make it a full word */
15692
15693                 genCodeForTree(curr, 0);
15694
15695                 __fallthrough; // now the value should be in a register ...
15696
15697             case TYP_INT:
15698             case TYP_REF:
15699             case TYP_BYREF:
15700 #if !CPU_HAS_FP_SUPPORT
15701             case TYP_FLOAT:
15702 #endif
15703
15704                 if (curr->gtFlags & GTF_LATE_ARG)
15705                 {
15706                     assert(curr->gtOper == GT_ASG);
15707                     /* one more argument will be passed in a register */
15708                     noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
15709
15710                     /* arg is passed in the register, nothing on the stack */
15711
15712                     opsz = 0;
15713                 }
15714
15715                 /* Is this value a handle? */
15716
15717                 if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
15718                 {
15719                     /* Emit a fixup for the push instruction */
15720
15721                     inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
15722                     genSinglePush();
15723
15724                     addrReg = 0;
15725                     break;
15726                 }
15727
15728                 /* Is the value a constant? */
15729
15730                 if (curr->gtOper == GT_CNS_INT)
15731                 {
15732
15733 #if REDUNDANT_LOAD
15734                     regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
15735
15736                     if (reg != REG_NA)
15737                     {
15738                         inst_RV(INS_push, reg, TYP_INT);
15739                     }
15740                     else
15741 #endif
15742                     {
15743                         inst_IV(INS_push, curr->gtIntCon.gtIconVal);
15744                     }
15745
15746                     /* If the type is TYP_REF, then this must be a "null". So we can
15747                        treat it as a TYP_INT as we don't need to report it as a GC ptr */
15748
15749                     noway_assert(curr->TypeGet() == TYP_INT ||
15750                                  (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
15751
15752                     genSinglePush();
15753
15754                     addrReg = 0;
15755                     break;
15756                 }
15757
15758                 if (curr->gtFlags & GTF_LATE_ARG)
15759                 {
15760                     /* This must be a register arg temp assignment */
15761
15762                     noway_assert(curr->gtOper == GT_ASG);
15763
15764                     /* Evaluate it to the temp */
15765
15766                     genCodeForTree(curr, 0);
15767
15768                     /* Increment the current argument register counter */
15769
15770                     intRegState.rsCurRegArgNum++;
15771
15772                     addrReg = 0;
15773                 }
15774                 else
15775                 {
15776                     /* This is a 32-bit integer non-register argument */
15777
15778                     addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
15779                     inst_TT(INS_push, curr);
15780                     genSinglePush();
15781                     genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
15782                 }
15783                 break;
15784
15785             case TYP_LONG:
15786 #if !CPU_HAS_FP_SUPPORT
15787             case TYP_DOUBLE:
15788 #endif
15789
15790                 /* Is the value a constant? */
15791
15792                 if (curr->gtOper == GT_CNS_LNG)
15793                 {
15794                     inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
15795                     genSinglePush();
15796                     inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
15797                     genSinglePush();
15798
15799                     addrReg = 0;
15800                 }
15801                 else
15802                 {
15803                     addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
15804
15805                     inst_TT(INS_push, curr, sizeof(int));
15806                     genSinglePush();
15807                     inst_TT(INS_push, curr);
15808                     genSinglePush();
15809                 }
15810                 break;
15811
15812 #if CPU_HAS_FP_SUPPORT
15813             case TYP_FLOAT:
15814             case TYP_DOUBLE:
15815 #endif
15816 #if FEATURE_STACK_FP_X87
15817                 addrReg = genPushArgumentStackFP(curr);
15818 #else
15819                 NYI("FP codegen");
15820                 addrReg = 0;
15821 #endif
15822                 break;
15823
15824             case TYP_VOID:
15825
15826                 /* Is this a nothing node, deferred register argument? */
15827
15828                 if (curr->gtFlags & GTF_LATE_ARG)
15829                 {
15830                     GenTree* arg = curr;
15831                     if (arg->gtOper == GT_COMMA)
15832                     {
15833                         while (arg->gtOper == GT_COMMA)
15834                         {
15835                             GenTreePtr op1 = arg->gtOp.gtOp1;
15836                             genEvalSideEffects(op1);
15837                             genUpdateLife(op1);
15838                             arg = arg->gtOp.gtOp2;
15839                         }
15840                         if (!arg->IsNothingNode())
15841                         {
15842                             genEvalSideEffects(arg);
15843                             genUpdateLife(arg);
15844                         }
15845                     }
15846
15847                     /* increment the register count and continue with the next argument */
15848
15849                     intRegState.rsCurRegArgNum++;
15850
15851                     noway_assert(opsz == 0);
15852
15853                     addrReg = 0;
15854                     break;
15855                 }
15856
15857                 __fallthrough;
15858
15859             case TYP_STRUCT:
15860             {
15861                 GenTree* arg = curr;
15862                 while (arg->gtOper == GT_COMMA)
15863                 {
15864                     GenTreePtr op1 = arg->gtOp.gtOp1;
15865                     genEvalSideEffects(op1);
15866                     genUpdateLife(op1);
15867                     arg = arg->gtOp.gtOp2;
15868                 }
15869
15870                 noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
15871                 noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
15872                 noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
15873
15874                 if (arg->gtOper == GT_MKREFANY)
15875                 {
15876                     GenTreePtr op1 = arg->gtOp.gtOp1;
15877                     GenTreePtr op2 = arg->gtOp.gtOp2;
15878
15879                     addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
15880
15881                     /* Is this value a handle? */
15882                     if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
15883                     {
15884                         /* Emit a fixup for the push instruction */
15885
15886                         inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
15887                         genSinglePush();
15888                     }
15889                     else
15890                     {
15891                         regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
15892                         inst_TT(INS_push, op2);
15893                         genSinglePush();
15894                         genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
15895                     }
15896                     addrReg = genKeepAddressable(op1, addrReg);
15897                     inst_TT(INS_push, op1);
15898                     genSinglePush();
15899                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
15900
15901                     opsz = 2 * TARGET_POINTER_SIZE;
15902                 }
15903                 else
15904                 {
15905                     noway_assert(arg->gtOper == GT_OBJ);
15906
15907                     if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
15908                     {
15909                         GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
15910                         unsigned   structLclNum    = structLocalTree->gtLclVarCommon.gtLclNum;
15911                         LclVarDsc* varDsc          = &compiler->lvaTable[structLclNum];
15912
15913                         // As much as we would like this to be a noway_assert, we can't because
15914                         // there are some weird casts out there, and backwards compatiblity
15915                         // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
15916                         // lvPromoted in general currently do not require the local to be
15917                         // TYP_STRUCT, so this assert is really more about how we wish the world
15918                         // was then some JIT invariant.
15919                         assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
15920
15921                         Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
15922
15923                         if (varDsc->lvPromoted &&
15924                             promotionType ==
15925                                 Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
15926                         {
15927                             assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
15928
15929                             addrReg = 0;
15930
15931                             // Get the number of BYTES to copy to the stack
15932                             opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
15933                             size_t bytesToBeCopied = opsz;
15934
15935                             // postponedFields is true if we have any postponed fields
15936                             //   Any field that does not start on a 4-byte boundary is a postponed field
15937                             //   Such a field is required to be a short or a byte
15938                             //
15939                             // postponedRegKind records the kind of scratch register we will
15940                             //   need to process the postponed fields
15941                             //   RBM_NONE means that we don't need a register
15942                             //
15943                             // expectedAlignedOffset records the aligned offset that
15944                             //   has to exist for a push to cover the postponed fields.
15945                             //   Since all promoted structs have the tightly packed property
15946                             //   we are guaranteed that we will have such a push
15947                             //
15948                             bool      postponedFields       = false;
15949                             regMaskTP postponedRegKind      = RBM_NONE;
15950                             size_t    expectedAlignedOffset = UINT_MAX;
15951
15952                             VARSET_TP* deadVarBits = NULL;
15953                             compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
15954
15955                             // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
15956                             //
15957                             for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
15958                                  varNum >= (int)varDsc->lvFieldLclStart; varNum--)
15959                             {
15960                                 LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
15961 #ifdef DEBUG
15962                                 if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
15963                                 {
15964                                     noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
15965                                     noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
15966                                 }
15967 #endif
15968                                 // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
15969                                 // For packed structs we will go back and store the unaligned bytes and shorts
15970                                 // in the next loop
15971                                 //
15972                                 if (fieldVarDsc->lvStackAligned())
15973                                 {
15974                                     if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
15975                                         fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
15976                                     {
15977                                         // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
15978                                         // Just push some junk (i.e EAX) on the stack.
15979                                         inst_RV(INS_push, REG_EAX, TYP_INT);
15980                                         genSinglePush();
15981
15982                                         bytesToBeCopied -= sizeof(void*);
15983                                     }
15984
15985                                     // If we have an expectedAlignedOffset make sure that this push instruction
15986                                     // is what we expect to cover the postponedFields
15987                                     //
15988                                     if (expectedAlignedOffset != UINT_MAX)
15989                                     {
15990                                         // This push must be for a small field
15991                                         noway_assert(fieldVarDsc->lvExactSize < 4);
15992                                         // The fldOffset for this push should be equal to the expectedAlignedOffset
15993                                         noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
15994                                         expectedAlignedOffset = UINT_MAX;
15995                                     }
15996
15997                                     // Push the "upper half" of LONG var first
15998
15999                                     if (isRegPairType(fieldVarDsc->lvType))
16000                                     {
16001                                         if (fieldVarDsc->lvOtherReg != REG_STK)
16002                                         {
16003                                             inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
16004                                             genSinglePush();
16005
16006                                             // Prepare the set of vars to be cleared from gcref/gcbyref set
16007                                             // in case they become dead after genUpdateLife.
16008                                             // genDoneAddressable() will remove dead gc vars by calling
16009                                             // gcInfo.gcMarkRegSetNpt.
16010                                             // Although it is not addrReg, we just borrow the name here.
16011                                             addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
16012                                         }
16013                                         else
16014                                         {
16015                                             getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
16016                                             genSinglePush();
16017                                         }
16018
16019                                         bytesToBeCopied -= sizeof(void*);
16020                                     }
16021
16022                                     // Push the "upper half" of DOUBLE var if it is not enregistered.
16023
16024                                     if (fieldVarDsc->lvType == TYP_DOUBLE)
16025                                     {
16026                                         if (!fieldVarDsc->lvRegister)
16027                                         {
16028                                             getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
16029                                             genSinglePush();
16030                                         }
16031
16032                                         bytesToBeCopied -= sizeof(void*);
16033                                     }
16034
16035                                     //
16036                                     // Push the field local.
16037                                     //
16038
16039                                     if (fieldVarDsc->lvRegister)
16040                                     {
16041                                         if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
16042                                         {
16043                                             inst_RV(INS_push, fieldVarDsc->lvRegNum,
16044                                                     genActualType(fieldVarDsc->TypeGet()));
16045                                             genSinglePush();
16046
16047                                             // Prepare the set of vars to be cleared from gcref/gcbyref set
16048                                             // in case they become dead after genUpdateLife.
16049                                             // genDoneAddressable() will remove dead gc vars by calling
16050                                             // gcInfo.gcMarkRegSetNpt.
16051                                             // Although it is not addrReg, we just borrow the name here.
16052                                             addrReg |= genRegMask(fieldVarDsc->lvRegNum);
16053                                         }
16054                                         else
16055                                         {
16056                                             // Must be TYP_FLOAT or TYP_DOUBLE
16057                                             noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
16058
16059                                             noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
16060                                                          fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
16061
16062                                             inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
16063
16064                                             genSinglePush();
16065                                             if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
16066                                             {
16067                                                 genSinglePush();
16068                                             }
16069
16070 #if FEATURE_STACK_FP_X87
16071                                             GenTree* fieldTree = new (compiler, GT_REG_VAR)
16072                                                 GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
16073                                             fieldTree->gtOper            = GT_REG_VAR;
16074                                             fieldTree->gtRegNum          = fieldVarDsc->lvRegNum;
16075                                             fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
16076                                             if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
16077                                             {
16078                                                 if (fieldVarDsc->lvTracked &&
16079                                                     (deadVarBits == NULL ||
16080                                                      VarSetOps::IsMember(compiler, *deadVarBits,
16081                                                                          fieldVarDsc->lvVarIndex)))
16082                                                 {
16083                                                     fieldTree->gtFlags |= GTF_VAR_DEATH;
16084                                                 }
16085                                             }
16086                                             genCodeForTreeStackFP_Leaf(fieldTree);
16087
16088                                             // Take reg to top of stack
16089
16090                                             FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
16091
16092                                             // Pop it off to stack
16093                                             compCurFPState.Pop();
16094
16095                                             getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
16096                                                                        REG_NA, REG_SPBASE, 0);
16097 #else
16098                                             NYI_FLAT_FP_X87("FP codegen");
16099 #endif
16100                                         }
16101                                     }
16102                                     else
16103                                     {
16104                                         getEmitter()->emitIns_S(INS_push,
16105                                                                 (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
16106                                                                                                     : EA_4BYTE,
16107                                                                 varNum, 0);
16108                                         genSinglePush();
16109                                     }
16110
16111                                     bytesToBeCopied -= sizeof(void*);
16112                                 }
16113                                 else // not stack aligned
16114                                 {
16115                                     noway_assert(fieldVarDsc->lvExactSize < 4);
16116
16117                                     // We will need to use a store byte or store word
16118                                     // to set this unaligned location
16119                                     postponedFields = true;
16120
16121                                     if (expectedAlignedOffset != UINT_MAX)
16122                                     {
16123                                         // This should never change until it is set back to UINT_MAX by an aligned
16124                                         // offset
16125                                         noway_assert(expectedAlignedOffset ==
16126                                                      roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
16127                                     }
16128
16129                                     expectedAlignedOffset =
16130                                         roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
16131
16132                                     noway_assert(expectedAlignedOffset < bytesToBeCopied);
16133
16134                                     if (fieldVarDsc->lvRegister)
16135                                     {
16136                                         // Do we need to use a byte-able register?
16137                                         if (fieldVarDsc->lvExactSize == 1)
16138                                         {
16139                                             // Did we enregister fieldVarDsc2 in a non byte-able register?
16140                                             if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
16141                                             {
16142                                                 // then we will need to grab a byte-able register
16143                                                 postponedRegKind = RBM_BYTE_REGS;
16144                                             }
16145                                         }
16146                                     }
16147                                     else // not enregistered
16148                                     {
16149                                         if (fieldVarDsc->lvExactSize == 1)
16150                                         {
16151                                             // We will need to grab a byte-able register
16152                                             postponedRegKind = RBM_BYTE_REGS;
16153                                         }
16154                                         else
16155                                         {
16156                                             // We will need to grab any scratch register
16157                                             if (postponedRegKind != RBM_BYTE_REGS)
16158                                                 postponedRegKind = RBM_ALLINT;
16159                                         }
16160                                     }
16161                                 }
16162                             }
16163
16164                             // Now we've pushed all of the aligned fields.
16165                             //
16166                             // We should have pushed bytes equal to the entire struct
16167                             noway_assert(bytesToBeCopied == 0);
16168
16169                             // We should have seen a push that covers every postponed field
16170                             noway_assert(expectedAlignedOffset == UINT_MAX);
16171
16172                             // Did we have any postponed fields?
16173                             if (postponedFields)
16174                             {
16175                                 regNumber regNum = REG_STK; // means no register
16176
16177                                 // If we needed a scratch register then grab it here
16178
16179                                 if (postponedRegKind != RBM_NONE)
16180                                     regNum = regSet.rsGrabReg(postponedRegKind);
16181
16182                                 // Forward loop, starts from the lowest field offset
16183                                 //
16184                                 for (unsigned varNum = varDsc->lvFieldLclStart;
16185                                      varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
16186                                 {
16187                                     LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
16188
16189                                     // All stack aligned fields have already been pushed
16190                                     if (fieldVarDsc->lvStackAligned())
16191                                         continue;
16192
16193                                     // We have a postponed field
16194
16195                                     // It must be a byte or a short
16196                                     noway_assert(fieldVarDsc->lvExactSize < 4);
16197
16198                                     // Is the field enregistered?
16199                                     if (fieldVarDsc->lvRegister)
16200                                     {
16201                                         // Frequently we can just use that register
16202                                         regNumber tmpRegNum = fieldVarDsc->lvRegNum;
16203
16204                                         // Do we need to use a byte-able register?
16205                                         if (fieldVarDsc->lvExactSize == 1)
16206                                         {
16207                                             // Did we enregister the field in a non byte-able register?
16208                                             if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
16209                                             {
16210                                                 // then we will need to use the byte-able register 'regNum'
16211                                                 noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
16212
16213                                                 // Copy the register that contains fieldVarDsc into 'regNum'
16214                                                 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
16215                                                                           fieldVarDsc->lvRegNum);
16216                                                 regTracker.rsTrackRegLclVar(regNum, varNum);
16217
16218                                                 // tmpRegNum is the register that we will extract the byte value from
16219                                                 tmpRegNum = regNum;
16220                                             }
16221                                             noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
16222                                         }
16223
16224                                         getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16225                                                                    (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
16226                                                                    REG_SPBASE, fieldVarDsc->lvFldOffset);
16227                                     }
16228                                     else // not enregistered
16229                                     {
16230                                         // We will copy the non-enregister fieldVar into our scratch register 'regNum'
16231
16232                                         noway_assert(regNum != REG_STK);
16233                                         getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
16234                                                                   (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
16235                                                                   0);
16236
16237                                         regTracker.rsTrackRegLclVar(regNum, varNum);
16238
16239                                         // Store the value (byte or short) into the stack
16240
16241                                         getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16242                                                                    (emitAttr)fieldVarDsc->lvExactSize, regNum,
16243                                                                    REG_SPBASE, fieldVarDsc->lvFldOffset);
16244                                     }
16245                                 }
16246                             }
16247                             genUpdateLife(structLocalTree);
16248
16249                             break;
16250                         }
16251                     }
16252
16253                     genCodeForTree(arg->gtObj.gtOp1, 0);
16254                     noway_assert(arg->gtObj.gtOp1->InReg());
16255                     regNumber reg = arg->gtObj.gtOp1->gtRegNum;
16256                     // Get the number of DWORDS to copy to the stack
16257                     opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
16258                     unsigned slots = (unsigned)(opsz / sizeof(void*));
16259
16260                     BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16261
16262                     compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
16263
16264                     BOOL bNoneGC = TRUE;
16265                     for (int i = slots - 1; i >= 0; --i)
16266                     {
16267                         if (gcLayout[i] != TYPE_GC_NONE)
16268                         {
16269                             bNoneGC = FALSE;
16270                             break;
16271                         }
16272                     }
16273
16274                     /* passing large structures using movq instead of pushes does not increase codesize very much */
16275                     unsigned movqLenMin  = 8;
16276                     unsigned movqLenMax  = 64;
16277                     unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
16278
16279                     if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
16280                     {
16281                         // Don't bother with this optimization in
16282                         // rarely run blocks or when optimizing for size
16283                         movqLenMax = movqLenMin = 0;
16284                     }
16285                     else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
16286                     {
16287                         // Be more aggressive when optimizing for speed
16288                         movqLenMax *= 2;
16289                     }
16290
16291                     /* Adjust for BB weight */
16292                     if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
16293                     {
16294                         // Be more aggressive when we are inside a loop
16295                         movqLenMax *= 2;
16296                     }
16297
16298                     if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
16299                     {
16300                         JITLOG_THIS(compiler, (LL_INFO10000,
16301                                                "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
16302                                                opsz, compiler->info.compFullName));
16303
16304                         int       stkDisp = (int)(unsigned)opsz;
16305                         int       curDisp = 0;
16306                         regNumber xmmReg  = REG_XMM0;
16307
16308                         if (opsz & 0x4)
16309                         {
16310                             stkDisp -= sizeof(void*);
16311                             getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
16312                             genSinglePush();
16313                         }
16314
16315                         inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
16316                         AddStackLevel(stkDisp);
16317
16318                         while (curDisp < stkDisp)
16319                         {
16320                             getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
16321                             getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
16322                             curDisp += 2 * sizeof(void*);
16323                         }
16324                         noway_assert(curDisp == stkDisp);
16325                     }
16326                     else
16327                     {
16328                         for (int i = slots - 1; i >= 0; --i)
16329                         {
16330                             emitAttr fieldSize;
16331                             if (gcLayout[i] == TYPE_GC_NONE)
16332                                 fieldSize = EA_4BYTE;
16333                             else if (gcLayout[i] == TYPE_GC_REF)
16334                                 fieldSize = EA_GCREF;
16335                             else
16336                             {
16337                                 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16338                                 fieldSize = EA_BYREF;
16339                             }
16340                             getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
16341                             genSinglePush();
16342                         }
16343                     }
16344                     gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
16345                 }
16346
16347                 addrReg = 0;
16348                 break;
16349             }
16350
16351             default:
16352                 noway_assert(!"unhandled/unexpected arg type");
16353                 NO_WAY("unhandled/unexpected arg type");
16354         }
16355
16356         /* Update the current set of live variables */
16357
16358         genUpdateLife(curr);
16359
16360         /* Update the current set of register pointers */
16361
16362         noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
16363         genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
16364
16365         /* Remember how much stuff we've pushed on the stack */
16366
16367         size += opsz;
16368
16369         /* Update the current argument stack offset */
16370
16371         /* Continue with the next argument, if any more are present */
16372
16373     } // while args
16374
16375     /* Move the deferred arguments to registers */
16376
16377     for (args = regArgs; args; args = args->Rest())
16378     {
16379         curr = args->Current();
16380
16381         assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
16382
16383         fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16384         assert(curArgTabEntry);
16385         regNumber regNum = curArgTabEntry->regNum;
16386
16387         noway_assert(isRegParamType(curr->TypeGet()));
16388         noway_assert(curr->gtType != TYP_VOID);
16389
16390         /* Evaluate the argument to a register [pair] */
16391
16392         if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
16393         {
16394             /* Check if this is the guess area for the resolve interface call
16395              * Pass a size of EA_OFFSET*/
16396             if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
16397             {
16398                 getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
16399                 regTracker.rsTrackRegTrash(regNum);
16400
16401                 /* The value is now in the appropriate register */
16402
16403                 genMarkTreeInReg(curr, regNum);
16404             }
16405             else
16406             {
16407                 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
16408             }
16409
16410             noway_assert(curr->gtRegNum == regNum);
16411
16412             /* If the register is already marked as used, it will become
16413                multi-used. However, since it is a callee-trashed register,
16414                we will have to spill it before the call anyway. So do it now */
16415
16416             if (regSet.rsMaskUsed & genRegMask(regNum))
16417             {
16418                 noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
16419                 regSet.rsSpillReg(regNum);
16420             }
16421
16422             /* Mark the register as 'used' */
16423
16424             regSet.rsMarkRegUsed(curr);
16425         }
16426         else
16427         {
16428             noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
16429         }
16430     }
16431
16432     /* If any of the previously loaded arguments were spilled - reload them */
16433
16434     for (args = regArgs; args; args = args->Rest())
16435     {
16436         curr = args->Current();
16437         assert(curr);
16438
16439         if (curr->gtFlags & GTF_SPILLED)
16440         {
16441             if (isRegPairType(curr->gtType))
16442             {
16443                 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
16444             }
16445             else
16446             {
16447                 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
16448             }
16449         }
16450     }
16451
16452     /* Return the total size pushed */
16453
16454     return size;
16455 }
16456 #ifdef _PREFAST_
16457 #pragma warning(pop)
16458 #endif
16459
16460 #else // FEATURE_FIXED_OUT_ARGS
16461
16462 //
16463 // ARM and AMD64 uses this method to pass the stack based args
16464 //
16465 // returns size pushed (always zero)
16466 size_t CodeGen::genPushArgList(GenTreeCall* call)
16467 {
16468     GenTreeArgList* lateArgs = call->gtCallLateArgs;
16469     GenTreePtr      curr;
16470     var_types       type;
16471     int             argSize;
16472
16473     GenTreeArgList* args;
16474     // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16475     // so we can iterate over this argument list more uniformly.
16476     // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16477     GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCallArgs);
16478     if (call->gtCallObjp == NULL)
16479     {
16480         args = call->gtCallArgs;
16481     }
16482     else
16483     {
16484         objpArgList.Current() = call->gtCallObjp;
16485         args                  = &objpArgList;
16486     }
16487
16488     for (; args; args = args->Rest())
16489     {
16490         /* Get hold of the next argument value */
16491         curr = args->Current();
16492
16493         fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16494         assert(curArgTabEntry);
16495         regNumber regNum    = curArgTabEntry->regNum;
16496         int       argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
16497
16498         /* See what type of a value we're passing */
16499         type = curr->TypeGet();
16500
16501         if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
16502         {
16503             type = TYP_VOID;
16504         }
16505
16506         // This holds the set of registers corresponding to enregistered promoted struct field variables
16507         // that go dead after this use of the variable in the argument list.
16508         regMaskTP deadFieldVarRegs = RBM_NONE;
16509
16510         argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
16511
16512         if (curr->IsArgPlaceHolderNode())
16513         {
16514             assert(curr->gtFlags & GTF_LATE_ARG);
16515             goto DEFERRED;
16516         }
16517
16518         if (varTypeIsSmall(type))
16519         {
16520             // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
16521             type = TYP_I_IMPL;
16522         }
16523
16524         switch (type)
16525         {
16526
16527             case TYP_DOUBLE:
16528             case TYP_LONG:
16529
16530 #if defined(_TARGET_ARM_)
16531
16532                 argSize = (TARGET_POINTER_SIZE * 2);
16533
16534                 /* Is the value a constant? */
16535
16536                 if (curr->gtOper == GT_CNS_LNG)
16537                 {
16538                     assert((curr->gtFlags & GTF_LATE_ARG) == 0);
16539
16540                     int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
16541                     int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
16542
16543                     instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
16544
16545                     instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
16546                                                argOffset + 4);
16547
16548                     break;
16549                 }
16550                 else
16551                 {
16552                     genCodeForTree(curr, 0);
16553
16554                     if (curr->gtFlags & GTF_LATE_ARG)
16555                     {
16556                         // The arg was assigned into a temp and
16557                         // will be moved to the correct register or slot later
16558
16559                         argSize = 0; // nothing is passed on the stack
16560                     }
16561                     else
16562                     {
16563                         // The arg is passed in the outgoing argument area of the stack frame
16564                         //
16565                         assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16566                         assert(curr->InReg());          // should be enregistered after genCodeForTree(curr, 0)
16567
16568                         if (type == TYP_LONG)
16569                         {
16570                             regNumber regLo = genRegPairLo(curr->gtRegPair);
16571                             regNumber regHi = genRegPairHi(curr->gtRegPair);
16572
16573                             assert(regLo != REG_STK);
16574                             inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
16575                             if (regHi == REG_STK)
16576                             {
16577                                 regHi = regSet.rsPickFreeReg();
16578                                 inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
16579                                 regTracker.rsTrackRegTrash(regHi);
16580                             }
16581                             inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
16582                         }
16583                         else // (type == TYP_DOUBLE)
16584                         {
16585                             inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16586                         }
16587                     }
16588                 }
16589                 break;
16590
16591 #elif defined(_TARGET_64BIT_)
16592                 __fallthrough;
16593 #else
16594 #error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
16595 #endif
16596
16597             case TYP_REF:
16598             case TYP_BYREF:
16599
16600             case TYP_FLOAT:
16601             case TYP_INT:
16602                 /* Is the value a constant? */
16603
16604                 if (curr->gtOper == GT_CNS_INT)
16605                 {
16606                     assert(!(curr->gtFlags & GTF_LATE_ARG));
16607
16608 #if REDUNDANT_LOAD
16609                     regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
16610
16611                     if (reg != REG_NA)
16612                     {
16613                         inst_SA_RV(ins_Store(type), argOffset, reg, type);
16614                     }
16615                     else
16616 #endif
16617                     {
16618                         bool     needReloc = compiler->opts.compReloc && curr->IsIconHandle();
16619                         emitAttr attr      = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
16620                         instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
16621                                                    compiler->lvaOutgoingArgSpaceVar, argOffset);
16622                     }
16623                     break;
16624                 }
16625
16626                 /* This is passed as a pointer-sized integer argument */
16627
16628                 genCodeForTree(curr, 0);
16629
16630                 // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
16631                 if (curr->gtFlags & GTF_LATE_ARG)
16632                 {
16633 #ifdef _TARGET_ARM_
16634                     argSize = 0; // nothing is passed on the stack
16635 #endif
16636                 }
16637                 else
16638                 {
16639                     // The arg is passed in the outgoing argument area of the stack frame
16640
16641                     assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16642                     assert(curr->InReg());          // should be enregistered after genCodeForTree(curr, 0)
16643                     inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16644
16645                     if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
16646                         gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
16647                 }
16648                 break;
16649
16650             case TYP_VOID:
16651                 /* Is this a nothing node, deferred register argument? */
16652
16653                 if (curr->gtFlags & GTF_LATE_ARG)
16654                 {
16655                 /* Handle side-effects */
16656                 DEFERRED:
16657                     if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
16658                     {
16659 #ifdef _TARGET_ARM_
16660                         {
16661                             GenTreePtr curArgNode    = curArgTabEntry->node;
16662                             var_types  curRegArgType = curArgNode->gtType;
16663                             assert(curRegArgType != TYP_UNDEF);
16664
16665                             if (curRegArgType == TYP_STRUCT)
16666                             {
16667                                 // If the RHS of the COPYBLK is a promoted struct local, then the use of that
16668                                 // is an implicit use of all its field vars.  If these are last uses, remember that,
16669                                 // so we can later update the GC compiler->info.
16670                                 if (curr->OperIsCopyBlkOp())
16671                                     deadFieldVarRegs |= genFindDeadFieldRegs(curr);
16672                             }
16673                         }
16674 #endif // _TARGET_ARM_
16675
16676                         genCodeForTree(curr, 0);
16677                     }
16678                     else
16679                     {
16680                         assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
16681                     }
16682
16683 #if defined(_TARGET_ARM_)
16684                     argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
16685 #endif
16686                 }
16687                 else
16688                 {
16689                     for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
16690                     {
16691                         GenTreePtr op1 = arg->gtOp.gtOp1;
16692
16693                         genEvalSideEffects(op1);
16694                         genUpdateLife(op1);
16695                     }
16696                 }
16697                 break;
16698
16699 #ifdef _TARGET_ARM_
16700
16701             case TYP_STRUCT:
16702             {
16703                 GenTree* arg = curr;
16704                 while (arg->gtOper == GT_COMMA)
16705                 {
16706                     GenTreePtr op1 = arg->gtOp.gtOp1;
16707                     genEvalSideEffects(op1);
16708                     genUpdateLife(op1);
16709                     arg = arg->gtOp.gtOp2;
16710                 }
16711                 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
16712
16713                 CORINFO_CLASS_HANDLE clsHnd;
16714                 unsigned             argAlign;
16715                 unsigned             slots;
16716                 BYTE*                gcLayout = NULL;
16717
16718                 // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
16719                 // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
16720                 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
16721                 // table entry for the promoted struct local.  As we fill slots with the contents of a
16722                 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
16723                 // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
16724                 // variable number of the next field variable to be copied.
16725                 LclVarDsc* promotedStructLocalVarDesc           = NULL;
16726                 GenTreePtr structLocalTree                      = NULL;
16727                 unsigned   bytesOfNextSlotOfCurPromotedStruct   = TARGET_POINTER_SIZE; // Size of slot.
16728                 unsigned   nextPromotedStructFieldVar           = BAD_VAR_NUM;
16729                 unsigned   promotedStructOffsetOfFirstStackSlot = 0;
16730                 unsigned   argOffsetOfFirstStackSlot            = UINT32_MAX; // Indicates uninitialized.
16731
16732                 if (arg->OperGet() == GT_OBJ)
16733                 {
16734                     clsHnd                = arg->gtObj.gtClass;
16735                     unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
16736                     argAlign =
16737                         roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
16738                     argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
16739
16740                     slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
16741
16742                     gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16743
16744                     compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
16745
16746                     // Are we loading a promoted struct local var?
16747                     if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16748                     {
16749                         structLocalTree         = arg->gtObj.gtOp1->gtOp.gtOp1;
16750                         unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
16751                         LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
16752
16753                         // As much as we would like this to be a noway_assert, we can't because
16754                         // there are some weird casts out there, and backwards compatiblity
16755                         // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
16756                         // lvPromoted in general currently do not require the local to be
16757                         // TYP_STRUCT, so this assert is really more about how we wish the world
16758                         // was then some JIT invariant.
16759                         assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
16760
16761                         Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
16762
16763                         if (varDsc->lvPromoted &&
16764                             promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
16765                                                                                    // on stack.
16766                         {
16767                             assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
16768                             promotedStructLocalVarDesc = varDsc;
16769                             nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
16770                         }
16771                     }
16772                 }
16773                 else
16774                 {
16775                     noway_assert(arg->OperGet() == GT_MKREFANY);
16776
16777                     clsHnd   = NULL;
16778                     argAlign = TARGET_POINTER_SIZE;
16779                     argSize  = 2 * TARGET_POINTER_SIZE;
16780                     slots    = 2;
16781                 }
16782
16783                 // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
16784                 noway_assert(regNum == REG_STK);
16785
16786                 // This code passes a TYP_STRUCT by value using the outgoing arg space var
16787                 //
16788                 if (arg->OperGet() == GT_OBJ)
16789                 {
16790                     regNumber regSrc = REG_STK;
16791                     regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
16792                     int       cStackSlots = 0;
16793
16794                     if (promotedStructLocalVarDesc == NULL)
16795                     {
16796                         genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
16797                         noway_assert(arg->gtObj.gtOp1->InReg());
16798                         regSrc = arg->gtObj.gtOp1->gtRegNum;
16799                     }
16800
16801                     // The number of bytes to add "argOffset" to get the arg offset of the current slot.
16802                     int extraArgOffset = 0;
16803
16804                     for (unsigned i = 0; i < slots; i++)
16805                     {
16806                         emitAttr fieldSize;
16807                         if (gcLayout[i] == TYPE_GC_NONE)
16808                             fieldSize = EA_PTRSIZE;
16809                         else if (gcLayout[i] == TYPE_GC_REF)
16810                             fieldSize = EA_GCREF;
16811                         else
16812                         {
16813                             noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16814                             fieldSize = EA_BYREF;
16815                         }
16816
16817                         // Pass the argument using the lvaOutgoingArgSpaceVar
16818
16819                         if (promotedStructLocalVarDesc != NULL)
16820                         {
16821                             if (argOffsetOfFirstStackSlot == UINT32_MAX)
16822                                 argOffsetOfFirstStackSlot = argOffset;
16823
16824                             regNumber maxRegArg       = regNumber(MAX_REG_ARG);
16825                             bool      filledExtraSlot = genFillSlotFromPromotedStruct(
16826                                 arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
16827                                 &bytesOfNextSlotOfCurPromotedStruct,
16828                                 /*pCurRegNum*/ &maxRegArg,
16829                                 /*argOffset*/ argOffset + extraArgOffset,
16830                                 /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
16831                                 argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
16832                             extraArgOffset += TARGET_POINTER_SIZE;
16833                             // If we filled an extra slot with an 8-byte value, skip a slot.
16834                             if (filledExtraSlot)
16835                             {
16836                                 i++;
16837                                 cStackSlots++;
16838                                 extraArgOffset += TARGET_POINTER_SIZE;
16839                             }
16840                         }
16841                         else
16842                         {
16843                             if (regTmp == REG_STK)
16844                             {
16845                                 regTmp = regSet.rsPickFreeReg();
16846                             }
16847
16848                             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
16849                                                        i * TARGET_POINTER_SIZE);
16850
16851                             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
16852                                                       compiler->lvaOutgoingArgSpaceVar,
16853                                                       argOffset + cStackSlots * TARGET_POINTER_SIZE);
16854                             regTracker.rsTrackRegTrash(regTmp);
16855                         }
16856                         cStackSlots++;
16857                     }
16858
16859                     if (promotedStructLocalVarDesc == NULL)
16860                     {
16861                         regSet.rsMarkRegFree(genRegMask(regSrc));
16862                     }
16863                     if (structLocalTree != NULL)
16864                         genUpdateLife(structLocalTree);
16865                 }
16866                 else
16867                 {
16868                     assert(arg->OperGet() == GT_MKREFANY);
16869                     PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
16870                     argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
16871                 }
16872             }
16873             break;
16874 #endif // _TARGET_ARM_
16875
16876             default:
16877                 assert(!"unhandled/unexpected arg type");
16878                 NO_WAY("unhandled/unexpected arg type");
16879         }
16880
16881         /* Update the current set of live variables */
16882
16883         genUpdateLife(curr);
16884
16885         // Now, if some copied field locals were enregistered, and they're now dead, update the set of
16886         // register holding gc pointers.
16887         if (deadFieldVarRegs != 0)
16888             gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
16889
16890         /* Update the current argument stack offset */
16891
16892         argOffset += argSize;
16893
16894         /* Continue with the next argument, if any more are present */
16895     } // while (args)
16896
16897     if (lateArgs)
16898     {
16899         SetupLateArgs(call);
16900     }
16901
16902     /* Return the total size pushed */
16903
16904     return 0;
16905 }
16906
16907 #ifdef _TARGET_ARM_
16908 bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr       arg,
16909                                             fgArgTabEntryPtr curArgTabEntry,
16910                                             LclVarDsc*       promotedStructLocalVarDesc,
16911                                             emitAttr         fieldSize,
16912                                             unsigned*        pNextPromotedStructFieldVar,
16913                                             unsigned*        pBytesOfNextSlotOfCurPromotedStruct,
16914                                             regNumber*       pCurRegNum,
16915                                             int              argOffset,
16916                                             int              fieldOffsetOfFirstStackSlot,
16917                                             int              argOffsetOfFirstStackSlot,
16918                                             regMaskTP*       deadFieldVarRegs,
16919                                             regNumber*       pRegTmp)
16920 {
16921     unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
16922     unsigned limitPromotedStructFieldVar =
16923         promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
16924     unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
16925
16926     regNumber curRegNum       = *pCurRegNum;
16927     regNumber regTmp          = *pRegTmp;
16928     bool      filledExtraSlot = false;
16929
16930     if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
16931     {
16932         // We've already finished; just return.
16933         // We can reach this because the calling loop computes a # of slots based on the size of the struct.
16934         // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
16935         // the fourth slot, even though we've copied all the fields.
16936         return false;
16937     }
16938
16939     LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
16940
16941     // Does this field fill an entire slot, and does it go at the start of the slot?
16942     // If so, things are easier...
16943
16944     bool oneFieldFillsSlotFromStart =
16945         (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
16946         && ((fieldVarDsc->lvFldOffset % 4) == 0)                        // at the start of the slot, and...
16947         && (nextPromotedStructFieldVar + 1 ==
16948                 limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
16949             || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
16950
16951     // Compute the proper size.
16952     if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
16953     {
16954         switch (fieldVarDsc->lvExactSize)
16955         {
16956             case 1:
16957                 fieldSize = EA_1BYTE;
16958                 break;
16959             case 2:
16960                 fieldSize = EA_2BYTE;
16961                 break;
16962             case 8:
16963                 // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
16964                 // in which case we should not have promoted the struct variable.
16965                 noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
16966
16967                 // If the current reg number is not aligned, align it, and return to the calling loop, which will
16968                 // consider that a filled slot and move on to the next argument register.
16969                 if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
16970                 {
16971                     // We must update the slot target, however!
16972                     bytesOfNextSlotOfCurPromotedStruct += 4;
16973                     *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
16974                     return false;
16975                 }
16976                 // Dest is an aligned pair of arg regs, if the struct type demands it.
16977                 noway_assert((curRegNum % 2) == 0);
16978                 // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
16979                 break;
16980             default:
16981                 assert(fieldVarDsc->lvExactSize == 4);
16982                 break;
16983         }
16984     }
16985     else
16986     {
16987         // If the gc layout said it's a GC ref or byref, then the field size must be 4.
16988         noway_assert(fieldVarDsc->lvExactSize == 4);
16989     }
16990
16991     // We may need the type of the field to influence instruction selection.
16992     // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
16993     // If the fieldVarDsc is enregistered float we must use the field's exact type
16994     // however if it is in memory we can use an integer type TYP_I_IMPL
16995     //
16996     var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
16997     if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
16998     {
16999         fieldTypeForInstr = TYP_I_IMPL;
17000     }
17001
17002     // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
17003     if (curArgTabEntry->isHfaRegArg)
17004     {
17005         assert(oneFieldFillsSlotFromStart);
17006
17007         // Is the field variable promoted?
17008         if (fieldVarDsc->lvRegister)
17009         {
17010             // Move the field var living in register to dst, if they are different registers.
17011             regNumber srcReg = fieldVarDsc->lvRegNum;
17012             regNumber dstReg = curRegNum;
17013             if (srcReg != dstReg)
17014             {
17015                 inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
17016                 assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
17017             }
17018         }
17019         else
17020         {
17021             // Move the field var living in stack to dst.
17022             getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
17023                                       fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
17024                                       nextPromotedStructFieldVar, 0);
17025             assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
17026         }
17027
17028         // Mark the arg as used and using reg val.
17029         genMarkTreeInReg(arg, curRegNum);
17030         regSet.SetUsedRegFloat(arg, true);
17031
17032         // Advance for double.
17033         if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
17034         {
17035             bytesOfNextSlotOfCurPromotedStruct += 4;
17036             curRegNum     = REG_NEXT(curRegNum);
17037             arg->gtRegNum = curRegNum;
17038             regSet.SetUsedRegFloat(arg, true);
17039             filledExtraSlot = true;
17040         }
17041         arg->gtRegNum = curArgTabEntry->regNum;
17042
17043         // Advance.
17044         bytesOfNextSlotOfCurPromotedStruct += 4;
17045         nextPromotedStructFieldVar++;
17046     }
17047     else
17048     {
17049         if (oneFieldFillsSlotFromStart)
17050         {
17051             // If we write to the stack, offset in outgoing args at which we'll write.
17052             int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17053             assert(fieldArgOffset >= 0);
17054
17055             // Is the source a register or memory?
17056             if (fieldVarDsc->lvRegister)
17057             {
17058                 if (fieldTypeForInstr == TYP_DOUBLE)
17059                 {
17060                     fieldSize = EA_8BYTE;
17061                 }
17062
17063                 // Are we writing to a register or to the stack?
17064                 if (curRegNum != MAX_REG_ARG)
17065                 {
17066                     // Source is register and Dest is register.
17067
17068                     instruction insCopy = INS_mov;
17069
17070                     if (varTypeIsFloating(fieldTypeForInstr))
17071                     {
17072                         if (fieldTypeForInstr == TYP_FLOAT)
17073                         {
17074                             insCopy = INS_vmov_f2i;
17075                         }
17076                         else
17077                         {
17078                             assert(fieldTypeForInstr == TYP_DOUBLE);
17079                             insCopy = INS_vmov_d2i;
17080                         }
17081                     }
17082
17083                     // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers.  Record the second
17084                     // register (which may become a tmp register, if its held in the argument register that the first
17085                     // register to be copied will overwrite).
17086                     regNumber otherRegNum = REG_STK;
17087                     if (fieldVarDsc->lvType == TYP_LONG)
17088                     {
17089                         otherRegNum = fieldVarDsc->lvOtherReg;
17090                         // Are we about to overwrite?
17091                         if (otherRegNum == curRegNum)
17092                         {
17093                             if (regTmp == REG_STK)
17094                             {
17095                                 regTmp = regSet.rsPickFreeReg();
17096                             }
17097                             // Copy the second register to the temp reg.
17098                             getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
17099                             regTracker.rsTrackRegCopy(regTmp, otherRegNum);
17100                             otherRegNum = regTmp;
17101                         }
17102                     }
17103
17104                     if (fieldVarDsc->lvType == TYP_DOUBLE)
17105                     {
17106                         assert(curRegNum <= REG_R2);
17107                         getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
17108                                                     fieldVarDsc->lvRegNum);
17109                         regTracker.rsTrackRegTrash(curRegNum);
17110                         regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
17111                     }
17112                     else
17113                     {
17114                         // Now do the first register.
17115                         // It might be the case that it's already in the desired register; if so do nothing.
17116                         if (curRegNum != fieldVarDsc->lvRegNum)
17117                         {
17118                             getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
17119                             regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
17120                         }
17121                     }
17122
17123                     // In either case, mark the arg register as used.
17124                     regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17125
17126                     // Is there a second half of the value?
17127                     if (fieldVarDsc->lvExactSize == 8)
17128                     {
17129                         curRegNum = genRegArgNext(curRegNum);
17130                         // The second dest reg must also be an argument register.
17131                         noway_assert(curRegNum < MAX_REG_ARG);
17132
17133                         // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17134                         if (fieldVarDsc->lvType == TYP_LONG)
17135                         {
17136                             // Copy the second register into the next argument register
17137
17138                             // If it's a register variable for a TYP_LONG value, then otherReg now should
17139                             //  hold the second register or it might say that it's in the stack.
17140                             if (otherRegNum == REG_STK)
17141                             {
17142                                 // Apparently when we partially enregister, we allocate stack space for the full
17143                                 // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
17144                                 // parameter, to get the high half.
17145                                 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17146                                                           nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17147                                 regTracker.rsTrackRegTrash(curRegNum);
17148                             }
17149                             else
17150                             {
17151                                 // The other half is in a register.
17152                                 // Again, it might be the case that it's already in the desired register; if so do
17153                                 // nothing.
17154                                 if (curRegNum != otherRegNum)
17155                                 {
17156                                     getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
17157                                     regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
17158                                 }
17159                             }
17160                         }
17161
17162                         // Also mark the 2nd arg register as used.
17163                         regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
17164                         // Record the fact that we filled in an extra register slot
17165                         filledExtraSlot = true;
17166                     }
17167                 }
17168                 else
17169                 {
17170                     // Source is register and Dest is memory (OutgoingArgSpace).
17171
17172                     // Now write the srcReg into the right location in the outgoing argument list.
17173                     getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17174                                               compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17175
17176                     if (fieldVarDsc->lvExactSize == 8)
17177                     {
17178                         // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17179                         if (fieldVarDsc->lvType == TYP_LONG)
17180                         {
17181                             if (fieldVarDsc->lvOtherReg == REG_STK)
17182                             {
17183                                 // Source is stack.
17184                                 if (regTmp == REG_STK)
17185                                 {
17186                                     regTmp = regSet.rsPickFreeReg();
17187                                 }
17188                                 // Apparently if we partially enregister, we allocate stack space for the full
17189                                 // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
17190                                 // parameter, to get the high half.
17191                                 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17192                                                           nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17193                                 regTracker.rsTrackRegTrash(regTmp);
17194                                 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17195                                                           compiler->lvaOutgoingArgSpaceVar,
17196                                                           fieldArgOffset + TARGET_POINTER_SIZE);
17197                             }
17198                             else
17199                             {
17200                                 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
17201                                                           compiler->lvaOutgoingArgSpaceVar,
17202                                                           fieldArgOffset + TARGET_POINTER_SIZE);
17203                             }
17204                         }
17205                         // Record the fact that we filled in an extra register slot
17206                         filledExtraSlot = true;
17207                     }
17208                 }
17209                 assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
17210                 // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
17211                 if (arg->gtFlags & GTF_VAR_DEATH)
17212                 {
17213                     *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
17214                     // We don't bother with the second reg of a register pair, since if it has one,
17215                     // it obviously doesn't hold a pointer.
17216                 }
17217             }
17218             else
17219             {
17220                 // Source is in memory.
17221
17222                 if (curRegNum != MAX_REG_ARG)
17223                 {
17224                     // Dest is reg.
17225                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17226                                               nextPromotedStructFieldVar, 0);
17227                     regTracker.rsTrackRegTrash(curRegNum);
17228
17229                     regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17230
17231                     if (fieldVarDsc->lvExactSize == 8)
17232                     {
17233                         noway_assert(fieldSize == EA_4BYTE);
17234                         curRegNum = genRegArgNext(curRegNum);
17235                         noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
17236                         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
17237                                                   nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17238                         regTracker.rsTrackRegTrash(curRegNum);
17239                         regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17240                         // Record the fact that we filled in an extra stack slot
17241                         filledExtraSlot = true;
17242                     }
17243                 }
17244                 else
17245                 {
17246                     // Dest is stack.
17247                     if (regTmp == REG_STK)
17248                     {
17249                         regTmp = regSet.rsPickFreeReg();
17250                     }
17251                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17252                                               nextPromotedStructFieldVar, 0);
17253
17254                     // Now write regTmp into the right location in the outgoing argument list.
17255                     getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17256                                               compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17257                     // We overwrote "regTmp", so erase any previous value we recorded that it contained.
17258                     regTracker.rsTrackRegTrash(regTmp);
17259
17260                     if (fieldVarDsc->lvExactSize == 8)
17261                     {
17262                         getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17263                                                   nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17264
17265                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17266                                                   compiler->lvaOutgoingArgSpaceVar,
17267                                                   fieldArgOffset + TARGET_POINTER_SIZE);
17268                         // Record the fact that we filled in an extra stack slot
17269                         filledExtraSlot = true;
17270                     }
17271                 }
17272             }
17273
17274             // Bump up the following if we filled in an extra slot
17275             if (filledExtraSlot)
17276                 bytesOfNextSlotOfCurPromotedStruct += 4;
17277
17278             // Go to the next field.
17279             nextPromotedStructFieldVar++;
17280             if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17281             {
17282                 fieldVarDsc = NULL;
17283             }
17284             else
17285             {
17286                 // The next field should have the same parent variable, and we should have put the field vars in order
17287                 // sorted by offset.
17288                 assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17289                        fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17290                        fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17291                 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17292             }
17293             bytesOfNextSlotOfCurPromotedStruct += 4;
17294         }
17295         else // oneFieldFillsSlotFromStart == false
17296         {
17297             // The current slot should contain more than one field.
17298             // We'll construct a word in memory for the slot, then load it into a register.
17299             // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
17300             // slot, in which case we'll just skip this loop altogether.)
17301             while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
17302             {
17303                 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
17304                 // whose fields have their natural alignment, and alignment == size on ARM).
17305                 noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
17306
17307                 // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
17308                 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17309                 noway_assert(argOffset == INT32_MAX ||
17310                              (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
17311
17312                 if (fieldVarDsc->lvRegister)
17313                 {
17314                     if (curRegNum != MAX_REG_ARG)
17315                     {
17316                         noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17317
17318                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17319                                                   compiler->lvaPromotedStructAssemblyScratchVar,
17320                                                   fieldVarDsc->lvFldOffset % 4);
17321                     }
17322                     else
17323                     {
17324                         // Dest is stack; write directly.
17325                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17326                                                   compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17327                     }
17328                 }
17329                 else
17330                 {
17331                     // Source is in memory.
17332
17333                     // Make sure we have a temporary register to use...
17334                     if (regTmp == REG_STK)
17335                     {
17336                         regTmp = regSet.rsPickFreeReg();
17337                     }
17338                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17339                                               nextPromotedStructFieldVar, 0);
17340                     regTracker.rsTrackRegTrash(regTmp);
17341
17342                     if (curRegNum != MAX_REG_ARG)
17343                     {
17344                         noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17345
17346                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17347                                                   compiler->lvaPromotedStructAssemblyScratchVar,
17348                                                   fieldVarDsc->lvFldOffset % 4);
17349                     }
17350                     else
17351                     {
17352                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17353                                                   compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17354                     }
17355                 }
17356                 // Go to the next field.
17357                 nextPromotedStructFieldVar++;
17358                 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17359                 {
17360                     fieldVarDsc = NULL;
17361                 }
17362                 else
17363                 {
17364                     // The next field should have the same parent variable, and we should have put the field vars in
17365                     // order sorted by offset.
17366                     noway_assert(fieldVarDsc->lvIsStructField &&
17367                                  compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17368                                  fieldVarDsc->lvParentLcl ==
17369                                      compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17370                                  fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17371                     fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17372                 }
17373             }
17374             // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
17375             // write to an argument register, do so.
17376             if (curRegNum != MAX_REG_ARG)
17377             {
17378                 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17379
17380                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
17381                                           compiler->lvaPromotedStructAssemblyScratchVar, 0);
17382                 regTracker.rsTrackRegTrash(curRegNum);
17383                 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17384             }
17385             // We've finished a slot; set the goal of the next slot.
17386             bytesOfNextSlotOfCurPromotedStruct += 4;
17387         }
17388     }
17389
17390     // Write back the updates.
17391     *pNextPromotedStructFieldVar         = nextPromotedStructFieldVar;
17392     *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17393     *pCurRegNum                          = curRegNum;
17394     *pRegTmp                             = regTmp;
17395
17396     return filledExtraSlot;
17397 }
17398 #endif // _TARGET_ARM_
17399
17400 regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
17401 {
17402     noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
17403     GenTreePtr rhs = cpBlk->gtOp.gtOp1;
17404     regMaskTP  res = 0;
17405     if (rhs->OperIsIndir())
17406     {
17407         GenTree* addr = rhs->AsIndir()->Addr();
17408         if (addr->gtOper == GT_ADDR)
17409         {
17410             rhs = addr->gtOp.gtOp1;
17411         }
17412     }
17413     if (rhs->OperGet() == GT_LCL_VAR)
17414     {
17415         LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
17416         if (rhsDsc->lvPromoted)
17417         {
17418             // It is promoted; iterate over its field vars.
17419             unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
17420             for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
17421             {
17422                 LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
17423                 // Did the variable go dead, and is it enregistered?
17424                 if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
17425                 {
17426                     // Add the register number to the set of registers holding field vars that are going dead.
17427                     res |= genRegMask(fieldVarDsc->lvRegNum);
17428                 }
17429             }
17430         }
17431     }
17432     return res;
17433 }
17434
17435 void CodeGen::SetupLateArgs(GenTreeCall* call)
17436 {
17437     GenTreeArgList* lateArgs;
17438     GenTreePtr      curr;
17439
17440     /* Generate the code to move the late arguments into registers */
17441
17442     for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
17443     {
17444         curr = lateArgs->Current();
17445         assert(curr);
17446
17447         fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
17448         assert(curArgTabEntry);
17449         regNumber regNum    = curArgTabEntry->regNum;
17450         unsigned  argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
17451
17452         assert(isRegParamType(curr->TypeGet()));
17453         assert(curr->gtType != TYP_VOID);
17454
17455         /* If the register is already marked as used, it will become
17456            multi-used. However, since it is a callee-trashed register,
17457            we will have to spill it before the call anyway. So do it now */
17458
17459         {
17460             // Remember which registers hold pointers. We will spill
17461             // them, but the code that follows will fetch reg vars from
17462             // the registers, so we need that gc compiler->info.
17463             // Also regSet.rsSpillReg doesn't like to spill enregistered
17464             // variables, but if this is their last use that is *exactly*
17465             // what we need to do, so we have to temporarily pretend
17466             // they are no longer live.
17467             // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
17468             // when their last use is about to occur?
17469             // It is because this is the second operand to be evaluated
17470             // of some parent binary op, and the first operand is
17471             // live across this tree, and thought it could re-use the
17472             // variables register (like a GT_REG_VAR). This probably
17473             // is caused by RegAlloc assuming the first operand would
17474             // evaluate into another register.
17475             regMaskTP rsTemp          = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
17476             regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
17477             regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
17478             regSet.RemoveMaskVars(rsTemp);
17479
17480             regNumber regNum2 = regNum;
17481             for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
17482             {
17483                 if (regSet.rsMaskUsed & genRegMask(regNum2))
17484                 {
17485                     assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
17486                     regSet.rsSpillReg(regNum2);
17487                 }
17488                 regNum2 = genRegArgNext(regNum2);
17489                 assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
17490             }
17491
17492             // Restore gc tracking masks.
17493             gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
17494             gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
17495
17496             // Set maskvars back to normal
17497             regSet.AddMaskVars(rsTemp);
17498         }
17499
17500         /* Evaluate the argument to a register */
17501
17502         /* Check if this is the guess area for the resolve interface call
17503          * Pass a size of EA_OFFSET*/
17504         if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
17505         {
17506             getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
17507             regTracker.rsTrackRegTrash(regNum);
17508
17509             /* The value is now in the appropriate register */
17510
17511             genMarkTreeInReg(curr, regNum);
17512
17513             regSet.rsMarkRegUsed(curr);
17514         }
17515 #ifdef _TARGET_ARM_
17516         else if (curr->gtType == TYP_STRUCT)
17517         {
17518             GenTree* arg = curr;
17519             while (arg->gtOper == GT_COMMA)
17520             {
17521                 GenTreePtr op1 = arg->gtOp.gtOp1;
17522                 genEvalSideEffects(op1);
17523                 genUpdateLife(op1);
17524                 arg = arg->gtOp.gtOp2;
17525             }
17526             noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
17527                          (arg->OperGet() == GT_MKREFANY));
17528
17529             // This code passes a TYP_STRUCT by value using
17530             // the argument registers first and
17531             // then the lvaOutgoingArgSpaceVar area.
17532             //
17533
17534             // We prefer to choose low registers here to reduce code bloat
17535             regMaskTP regNeedMask    = RBM_LOW_REGS;
17536             unsigned  firstStackSlot = 0;
17537             unsigned  argAlign       = TARGET_POINTER_SIZE;
17538             size_t    originalSize   = InferStructOpSizeAlign(arg, &argAlign);
17539
17540             unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
17541             assert(slots > 0);
17542
17543             if (regNum == REG_STK)
17544             {
17545                 firstStackSlot = 0;
17546             }
17547             else
17548             {
17549                 if (argAlign == (TARGET_POINTER_SIZE * 2))
17550                 {
17551                     assert((regNum & 1) == 0);
17552                 }
17553
17554                 // firstStackSlot is an index of the first slot of the struct
17555                 // that is on the stack, in the range [0,slots]. If it is 'slots',
17556                 // then the entire struct is in registers. It is also equal to
17557                 // the number of slots of the struct that are passed in registers.
17558
17559                 if (curArgTabEntry->isHfaRegArg)
17560                 {
17561                     // HFA arguments that have been decided to go into registers fit the reg space.
17562                     assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
17563                     assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
17564                            "HFA argument doesn't fit entirely in FP argument registers");
17565                     firstStackSlot = slots;
17566                 }
17567                 else if (regNum + slots > MAX_REG_ARG)
17568                 {
17569                     firstStackSlot = MAX_REG_ARG - regNum;
17570                     assert(firstStackSlot > 0);
17571                 }
17572                 else
17573                 {
17574                     firstStackSlot = slots;
17575                 }
17576
17577                 if (curArgTabEntry->isHfaRegArg)
17578                 {
17579                     // Mask out the registers used by an HFA arg from the ones used to compute tree into.
17580                     for (unsigned i = regNum; i < regNum + slots; i++)
17581                     {
17582                         regNeedMask &= ~genRegMask(regNumber(i));
17583                     }
17584                 }
17585             }
17586
17587             // This holds the set of registers corresponding to enregistered promoted struct field variables
17588             // that go dead after this use of the variable in the argument list.
17589             regMaskTP deadFieldVarRegs = RBM_NONE;
17590
17591             // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
17592             // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
17593             // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
17594             // table entry for the promoted struct local.  As we fill slots with the contents of a
17595             // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
17596             // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
17597             // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
17598             // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
17599             // to be copied.
17600             LclVarDsc* promotedStructLocalVarDesc         = NULL;
17601             unsigned   bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
17602             unsigned   nextPromotedStructFieldVar         = BAD_VAR_NUM;
17603             GenTreePtr structLocalTree                    = NULL;
17604
17605             BYTE*     gcLayout = NULL;
17606             regNumber regSrc   = REG_NA;
17607             if (arg->gtOper == GT_OBJ)
17608             {
17609                 // Are we loading a promoted struct local var?
17610                 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17611                 {
17612                     structLocalTree         = arg->gtObj.gtOp1->gtOp.gtOp1;
17613                     unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17614                     LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
17615
17616                     Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17617
17618                     if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17619                                                                                                      // guaranteed to
17620                                                                                                      // live on stack.
17621                     {
17622                         // Fix 388395 ARM JitStress WP7
17623                         noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17624
17625                         assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17626                         promotedStructLocalVarDesc = varDsc;
17627                         nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17628                     }
17629                 }
17630
17631                 if (promotedStructLocalVarDesc == NULL)
17632                 {
17633                     // If it's not a promoted struct variable, set "regSrc" to the address
17634                     // of the struct local.
17635                     genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
17636                     noway_assert(arg->gtObj.gtOp1->InReg());
17637                     regSrc = arg->gtObj.gtOp1->gtRegNum;
17638                     // Remove this register from the set of registers that we pick from, unless slots equals 1
17639                     if (slots > 1)
17640                         regNeedMask &= ~genRegMask(regSrc);
17641                 }
17642
17643                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17644                 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
17645             }
17646             else if (arg->gtOper == GT_LCL_VAR)
17647             {
17648                 // Move the address of the LCL_VAR in arg into reg
17649
17650                 unsigned varNum = arg->gtLclVarCommon.gtLclNum;
17651
17652                 // Are we loading a promoted struct local var?
17653                 structLocalTree         = arg;
17654                 unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17655                 LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
17656
17657                 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17658
17659                 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17660
17661                 if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17662                                                                                                  // guaranteed to live
17663                                                                                                  // on stack.
17664                 {
17665                     assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17666                     promotedStructLocalVarDesc = varDsc;
17667                     nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17668                 }
17669
17670                 if (promotedStructLocalVarDesc == NULL)
17671                 {
17672                     regSrc = regSet.rsPickFreeReg(regNeedMask);
17673                     // Remove this register from the set of registers that we pick from, unless slots equals 1
17674                     if (slots > 1)
17675                         regNeedMask &= ~genRegMask(regSrc);
17676
17677                     getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
17678                     regTracker.rsTrackRegTrash(regSrc);
17679                     gcLayout = compiler->lvaGetGcLayout(varNum);
17680                 }
17681             }
17682             else if (arg->gtOper == GT_MKREFANY)
17683             {
17684                 assert(slots == 2);
17685                 assert((firstStackSlot == 1) || (firstStackSlot == 2));
17686                 assert(argOffset == 0); // ???
17687                 PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
17688
17689                 // Adjust argOffset if part of this guy was pushed onto the stack
17690                 if (firstStackSlot < slots)
17691                 {
17692                     argOffset += TARGET_POINTER_SIZE;
17693                 }
17694
17695                 // Skip the copy loop below because we have already placed the argument in the right place
17696                 slots    = 0;
17697                 gcLayout = NULL;
17698             }
17699             else
17700             {
17701                 assert(!"Unsupported TYP_STRUCT arg kind");
17702                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17703             }
17704
17705             if (promotedStructLocalVarDesc != NULL)
17706             {
17707                 // We must do do the stack parts first, since those might need values
17708                 // from argument registers that will be overwritten in the portion of the
17709                 // loop that writes into the argument registers.
17710                 bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
17711                 // Now find the var number of the first that starts in the first stack slot.
17712                 unsigned fieldVarLim =
17713                     promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
17714                 while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
17715                            (firstStackSlot * TARGET_POINTER_SIZE) &&
17716                        nextPromotedStructFieldVar < fieldVarLim)
17717                 {
17718                     nextPromotedStructFieldVar++;
17719                 }
17720                 // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
17721                 // first stack slot is after the last slot.
17722                 assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
17723             }
17724
17725             if (slots > 0) // the mkref case may have set "slots" to zero.
17726             {
17727                 // First pass the stack portion of the struct (if any)
17728                 //
17729                 int argOffsetOfFirstStackSlot = argOffset;
17730                 for (unsigned i = firstStackSlot; i < slots; i++)
17731                 {
17732                     emitAttr fieldSize;
17733                     if (gcLayout[i] == TYPE_GC_NONE)
17734                         fieldSize = EA_PTRSIZE;
17735                     else if (gcLayout[i] == TYPE_GC_REF)
17736                         fieldSize = EA_GCREF;
17737                     else
17738                     {
17739                         noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17740                         fieldSize = EA_BYREF;
17741                     }
17742
17743                     regNumber maxRegArg = regNumber(MAX_REG_ARG);
17744                     if (promotedStructLocalVarDesc != NULL)
17745                     {
17746                         regNumber regTmp = REG_STK;
17747
17748                         bool filledExtraSlot =
17749                             genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
17750                                                           &nextPromotedStructFieldVar,
17751                                                           &bytesOfNextSlotOfCurPromotedStruct,
17752                                                           /*pCurRegNum*/ &maxRegArg, argOffset,
17753                                                           /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
17754                                                               TARGET_POINTER_SIZE,
17755                                                           argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
17756                         if (filledExtraSlot)
17757                         {
17758                             i++;
17759                             argOffset += TARGET_POINTER_SIZE;
17760                         }
17761                     }
17762                     else // (promotedStructLocalVarDesc == NULL)
17763                     {
17764                         // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
17765                         // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
17766                         // to regSet.rsPickFreeReg, so we need to be a little more forceful.
17767                         // Otherwise, just re-use the same register.
17768                         //
17769                         regNumber regTmp = regSrc;
17770                         if (slots != 1)
17771                         {
17772                             regMaskTP regSrcUsed;
17773                             regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
17774
17775                             regTmp = regSet.rsPickFreeReg(regNeedMask);
17776
17777                             noway_assert(regTmp != regSrc);
17778
17779                             regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
17780                         }
17781
17782                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
17783                                                    i * TARGET_POINTER_SIZE);
17784
17785                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17786                                                   compiler->lvaOutgoingArgSpaceVar, argOffset);
17787                         regTracker.rsTrackRegTrash(regTmp);
17788                     }
17789                     argOffset += TARGET_POINTER_SIZE;
17790                 }
17791
17792                 // Now pass the register portion of the struct
17793                 //
17794
17795                 bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
17796                 if (promotedStructLocalVarDesc != NULL)
17797                     nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17798
17799                 // Create a nested loop here so that the first time thru the loop
17800                 // we setup all of the regArg registers except for possibly
17801                 // the one that would overwrite regSrc.  Then in the final loop
17802                 // (if necessary) we just setup regArg/regSrc with the overwrite
17803                 //
17804                 bool overwriteRegSrc     = false;
17805                 bool needOverwriteRegSrc = false;
17806                 do
17807                 {
17808                     if (needOverwriteRegSrc)
17809                         overwriteRegSrc = true;
17810
17811                     for (unsigned i = 0; i < firstStackSlot; i++)
17812                     {
17813                         regNumber regArg = (regNumber)(regNum + i);
17814
17815                         if (overwriteRegSrc == false)
17816                         {
17817                             if (regArg == regSrc)
17818                             {
17819                                 needOverwriteRegSrc = true;
17820                                 continue;
17821                             }
17822                         }
17823                         else
17824                         {
17825                             if (regArg != regSrc)
17826                                 continue;
17827                         }
17828
17829                         emitAttr fieldSize;
17830                         if (gcLayout[i] == TYPE_GC_NONE)
17831                             fieldSize = EA_PTRSIZE;
17832                         else if (gcLayout[i] == TYPE_GC_REF)
17833                             fieldSize = EA_GCREF;
17834                         else
17835                         {
17836                             noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17837                             fieldSize = EA_BYREF;
17838                         }
17839
17840                         regNumber regTmp = REG_STK;
17841                         if (promotedStructLocalVarDesc != NULL)
17842                         {
17843                             bool filledExtraSlot =
17844                                 genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
17845                                                               fieldSize, &nextPromotedStructFieldVar,
17846                                                               &bytesOfNextSlotOfCurPromotedStruct,
17847                                                               /*pCurRegNum*/ &regArg,
17848                                                               /*argOffset*/ INT32_MAX,
17849                                                               /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
17850                                                               /*argOffsetOfFirstStackSlot*/ INT32_MAX,
17851                                                               &deadFieldVarRegs, &regTmp);
17852                             if (filledExtraSlot)
17853                                 i++;
17854                         }
17855                         else
17856                         {
17857                             getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
17858                                                        fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
17859                         }
17860                         regTracker.rsTrackRegTrash(regArg);
17861                     }
17862                 } while (needOverwriteRegSrc != overwriteRegSrc);
17863             }
17864
17865             if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
17866             {
17867                 regSet.rsMarkRegFree(genRegMask(regSrc));
17868             }
17869
17870             if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
17871                                                                          // used.
17872             {
17873                 arg->SetInReg();
17874                 for (unsigned i = 1; i < firstStackSlot; i++)
17875                 {
17876                     arg->gtRegNum = (regNumber)(regNum + i);
17877                     curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17878                 }
17879                 arg->gtRegNum = regNum;
17880                 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17881             }
17882
17883             // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
17884             // so update liveness.
17885             genUpdateLife(arg);
17886
17887             // Now, if some copied field locals were enregistered, and they're now dead, update the set of
17888             // register holding gc pointers.
17889             if (deadFieldVarRegs != RBM_NONE)
17890                 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
17891         }
17892         else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
17893         {
17894             if (curArgTabEntry->regNum == REG_STK)
17895             {
17896                 // The arg is passed in the outgoing argument area of the stack frame
17897                 genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
17898                 assert(curr->InReg()); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
17899
17900                 inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
17901                 inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
17902             }
17903             else
17904             {
17905                 assert(regNum < REG_ARG_LAST);
17906                 regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
17907                 genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
17908                 assert(curr->gtRegPair == regPair);
17909                 regSet.rsMarkRegPairUsed(curr);
17910             }
17911         }
17912 #endif // _TARGET_ARM_
17913         else if (curArgTabEntry->regNum == REG_STK)
17914         {
17915             // The arg is passed in the outgoing argument area of the stack frame
17916             //
17917             genCodeForTree(curr, 0);
17918             assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
17919
17920             inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
17921
17922             if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
17923                 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
17924         }
17925         else
17926         {
17927             if (!varTypeIsFloating(curr->gtType))
17928             {
17929                 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
17930                 assert(curr->gtRegNum == regNum);
17931                 regSet.rsMarkRegUsed(curr);
17932             }
17933             else // varTypeIsFloating(curr->gtType)
17934             {
17935                 if (genIsValidFloatReg(regNum))
17936                 {
17937                     genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
17938                                   false);
17939                     assert(curr->gtRegNum == regNum);
17940                     regSet.rsMarkRegUsed(curr);
17941                 }
17942                 else
17943                 {
17944                     genCodeForTree(curr, 0);
17945                     // If we are loading a floating point type into integer registers
17946                     // then it must be for varargs.
17947                     // genCodeForTree will load it into a floating point register,
17948                     // now copy it into the correct integer register(s)
17949                     if (curr->TypeGet() == TYP_FLOAT)
17950                     {
17951                         assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
17952                         regSet.rsSpillRegIfUsed(regNum);
17953 #ifdef _TARGET_ARM_
17954                         getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
17955 #else
17956 #error "Unsupported target"
17957 #endif
17958                         regTracker.rsTrackRegTrash(regNum);
17959
17960                         curr->gtType   = TYP_INT; // Change this to TYP_INT in case we need to spill this register
17961                         curr->gtRegNum = regNum;
17962                         regSet.rsMarkRegUsed(curr);
17963                     }
17964                     else
17965                     {
17966                         assert(curr->TypeGet() == TYP_DOUBLE);
17967                         regNumber intRegNumLo = regNum;
17968                         curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
17969 #ifdef _TARGET_ARM_
17970                         regNumber intRegNumHi = regNumber(intRegNumLo + 1);
17971                         assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
17972                         assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
17973                         regSet.rsSpillRegIfUsed(intRegNumHi);
17974                         regSet.rsSpillRegIfUsed(intRegNumLo);
17975
17976                         getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
17977                         regTracker.rsTrackRegTrash(intRegNumLo);
17978                         regTracker.rsTrackRegTrash(intRegNumHi);
17979                         curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
17980                         regSet.rsMarkRegPairUsed(curr);
17981 #else
17982 #error "Unsupported target"
17983 #endif
17984                     }
17985                 }
17986             }
17987         }
17988     }
17989
17990     /* If any of the previously loaded arguments were spilled - reload them */
17991
17992     for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
17993     {
17994         curr = lateArgs->Current();
17995         assert(curr);
17996
17997         if (curr->gtFlags & GTF_SPILLED)
17998         {
17999             if (isRegPairType(curr->gtType))
18000             {
18001                 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
18002             }
18003             else
18004             {
18005                 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
18006             }
18007         }
18008     }
18009 }
18010
18011 #ifdef _TARGET_ARM_
18012
18013 // 'Push' a single GT_MKREFANY argument onto a call's argument list
18014 // The argument is passed as described by the fgArgTabEntry
18015 // If any part of the struct is to be passed in a register the
18016 // regNum value will be equal to the the registers used to pass the
18017 // the first part of the struct.
18018 // If any part is to go onto the stack, we first generate the
18019 // value into a register specified by 'regNeedMask' and
18020 // then store it to the out going argument area.
18021 // When this method returns, both parts of the TypeReference have
18022 // been pushed onto the stack, but *no* registers have been marked
18023 // as 'in-use', that is the responsibility of the caller.
18024 //
18025 void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
18026 {
18027     regNumber regNum = curArgTabEntry->regNum;
18028     regNumber regNum2;
18029     assert(mkRefAnyTree->gtOper == GT_MKREFANY);
18030     regMaskTP arg1RegMask = 0;
18031     int       argOffset   = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18032
18033     // Construct the TypedReference directly into the argument list of the call by
18034     // 'pushing' the first field of the typed reference: the pointer.
18035     // Do this by directly generating it into the argument register or outgoing arg area of the stack.
18036     // Mark it as used so we don't trash it while generating the second field.
18037     //
18038     if (regNum == REG_STK)
18039     {
18040         genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18041         noway_assert(mkRefAnyTree->gtOp.gtOp1->InReg());
18042         regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
18043         inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
18044         regTracker.rsTrackRegTrash(tmpReg1);
18045         argOffset += TARGET_POINTER_SIZE;
18046         regNum2 = REG_STK;
18047     }
18048     else
18049     {
18050         assert(regNum <= REG_ARG_LAST);
18051         arg1RegMask = genRegMask(regNum);
18052         genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18053         regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
18054     }
18055
18056     // Now 'push' the second field of the typed reference: the method table.
18057     if (regNum2 == REG_STK)
18058     {
18059         genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18060         noway_assert(mkRefAnyTree->gtOp.gtOp2->InReg());
18061         regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
18062         inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
18063         regTracker.rsTrackRegTrash(tmpReg2);
18064     }
18065     else
18066     {
18067         assert(regNum2 <= REG_ARG_LAST);
18068         // We don't have to mark this register as being in use here because it will
18069         // be done by the caller, and we don't want to double-count it.
18070         genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
18071     }
18072
18073     // Now that we are done generating the second part of the TypeReference, we can mark
18074     // the first register as free.
18075     // The caller in the shared path we will re-mark all registers used by this argument
18076     // as being used, so we don't want to double-count this one.
18077     if (arg1RegMask != 0)
18078     {
18079         GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
18080         if (op1->gtFlags & GTF_SPILLED)
18081         {
18082             /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
18083
18084             regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
18085         }
18086         else
18087         {
18088             regSet.rsMarkRegFree(arg1RegMask);
18089         }
18090     }
18091 }
18092 #endif // _TARGET_ARM_
18093
18094 #endif // FEATURE_FIXED_OUT_ARGS
18095
18096 regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreeCall* call)
18097 {
18098     assert((gtCallTypes)call->gtCallType == CT_INDIRECT);
18099
18100     regMaskTP fptrRegs;
18101
18102     /* Loading the indirect call target might cause one or more of the previously
18103        loaded argument registers to be spilled. So, we save information about all
18104        the argument registers, and unspill any of them that get spilled, after
18105        the call target is loaded.
18106     */
18107     struct
18108     {
18109         GenTreePtr node;
18110         union {
18111             regNumber regNum;
18112             regPairNo regPair;
18113         };
18114     } regArgTab[MAX_REG_ARG];
18115
18116     /* Record the previously loaded arguments, if any */
18117
18118     unsigned  regIndex;
18119     regMaskTP prefRegs = regSet.rsRegMaskFree();
18120     regMaskTP argRegs  = RBM_NONE;
18121     for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18122     {
18123         regMaskTP  mask;
18124         regNumber  regNum        = genMapRegArgNumToRegNum(regIndex, TYP_INT);
18125         GenTreePtr argTree       = regSet.rsUsedTree[regNum];
18126         regArgTab[regIndex].node = argTree;
18127         if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
18128         {
18129             assert(argTree->InReg());
18130             if (isRegPairType(argTree->gtType))
18131             {
18132                 regPairNo regPair = argTree->gtRegPair;
18133                 assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
18134                 regArgTab[regIndex].regPair = regPair;
18135                 mask                        = genRegPairMask(regPair);
18136             }
18137             else
18138             {
18139                 assert(regNum == argTree->gtRegNum);
18140                 regArgTab[regIndex].regNum = regNum;
18141                 mask                       = genRegMask(regNum);
18142             }
18143             assert(!(prefRegs & mask));
18144             argRegs |= mask;
18145         }
18146     }
18147
18148     /* Record the register(s) used for the indirect call func ptr */
18149     fptrRegs = genMakeRvalueAddressable(call->gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
18150
18151     /* If any of the previously loaded arguments were spilled, reload them */
18152
18153     for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18154     {
18155         GenTreePtr argTree = regArgTab[regIndex].node;
18156         if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
18157         {
18158             assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
18159             if (isRegPairType(argTree->gtType))
18160             {
18161                 regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
18162             }
18163             else
18164             {
18165                 regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
18166             }
18167         }
18168     }
18169
18170     /* Make sure the target is still addressable while avoiding the argument registers */
18171
18172     fptrRegs = genKeepAddressable(call->gtCallAddr, fptrRegs, argRegs);
18173
18174     return fptrRegs;
18175 }
18176
18177 /*****************************************************************************
18178  *
18179  *  Generate code for a call. If the call returns a value in register(s), the
18180  *  register mask that describes where the result will be found is returned;
18181  *  otherwise, RBM_NONE is returned.
18182  */
18183
18184 #ifdef _PREFAST_
18185 #pragma warning(push)
18186 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
18187 #endif
18188 regMaskTP CodeGen::genCodeForCall(GenTreeCall* call, bool valUsed)
18189 {
18190     emitAttr              retSize;
18191     size_t                argSize;
18192     size_t                args;
18193     regMaskTP             retVal;
18194     emitter::EmitCallType emitCallType;
18195
18196     unsigned saveStackLvl;
18197
18198     BasicBlock* returnLabel   = DUMMY_INIT(NULL);
18199     LclVarDsc*  frameListRoot = NULL;
18200
18201     unsigned savCurIntArgReg;
18202     unsigned savCurFloatArgReg;
18203
18204     unsigned areg;
18205
18206     regMaskTP fptrRegs = RBM_NONE;
18207     regMaskTP vptrMask = RBM_NONE;
18208
18209 #ifdef DEBUG
18210     unsigned stackLvl = getEmitter()->emitCurStackLvl;
18211
18212     if (compiler->verbose)
18213     {
18214         printf("\t\t\t\t\t\t\tBeg call ");
18215         Compiler::printTreeID(call);
18216         printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
18217     }
18218 #endif
18219
18220 #ifdef _TARGET_ARM_
18221     if (compiler->opts.ShouldUsePInvokeHelpers() && (call->gtFlags & GTF_CALL_UNMANAGED) &&
18222         ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_NONVIRT))
18223     {
18224         (void)genPInvokeCallProlog(nullptr, 0, (CORINFO_METHOD_HANDLE) nullptr, nullptr);
18225     }
18226 #endif
18227
18228     gtCallTypes callType = (gtCallTypes)call->gtCallType;
18229     IL_OFFSETX  ilOffset = BAD_IL_OFFSET;
18230
18231     CORINFO_SIG_INFO* sigInfo = nullptr;
18232
18233     if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
18234     {
18235         (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
18236     }
18237
18238     /* Make some sanity checks on the call node */
18239
18240     // "this" only makes sense for user functions
18241     noway_assert(call->gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
18242     // tailcalls won't be done for helpers, caller-pop args, and check that
18243     // the global flag is set
18244     noway_assert(!call->IsTailCall() ||
18245                  (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
18246
18247 #ifdef DEBUG
18248     // Pass the call signature information down into the emitter so the emitter can associate
18249     // native call sites with the signatures they were generated from.
18250     if (callType != CT_HELPER)
18251     {
18252         sigInfo = call->callSig;
18253     }
18254 #endif // DEBUG
18255
18256     unsigned pseudoStackLvl = 0;
18257
18258     if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
18259     {
18260         noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
18261
18262         pseudoStackLvl = genStackLevel;
18263
18264         noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
18265                       "so fgAddCodeRef() should have set isFramePointerRequired()");
18266     }
18267
18268     /* Mark the current stack level and list of pointer arguments */
18269
18270     saveStackLvl = genStackLevel;
18271
18272     /*-------------------------------------------------------------------------
18273      *  Set up the registers and arguments
18274      */
18275
18276     /* We'll keep track of how much we've pushed on the stack */
18277
18278     argSize = 0;
18279
18280     /* We need to get a label for the return address with the proper stack depth. */
18281     /* For the callee pops case (the default) that is before the args are pushed. */
18282
18283     if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
18284     {
18285         returnLabel = genCreateTempLabel();
18286     }
18287
18288     /*
18289         Make sure to save the current argument register status
18290         in case we have nested calls.
18291      */
18292
18293     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
18294     savCurIntArgReg              = intRegState.rsCurRegArgNum;
18295     savCurFloatArgReg            = floatRegState.rsCurRegArgNum;
18296     intRegState.rsCurRegArgNum   = 0;
18297     floatRegState.rsCurRegArgNum = 0;
18298
18299     /* Pass the arguments */
18300
18301     if ((call->gtCallObjp != NULL) || (call->gtCallArgs != NULL))
18302     {
18303         argSize += genPushArgList(call);
18304     }
18305
18306     /* We need to get a label for the return address with the proper stack depth. */
18307     /* For the caller pops case (cdecl) that is after the args are pushed. */
18308
18309     if (call->gtFlags & GTF_CALL_UNMANAGED)
18310     {
18311         if (call->gtFlags & GTF_CALL_POP_ARGS)
18312             returnLabel = genCreateTempLabel();
18313
18314         /* Make sure that we now have a label */
18315         noway_assert(returnLabel != DUMMY_INIT(NULL));
18316     }
18317
18318     if (callType == CT_INDIRECT)
18319     {
18320         fptrRegs = genLoadIndirectCallTarget(call);
18321     }
18322
18323     /* Make sure any callee-trashed registers are saved */
18324
18325     regMaskTP calleeTrashedRegs = RBM_NONE;
18326
18327 #if GTF_CALL_REG_SAVE
18328     if (call->gtFlags & GTF_CALL_REG_SAVE)
18329     {
18330         /* The return value reg(s) will definitely be trashed */
18331
18332         switch (call->gtType)
18333         {
18334             case TYP_INT:
18335             case TYP_REF:
18336             case TYP_BYREF:
18337 #if !CPU_HAS_FP_SUPPORT
18338             case TYP_FLOAT:
18339 #endif
18340                 calleeTrashedRegs = RBM_INTRET;
18341                 break;
18342
18343             case TYP_LONG:
18344 #if !CPU_HAS_FP_SUPPORT
18345             case TYP_DOUBLE:
18346 #endif
18347                 calleeTrashedRegs = RBM_LNGRET;
18348                 break;
18349
18350             case TYP_VOID:
18351 #if CPU_HAS_FP_SUPPORT
18352             case TYP_FLOAT:
18353             case TYP_DOUBLE:
18354 #endif
18355                 calleeTrashedRegs = 0;
18356                 break;
18357
18358             default:
18359                 noway_assert(!"unhandled/unexpected type");
18360         }
18361     }
18362     else
18363 #endif
18364     {
18365         calleeTrashedRegs = RBM_CALLEE_TRASH;
18366     }
18367
18368     /* Spill any callee-saved registers which are being used */
18369
18370     regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
18371
18372     /* We need to save all GC registers to the InlinedCallFrame.
18373        Instead, just spill them to temps. */
18374
18375     if (call->gtFlags & GTF_CALL_UNMANAGED)
18376         spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
18377
18378     // Ignore fptrRegs as it is needed only to perform the indirect call
18379
18380     spillRegs &= ~fptrRegs;
18381
18382     /* Do not spill the argument registers.
18383        Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
18384
18385     noway_assert((regSet.rsMaskMult & call->gtCallRegUsedMask) == 0);
18386     spillRegs &= ~call->gtCallRegUsedMask;
18387
18388     if (spillRegs)
18389     {
18390         regSet.rsSpillRegs(spillRegs);
18391     }
18392
18393 #if FEATURE_STACK_FP_X87
18394     // Spill fp stack
18395     SpillForCallStackFP();
18396
18397     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
18398     {
18399         // Pick up a reg
18400         regNumber regReturn = regSet.PickRegFloat();
18401
18402         // Assign reg to tree
18403         genMarkTreeInReg(call, regReturn);
18404
18405         // Mark as used
18406         regSet.SetUsedRegFloat(call, true);
18407
18408         // Update fp state
18409         compCurFPState.Push(regReturn);
18410     }
18411 #else
18412     SpillForCallRegisterFP(call->gtCallRegUsedMask);
18413 #endif
18414
18415     /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
18416
18417     retSize = EA_PTRSIZE;
18418
18419     if (valUsed)
18420     {
18421         if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
18422         {
18423             retSize = EA_GCREF;
18424         }
18425         else if (call->gtType == TYP_BYREF)
18426         {
18427             retSize = EA_BYREF;
18428         }
18429     }
18430
18431     /*-------------------------------------------------------------------------
18432      * For caller-pop calls, the GC info will report the arguments as pending
18433        arguments as the caller explicitly pops them. Also should be
18434        reported as non-GC arguments as they effectively go dead at the
18435        call site (callee owns them)
18436      */
18437
18438     args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
18439
18440 #ifdef PROFILING_SUPPORTED
18441
18442     /*-------------------------------------------------------------------------
18443      *  Generate the profiling hooks for the call
18444      */
18445
18446     /* Treat special cases first */
18447
18448     /* fire the event at the call site */
18449     /* alas, right now I can only handle calls via a method handle */
18450     if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->IsTailCall())
18451     {
18452         unsigned saveStackLvl2 = genStackLevel;
18453
18454         //
18455         // Push the profilerHandle
18456         //
18457         CLANG_FORMAT_COMMENT_ANCHOR;
18458
18459 #ifdef _TARGET_X86_
18460         regMaskTP byrefPushedRegs;
18461         regMaskTP norefPushedRegs;
18462         regMaskTP pushedArgRegs = genPushRegs(call->gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
18463
18464         if (compiler->compProfilerMethHndIndirected)
18465         {
18466             getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
18467                                        (ssize_t)compiler->compProfilerMethHnd);
18468         }
18469         else
18470         {
18471             inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
18472         }
18473         genSinglePush();
18474
18475         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18476                           sizeof(int) * 1, // argSize
18477                           EA_UNKNOWN);     // retSize
18478
18479         //
18480         // Adjust the number of stack slots used by this managed method if necessary.
18481         //
18482         if (compiler->fgPtrArgCntMax < 1)
18483         {
18484             JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
18485             compiler->fgPtrArgCntMax = 1;
18486         }
18487
18488         genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
18489 #elif _TARGET_ARM_
18490         // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
18491         // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
18492         // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
18493         //
18494         // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
18495         // to disturb them and hence argument registers are locked here.
18496         regMaskTP usedMask = RBM_NONE;
18497         regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
18498
18499         regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
18500         regSet.rsLockReg(genRegMask(scratchReg));
18501
18502         emitAttr attr = EA_UNKNOWN;
18503         if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
18504         {
18505             attr = EA_GCREF;
18506             gcInfo.gcMarkRegSetGCref(scratchReg);
18507         }
18508         else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
18509         {
18510             attr = EA_BYREF;
18511             gcInfo.gcMarkRegSetByref(scratchReg);
18512         }
18513         else
18514         {
18515             attr = EA_4BYTE;
18516         }
18517
18518         getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
18519         regTracker.rsTrackRegTrash(scratchReg);
18520
18521         if (compiler->compProfilerMethHndIndirected)
18522         {
18523             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18524             regTracker.rsTrackRegTrash(REG_R0);
18525         }
18526         else
18527         {
18528             instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18529         }
18530
18531         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18532                           0,           // argSize
18533                           EA_UNKNOWN); // retSize
18534
18535         // Restore back to the state that existed before profiler callback
18536         gcInfo.gcMarkRegSetNpt(scratchReg);
18537         getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
18538         regTracker.rsTrackRegTrash(REG_R0);
18539         regSet.rsUnlockReg(genRegMask(scratchReg));
18540         regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
18541 #else
18542         NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
18543 #endif //_TARGET_X86_
18544
18545         /* Restore the stack level */
18546         SetStackLevel(saveStackLvl2);
18547     }
18548
18549 #endif // PROFILING_SUPPORTED
18550
18551 #ifdef DEBUG
18552     /*-------------------------------------------------------------------------
18553      *  Generate an ESP check for the call
18554      */
18555
18556     if (compiler->opts.compStackCheckOnCall
18557 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
18558         // check the stacks as frequently as possible
18559         && !call->IsHelperCall()
18560 #else
18561         && call->gtCallType == CT_USER_FUNC
18562 #endif
18563             )
18564     {
18565         noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
18566                      compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
18567                      compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
18568         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
18569     }
18570 #endif
18571
18572     /*-------------------------------------------------------------------------
18573      *  Generate the call
18574      */
18575
18576     bool            fPossibleSyncHelperCall = false;
18577     CorInfoHelpFunc helperNum               = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
18578
18579     bool fTailCallTargetIsVSD = false;
18580
18581     bool fTailCall = (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
18582
18583     /* Check for Delegate.Invoke. If so, we inline it. We get the
18584        target-object and target-function from the delegate-object, and do
18585        an indirect call.
18586      */
18587
18588     if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
18589     {
18590         noway_assert(call->gtCallType == CT_USER_FUNC);
18591
18592         assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
18593                 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
18594                (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
18595
18596         /* Find the offsets of the 'this' pointer and new target */
18597
18598         CORINFO_EE_INFO* pInfo;
18599         unsigned         instOffs;     // offset of new 'this' pointer
18600         unsigned         firstTgtOffs; // offset of first target to invoke
18601         const regNumber  regThis = genGetThisArgReg(call);
18602
18603         pInfo        = compiler->eeGetEEInfo();
18604         instOffs     = pInfo->offsetOfDelegateInstance;
18605         firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
18606
18607 #ifdef _TARGET_ARM_
18608         if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
18609         {
18610             getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, compiler->virtualStubParamInfo->GetReg(), regThis,
18611                                         pInfo->offsetOfSecureDelegateIndirectCell);
18612             regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18613         }
18614 #endif // _TARGET_ARM_
18615
18616         // Grab an available register to use for the CALL indirection
18617         regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
18618
18619         //  Save the invoke-target-function in indCallReg
18620         //  'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
18621         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
18622         regTracker.rsTrackRegTrash(indCallReg);
18623
18624         /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
18625
18626         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
18627         regTracker.rsTrackRegTrash(regThis);
18628         noway_assert(instOffs < 127);
18629
18630         /* Call through indCallReg */
18631
18632         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18633                                    NULL,                                // methHnd
18634                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18635                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18636                                    gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
18637     }
18638     else
18639
18640         /*-------------------------------------------------------------------------
18641          *  Virtual and interface calls
18642          */
18643
18644         switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
18645         {
18646             case GTF_CALL_VIRT_STUB:
18647             {
18648                 regSet.rsSetRegsModified(compiler->virtualStubParamInfo->GetRegMask());
18649
18650                 // An x86 JIT which uses full stub dispatch must generate only
18651                 // the following stub dispatch calls:
18652                 //
18653                 // (1) isCallRelativeIndirect:
18654                 //        call dword ptr [rel32]  ;  FF 15 ---rel32----
18655                 // (2) isCallRelative:
18656                 //        call abc                ;     E8 ---rel32----
18657                 // (3) isCallRegisterIndirect:
18658                 //     3-byte nop                 ;
18659                 //     call dword ptr [eax]       ;     FF 10
18660                 //
18661                 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18662                 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18663
18664                 //
18665                 // Please do not insert any Random NOPs while constructing this VSD call
18666                 //
18667                 getEmitter()->emitDisableRandomNops();
18668
18669                 if (!fTailCall)
18670                 {
18671                     // This is code to set up an indirect call to a stub address computed
18672                     // via dictionary lookup.  However the dispatch stub receivers aren't set up
18673                     // to accept such calls at the moment.
18674                     if (callType == CT_INDIRECT)
18675                     {
18676                         regNumber indReg;
18677
18678                         // -------------------------------------------------------------------------
18679                         // The importer decided we needed a stub call via a computed
18680                         // stub dispatch address, i.e. an address which came from a dictionary lookup.
18681                         //   - The dictionary lookup produces an indirected address, suitable for call
18682                         //     via "call [virtualStubParamInfo.reg]"
18683                         //
18684                         // This combination will only be generated for shared generic code and when
18685                         // stub dispatch is active.
18686
18687                         // No need to null check the this pointer - the dispatch code will deal with this.
18688
18689                         noway_assert(genStillAddressable(call->gtCallAddr));
18690
18691                         // Now put the address in virtualStubParamInfo.reg.
18692                         // This is typically a nop when the register used for
18693                         // the gtCallAddr is virtualStubParamInfo.reg
18694                         //
18695                         inst_RV_TT(INS_mov, compiler->virtualStubParamInfo->GetReg(), call->gtCallAddr);
18696                         regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18697
18698 #if defined(_TARGET_X86_)
18699                         // Emit enough bytes of nops so that this sequence can be distinguished
18700                         // from other virtual stub dispatch calls.
18701                         //
18702                         // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18703                         //        vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18704                         //
18705                         getEmitter()->emitIns_Nop(3);
18706
18707                         // Make the virtual stub call:
18708                         //     call   [virtualStubParamInfo.reg]
18709                         //
18710                         emitCallType = emitter::EC_INDIR_ARD;
18711
18712                         indReg = compiler->virtualStubParamInfo->GetReg();
18713                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18714
18715 #elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
18716
18717                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18718
18719                         // Make the virtual stub call:
18720                         //     ldr   indReg, [virtualStubParamInfo.reg]
18721                         //     call  indReg
18722                         //
18723                         emitCallType = emitter::EC_INDIR_R;
18724
18725                         // Now dereference [virtualStubParamInfo.reg] and put it in a new temp register 'indReg'
18726                         //
18727                         indReg = regSet.rsGrabReg(RBM_ALLINT & ~compiler->virtualStubParamInfo->GetRegMask());
18728                         assert(call->gtCallAddr->InReg());
18729                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg,
18730                                                     compiler->virtualStubParamInfo->GetReg(), 0);
18731                         regTracker.rsTrackRegTrash(indReg);
18732
18733 #else
18734 #error "Unknown target for VSD call"
18735 #endif
18736
18737                         getEmitter()->emitIns_Call(emitCallType,
18738                                                    NULL,                                // methHnd
18739                                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18740                                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18741                                                    gcInfo.gcRegByrefSetCur, ilOffset, indReg);
18742                     }
18743                     else
18744                     {
18745                         // -------------------------------------------------------------------------
18746                         // Check for a direct stub call.
18747                         //
18748
18749                         // Get stub addr. This will return NULL if virtual call stubs are not active
18750                         void* stubAddr = NULL;
18751
18752                         stubAddr = (void*)call->gtStubCallStubAddr;
18753
18754                         noway_assert(stubAddr != NULL);
18755
18756                         // -------------------------------------------------------------------------
18757                         // Direct stub calls, though the stubAddr itself may still need to be
18758                         // accesed via an indirection.
18759                         //
18760
18761                         // No need to null check - the dispatch code will deal with null this.
18762
18763                         emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
18764                         void*                 addr             = stubAddr;
18765                         int                   disp             = 0;
18766                         regNumber             callReg          = REG_NA;
18767
18768                         if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
18769                         {
18770 #if CPU_LOAD_STORE_ARCH
18771                             callReg = regSet.rsGrabReg(compiler->virtualStubParamInfo->GetRegMask());
18772                             noway_assert(callReg == compiler->virtualStubParamInfo->GetReg());
18773
18774                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, compiler->virtualStubParamInfo->GetReg(),
18775                                                    (ssize_t)stubAddr);
18776                             // The stub will write-back to this register, so don't track it
18777                             regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18778                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
18779                                                         compiler->virtualStubParamInfo->GetReg(), 0);
18780                             regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
18781                             callTypeStubAddr = emitter::EC_INDIR_R;
18782                             getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18783                                                        NULL,                                // methHnd
18784                                                        INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18785                                                        args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18786                                                        gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
18787
18788 #else
18789                             // emit an indirect call
18790                             callTypeStubAddr = emitter::EC_INDIR_C;
18791                             addr             = 0;
18792                             disp             = (ssize_t)stubAddr;
18793 #endif
18794                         }
18795 #if CPU_LOAD_STORE_ARCH
18796                         if (callTypeStubAddr != emitter::EC_INDIR_R)
18797 #endif
18798                         {
18799                             getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCallMethHnd,
18800                                                        INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
18801                                                        gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18802                                                        gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
18803                         }
18804                     }
18805                 }
18806                 else // tailCall is true
18807                 {
18808
18809 // Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
18810 // moves the this pointer out of it's usual place and into the argument list.
18811 #ifdef _TARGET_X86_
18812
18813                     // Generate "cmp ECX, [ECX]" to trap null pointers
18814                     const regNumber regThis = genGetThisArgReg(call);
18815                     getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
18816
18817 #endif // _TARGET_X86_
18818
18819                     if (callType == CT_INDIRECT)
18820                     {
18821                         noway_assert(genStillAddressable(call->gtCallAddr));
18822
18823                         // Now put the address in EAX.
18824                         inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
18825                         regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
18826
18827                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18828                     }
18829                     else
18830                     {
18831                         // importer/EE should guarantee the indirection
18832                         noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
18833
18834                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
18835                                                ssize_t(call->gtStubCallStubAddr));
18836                     }
18837
18838                     fTailCallTargetIsVSD = true;
18839                 }
18840
18841                 //
18842                 // OK to start inserting random NOPs again
18843                 //
18844                 getEmitter()->emitEnableRandomNops();
18845             }
18846             break;
18847
18848             case GTF_CALL_VIRT_VTABLE:
18849                 // stub dispatching is off or this is not a virtual call (could be a tailcall)
18850                 {
18851                     regNumber vptrReg;
18852                     unsigned  vtabOffsOfIndirection;
18853                     unsigned  vtabOffsAfterIndirection;
18854
18855                     noway_assert(callType == CT_USER_FUNC);
18856
18857                     vptrReg =
18858                         regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
18859                     vptrMask = genRegMask(vptrReg);
18860
18861                     /* The register no longer holds a live pointer value */
18862                     gcInfo.gcMarkRegSetNpt(vptrMask);
18863
18864                     // MOV vptrReg, [REG_CALL_THIS + offs]
18865                     getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
18866                                                VPTR_OFFS);
18867                     regTracker.rsTrackRegTrash(vptrReg);
18868
18869                     noway_assert(vptrMask & ~call->gtCallRegUsedMask);
18870
18871                     /* Get hold of the vtable offset (note: this might be expensive) */
18872
18873                     compiler->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
18874                                                                       &vtabOffsAfterIndirection);
18875
18876                     /* The register no longer holds a live pointer value */
18877                     gcInfo.gcMarkRegSetNpt(vptrMask);
18878
18879                     /* Get the appropriate vtable chunk */
18880
18881                     if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
18882                     {
18883                         // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
18884                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
18885                                                    vtabOffsOfIndirection);
18886                     }
18887
18888                     /* Call through the appropriate vtable slot */
18889
18890                     if (fTailCall)
18891                     {
18892                         /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
18893
18894                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
18895                                                    vtabOffsAfterIndirection);
18896                     }
18897                     else
18898                     {
18899 #if CPU_LOAD_STORE_ARCH
18900                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
18901                                                    vtabOffsAfterIndirection);
18902
18903                         getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCallMethHnd,
18904                                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18905                                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18906                                                    gcInfo.gcRegByrefSetCur, ilOffset,
18907                                                    vptrReg); // ireg
18908 #else
18909                         getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCallMethHnd,
18910                                                    INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18911                                                    args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18912                                                    gcInfo.gcRegByrefSetCur, ilOffset,
18913                                                    vptrReg,                   // ireg
18914                                                    REG_NA,                    // xreg
18915                                                    0,                         // xmul
18916                                                    vtabOffsAfterIndirection); // disp
18917 #endif // CPU_LOAD_STORE_ARCH
18918                     }
18919                 }
18920                 break;
18921
18922             case GTF_CALL_NONVIRT:
18923             {
18924                 //------------------------ Non-virtual/Indirect calls -------------------------
18925                 // Lots of cases follow
18926                 //    - Direct P/Invoke calls
18927                 //    - Indirect calls to P/Invoke functions via the P/Invoke stub
18928                 //    - Direct Helper calls
18929                 //    - Indirect Helper calls
18930                 //    - Direct calls to known addresses
18931                 //    - Direct calls where address is accessed by one or two indirections
18932                 //    - Indirect calls to computed addresses
18933                 //    - Tailcall versions of all of the above
18934
18935                 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
18936
18937                 //------------------------------------------------------
18938                 // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
18939                 //
18940                 // For (final and private) functions which were called with
18941                 //  invokevirtual, but which we call directly, we need to
18942                 //  dereference the object pointer to make sure it's not NULL.
18943                 //
18944
18945                 if (call->gtFlags & GTF_CALL_NULLCHECK)
18946                 {
18947                     /* Generate "cmp ECX, [ECX]" to trap null pointers */
18948                     const regNumber regThis = genGetThisArgReg(call);
18949 #if CPU_LOAD_STORE_ARCH
18950                     regNumber indReg =
18951                         regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
18952                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
18953                     regTracker.rsTrackRegTrash(indReg);
18954 #else
18955                     getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
18956 #endif
18957                 }
18958
18959                 if (call->gtFlags & GTF_CALL_UNMANAGED)
18960                 {
18961                     //------------------------------------------------------
18962                     // Non-virtual/Indirect calls: PInvoke calls.
18963
18964                     noway_assert(compiler->info.compCallUnmanaged != 0);
18965
18966                     /* args shouldn't be greater than 64K */
18967
18968                     noway_assert((argSize & 0xffff0000) == 0);
18969
18970                     /* Remember the varDsc for the callsite-epilog */
18971
18972                     frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
18973
18974                     // exact codegen is required
18975                     getEmitter()->emitDisableRandomNops();
18976
18977                     int nArgSize = 0;
18978
18979                     regNumber indCallReg = REG_NA;
18980
18981                     if (callType == CT_INDIRECT)
18982                     {
18983                         noway_assert(genStillAddressable(call->gtCallAddr));
18984
18985                         if (call->gtCallAddr->InReg())
18986                             indCallReg = call->gtCallAddr->gtRegNum;
18987
18988                         nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
18989                         methHnd  = 0;
18990                     }
18991                     else
18992                     {
18993                         noway_assert(callType == CT_USER_FUNC);
18994                     }
18995
18996                     regNumber tcbReg = REG_NA;
18997
18998                     if (!compiler->opts.ShouldUsePInvokeHelpers())
18999                     {
19000                         tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
19001                     }
19002
19003                     void* addr = NULL;
19004
19005                     if (callType == CT_INDIRECT)
19006                     {
19007                         /* Double check that the callee didn't use/trash the
19008                            registers holding the call target.
19009                         */
19010                         noway_assert(tcbReg != indCallReg);
19011
19012                         if (indCallReg == REG_NA)
19013                         {
19014                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19015                                                                        // indirection
19016
19017                             /* Please note that this even works with tcbReg == REG_EAX.
19018                             tcbReg contains an interesting value only if frameListRoot is
19019                             an enregistered local that stays alive across the call
19020                             (certainly not EAX). If frameListRoot has been moved into
19021                             EAX, we can trash it since it won't survive across the call
19022                             anyways.
19023                             */
19024
19025                             inst_RV_TT(INS_mov, indCallReg, call->gtCallAddr);
19026                             regTracker.rsTrackRegTrash(indCallReg);
19027                         }
19028
19029                         emitCallType = emitter::EC_INDIR_R;
19030                     }
19031                     else
19032                     {
19033                         noway_assert(callType == CT_USER_FUNC);
19034
19035                         void* pAddr;
19036                         addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
19037                         if (addr != NULL)
19038                         {
19039 #if CPU_LOAD_STORE_ARCH
19040                             // Load the address into a register, indirect it and call  through a register
19041                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19042                                                                        // indirection
19043                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19044                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19045                             regTracker.rsTrackRegTrash(indCallReg);
19046                             // Now make the call "call indCallReg"
19047
19048                             getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19049                                                        methHnd,                       // methHnd
19050                                                        INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
19051                                                        NULL,                          // addr
19052                                                        args,
19053                                                        retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19054                                                        gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
19055
19056                             emitCallType = emitter::EC_INDIR_R;
19057                             break;
19058 #else
19059                             emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19060                             indCallReg   = REG_NA;
19061 #endif
19062                         }
19063                         else
19064                         {
19065                             // Double-indirection. Load the address into a register
19066                             // and call indirectly through a register
19067                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19068                                                                        // indirection
19069
19070 #if CPU_LOAD_STORE_ARCH
19071                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
19072                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19073                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19074                             regTracker.rsTrackRegTrash(indCallReg);
19075
19076                             emitCallType = emitter::EC_INDIR_R;
19077
19078 #else
19079                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
19080                             regTracker.rsTrackRegTrash(indCallReg);
19081                             emitCallType = emitter::EC_INDIR_ARD;
19082
19083 #endif // CPU_LOAD_STORE_ARCH
19084                         }
19085                     }
19086
19087                     getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
19088                                                INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19089                                                gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
19090                                                ilOffset, indCallReg);
19091
19092                     if (callType == CT_INDIRECT)
19093                         genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19094
19095                     getEmitter()->emitEnableRandomNops();
19096
19097                     // Done with PInvoke calls
19098                     break;
19099                 }
19100
19101                 if (callType == CT_INDIRECT)
19102                 {
19103                     noway_assert(genStillAddressable(call->gtCallAddr));
19104
19105                     if (call->gtCallCookie)
19106                     {
19107                         //------------------------------------------------------
19108                         // Non-virtual indirect calls via the P/Invoke stub
19109
19110                         GenTreePtr cookie = call->gtCallCookie;
19111                         GenTreePtr target = call->gtCallAddr;
19112
19113                         noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
19114
19115                         noway_assert(cookie->gtOper == GT_CNS_INT ||
19116                                      cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
19117
19118                         noway_assert(args == argSize);
19119
19120 #if defined(_TARGET_X86_)
19121                         /* load eax with the real target */
19122
19123                         inst_RV_TT(INS_mov, REG_EAX, target);
19124                         regTracker.rsTrackRegTrash(REG_EAX);
19125
19126                         if (cookie->gtOper == GT_CNS_INT)
19127                             inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
19128                         else
19129                             inst_TT(INS_push, cookie);
19130
19131                         /* Keep track of ESP for EBP-less frames */
19132                         genSinglePush();
19133
19134                         argSize += sizeof(void*);
19135
19136 #elif defined(_TARGET_ARM_)
19137
19138                         // Ensure that we spill these registers (if caller saved) in the prolog
19139                         regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
19140
19141                         // ARM: load r12 with the real target
19142                         // X64: load r10 with the real target
19143                         inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
19144                         regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
19145
19146                         // ARM: load r4  with the pinvoke VASigCookie
19147                         // X64: load r11 with the pinvoke VASigCookie
19148                         if (cookie->gtOper == GT_CNS_INT)
19149                             inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
19150                                        EA_HANDLE_CNS_RELOC);
19151                         else
19152                             inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
19153                         regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19154
19155                         noway_assert(args == argSize);
19156
19157                         // Ensure that we don't trash any of these registers if we have to load
19158                         // the helper call target into a register to invoke it.
19159                         regMaskTP regsUsed;
19160                         regSet.rsLockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | RBM_PINVOKE_COOKIE_PARAM,
19161                                          &regsUsed);
19162 #else
19163                         NYI("Non-virtual indirect calls via the P/Invoke stub");
19164 #endif
19165
19166                         args = argSize;
19167                         noway_assert((size_t)(int)args == args);
19168
19169                         genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
19170
19171 #if defined(_TARGET_ARM_)
19172                         regSet.rsUnlockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
19173                                                RBM_PINVOKE_COOKIE_PARAM,
19174                                            regsUsed);
19175 #endif
19176
19177 #ifdef _TARGET_ARM_
19178                         // genEmitHelperCall doesn't record all registers a helper call would trash.
19179                         regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19180 #endif
19181                     }
19182                     else
19183                     {
19184                         //------------------------------------------------------
19185                         // Non-virtual indirect calls
19186
19187                         if (fTailCall)
19188                         {
19189                             inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
19190                             regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19191                         }
19192                         else
19193                             instEmit_indCall(call, args, retSize);
19194                     }
19195
19196                     genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19197
19198                     // Done with indirect calls
19199                     break;
19200                 }
19201
19202                 //------------------------------------------------------
19203                 // Non-virtual direct/indirect calls: Work out if the address of the
19204                 // call is known at JIT time (if not it is either an indirect call
19205                 // or the address must be accessed via an single/double indirection)
19206
19207                 noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
19208
19209                 void*          addr;
19210                 InfoAccessType accessType;
19211
19212                 helperNum = compiler->eeGetHelperNum(methHnd);
19213
19214                 if (callType == CT_HELPER)
19215                 {
19216                     noway_assert(helperNum != CORINFO_HELP_UNDEF);
19217
19218 #ifdef FEATURE_READYTORUN_COMPILER
19219                     if (call->gtEntryPoint.addr != NULL)
19220                     {
19221                         accessType = call->gtEntryPoint.accessType;
19222                         addr       = call->gtEntryPoint.addr;
19223                     }
19224                     else
19225 #endif // FEATURE_READYTORUN_COMPILER
19226                     {
19227                         void* pAddr;
19228
19229                         accessType = IAT_VALUE;
19230                         addr       = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
19231
19232                         if (!addr)
19233                         {
19234                             accessType = IAT_PVALUE;
19235                             addr       = pAddr;
19236                         }
19237                     }
19238                 }
19239                 else
19240                 {
19241                     noway_assert(helperNum == CORINFO_HELP_UNDEF);
19242
19243                     CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
19244
19245                     if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
19246                         aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
19247
19248                     if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
19249                         aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
19250
19251 #ifdef FEATURE_READYTORUN_COMPILER
19252                     if (call->gtEntryPoint.addr != NULL)
19253                     {
19254                         accessType = call->gtEntryPoint.accessType;
19255                         addr       = call->gtEntryPoint.addr;
19256                     }
19257                     else
19258 #endif // FEATURE_READYTORUN_COMPILER
19259                     {
19260                         CORINFO_CONST_LOOKUP addrInfo;
19261                         compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
19262
19263                         accessType = addrInfo.accessType;
19264                         addr       = addrInfo.addr;
19265                     }
19266                 }
19267
19268                 if (fTailCall)
19269                 {
19270                     noway_assert(callType == CT_USER_FUNC);
19271
19272                     switch (accessType)
19273                     {
19274                         case IAT_VALUE:
19275                             //------------------------------------------------------
19276                             // Non-virtual direct calls to known addressess
19277                             //
19278                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19279                             break;
19280
19281                         case IAT_PVALUE:
19282                             //------------------------------------------------------
19283                             // Non-virtual direct calls to addresses accessed by
19284                             // a single indirection.
19285                             //
19286                             // For tailcalls we place the target address in REG_TAILCALL_ADDR
19287                             CLANG_FORMAT_COMMENT_ANCHOR;
19288
19289 #if CPU_LOAD_STORE_ARCH
19290                             {
19291                                 regNumber indReg = REG_TAILCALL_ADDR;
19292                                 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19293                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19294                                 regTracker.rsTrackRegTrash(indReg);
19295                             }
19296 #else
19297                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19298                             regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19299 #endif
19300                             break;
19301
19302                         case IAT_PPVALUE:
19303                             //------------------------------------------------------
19304                             // Non-virtual direct calls to addresses accessed by
19305                             // a double indirection.
19306                             //
19307                             // For tailcalls we place the target address in REG_TAILCALL_ADDR
19308                             CLANG_FORMAT_COMMENT_ANCHOR;
19309
19310 #if CPU_LOAD_STORE_ARCH
19311                             {
19312                                 regNumber indReg = REG_TAILCALL_ADDR;
19313                                 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19314                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19315                                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19316                                 regTracker.rsTrackRegTrash(indReg);
19317                             }
19318 #else
19319                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19320                             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19321                                                        REG_TAILCALL_ADDR, 0);
19322                             regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19323 #endif
19324                             break;
19325
19326                         default:
19327                             noway_assert(!"Bad accessType");
19328                             break;
19329                     }
19330                 }
19331                 else
19332                 {
19333                     switch (accessType)
19334                     {
19335                         regNumber indCallReg;
19336
19337                         case IAT_VALUE:
19338                         {
19339                             //------------------------------------------------------
19340                             // Non-virtual direct calls to known addressess
19341                             //
19342                             // The vast majority of calls end up here....  Wouldn't
19343                             // it be nice if they all did!
19344                             CLANG_FORMAT_COMMENT_ANCHOR;
19345 #ifdef _TARGET_ARM_
19346                             // We may use direct call for some of recursive calls
19347                             // as we can safely estimate the distance from the call site to the top of the method
19348                             const int codeOffset = MAX_PROLOG_SIZE_BYTES +           // prolog size
19349                                                    getEmitter()->emitCurCodeOffset + // offset of the current IG
19350                                                    getEmitter()->emitCurIGsize +     // size of the current IG
19351                                                    4;                                // size of the jump instruction
19352                                                                                      // that we are now emitting
19353                             if (compiler->gtIsRecursiveCall(call) && codeOffset <= -CALL_DIST_MAX_NEG)
19354                             {
19355                                 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19356                                                            INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19357                                                            args, retSize, gcInfo.gcVarPtrSetCur,
19358                                                            gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19359                                                            REG_NA, REG_NA, 0, 0, // ireg, xreg, xmul, disp
19360                                                            false,                // isJump
19361                                                            emitter::emitNoGChelper(helperNum));
19362                             }
19363                             else if (!arm_Valid_Imm_For_BL((ssize_t)addr))
19364                             {
19365                                 // Load the address into a register and call  through a register
19366                                 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
19367                                                                            // CALL indirection
19368                                 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19369
19370                                 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
19371                                                            INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19372                                                            args, retSize, gcInfo.gcVarPtrSetCur,
19373                                                            gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19374                                                            indCallReg,   // ireg
19375                                                            REG_NA, 0, 0, // xreg, xmul, disp
19376                                                            false,        // isJump
19377                                                            emitter::emitNoGChelper(helperNum));
19378                             }
19379                             else
19380 #endif
19381                             {
19382                                 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19383                                                            INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19384                                                            gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19385                                                            gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
19386                                                            0,     /* ireg, xreg, xmul, disp */
19387                                                            false, /* isJump */
19388                                                            emitter::emitNoGChelper(helperNum));
19389                             }
19390                         }
19391                         break;
19392
19393                         case IAT_PVALUE:
19394                             //------------------------------------------------------
19395                             // Non-virtual direct calls to addresses accessed by
19396                             // a single indirection.
19397                             //
19398
19399                             // Load the address into a register, load indirect and call  through a register
19400                             CLANG_FORMAT_COMMENT_ANCHOR;
19401 #if CPU_LOAD_STORE_ARCH
19402                             indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19403                                                                        // indirection
19404
19405                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19406                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19407                             regTracker.rsTrackRegTrash(indCallReg);
19408
19409                             emitCallType = emitter::EC_INDIR_R;
19410                             addr         = NULL;
19411
19412 #else
19413                             emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19414                             indCallReg   = REG_NA;
19415
19416 #endif // CPU_LOAD_STORE_ARCH
19417
19418                             getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
19419                                                        retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19420                                                        gcInfo.gcRegByrefSetCur, ilOffset,
19421                                                        indCallReg,   // ireg
19422                                                        REG_NA, 0, 0, // xreg, xmul, disp
19423                                                        false,        /* isJump */
19424                                                        emitter::emitNoGChelper(helperNum));
19425                             break;
19426
19427                         case IAT_PPVALUE:
19428                         {
19429                             //------------------------------------------------------
19430                             // Non-virtual direct calls to addresses accessed by
19431                             // a double indirection.
19432                             //
19433                             // Double-indirection. Load the address into a register
19434                             // and call indirectly through the register
19435
19436                             noway_assert(helperNum == CORINFO_HELP_UNDEF);
19437
19438                             // Grab an available register to use for the CALL indirection
19439                             indCallReg = regSet.rsGrabReg(RBM_ALLINT);
19440
19441 #if CPU_LOAD_STORE_ARCH
19442                             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19443                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19444                             getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19445                             regTracker.rsTrackRegTrash(indCallReg);
19446
19447                             emitCallType = emitter::EC_INDIR_R;
19448
19449 #else
19450
19451                             getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
19452                             regTracker.rsTrackRegTrash(indCallReg);
19453
19454                             emitCallType = emitter::EC_INDIR_ARD;
19455
19456 #endif // CPU_LOAD_STORE_ARCH
19457
19458                             getEmitter()->emitIns_Call(emitCallType, methHnd,
19459                                                        INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19460                                                        args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19461                                                        gcInfo.gcRegByrefSetCur, ilOffset,
19462                                                        indCallReg,   // ireg
19463                                                        REG_NA, 0, 0, // xreg, xmul, disp
19464                                                        false,        // isJump
19465                                                        emitter::emitNoGChelper(helperNum));
19466                         }
19467                         break;
19468
19469                         default:
19470                             noway_assert(!"Bad accessType");
19471                             break;
19472                     }
19473
19474                     // tracking of region protected by the monitor in synchronized methods
19475                     if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
19476                     {
19477                         fPossibleSyncHelperCall = true;
19478                     }
19479                 }
19480             }
19481             break;
19482
19483             default:
19484                 noway_assert(!"strange call type");
19485                 break;
19486         }
19487
19488     /*-------------------------------------------------------------------------
19489      *  For tailcalls, REG_INTRET contains the address of the target function,
19490      *  enregistered args are in the correct registers, and the stack arguments
19491      *  have been pushed on the stack. Now call the stub-sliding helper
19492      */
19493
19494     if (fTailCall)
19495     {
19496
19497         if (compiler->info.compCallUnmanaged)
19498             genPInvokeMethodEpilog();
19499
19500 #ifdef _TARGET_X86_
19501         noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
19502
19503         // Push the count of the incoming stack arguments
19504
19505         unsigned nOldStkArgs =
19506             (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
19507         getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
19508         genSinglePush(); // Keep track of ESP for EBP-less frames
19509         args += sizeof(void*);
19510
19511         // Push the count of the outgoing stack arguments
19512
19513         getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
19514         genSinglePush(); // Keep track of ESP for EBP-less frames
19515         args += sizeof(void*);
19516
19517         // Push info about the callee-saved registers to be restored
19518         // For now, we always spill all registers if compiler->compTailCallUsed
19519
19520         DWORD calleeSavedRegInfo = 1 |                                 // always restore EDI,ESI,EBX
19521                                    (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
19522         getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
19523         genSinglePush(); // Keep track of ESP for EBP-less frames
19524         args += sizeof(void*);
19525
19526         // Push the address of the target function
19527
19528         getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
19529         genSinglePush(); // Keep track of ESP for EBP-less frames
19530         args += sizeof(void*);
19531
19532 #else // _TARGET_X86_
19533
19534         args    = 0;
19535         retSize = EA_UNKNOWN;
19536
19537 #endif // _TARGET_X86_
19538
19539         if (compiler->getNeedsGSSecurityCookie())
19540         {
19541             genEmitGSCookieCheck(true);
19542         }
19543
19544         // TailCall helper does not poll for GC. An explicit GC poll
19545         // Should have been placed in when we morphed this into a tail call.
19546         noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
19547
19548         // Now call the helper
19549
19550         genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
19551     }
19552
19553     /*-------------------------------------------------------------------------
19554      *  Done with call.
19555      *  Trash registers, pop arguments if needed, etc
19556      */
19557
19558     /* Mark the argument registers as free */
19559
19560     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19561
19562     for (areg = 0; areg < MAX_REG_ARG; areg++)
19563     {
19564         regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
19565
19566         // Is this one of the used argument registers?
19567         if ((curArgMask & call->gtCallRegUsedMask) == 0)
19568             continue;
19569
19570 #ifdef _TARGET_ARM_
19571         if (regSet.rsUsedTree[areg] == NULL)
19572         {
19573             noway_assert(areg % 2 == 1 &&
19574                          (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
19575                           (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
19576             continue;
19577         }
19578 #endif
19579
19580         regSet.rsMarkRegFree(curArgMask);
19581
19582         // We keep regSet.rsMaskVars current during codegen, so we have to remove any
19583         // that have been copied into arg regs.
19584
19585         regSet.RemoveMaskVars(curArgMask);
19586         gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
19587         gcInfo.gcRegByrefSetCur &= ~(curArgMask);
19588     }
19589
19590 #if !FEATURE_STACK_FP_X87
19591     //-------------------------------------------------------------------------
19592     // free up the FP args
19593
19594     for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
19595     {
19596         regNumber argRegNum  = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
19597         regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
19598
19599         // Is this one of the used argument registers?
19600         if ((curArgMask & call->gtCallRegUsedMask) == 0)
19601             continue;
19602
19603         regSet.rsMaskUsed &= ~curArgMask;
19604         regSet.rsUsedTree[argRegNum] = NULL;
19605     }
19606 #endif // !FEATURE_STACK_FP_X87
19607
19608     /* restore the old argument register status */
19609
19610     intRegState.rsCurRegArgNum   = savCurIntArgReg;
19611     floatRegState.rsCurRegArgNum = savCurFloatArgReg;
19612
19613     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19614
19615     /* Mark all trashed registers as such */
19616
19617     if (calleeTrashedRegs)
19618         regTracker.rsTrashRegSet(calleeTrashedRegs);
19619
19620     regTracker.rsTrashRegsForGCInterruptability();
19621
19622 #ifdef DEBUG
19623
19624     if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19625     {
19626         if (compiler->verbose)
19627         {
19628             printf("\t\t\t\t\t\t\tEnd call ");
19629             Compiler::printTreeID(call);
19630             printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
19631         }
19632         noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
19633     }
19634
19635 #endif
19636
19637 #if FEATURE_STACK_FP_X87
19638     /* All float temps must be spilled around function calls */
19639     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19640     {
19641         noway_assert(compCurFPState.m_uStackSize == 1);
19642     }
19643     else
19644     {
19645         noway_assert(compCurFPState.m_uStackSize == 0);
19646     }
19647 #else
19648     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19649     {
19650 #ifdef _TARGET_ARM_
19651         if (call->IsVarargs() || compiler->opts.compUseSoftFP)
19652         {
19653             // Result return for vararg methods is in r0, r1, but our callers would
19654             // expect the return in s0, s1 because of floating type. Do the move now.
19655             if (call->gtType == TYP_FLOAT)
19656             {
19657                 inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
19658             }
19659             else
19660             {
19661                 inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
19662             }
19663         }
19664 #endif
19665         genMarkTreeInReg(call, REG_FLOATRET);
19666     }
19667 #endif
19668
19669     /* The function will pop all arguments before returning */
19670
19671     SetStackLevel(saveStackLvl);
19672
19673     /* No trashed registers may possibly hold a pointer at this point */
19674     CLANG_FORMAT_COMMENT_ANCHOR;
19675
19676 #ifdef DEBUG
19677
19678     regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
19679                         ~regSet.rsMaskVars & ~vptrMask;
19680     if (ptrRegs)
19681     {
19682         // A reg may be dead already.  The assertion is too strong.
19683         LclVarDsc* varDsc;
19684         unsigned   varNum;
19685
19686         // use compiler->compCurLife
19687         for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
19688         {
19689             /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
19690
19691             if (!varDsc->lvTracked)
19692                 continue;
19693             if (!varDsc->lvRegister)
19694                 continue;
19695             if (varDsc->IsFloatRegType())
19696                 continue;
19697
19698             /* Get hold of the index and the bitmask for the variable */
19699
19700             unsigned varIndex = varDsc->lvVarIndex;
19701
19702             /* Is this variable live currently? */
19703
19704             if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
19705             {
19706                 regNumber regNum  = varDsc->lvRegNum;
19707                 regMaskTP regMask = genRegMask(regNum);
19708
19709                 if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
19710                     ptrRegs &= ~regMask;
19711             }
19712         }
19713         if (ptrRegs)
19714         {
19715             printf("Bad call handling for ");
19716             Compiler::printTreeID(call);
19717             printf("\n");
19718             noway_assert(!"A callee trashed reg is holding a GC pointer");
19719         }
19720     }
19721 #endif
19722
19723 #if defined(_TARGET_X86_)
19724     //-------------------------------------------------------------------------
19725     // Create a label for tracking of region protected by the monitor in synchronized methods.
19726     // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
19727     // so the GC state vars have been updated before creating the label.
19728
19729     if (fPossibleSyncHelperCall)
19730     {
19731         switch (helperNum)
19732         {
19733             case CORINFO_HELP_MON_ENTER:
19734             case CORINFO_HELP_MON_ENTER_STATIC:
19735                 noway_assert(compiler->syncStartEmitCookie == NULL);
19736                 compiler->syncStartEmitCookie =
19737                     getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19738                 noway_assert(compiler->syncStartEmitCookie != NULL);
19739                 break;
19740             case CORINFO_HELP_MON_EXIT:
19741             case CORINFO_HELP_MON_EXIT_STATIC:
19742                 noway_assert(compiler->syncEndEmitCookie == NULL);
19743                 compiler->syncEndEmitCookie =
19744                     getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19745                 noway_assert(compiler->syncEndEmitCookie != NULL);
19746                 break;
19747             default:
19748                 break;
19749         }
19750     }
19751 #endif // _TARGET_X86_
19752
19753     if (call->gtFlags & GTF_CALL_UNMANAGED)
19754     {
19755         genDefineTempLabel(returnLabel);
19756
19757 #ifdef _TARGET_X86_
19758         if (getInlinePInvokeCheckEnabled())
19759         {
19760             noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
19761             BasicBlock* esp_check;
19762
19763             CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19764             /* mov   ecx, dword ptr [frame.callSiteTracker] */
19765
19766             getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
19767                                       pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19768             regTracker.rsTrackRegTrash(REG_ARG_0);
19769
19770             /* Generate the conditional jump */
19771
19772             if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19773             {
19774                 if (argSize)
19775                 {
19776                     getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
19777                 }
19778             }
19779             /* cmp   ecx, esp */
19780
19781             getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
19782
19783             esp_check = genCreateTempLabel();
19784
19785             emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
19786             inst_JMP(jmpEqual, esp_check);
19787
19788             getEmitter()->emitIns(INS_BREAKPOINT);
19789
19790             /* genCondJump() closes the current emitter block */
19791
19792             genDefineTempLabel(esp_check);
19793         }
19794 #endif
19795     }
19796
19797     /* Are we supposed to pop the arguments? */
19798     CLANG_FORMAT_COMMENT_ANCHOR;
19799
19800 #if defined(_TARGET_X86_)
19801     if (call->gtFlags & GTF_CALL_UNMANAGED)
19802     {
19803         if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PINVOKE_RESTORE_ESP) ||
19804             compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
19805         {
19806             // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
19807             // take care of the cdecl argument popping here as well but the stack depth tracking logic
19808             // makes this very hard, i.e. it needs to "see" the actual pop.
19809
19810             CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19811
19812             if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
19813             {
19814                 /* mov   esp, dword ptr [frame.callSiteTracker] */
19815                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
19816                                           compiler->lvaInlinedPInvokeFrameVar,
19817                                           pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19818             }
19819             else
19820             {
19821                 /* mov   ecx, dword ptr [frame.callSiteTracker] */
19822                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
19823                                           compiler->lvaInlinedPInvokeFrameVar,
19824                                           pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19825                 regTracker.rsTrackRegTrash(REG_ARG_0);
19826
19827                 /* lea   esp, [ecx + argSize] */
19828                 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
19829             }
19830         }
19831     }
19832 #endif // _TARGET_X86_
19833
19834     if (call->gtFlags & GTF_CALL_POP_ARGS)
19835     {
19836         noway_assert(args == (size_t) - (int)argSize);
19837
19838         if (argSize)
19839         {
19840             genAdjustSP(argSize);
19841         }
19842     }
19843
19844     if (pseudoStackLvl)
19845     {
19846         noway_assert(call->gtType == TYP_VOID);
19847
19848         /* Generate NOP */
19849
19850         instGen(INS_nop);
19851     }
19852
19853     /* What does the function return? */
19854
19855     retVal = RBM_NONE;
19856
19857     switch (call->gtType)
19858     {
19859         case TYP_REF:
19860         case TYP_ARRAY:
19861         case TYP_BYREF:
19862             gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
19863
19864             __fallthrough;
19865
19866         case TYP_INT:
19867 #if !CPU_HAS_FP_SUPPORT
19868         case TYP_FLOAT:
19869 #endif
19870             retVal = RBM_INTRET;
19871             break;
19872
19873 #ifdef _TARGET_ARM_
19874         case TYP_STRUCT:
19875         {
19876             assert(call->gtRetClsHnd != NULL);
19877             assert(compiler->IsHfa(call->gtRetClsHnd));
19878             int retSlots = compiler->GetHfaCount(call->gtRetClsHnd);
19879             assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
19880             assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
19881             retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
19882         }
19883         break;
19884 #endif
19885
19886         case TYP_LONG:
19887 #if !CPU_HAS_FP_SUPPORT
19888         case TYP_DOUBLE:
19889 #endif
19890             retVal = RBM_LNGRET;
19891             break;
19892
19893 #if CPU_HAS_FP_SUPPORT
19894         case TYP_FLOAT:
19895         case TYP_DOUBLE:
19896
19897             break;
19898 #endif
19899
19900         case TYP_VOID:
19901             break;
19902
19903         default:
19904             noway_assert(!"unexpected/unhandled fn return type");
19905     }
19906
19907     // We now have to generate the "call epilog" (if it was a call to unmanaged code).
19908     /* if it is a call to unmanaged code, frameListRoot must be set */
19909
19910     noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
19911
19912     if (frameListRoot)
19913         genPInvokeCallEpilog(frameListRoot, retVal);
19914
19915     if (frameListRoot && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
19916     {
19917         if (frameListRoot->lvRegister)
19918         {
19919             bool isBorn  = false;
19920             bool isDying = true;
19921             genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
19922         }
19923     }
19924
19925 #ifdef DEBUG
19926     if (compiler->opts.compStackCheckOnCall
19927 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
19928         // check the stack as frequently as possible
19929         && !call->IsHelperCall()
19930 #else
19931         && call->gtCallType == CT_USER_FUNC
19932 #endif
19933             )
19934     {
19935         noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
19936                      compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
19937                      compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
19938         if (argSize > 0)
19939         {
19940             getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
19941             getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
19942             getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
19943             regTracker.rsTrackRegTrash(REG_ARG_0);
19944         }
19945         else
19946             getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
19947
19948         BasicBlock*  esp_check = genCreateTempLabel();
19949         emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
19950         inst_JMP(jmpEqual, esp_check);
19951         getEmitter()->emitIns(INS_BREAKPOINT);
19952         genDefineTempLabel(esp_check);
19953     }
19954 #endif // DEBUG
19955
19956 #if FEATURE_STACK_FP_X87
19957     UnspillRegVarsStackFp();
19958 #endif // FEATURE_STACK_FP_X87
19959
19960     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19961     {
19962         // Restore return node if necessary
19963         if (call->gtFlags & GTF_SPILLED)
19964         {
19965             UnspillFloat(call);
19966         }
19967
19968 #if FEATURE_STACK_FP_X87
19969         // Mark as free
19970         regSet.SetUsedRegFloat(call, false);
19971 #endif
19972     }
19973
19974 #if FEATURE_STACK_FP_X87
19975 #ifdef DEBUG
19976     if (compiler->verbose)
19977     {
19978         JitDumpFPState();
19979     }
19980 #endif
19981 #endif
19982
19983     return retVal;
19984 }
19985 #ifdef _PREFAST_
19986 #pragma warning(pop)
19987 #endif
19988
19989 /*****************************************************************************
19990  *
19991  *  Create and record GC Info for the function.
19992  */
19993 #ifdef JIT32_GCENCODER
19994 void*
19995 #else
19996 void
19997 #endif
19998 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
19999 {
20000 #ifdef JIT32_GCENCODER
20001     return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
20002 #else
20003     genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
20004 #endif
20005 }
20006
20007 #ifdef JIT32_GCENCODER
20008 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
20009                                             unsigned prologSize,
20010                                             unsigned epilogSize DEBUGARG(void* codePtr))
20011 {
20012     BYTE    headerBuf[64];
20013     InfoHdr header;
20014
20015     int s_cached;
20016 #ifdef DEBUG
20017     size_t headerSize =
20018 #endif
20019         compiler->compInfoBlkSize =
20020             gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
20021
20022     size_t argTabOffset = 0;
20023     size_t ptrMapSize   = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
20024
20025 #if DISPLAY_SIZES
20026
20027     if (genInterruptible)
20028     {
20029         gcHeaderISize += compiler->compInfoBlkSize;
20030         gcPtrMapISize += ptrMapSize;
20031     }
20032     else
20033     {
20034         gcHeaderNSize += compiler->compInfoBlkSize;
20035         gcPtrMapNSize += ptrMapSize;
20036     }
20037
20038 #endif // DISPLAY_SIZES
20039
20040     compiler->compInfoBlkSize += ptrMapSize;
20041
20042     /* Allocate the info block for the method */
20043
20044     compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
20045
20046 #if 0 // VERBOSE_SIZES
20047     // TODO-Review: 'dataSize', below, is not defined
20048
20049 //  if  (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
20050     {
20051         printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
20052                compiler->info.compILCodeSize,
20053                compiler->compInfoBlkSize,
20054                codeSize + dataSize,
20055                codeSize + dataSize - prologSize - epilogSize,
20056                100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
20057                100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
20058                compiler->info.compClassName,
20059                compiler->info.compMethodName);
20060     }
20061
20062 #endif
20063
20064     /* Fill in the info block and return it to the caller */
20065
20066     void* infoPtr = compiler->compInfoBlkAddr;
20067
20068     /* Create the method info block: header followed by GC tracking tables */
20069
20070     compiler->compInfoBlkAddr +=
20071         gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
20072
20073     assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
20074     compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
20075     assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
20076
20077 #ifdef DEBUG
20078
20079     if (0)
20080     {
20081         BYTE*    temp = (BYTE*)infoPtr;
20082         unsigned size = compiler->compInfoBlkAddr - temp;
20083         BYTE*    ptab = temp + headerSize;
20084
20085         noway_assert(size == headerSize + ptrMapSize);
20086
20087         printf("Method info block - header [%u bytes]:", headerSize);
20088
20089         for (unsigned i = 0; i < size; i++)
20090         {
20091             if (temp == ptab)
20092             {
20093                 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
20094                 printf("\n    %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
20095             }
20096             else
20097             {
20098                 if (!(i % 16))
20099                     printf("\n    %04X: ", i);
20100             }
20101
20102             printf("%02X ", *temp++);
20103         }
20104
20105         printf("\n");
20106     }
20107
20108 #endif // DEBUG
20109
20110 #if DUMP_GC_TABLES
20111
20112     if (compiler->opts.dspGCtbls)
20113     {
20114         const BYTE* base = (BYTE*)infoPtr;
20115         unsigned    size;
20116         unsigned    methodSize;
20117         InfoHdr     dumpHeader;
20118
20119         printf("GC Info for method %s\n", compiler->info.compFullName);
20120         printf("GC info size = %3u\n", compiler->compInfoBlkSize);
20121
20122         size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
20123         // printf("size of header encoding is %3u\n", size);
20124         printf("\n");
20125
20126         if (compiler->opts.dspGCtbls)
20127         {
20128             base += size;
20129             size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
20130             // printf("size of pointer table is %3u\n", size);
20131             printf("\n");
20132             noway_assert(compiler->compInfoBlkAddr == (base + size));
20133         }
20134     }
20135
20136 #ifdef DEBUG
20137     if (jitOpts.testMask & 128)
20138     {
20139         for (unsigned offs = 0; offs < codeSize; offs++)
20140         {
20141             gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
20142         }
20143     }
20144 #endif // DEBUG
20145 #endif // DUMP_GC_TABLES
20146
20147     /* Make sure we ended up generating the expected number of bytes */
20148
20149     noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
20150
20151     return infoPtr;
20152 }
20153
20154 #else // JIT32_GCENCODER
20155
20156 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
20157 {
20158     IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
20159     GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
20160         GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
20161     assert(gcInfoEncoder);
20162
20163     // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
20164     gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
20165
20166     // We keep the call count for the second call to gcMakeRegPtrTable() below.
20167     unsigned callCnt = 0;
20168     // First we figure out the encoder ID's for the stack slots and registers.
20169     gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
20170     // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
20171     gcInfoEncoder->FinalizeSlotIds();
20172     // Now we can actually use those slot ID's to declare live ranges.
20173     gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
20174
20175     gcInfoEncoder->Build();
20176
20177     // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
20178     // let's save the values anyway for debugging purposes
20179     compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
20180     compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
20181 }
20182 #endif
20183
20184 /*****************************************************************************
20185  *  For CEE_LOCALLOC
20186  */
20187
20188 regNumber CodeGen::genLclHeap(GenTreePtr size)
20189 {
20190     noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
20191
20192     // regCnt is a register used to hold both
20193     //              the amount to stack alloc (either in bytes or pointer sized words)
20194     //          and the final stack alloc address to return as the result
20195     //
20196     regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
20197     var_types type   = genActualType(size->gtType);
20198     emitAttr  easz   = emitTypeSize(type);
20199
20200 #ifdef DEBUG
20201     // Verify ESP
20202     if (compiler->opts.compStackCheckOnRet)
20203     {
20204         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20205                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20206                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20207         getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20208
20209         BasicBlock*  esp_check = genCreateTempLabel();
20210         emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
20211         inst_JMP(jmpEqual, esp_check);
20212         getEmitter()->emitIns(INS_BREAKPOINT);
20213         genDefineTempLabel(esp_check);
20214     }
20215 #endif
20216
20217     noway_assert(isFramePointerUsed());
20218     noway_assert(genStackLevel == 0); // Can't have anything on the stack
20219
20220     BasicBlock* endLabel = NULL;
20221 #if FEATURE_FIXED_OUT_ARGS
20222     bool stackAdjusted = false;
20223 #endif
20224
20225     if (size->IsCnsIntOrI())
20226     {
20227 #if FEATURE_FIXED_OUT_ARGS
20228         // If we have an outgoing arg area then we must adjust the SP
20229         // essentially popping off the outgoing arg area,
20230         // We will restore it right before we return from this method
20231         //
20232         if (compiler->lvaOutgoingArgSpaceSize > 0)
20233         {
20234             assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20235                    0); // This must be true for the stack to remain aligned
20236             inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20237             stackAdjusted = true;
20238         }
20239 #endif
20240         size_t amount = size->gtIntCon.gtIconVal;
20241
20242         // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
20243         amount += (STACK_ALIGN - 1);
20244         amount &= ~(STACK_ALIGN - 1);
20245         amount >>= STACK_ALIGN_SHIFT;      // amount is number of pointer-sized words to locAlloc
20246         size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20247
20248         /* If amount is zero then return null in RegCnt */
20249         if (amount == 0)
20250         {
20251             regCnt = regSet.rsGrabReg(RBM_ALLINT);
20252             instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20253             goto DONE;
20254         }
20255
20256         /* For small allocations we will generate up to six push 0 inline */
20257         if (amount <= 6)
20258         {
20259             regCnt = regSet.rsGrabReg(RBM_ALLINT);
20260 #if CPU_LOAD_STORE_ARCH
20261             regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20262             // Set 'regZero' to zero
20263             instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
20264 #endif
20265
20266             while (amount != 0)
20267             {
20268 #if CPU_LOAD_STORE_ARCH
20269                 inst_IV(INS_push, (unsigned)genRegMask(regZero));
20270 #else
20271                 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
20272 #endif
20273                 amount--;
20274             }
20275
20276             regTracker.rsTrackRegTrash(regCnt);
20277             // --- move regCnt, ESP
20278             inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20279             goto DONE;
20280         }
20281         else
20282         {
20283             if (!compiler->info.compInitMem)
20284             {
20285                 // Re-bias amount to be number of bytes to adjust the SP
20286                 amount <<= STACK_ALIGN_SHIFT;
20287                 size->gtIntCon.gtIconVal = amount;      // update the GT_CNS value in the node
20288                 if (amount < compiler->eeGetPageSize()) // must be < not <=
20289                 {
20290                     // Since the size is a page or less, simply adjust ESP
20291
20292                     // ESP might already be in the guard page, must touch it BEFORE
20293                     // the alloc, not after.
20294                     regCnt = regSet.rsGrabReg(RBM_ALLINT);
20295                     inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20296 #if CPU_LOAD_STORE_ARCH
20297                     regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20298                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
20299                     regTracker.rsTrackRegTrash(regTmp);
20300 #else
20301                     getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20302 #endif
20303                     inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
20304                     inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
20305                     regTracker.rsTrackRegTrash(regCnt);
20306                     goto DONE;
20307                 }
20308             }
20309         }
20310     }
20311
20312     // Compute the size of the block to allocate
20313     genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
20314     noway_assert(size->InReg());
20315     regCnt = size->gtRegNum;
20316
20317 #if FEATURE_FIXED_OUT_ARGS
20318     // If we have an outgoing arg area then we must adjust the SP
20319     // essentially popping off the outgoing arg area,
20320     // We will restore it right before we return from this method
20321     //
20322     if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
20323     {
20324         assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20325                0); // This must be true for the stack to remain aligned
20326         inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20327         stackAdjusted = true;
20328     }
20329 #endif
20330
20331     //  Perform alignment if we don't have a GT_CNS size
20332     //
20333     if (!size->IsCnsIntOrI())
20334     {
20335         endLabel = genCreateTempLabel();
20336
20337         // If 0 we bail out
20338         instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
20339         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20340         inst_JMP(jmpEqual, endLabel);
20341
20342         // Align to STACK_ALIGN
20343         inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
20344
20345         if (compiler->info.compInitMem)
20346         {
20347 #if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
20348             // regCnt will be the number of pointer-sized words to locAlloc
20349             // If the shift right won't do the 'and' do it here
20350             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20351 #endif
20352             // --- shr regCnt, 2 ---
20353             inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
20354         }
20355         else
20356         {
20357             // regCnt will be the total number of bytes to locAlloc
20358
20359             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20360         }
20361     }
20362
20363     BasicBlock* loop;
20364     loop = genCreateTempLabel();
20365
20366     if (compiler->info.compInitMem)
20367     {
20368         // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
20369
20370         /* Since we have to zero out the allocated memory AND ensure that
20371            ESP is always valid by tickling the pages, we will just push 0's
20372            on the stack */
20373         CLANG_FORMAT_COMMENT_ANCHOR;
20374
20375 #if defined(_TARGET_ARM_)
20376         regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20377         regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
20378         // Set 'regZero1' and 'regZero2' to zero
20379         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
20380         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
20381 #endif
20382
20383         // Loop:
20384         genDefineTempLabel(loop);
20385
20386 #if defined(_TARGET_X86_)
20387
20388         inst_IV(INS_push_hide, 0); // --- push 0
20389         // Are we done?
20390         inst_RV(INS_dec, regCnt, type);
20391
20392 #elif defined(_TARGET_ARM_)
20393
20394         inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
20395         // Are we done?
20396         inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
20397
20398 #else
20399         assert(!"Codegen missing");
20400 #endif // TARGETS
20401
20402         emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
20403         inst_JMP(jmpNotEqual, loop);
20404
20405         // Move the final value of ESP into regCnt
20406         inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
20407         regTracker.rsTrackRegTrash(regCnt);
20408     }
20409     else
20410     {
20411         // At this point 'regCnt' is set to the total number of bytes to locAlloc
20412
20413         /* We don't need to zero out the allocated memory. However, we do have
20414            to tickle the pages to ensure that ESP is always valid and is
20415            in sync with the "stack guard page".  Note that in the worst
20416            case ESP is on the last byte of the guard page.  Thus you must
20417            touch ESP+0 first not ESP+x01000.
20418
20419            Another subtlety is that you don't want ESP to be exactly on the
20420            boundary of the guard page because PUSH is predecrement, thus
20421            call setup would not touch the guard page but just beyond it */
20422
20423         /* Note that we go through a few hoops so that ESP never points to
20424            illegal pages at any time during the ticking process
20425
20426                   neg   REG
20427                   add   REG, ESP         // reg now holds ultimate ESP
20428                   jb    loop             // result is smaller than orignial ESP (no wrap around)
20429                   xor   REG, REG,        // Overflow, pick lowest possible number
20430              loop:
20431                   test  ESP, [ESP+0]     // X86 - tickle the page
20432                   ldr   REGH,[ESP+0]     // ARM - tickle the page
20433                   mov   REGH, ESP
20434                   sub   REGH, GetOsPageSize()
20435                   mov   ESP, REGH
20436                   cmp   ESP, REG
20437                   jae   loop
20438
20439                   mov   ESP, REG
20440              end:
20441           */
20442         CLANG_FORMAT_COMMENT_ANCHOR;
20443
20444 #ifdef _TARGET_ARM_
20445
20446         inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
20447         inst_JMP(EJ_hs, loop);
20448 #else
20449         inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
20450         inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
20451         inst_JMP(EJ_jb, loop);
20452 #endif
20453         regTracker.rsTrackRegTrash(regCnt);
20454
20455         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20456
20457         genDefineTempLabel(loop);
20458
20459         // This is a workaround to avoid the emitter trying to track the
20460         // decrement of the ESP - we do the subtraction in another reg
20461         // instead of adjusting ESP directly.
20462
20463         regNumber regTemp = regSet.rsPickReg();
20464
20465         // Tickle the decremented value, and move back to ESP,
20466         // note that it has to be done BEFORE the update of ESP since
20467         // ESP might already be on the guard page.  It is OK to leave
20468         // the final value of ESP on the guard page
20469         CLANG_FORMAT_COMMENT_ANCHOR;
20470
20471 #if CPU_LOAD_STORE_ARCH
20472         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
20473 #else
20474         getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20475 #endif
20476
20477         inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
20478         regTracker.rsTrackRegTrash(regTemp);
20479
20480         inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
20481         inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
20482
20483         genRecoverReg(size, RBM_ALLINT,
20484                       RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
20485         noway_assert(size->InReg());
20486         regCnt = size->gtRegNum;
20487         inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
20488         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
20489         inst_JMP(jmpGEU, loop);
20490
20491         // Move the final value to ESP
20492         inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
20493     }
20494     regSet.rsMarkRegFree(genRegMask(regCnt));
20495
20496 DONE:
20497
20498     noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
20499
20500     if (endLabel != NULL)
20501         genDefineTempLabel(endLabel);
20502
20503 #if FEATURE_FIXED_OUT_ARGS
20504     // If we have an outgoing arg area then we must readjust the SP
20505     //
20506     if (stackAdjusted)
20507     {
20508         assert(compiler->lvaOutgoingArgSpaceSize > 0);
20509         assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20510                0); // This must be true for the stack to remain aligned
20511         inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20512     }
20513 #endif
20514
20515     /* Write the lvaLocAllocSPvar stack frame slot */
20516     if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
20517     {
20518         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
20519     }
20520
20521 #if STACK_PROBES
20522     // Don't think it is worth it the codegen complexity to embed this
20523     // when it's possible in each of the customized allocas.
20524     if (compiler->opts.compNeedStackProbes)
20525     {
20526         genGenerateStackProbe();
20527     }
20528 #endif
20529
20530 #ifdef DEBUG
20531     // Update new ESP
20532     if (compiler->opts.compStackCheckOnRet)
20533     {
20534         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20535                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20536                      compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20537         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20538     }
20539 #endif
20540
20541     return regCnt;
20542 }
20543
20544 /*****************************************************************************
20545  *
20546  *  Return non-zero if the given register is free after the given tree is
20547  *  evaluated (i.e. the register is either not used at all, or it holds a
20548  *  register variable which is not live after the given node).
20549  *  This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
20550  *  constant operand, and one that's in a register.  Thus, the only thing we
20551  *  need to determine is whether the register holding op1 is dead.
20552  */
20553 bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
20554 {
20555     regMaskTP vars;
20556     regMaskTP mask = genRegMask(reg);
20557
20558     if (regSet.rsMaskUsed & mask)
20559         return false;
20560
20561     assert(tree->gtOper == GT_ADD);
20562     GenTreePtr regValTree = tree->gtOp.gtOp1;
20563     if (!tree->gtOp.gtOp2->IsCnsIntOrI())
20564     {
20565         regValTree = tree->gtOp.gtOp2;
20566         assert(tree->gtOp.gtOp1->IsCnsIntOrI());
20567     }
20568     assert(regValTree->InReg());
20569
20570     /* At this point, the only way that the register will remain live
20571      * is if it is itself a register variable that isn't dying.
20572      */
20573     assert(regValTree->gtRegNum == reg);
20574     if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
20575         return false;
20576     else
20577         return true;
20578 }
20579
20580 /*****************************************************************************/
20581 //
20582 // This method calculates the USE and DEF values for a statement.
20583 // It also calls fgSetRngChkTarget for the statement.
20584 //
20585 // We refactor out this code from fgPerBlockLocalVarLiveness
20586 // and add QMARK logics to it.
20587 //
20588 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20589 //
20590 // The usage of this method is very limited.
20591 // We should only call it for the first node in the statement or
20592 // for the node after the GTF_RELOP_QMARK node.
20593 //
20594 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20595
20596 /*
20597        Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
20598        when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
20599        from both trees.
20600
20601        Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
20602        linked by gtNext.
20603
20604        The algorithm we use is:
20605        (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
20606        (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
20607            We cache copies of current fgCurDefSet and fgCurUseSet.
20608            (The fact that it is recursively calling itself is for nested QMARK case,
20609             where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
20610        (3) We walk the thenTree.
20611        (4) When we see GT_COLON node, we know that we just finished the thenTree.
20612            We then make a copy of the current fgCurDefSet and fgCurUseSet,
20613            restore them to the ones before the thenTree, and then continue walking
20614            the elseTree.
20615        (5) When we see the GT_QMARK node, we know we just finished the elseTree.
20616            So we combine the results from the thenTree and elseTree and then return.
20617
20618
20619                                  +--------------------+
20620                                  |      GT_QMARK    11|
20621                                  +----------+---------+
20622                                             |
20623                                             *
20624                                            / \
20625                                          /     \
20626                                        /         \
20627                   +---------------------+       +--------------------+
20628                   |      GT_<cond>    3 |       |     GT_COLON     7 |
20629                   |  w/ GTF_RELOP_QMARK |       |  w/ GTF_COLON_COND |
20630                   +----------+----------+       +---------+----------+
20631                              |                            |
20632                              *                            *
20633                             / \                          / \
20634                           /     \                      /     \
20635                         /         \                  /         \
20636                        2           1          thenTree 6       elseTree 10
20637                                   x               |                |
20638                                  /                *                *
20639      +----------------+        /                 / \              / \
20640      |prevExpr->gtNext+------/                 /     \          /     \
20641      +----------------+                      /         \      /         \
20642                                             5           4    9           8
20643
20644
20645 */
20646
20647 GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
20648                                                           GenTreePtr relopNode) // The node before the startNode.
20649                                                                                 // (It should either be NULL or
20650                                                                                 // a GTF_RELOP_QMARK node.)
20651 {
20652     GenTreePtr tree;
20653
20654     VARSET_TP defSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurDefSet)); // Store the current fgCurDefSet and
20655                                                                           // fgCurUseSet so
20656     VARSET_TP useSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurUseSet)); // we can restore then before entering the
20657                                                                           // elseTree.
20658
20659     MemoryKindSet memoryUse_BeforeSplit   = fgCurMemoryUse;
20660     MemoryKindSet memoryDef_BeforeSplit   = fgCurMemoryDef;
20661     MemoryKindSet memoryHavoc_BeforeSplit = fgCurMemoryHavoc;
20662
20663     VARSET_TP defSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // These two variables will store
20664                                                                 // the USE and DEF sets after
20665     VARSET_TP useSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
20666
20667     MemoryKindSet memoryUse_AfterThenTree   = fgCurMemoryUse;
20668     MemoryKindSet memoryDef_AfterThenTree   = fgCurMemoryDef;
20669     MemoryKindSet memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20670
20671     // relopNode is either NULL or a GTF_RELOP_QMARK node.
20672     assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
20673
20674     // If relopNode is NULL, then the startNode must be the 1st node of the statement.
20675     // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
20676     assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
20677            (relopNode && startNode == relopNode->gtNext));
20678
20679     for (tree = startNode; tree; tree = tree->gtNext)
20680     {
20681         switch (tree->gtOper)
20682         {
20683
20684             case GT_QMARK:
20685
20686                 // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
20687                 noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
20688
20689                 // By the time we see a GT_QMARK, we must have finished processing the elseTree.
20690                 // So it's the time to combine the results
20691                 // from the the thenTree and the elseTree, and then return.
20692
20693                 VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
20694                 VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
20695
20696                 fgCurMemoryDef   = fgCurMemoryDef & memoryDef_AfterThenTree;
20697                 fgCurMemoryHavoc = fgCurMemoryHavoc & memoryHavoc_AfterThenTree;
20698                 fgCurMemoryUse   = fgCurMemoryUse | memoryUse_AfterThenTree;
20699
20700                 // Return the GT_QMARK node itself so the caller can continue from there.
20701                 // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
20702                 // in the "for" statement.
20703                 goto _return;
20704
20705             case GT_COLON:
20706                 // By the time we see GT_COLON, we must have just walked the thenTree.
20707                 // So we need to do two things here.
20708                 // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
20709                 //     with the result from the elseTree.
20710                 // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
20711                 //     and then continue walking the elseTree.
20712                 VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
20713                 VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
20714
20715                 memoryDef_AfterThenTree   = fgCurMemoryDef;
20716                 memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20717                 memoryUse_AfterThenTree   = fgCurMemoryUse;
20718
20719                 VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
20720                 VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
20721
20722                 fgCurMemoryDef   = memoryDef_BeforeSplit;
20723                 fgCurMemoryHavoc = memoryHavoc_BeforeSplit;
20724                 fgCurMemoryUse   = memoryUse_BeforeSplit;
20725
20726                 break;
20727
20728             case GT_LCL_VAR:
20729             case GT_LCL_FLD:
20730             case GT_LCL_VAR_ADDR:
20731             case GT_LCL_FLD_ADDR:
20732             case GT_STORE_LCL_VAR:
20733             case GT_STORE_LCL_FLD:
20734                 fgMarkUseDef(tree->AsLclVarCommon());
20735                 break;
20736
20737             case GT_CLS_VAR:
20738                 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20739                 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20740                 // This models Volatile reads as def-then-use of the heap.
20741                 // and allows for a CSE of a subsequent non-volatile read
20742                 if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
20743                 {
20744                     // For any Volatile indirection, we must handle it as a
20745                     // definition of GcHeap/ByrefExposed
20746                     fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20747                 }
20748                 // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
20749                 // assignment.
20750                 // Otherwise, we treat it as a use here.
20751                 if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
20752                 {
20753                     fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20754                 }
20755                 break;
20756
20757             case GT_IND:
20758                 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20759                 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20760                 // This models Volatile reads as def-then-use of the heap.
20761                 // and allows for a CSE of a subsequent non-volatile read
20762                 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
20763                 {
20764                     // For any Volatile indirection, we must handle it as a
20765                     // definition of GcHeap/ByrefExposed
20766                     fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20767                 }
20768
20769                 // If the GT_IND is the lhs of an assignment, we'll handle it
20770                 // as a heap/byref def, when we get to assignment.
20771                 // Otherwise, we treat it as a use here.
20772                 if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
20773                 {
20774                     GenTreeLclVarCommon* dummyLclVarTree = NULL;
20775                     bool                 dummyIsEntire   = false;
20776                     GenTreePtr           addrArg         = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
20777                     if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
20778                     {
20779                         fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20780                     }
20781                     else
20782                     {
20783                         // Defines a local addr
20784                         assert(dummyLclVarTree != nullptr);
20785                         fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
20786                     }
20787                 }
20788                 break;
20789
20790             // These should have been morphed away to become GT_INDs:
20791             case GT_FIELD:
20792             case GT_INDEX:
20793                 unreached();
20794                 break;
20795
20796             // We'll assume these are use-then-defs of GcHeap/ByrefExposed.
20797             case GT_LOCKADD:
20798             case GT_XADD:
20799             case GT_XCHG:
20800             case GT_CMPXCHG:
20801                 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20802                 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20803                 fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20804                 break;
20805
20806             case GT_MEMORYBARRIER:
20807                 // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
20808                 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20809                 break;
20810
20811             // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety.  Might tighten this case
20812             // later.
20813             case GT_CALL:
20814             {
20815                 GenTreeCall* call    = tree->AsCall();
20816                 bool         modHeap = true;
20817                 if (call->gtCallType == CT_HELPER)
20818                 {
20819                     CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
20820
20821                     if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
20822                     {
20823                         modHeap = false;
20824                     }
20825                 }
20826                 if (modHeap)
20827                 {
20828                     fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20829                     fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20830                     fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20831                 }
20832             }
20833
20834                 // If this is a p/invoke unmanaged call or if this is a tail-call
20835                 // and we have an unmanaged p/invoke call in the method,
20836                 // then we're going to run the p/invoke epilog.
20837                 // So we mark the FrameRoot as used by this instruction.
20838                 // This ensures that the block->bbVarUse will contain
20839                 // the FrameRoot local var if is it a tracked variable.
20840
20841                 if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
20842                 {
20843                     /* Get the TCB local and mark it as used */
20844
20845                     noway_assert(info.compLvFrameListRoot < lvaCount);
20846
20847                     LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
20848
20849                     if (varDsc->lvTracked)
20850                     {
20851                         if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
20852                         {
20853                             VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
20854                         }
20855                     }
20856                 }
20857
20858                 break;
20859
20860             default:
20861
20862                 // Determine what memory kinds it defines.
20863                 if (tree->OperIsAssignment() || tree->OperIsBlkOp())
20864                 {
20865                     GenTreeLclVarCommon* dummyLclVarTree = NULL;
20866                     if (tree->DefinesLocal(this, &dummyLclVarTree))
20867                     {
20868                         if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
20869                         {
20870                             fgCurMemoryDef |= memoryKindSet(ByrefExposed);
20871
20872                             // We've found a store that modifies ByrefExposed
20873                             // memory but not GcHeap memory, so track their
20874                             // states separately.
20875                             byrefStatesMatchGcHeapStates = false;
20876                         }
20877                     }
20878                     else
20879                     {
20880                         // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
20881                         fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20882                     }
20883                 }
20884
20885                 // Are we seeing a GT_<cond> for a GT_QMARK node?
20886                 if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
20887                 {
20888                     // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
20889                     // Recursively call fgLegacyPerStatementLocalVarLiveness.
20890                     // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
20891                     // current
20892                     // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
20893                     // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
20894                     // node.
20895                     tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree);
20896
20897                     // We must have been returned here after seeing a GT_QMARK node.
20898                     noway_assert(tree->gtOper == GT_QMARK);
20899                 }
20900
20901                 break;
20902         }
20903     }
20904
20905 _return:
20906     return tree;
20907 }
20908
20909 /*****************************************************************************/
20910
20911 /*****************************************************************************
20912  * Initialize the TCB local and the NDirect stub, afterwards "push"
20913  * the hoisted NDirect stub.
20914  *
20915  * 'initRegs' is the set of registers which will be zeroed out by the prolog
20916  *             typically initRegs is zero
20917  *
20918  * The layout of the NDirect Inlined Call Frame is as follows:
20919  * (see VM/frames.h and VM/JITInterface.cpp for more information)
20920  *
20921  *   offset     field name                        when set
20922  *  --------------------------------------------------------------
20923  *    +00h      vptr for class InlinedCallFrame   method prolog
20924  *    +04h      m_Next                            method prolog
20925  *    +08h      m_Datum                           call site
20926  *    +0ch      m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
20927  *    +10h      m_pCallerReturnAddress            call site
20928  *    +14h      m_pCalleeSavedRegisters           not set by JIT
20929  *    +18h      JIT retval spill area (int)       before call_gc
20930  *    +1ch      JIT retval spill area (long)      before call_gc
20931  *    +20h      Saved value of EBP                method prolog
20932  */
20933
20934 regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
20935 {
20936     assert(compiler->compGeneratingProlog);
20937     noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
20938     noway_assert(compiler->info.compCallUnmanaged);
20939
20940     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
20941     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
20942
20943     /* let's find out if compLvFrameListRoot is enregistered */
20944
20945     LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
20946
20947     noway_assert(!varDsc->lvIsParam);
20948     noway_assert(varDsc->lvType == TYP_I_IMPL);
20949
20950     DWORD threadTlsIndex, *pThreadTlsIndex;
20951
20952     threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
20953 #if defined(_TARGET_X86_)
20954     if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
20955 #else
20956     if (true)
20957 #endif
20958     {
20959         // Instead of calling GetThread(), and getting GS cookie and
20960         // InlinedCallFrame vptr through indirections, we'll call only one helper.
20961         // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
20962         // and uses REG_PINVOKE_SCRATCH as scratch register.
20963         getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
20964                                   pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
20965         regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
20966
20967         // We're about to trask REG_PINVOKE_TCB, it better not be in use!
20968         assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
20969
20970         // Don't use the argument registers (including the special argument in
20971         // REG_PINVOKE_FRAME) for computing the target address.
20972         regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
20973
20974         genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
20975
20976         regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
20977
20978         if (varDsc->lvRegister)
20979         {
20980             regNumber regTgt = varDsc->lvRegNum;
20981
20982             // we are about to initialize it. So turn the bit off in initRegs to prevent
20983             // the prolog reinitializing it.
20984             initRegs &= ~genRegMask(regTgt);
20985
20986             if (regTgt != REG_PINVOKE_TCB)
20987             {
20988                 // move TCB to the its register if necessary
20989                 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
20990                 regTracker.rsTrackRegTrash(regTgt);
20991             }
20992         }
20993         else
20994         {
20995             // move TCB to its stack location
20996             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
20997                                       compiler->info.compLvFrameListRoot, 0);
20998         }
20999
21000         // We are done, the rest of this function deals with the inlined case.
21001         return initRegs;
21002     }
21003
21004     regNumber regTCB;
21005
21006     if (varDsc->lvRegister)
21007     {
21008         regTCB = varDsc->lvRegNum;
21009
21010         // we are about to initialize it. So turn the bit off in initRegs to prevent
21011         // the prolog reinitializing it.
21012         initRegs &= ~genRegMask(regTCB);
21013     }
21014     else // varDsc is allocated on the Stack
21015     {
21016         regTCB = REG_PINVOKE_TCB;
21017     }
21018
21019 #if !defined(_TARGET_ARM_)
21020 #define WIN_NT_TLS_OFFSET (0xE10)
21021 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
21022
21023     /* get TCB,  mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
21024
21025     // TODO-ARM-CQ: should we inline TlsGetValue here?
21026
21027     if (threadTlsIndex < 64)
21028     {
21029         //  mov  reg, FS:[0xE10+threadTlsIndex*4]
21030         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
21031                                   WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
21032         regTracker.rsTrackRegTrash(regTCB);
21033     }
21034     else
21035     {
21036         DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
21037         threadTlsIndex -= 64;
21038
21039         // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
21040         // mov reg, [reg+threadTlsIndex*4]
21041
21042         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
21043         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
21044         regTracker.rsTrackRegTrash(regTCB);
21045     }
21046 #endif
21047
21048     /* save TCB in local var if not enregistered */
21049
21050     if (!varDsc->lvRegister)
21051     {
21052         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
21053     }
21054
21055     /* set frame's vptr */
21056
21057     const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
21058     inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
21059     noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
21060
21061     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
21062                                compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
21063                                REG_PINVOKE_SCRATCH);
21064
21065     // Set the GSCookie
21066     GSCookie gsCookie, *pGSCookie;
21067     compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
21068     noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
21069
21070     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
21071                                pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
21072
21073     /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
21074        set next field in frame */
21075
21076     getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21077                                pInfo->offsetOfThreadFrame);
21078     regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21079
21080     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
21081                               compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21082
21083     noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
21084
21085     /* set EBP value in frame */
21086     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
21087                               compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
21088
21089     /* reset track field in frame */
21090     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21091                                pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
21092
21093     /* get address of our frame */
21094
21095     getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
21096                               pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21097     regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21098
21099     /* now "push" our N/direct frame */
21100
21101     getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21102                                pInfo->offsetOfThreadFrame);
21103
21104     return initRegs;
21105 }
21106
21107 /*****************************************************************************
21108  *  Unchain the InlinedCallFrame.
21109  *  Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
21110  *  or tail call.
21111  */
21112 void CodeGen::genPInvokeMethodEpilog()
21113 {
21114     if (compiler->opts.ShouldUsePInvokeHelpers())
21115         return;
21116
21117     noway_assert(compiler->info.compCallUnmanaged);
21118     noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21119     noway_assert(compiler->compCurBB == compiler->genReturnBB ||
21120                  (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
21121                  (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
21122
21123     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21124     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21125
21126     getEmitter()->emitDisableRandomNops();
21127     // debug check to make sure that we're not using ESI and/or EDI across this call, except for
21128     // compLvFrameListRoot.
21129     unsigned regTrashCheck = 0;
21130
21131     /* XXX Tue 5/29/2007
21132      * We explicitly add interference for these in CodeGen::rgPredictRegUse.  If you change the code
21133      * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
21134      */
21135     LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21136     regNumber  reg;
21137     regNumber  reg2 = REG_PINVOKE_FRAME;
21138
21139     //
21140     // Two cases for epilog invocation:
21141     //
21142     // 1. Return
21143     //    We can trash the ESI/EDI registers.
21144     //
21145     // 2. Tail call
21146     //    When tail called, we'd like to preserve enregistered args,
21147     //    in ESI/EDI so we can pass it to the callee.
21148     //
21149     // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
21150     // Instead use the reserved local variable slot.
21151     //
21152     if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21153     {
21154         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21155         {
21156 #if FEATURE_FIXED_OUT_ARGS
21157             // Save the register in the reserved local var slot.
21158             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21159                                       compiler->lvaPInvokeFrameRegSaveVar, 0);
21160 #else
21161             inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
21162 #endif
21163         }
21164         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21165         {
21166 #if FEATURE_FIXED_OUT_ARGS
21167             // Save the register in the reserved local var slot.
21168             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21169                                       compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21170 #else
21171             inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
21172 #endif
21173         }
21174     }
21175
21176     if (varDsc->lvRegister)
21177     {
21178         reg = varDsc->lvRegNum;
21179         if (reg == reg2)
21180             reg2 = REG_PINVOKE_TCB;
21181
21182         regTrashCheck |= genRegMask(reg2);
21183     }
21184     else
21185     {
21186         /* mov esi, [tcb address]    */
21187
21188         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
21189                                   0);
21190         regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21191         reg = REG_PINVOKE_TCB;
21192
21193         regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
21194     }
21195
21196     /* mov edi, [ebp-frame.next] */
21197
21198     getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
21199                               pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21200     regTracker.rsTrackRegTrash(reg2);
21201
21202     /* mov [esi+offsetOfThreadFrame], edi */
21203
21204     getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
21205
21206     noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
21207
21208     if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
21209         compiler->lvaTable[compiler->genReturnLocal].lvRegister)
21210     {
21211         // really make sure we're not clobbering compiler->genReturnLocal.
21212         noway_assert(
21213             !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
21214               ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
21215     }
21216
21217     (void)regTrashCheck;
21218
21219     // Restore the registers ESI and EDI.
21220     if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21221     {
21222         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21223         {
21224 #if FEATURE_FIXED_OUT_ARGS
21225             // Restore the register from the reserved local var slot.
21226             getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21227                                       compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21228 #else
21229             inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
21230 #endif
21231             regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21232         }
21233         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21234         {
21235 #if FEATURE_FIXED_OUT_ARGS
21236             // Restore the register from the reserved local var slot.
21237             getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21238                                       compiler->lvaPInvokeFrameRegSaveVar, 0);
21239 #else
21240             inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
21241 #endif
21242             regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21243         }
21244     }
21245     getEmitter()->emitEnableRandomNops();
21246 }
21247
21248 /*****************************************************************************
21249     This function emits the call-site prolog for direct calls to unmanaged code.
21250     It does all the necessary setup of the InlinedCallFrame.
21251     frameListRoot specifies the local containing the thread control block.
21252     argSize or methodToken is the value to be copied into the m_datum
21253             field of the frame (methodToken may be indirected & have a reloc)
21254     The function returns  the register now containing the thread control block,
21255     (it could be either enregistered or loaded into one of the scratch registers)
21256 */
21257
21258 regNumber CodeGen::genPInvokeCallProlog(LclVarDsc*            frameListRoot,
21259                                         int                   argSize,
21260                                         CORINFO_METHOD_HANDLE methodToken,
21261                                         BasicBlock*           returnLabel)
21262 {
21263     // Some stack locals might be 'cached' in registers, we need to trash them
21264     // from the regTracker *and* also ensure the gc tracker does not consider
21265     // them live (see the next assert).  However, they might be live reg vars
21266     // that are non-pointers CSE'd from pointers.
21267     // That means the register will be live in rsMaskVars, so we can't just
21268     // call gcMarkSetNpt().
21269     {
21270         regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
21271         gcInfo.gcRegGCrefSetCur &= ~deadRegs;
21272         gcInfo.gcRegByrefSetCur &= ~deadRegs;
21273
21274 #ifdef DEBUG
21275         deadRegs &= regSet.rsMaskVars;
21276         if (deadRegs)
21277         {
21278             for (LclVarDsc* varDsc = compiler->lvaTable;
21279                  ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
21280             {
21281                 if (!varDsc->lvTracked || !varDsc->lvRegister)
21282                     continue;
21283
21284                 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
21285                     continue;
21286
21287                 regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
21288                 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
21289                     varRegMask |= genRegMask(varDsc->lvOtherReg);
21290
21291                 if (varRegMask & deadRegs)
21292                 {
21293                     // We found the enregistered var that should not be live if it
21294                     // was a GC pointer.
21295                     noway_assert(!varTypeIsGC(varDsc));
21296                     deadRegs &= ~varRegMask;
21297                 }
21298             }
21299         }
21300 #endif // DEBUG
21301     }
21302
21303     /* Since we are using the InlinedCallFrame, we should have spilled all
21304        GC pointers to it - even from callee-saved registers */
21305
21306     noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
21307
21308     /* must specify only one of these parameters */
21309     noway_assert((argSize == 0) || (methodToken == NULL));
21310
21311     /* We are about to call unmanaged code directly.
21312        Before we can do that we have to emit the following sequence:
21313
21314        mov  dword ptr [frame.callTarget], MethodToken
21315        mov  dword ptr [frame.callSiteTracker], esp
21316        mov  reg, dword ptr [tcb_address]
21317        mov  byte  ptr [tcb+offsetOfGcState], 0
21318
21319      */
21320
21321     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21322
21323     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21324
21325 #ifdef _TARGET_ARM_
21326     if (compiler->opts.ShouldUsePInvokeHelpers())
21327     {
21328         regNumber baseReg;
21329         int       adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, true, &baseReg, 0);
21330
21331         getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21332         genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_BEGIN,
21333                           0,           // argSize
21334                           EA_UNKNOWN); // retSize
21335         regTracker.rsTrackRegTrash(REG_ARG_0);
21336         return REG_ARG_0;
21337     }
21338 #endif
21339
21340     /* mov   dword ptr [frame.callSiteTarget], value */
21341
21342     if (methodToken == NULL)
21343     {
21344         /* mov   dword ptr [frame.callSiteTarget], argSize */
21345         instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
21346                                    pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21347     }
21348     else
21349     {
21350         void *embedMethHnd, *pEmbedMethHnd;
21351
21352         embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
21353
21354         noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
21355
21356         if (embedMethHnd != NULL)
21357         {
21358             /* mov   dword ptr [frame.callSiteTarget], "MethodDesc" */
21359
21360             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
21361                                        compiler->lvaInlinedPInvokeFrameVar,
21362                                        pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21363         }
21364         else
21365         {
21366             /* mov   reg, dword ptr [MethodDescIndir]
21367                mov   dword ptr [frame.callSiteTarget], reg */
21368
21369             regNumber reg = regSet.rsPickFreeReg();
21370
21371 #if CPU_LOAD_STORE_ARCH
21372             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
21373             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
21374 #else  // !CPU_LOAD_STORE_ARCH
21375             getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
21376 #endif // !CPU_LOAD_STORE_ARCH
21377             regTracker.rsTrackRegTrash(reg);
21378             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
21379                                       pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21380         }
21381     }
21382
21383     regNumber tcbReg = REG_NA;
21384
21385     if (frameListRoot->lvRegister)
21386     {
21387         tcbReg = frameListRoot->lvRegNum;
21388     }
21389     else
21390     {
21391         tcbReg = regSet.rsGrabReg(RBM_ALLINT);
21392
21393         /* mov reg, dword ptr [tcb address]    */
21394
21395         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
21396                                   (unsigned)(frameListRoot - compiler->lvaTable), 0);
21397         regTracker.rsTrackRegTrash(tcbReg);
21398     }
21399
21400 #ifdef _TARGET_X86_
21401     /* mov   dword ptr [frame.callSiteTracker], esp */
21402
21403     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
21404                               pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
21405 #endif // _TARGET_X86_
21406
21407 #if CPU_LOAD_STORE_ARCH
21408     regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
21409     getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
21410     regTracker.rsTrackRegTrash(tmpReg);
21411     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
21412                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21413 #else  // !CPU_LOAD_STORE_ARCH
21414     /* mov   dword ptr [frame.callSiteReturnAddress], label */
21415
21416     getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
21417                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21418 #endif // !CPU_LOAD_STORE_ARCH
21419
21420 #if CPU_LOAD_STORE_ARCH
21421     instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
21422
21423     noway_assert(tmpReg != tcbReg);
21424
21425     getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
21426 #else  // !CPU_LOAD_STORE_ARCH
21427     /* mov   byte  ptr [tcbReg+offsetOfGcState], 0 */
21428
21429     getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
21430 #endif // !CPU_LOAD_STORE_ARCH
21431
21432     return tcbReg;
21433 }
21434
21435 /*****************************************************************************
21436  *
21437    First we have to mark in the hoisted NDirect stub that we are back
21438    in managed code. Then we have to check (a global flag) whether GC is
21439    pending or not. If so, we just call into a jit-helper.
21440    Right now we have this call always inlined, i.e. we always skip around
21441    the jit-helper call.
21442    Note:
21443    The tcb address is a regular local (initialized in the prolog), so it is either
21444    enregistered or in the frame:
21445
21446         tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
21447         mov  byte ptr[tcb_reg+offsetOfGcState], 1
21448         cmp  'global GC pending flag', 0
21449         je   @f
21450         [mov  ECX, tcb_reg]  OR [ecx was setup above]     ; we pass the tcb value to callGC
21451         [mov  [EBP+spill_area+0], eax]                    ; spill the int  return value if any
21452         [mov  [EBP+spill_area+4], edx]                    ; spill the long return value if any
21453         call @callGC
21454         [mov  eax, [EBP+spill_area+0] ]                   ; reload the int  return value if any
21455         [mov  edx, [EBP+spill_area+4] ]                   ; reload the long return value if any
21456     @f:
21457  */
21458
21459 void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
21460 {
21461 #ifdef _TARGET_ARM_
21462     if (compiler->opts.ShouldUsePInvokeHelpers())
21463     {
21464         noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21465
21466         regNumber baseReg;
21467         int       adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, true, &baseReg, 0);
21468
21469         getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21470         genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_END,
21471                           0,           // argSize
21472                           EA_UNKNOWN); // retSize
21473         regTracker.rsTrackRegTrash(REG_ARG_0);
21474         return;
21475     }
21476 #endif
21477
21478     BasicBlock*      clab_nostop;
21479     CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21480     regNumber        reg2;
21481     regNumber        reg3;
21482
21483 #ifdef _TARGET_ARM_
21484     reg3 = REG_R3;
21485 #else
21486     reg3     = REG_EDX;
21487 #endif
21488
21489     getEmitter()->emitDisableRandomNops();
21490
21491     if (frameListRoot->lvRegister)
21492     {
21493         /* make sure that register is live across the call */
21494
21495         reg2 = frameListRoot->lvRegNum;
21496         noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
21497     }
21498     else
21499     {
21500         /* mov   reg2, dword ptr [tcb address]    */
21501         CLANG_FORMAT_COMMENT_ANCHOR;
21502
21503 #ifdef _TARGET_ARM_
21504         reg2 = REG_R2;
21505 #else
21506         reg2 = REG_ECX;
21507 #endif
21508
21509         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
21510                                   (unsigned)(frameListRoot - compiler->lvaTable), 0);
21511         regTracker.rsTrackRegTrash(reg2);
21512     }
21513
21514 #ifdef _TARGET_ARM_
21515     /* mov   r3, 1 */
21516     /* strb  [r2+offsetOfGcState], r3 */
21517     instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
21518     getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
21519 #else
21520     /* mov   byte ptr [tcb+offsetOfGcState], 1 */
21521     getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
21522 #endif
21523
21524     /* test global flag (we return to managed code) */
21525
21526     LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
21527
21528     addrOfCaptureThreadGlobal =
21529         compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
21530     noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
21531
21532     // Can we directly use addrOfCaptureThreadGlobal?
21533
21534     if (addrOfCaptureThreadGlobal)
21535     {
21536 #ifdef _TARGET_ARM_
21537         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
21538         getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21539         regTracker.rsTrackRegTrash(reg3);
21540         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21541 #else
21542         getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
21543 #endif
21544     }
21545     else
21546     {
21547 #ifdef _TARGET_ARM_
21548         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
21549         getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21550         regTracker.rsTrackRegTrash(reg3);
21551         getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21552         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21553 #else // !_TARGET_ARM_
21554
21555         getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
21556                                    (ssize_t)pAddrOfCaptureThreadGlobal);
21557         regTracker.rsTrackRegTrash(REG_ECX);
21558
21559         getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
21560
21561 #endif // !_TARGET_ARM_
21562     }
21563
21564     /* */
21565     clab_nostop = genCreateTempLabel();
21566
21567     /* Generate the conditional jump */
21568     emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
21569     inst_JMP(jmpEqual, clab_nostop);
21570
21571 #ifdef _TARGET_ARM_
21572 // The helper preserves the return value on ARM
21573 #else
21574     /* save return value (if necessary) */
21575     if (retVal != RBM_NONE)
21576     {
21577         if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21578         {
21579             /* push eax */
21580
21581             inst_RV(INS_push, REG_INTRET, TYP_INT);
21582
21583             if (retVal == RBM_LNGRET)
21584             {
21585                 /* push edx */
21586
21587                 inst_RV(INS_push, REG_EDX, TYP_INT);
21588             }
21589         }
21590     }
21591 #endif
21592
21593     /* emit the call to the EE-helper that stops for GC (or other reasons) */
21594
21595     genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
21596                       EA_UNKNOWN);                 /* retSize */
21597
21598 #ifdef _TARGET_ARM_
21599 // The helper preserves the return value on ARM
21600 #else
21601     /* restore return value (if necessary) */
21602
21603     if (retVal != RBM_NONE)
21604     {
21605         if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21606         {
21607             if (retVal == RBM_LNGRET)
21608             {
21609                 /* pop edx */
21610
21611                 inst_RV(INS_pop, REG_EDX, TYP_INT);
21612                 regTracker.rsTrackRegTrash(REG_EDX);
21613             }
21614
21615             /* pop eax */
21616
21617             inst_RV(INS_pop, REG_INTRET, TYP_INT);
21618             regTracker.rsTrackRegTrash(REG_INTRET);
21619         }
21620     }
21621 #endif
21622
21623     /* genCondJump() closes the current emitter block */
21624
21625     genDefineTempLabel(clab_nostop);
21626
21627     // This marks the InlinedCallFrame as "inactive".  In fully interruptible code, this is not atomic with
21628     // the above code.  So the process is:
21629     // 1) Return to cooperative mode
21630     // 2) Check to see if we need to stop for GC
21631     // 3) Return from the p/invoke (as far as the stack walker is concerned).
21632
21633     /* mov  dword ptr [frame.callSiteTracker], 0 */
21634
21635     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21636                                pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21637
21638     getEmitter()->emitEnableRandomNops();
21639 }
21640
21641 /*****************************************************************************/
21642
21643 /*****************************************************************************
21644 *           TRACKING OF FLAGS
21645 *****************************************************************************/
21646
21647 void CodeGen::genFlagsEqualToNone()
21648 {
21649     genFlagsEqReg = REG_NA;
21650     genFlagsEqVar = (unsigned)-1;
21651     genFlagsEqLoc.Init();
21652 }
21653
21654 /*****************************************************************************
21655  *
21656  *  Record the fact that the flags register has a value that reflects the
21657  *  contents of the given register.
21658  */
21659
21660 void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
21661 {
21662     genFlagsEqLoc.CaptureLocation(getEmitter());
21663     genFlagsEqReg = reg;
21664
21665     /* previous setting of flags by a var becomes invalid */
21666
21667     genFlagsEqVar = 0xFFFFFFFF;
21668
21669     /* Set appropriate flags on the tree */
21670
21671     if (tree)
21672     {
21673         tree->gtFlags |= GTF_ZSF_SET;
21674         assert(tree->gtSetFlags());
21675     }
21676 }
21677
21678 /*****************************************************************************
21679  *
21680  *  Record the fact that the flags register has a value that reflects the
21681  *  contents of the given local variable.
21682  */
21683
21684 void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
21685 {
21686     genFlagsEqLoc.CaptureLocation(getEmitter());
21687     genFlagsEqVar = var;
21688
21689     /* previous setting of flags by a register becomes invalid */
21690
21691     genFlagsEqReg = REG_NA;
21692
21693     /* Set appropriate flags on the tree */
21694
21695     if (tree)
21696     {
21697         tree->gtFlags |= GTF_ZSF_SET;
21698         assert(tree->gtSetFlags());
21699     }
21700 }
21701
21702 /*****************************************************************************
21703  *
21704  *  Return an indication of whether the flags register is set to the current
21705  *  value of the given register/variable. The return value is as follows:
21706  *
21707  *      false  ..  nothing
21708  *      true   ..  the zero flag (ZF) and sign flag (SF) is set
21709  */
21710
21711 bool CodeGen::genFlagsAreReg(regNumber reg)
21712 {
21713     if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21714     {
21715         return true;
21716     }
21717
21718     return false;
21719 }
21720
21721 bool CodeGen::genFlagsAreVar(unsigned var)
21722 {
21723     if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21724     {
21725         return true;
21726     }
21727
21728     return false;
21729 }
21730
21731 /*****************************************************************************
21732  * This utility function returns true iff the execution path from "from"
21733  * (inclusive) to "to" (exclusive) contains a death of the given var
21734  */
21735 bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
21736 {
21737     GenTreePtr tree;
21738     for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
21739     {
21740         if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
21741         {
21742             unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
21743             if (dyingVarNum == varNum)
21744                 return true;
21745             LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
21746             if (varDsc->lvPromoted)
21747             {
21748                 assert(varDsc->lvType == TYP_STRUCT);
21749                 unsigned firstFieldNum = varDsc->lvFieldLclStart;
21750                 if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
21751                 {
21752                     return true;
21753                 }
21754             }
21755         }
21756     }
21757     assert(tree != NULL);
21758     return false;
21759 }
21760
21761 #endif // LEGACY_BACKEND