b70f7355f097b748bdae7bc6197eda241d5ad388
[platform/upstream/coreclr.git] / src / jit / codegenlegacy.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                           CodeGenerator                                   XX
9 XX                                                                           XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 */
13 #include "jitpch.h"
14 #ifdef _MSC_VER
15 #pragma hdrstop
16 #endif
17 #include "codegen.h"
18
19 #ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
20
21 #ifdef _TARGET_AMD64_
22 #error AMD64 must be !LEGACY_BACKEND
23 #endif
24
25 #ifdef _TARGET_ARM64_
26 #error ARM64 must be !LEGACY_BACKEND
27 #endif
28
29 #include "gcinfo.h"
30 #include "emit.h"
31
32 #ifndef JIT32_GCENCODER
33 #include "gcinfoencoder.h"
34 #endif
35
36
37 /*****************************************************************************
38  *
39  *  Determine what variables die between beforeSet and afterSet, and
40  *  update the liveness globals accordingly:
41  *  compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
42  */
43
44 void                CodeGen::genDyingVars(VARSET_VALARG_TP   beforeSet,
45                                            VARSET_VALARG_TP   afterSet)
46 {
47     unsigned        varNum;
48     LclVarDsc   *   varDsc;
49     regMaskTP       regBit;
50     VARSET_TP       VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
51
52     if (VarSetOps::IsEmpty(compiler, deadSet))
53         return;
54
55     /* iterate through the dead variables */
56
57     VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
58     while (iter.NextElem(compiler, &varIndex))
59     {
60         varNum = compiler->lvaTrackedToVarNum[varIndex];
61         varDsc = compiler->lvaTable + varNum;
62
63         /* Remove this variable from the 'deadSet' bit set */
64
65         noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
66
67         VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
68
69         noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
70                      VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
71
72         VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
73
74         /* We are done if the variable is not enregistered */
75
76         if (!varDsc->lvRegister)
77         {
78 #ifdef  DEBUG
79             if (compiler->verbose)
80             {
81                 printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
82             }
83 #endif
84             continue;
85         }
86
87 #if !FEATURE_FP_REGALLOC
88         // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND 
89         if (!varDsc->IsFloatRegType())
90 #endif
91         {
92             /* Get hold of the appropriate register bit(s) */
93
94             if (varTypeIsFloating(varDsc->TypeGet()))
95             {
96                 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
97             }
98             else
99             {
100                 regBit = genRegMask(varDsc->lvRegNum);
101                 if  (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
102                     regBit |= genRegMask(varDsc->lvOtherReg);
103             }
104
105 #ifdef  DEBUG
106             if (compiler->verbose)
107             {
108                 printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex, compiler->compRegVarName(varDsc->lvRegNum));
109             }
110 #endif
111             noway_assert((regSet.rsMaskVars &  regBit) != 0);
112
113             regSet.RemoveMaskVars(regBit);
114
115             // Remove GC tracking if any for this register
116
117             if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
118                 gcInfo.gcMarkRegSetNpt(regBit);
119         }
120     }
121 }
122
123 /*****************************************************************************
124  *
125  *  Change the given enregistered local variable node to a register variable node
126  */
127
128 void                CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned     varNum,
129                                                     LclVarDsc* varDsc)
130 {
131     noway_assert(tree->gtOper == GT_LCL_VAR);
132     noway_assert(varDsc->lvRegister);
133
134     if  (isRegPairType(varDsc->lvType))
135     {
136         /* Check for the case of a variable that was narrowed to an int */
137
138         if  (isRegPairType(tree->gtType))
139         {
140             genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
141             return;
142         }
143
144         noway_assert(tree->gtFlags & GTF_VAR_CAST);
145         noway_assert(tree->gtType == TYP_INT);
146     }
147     else
148     {
149         noway_assert(!isRegPairType(tree->gtType));
150     }
151
152     /* It's a register variable -- modify the node */
153
154     unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
155
156     ValueNumPair vnp = tree->gtVNPair;  // Save the ValueNumPair
157     tree->SetOper(GT_REG_VAR);
158     tree->gtVNPair = vnp;               // Preserve the ValueNumPair, as SetOper will clear it.
159
160     tree->gtFlags             |= livenessFlags;
161     tree->gtFlags             |= GTF_REG_VAL;
162     tree->gtRegNum             = varDsc->lvRegNum;
163     tree->gtRegVar.gtRegNum    = varDsc->lvRegNum;
164     tree->gtRegVar.SetLclNum(varNum);
165
166 }
167
168 // inline
169 void CodeGen::saveLiveness(genLivenessSet * ls)
170 {
171     VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
172     VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
173     ls->maskVars  = (regMaskSmall)regSet.rsMaskVars;
174     ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
175     ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
176 }
177
178 // inline
179 void CodeGen::restoreLiveness(genLivenessSet * ls)
180 {
181     VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
182     VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
183     regSet.rsMaskVars       = ls->maskVars;
184     gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
185     gcInfo.gcRegByrefSetCur = ls->byRefRegs;
186 }
187
188 // inline
189 void CodeGen::checkLiveness(genLivenessSet * ls)
190 {
191     assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
192     assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
193     assert(regSet.rsMaskVars       == ls->maskVars);
194     assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
195     assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
196 }
197
198 // inline
199 bool                CodeGenInterface::genMarkLclVar(GenTreePtr tree)
200 {
201     unsigned        varNum;
202     LclVarDsc   *   varDsc;
203
204     assert(tree->gtOper == GT_LCL_VAR);
205
206     /* Does the variable live in a register? */
207
208     varNum = tree->gtLclVarCommon.gtLclNum;
209     assert(varNum < compiler->lvaCount);
210     varDsc = compiler->lvaTable + varNum;
211
212     if  (varDsc->lvRegister)
213     {
214         genBashLclVar(tree, varNum, varDsc);
215         return true;
216     }
217     else
218     {
219         return false;
220     }
221 }
222
223 // inline
224 GenTreePtr          CodeGen::genGetAddrModeBase(GenTreePtr tree)
225 {
226     bool            rev;
227     unsigned        mul;
228     unsigned        cns;
229     GenTreePtr      adr;
230     GenTreePtr      idx;
231
232     if  (genCreateAddrMode(tree,        // address
233                            0,           // mode
234                            false,       // fold
235                            RBM_NONE,    // reg mask
236                            &rev,        // reverse ops
237                            &adr,        // base addr
238                            &idx,        // index val
239 #if SCALED_ADDR_MODES
240                            &mul,        // scaling
241 #endif
242                            &cns,        // displacement
243                            true))       // don't generate code
244         return  adr;
245     else
246         return  NULL;
247 }
248
249 // inline
250 void                CodeGen::genSinglePush()
251 {
252     genStackLevel += sizeof(void*);
253 }
254
255 // inline
256 void                CodeGen::genSinglePop()
257 {
258     genStackLevel -= sizeof(void*);
259 }
260
261
262 #if FEATURE_STACK_FP_X87
263 // inline
264 void        CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
265 {
266     genFPstkLevel = newValue;
267 }
268
269 // inline
270 unsigned    CodeGenInterface::genGetFPstkLevel()
271 {
272     return genFPstkLevel;
273 }
274
275 // inline
276 void        CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
277 {
278     noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
279     genFPstkLevel += inc;
280 }
281
282 // inline
283 void        CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
284 {
285     noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
286     genFPstkLevel -= dec;
287 }
288
289 #endif // FEATURE_STACK_FP_X87
290
291 /*****************************************************************************
292  *
293  *  Generate code that will set the given register to the integer constant.
294  */
295
296 void                CodeGen::genSetRegToIcon(regNumber     reg,
297                                              ssize_t       val,
298                                              var_types     type,
299                                              insFlags      flags)
300 {
301     noway_assert(type != TYP_REF || val== NULL);
302
303     /* Does the reg already hold this constant? */
304
305     if  (!regTracker.rsIconIsInReg(val, reg))
306     {
307         if  (val == 0)
308         {
309             instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
310         }
311 #ifdef _TARGET_ARM_
312         // If we can set a register to a constant with a small encoding, then do that.
313         else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
314         {
315             instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
316         }
317 #endif
318         else
319         {
320             /* See if a register holds the value or a close value? */
321             bool      constantLoaded = false;
322             ssize_t   delta;
323             regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
324
325             if (srcReg != REG_NA)
326             {
327                 if (delta == 0)
328                 {
329                     inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
330                     constantLoaded = true;
331                 }
332                 else
333                 {
334 #if defined(_TARGET_XARCH_)
335                     /* delta should fit inside a byte */
336                     if (delta == (signed char)delta)
337                     {
338                         /* use an lea instruction to set reg */
339                         getEmitter()->emitIns_R_AR (INS_lea,
340                                                   emitTypeSize(type),
341                                                   reg,
342                                                   srcReg,
343                                                   (int)delta);
344                         constantLoaded = true;
345                     }
346 #elif defined(_TARGET_ARM_)
347                     /* We found a register 'regS' that has the value we need, modulo a small delta.
348                        That is, the value we need is 'regS + delta'.
349                        We one to generate one of the following instructions, listed in order of preference:
350
351                             adds  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and 0<=delta<=255
352                             subs  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and -255<=delta<=0
353                             adds  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
354                             subs  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
355                             mov   regD, icon         ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
356                             movw  regD, icon         ; 4 bytes. 0<=icon<=65535
357                             add.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
358                             sub.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
359                             addw  regD, regS, delta  ; 4 bytes. 0<=delta<=4095
360                             subw  regD, regS, delta  ; 4 bytes. -4095<=delta<=0
361
362                        If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
363                        than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
364                        can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
365                        0x80000000. In this case, delta will be 1.
366                     */
367
368                     bool useAdd = false;
369                     regMaskTP regMask    = genRegMask(reg);
370                     regMaskTP srcRegMask = genRegMask(srcReg);
371
372                     if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) && (unsigned_abs(delta) <= 255))
373                     {
374                         useAdd = true;
375                     }
376                     else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) && (unsigned_abs(delta) <= 7))
377                     {
378                         useAdd = true;
379                     }
380                     else if (arm_Valid_Imm_For_Mov(val))
381                     {
382                         // fall through to general "!constantLoaded" case below
383                     }
384                     else if (arm_Valid_Imm_For_Add(delta, flags))
385                     {
386                         useAdd = true;
387                     }
388
389                     if (useAdd)
390                     {
391                         getEmitter()->emitIns_R_R_I (INS_add,
392                                                    EA_4BYTE,
393                                                    reg,
394                                                    srcReg,
395                                                    delta, 
396                                                    flags);
397                         constantLoaded = true;
398                     }
399 #else
400                     assert(!"Codegen missing");
401 #endif
402                 }
403             }
404
405             if (!constantLoaded) // Have we loaded it yet?
406             {
407 #ifdef _TARGET_XARCH_
408                 if (val == -1)
409                 {
410                     /* or reg,-1 takes 3 bytes */
411                     inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
412                 }
413 #ifdef _TARGET_X86_
414                 else
415                 /* For SMALL_CODE it is smaller to push a small immediate and
416                    then pop it into the dest register */
417                 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) &&
418                     val == (signed char)val)
419                 {
420                     /* "mov" has no s(sign)-bit and so always takes 6 bytes,
421                        whereas push+pop takes 2+1 bytes */
422
423                     inst_IV(INS_push, val);
424                     genSinglePush();
425
426                     inst_RV(INS_pop, reg, type);
427                     genSinglePop();
428                 }
429 #endif  // _TARGET_X86_
430                 else
431 #endif  // _TARGET_XARCH_
432                 {
433                     instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
434                 }
435             }
436         }
437     }
438     regTracker.rsTrackRegIntCns(reg, val);
439     gcInfo.gcMarkRegPtrVal(reg, type);
440 }
441
442 /*****************************************************************************
443  *
444  *  Find an existing register set to the given integer constant, or
445  *  pick a register and generate code that will set it to the integer constant.
446  *
447  *  If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
448  *  to pick some register to set.  NOTE that this means the returned regNumber
449  *  might *not* be in regBest.  It also implies that you should lock any registers
450  *  you don't want spilled (not just mark as used).
451  *
452  */
453
454 regNumber           CodeGen::genGetRegSetToIcon(ssize_t        val,
455                                                 regMaskTP      regBest /* = 0 */,
456                                                 var_types      type    /* = TYP_INT */)
457 {
458     regNumber regCns;
459 #if REDUNDANT_LOAD
460
461     // Is there already a register with zero that we can use?
462     regCns = regTracker.rsIconIsInReg(val);
463
464     if  (regCns == REG_NA)
465 #endif
466     {
467         // If not, grab a register to hold the constant, preferring
468         // any register besides RBM_TMP_0 so it can hopefully be re-used
469         regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
470
471         // Now set the constant
472         genSetRegToIcon(regCns, val, type);
473     }
474
475     // NOTE: there is guarantee that regCns is in regBest's mask
476     return regCns;
477 }
478
479
480
481 /*****************************************************************************/
482 /*****************************************************************************
483  *
484  *  Add the given constant to the specified register.
485  *  'tree' is the resulting tree
486  */
487
488 void                CodeGen::genIncRegBy(regNumber     reg,
489                                          ssize_t       ival,
490                                          GenTreePtr    tree,
491                                          var_types     dstType,
492                                          bool          ovfl)
493 {
494     bool setFlags = (tree!=NULL) && tree->gtSetFlags();
495
496 #ifdef _TARGET_XARCH_
497     /* First check to see if we can generate inc or dec instruction(s) */
498     /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
499     if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
500     {
501         emitAttr    size = emitTypeSize(dstType);
502
503         switch (ival)
504         {
505         case 2:
506             inst_RV(INS_inc, reg, dstType, size);
507             __fallthrough;
508         case 1:
509             inst_RV(INS_inc, reg, dstType, size);
510
511             goto UPDATE_LIVENESS;
512
513         case -2:
514             inst_RV(INS_dec, reg, dstType, size);
515             __fallthrough;
516         case -1:
517             inst_RV(INS_dec, reg, dstType, size);
518
519             goto UPDATE_LIVENESS;
520         }
521     }
522 #endif
523
524     insFlags  flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
525     inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
526
527 #ifdef _TARGET_XARCH_
528 UPDATE_LIVENESS:
529 #endif
530
531     if  (setFlags)
532         genFlagsEqualToReg(tree, reg);
533
534     regTracker.rsTrackRegTrash(reg);
535
536     gcInfo.gcMarkRegSetNpt(genRegMask(reg));
537
538     if (tree != NULL)
539     {
540         if (!tree->OperIsAssignment())
541         {
542             genMarkTreeInReg(tree, reg);
543             if (varTypeIsGC(tree->TypeGet()))
544                 gcInfo.gcMarkRegSetByref(genRegMask(reg));
545         }
546     }
547 }
548
549
550 /*****************************************************************************
551  *
552  *  Subtract the given constant from the specified register.
553  *  Should only be used for unsigned sub with overflow. Else
554  *  genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
555  *  for these cases as the flags are set differently, and the following
556  *  check for overflow won't work correctly.
557  *  'tree' is the resulting tree.
558  */
559
560 void                CodeGen::genDecRegBy(regNumber     reg,
561                                          ssize_t       ival,
562                                          GenTreePtr    tree)
563 {
564     noway_assert((tree->gtFlags & GTF_OVERFLOW) && ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
565     noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
566
567     regTracker.rsTrackRegTrash(reg);
568
569     noway_assert(!varTypeIsGC(tree->TypeGet()));
570     gcInfo.gcMarkRegSetNpt(genRegMask(reg));
571
572     insFlags  flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
573     inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
574
575     if (tree->gtSetFlags())
576         genFlagsEqualToReg(tree, reg);
577
578     if (tree)
579     {
580         genMarkTreeInReg(tree, reg);
581     }
582 }
583
584 /*****************************************************************************
585  *
586  *  Multiply the specified register by the given value.
587  *  'tree' is the resulting tree
588  */
589
590 void                CodeGen::genMulRegBy(regNumber     reg,
591                                          ssize_t       ival,
592                                          GenTreePtr    tree,
593                                          var_types     dstType,
594                                          bool          ovfl)
595 {
596     noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
597
598     regTracker.rsTrackRegTrash(reg);
599
600     if (tree)
601     {
602         genMarkTreeInReg(tree, reg);
603     }
604
605     bool        use_shift = false;
606     unsigned    shift_by  = 0;
607
608     if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival-1)) == 0))
609     {
610         use_shift = true;
611         BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
612     }
613
614     if (use_shift)
615     {
616         if (shift_by != 0)
617         {
618             insFlags  flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
619             inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
620             if  (tree->gtSetFlags())
621                 genFlagsEqualToReg(tree, reg);
622         }
623     }
624     else
625     {
626         instruction    ins;
627 #ifdef _TARGET_XARCH_
628         ins = getEmitter()->inst3opImulForReg(reg);
629 #else
630         ins = INS_mul;
631 #endif
632
633         inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
634     }
635 }
636
637 /*****************************************************************************/
638 /*****************************************************************************/
639 /*****************************************************************************
640  *
641  *  Compute the value 'tree' into a register that's in 'needReg' 
642  *  (or any free register if 'needReg' is RBM_NONE).
643  *
644  *  Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
645  *  If keepReg==RegSet::KEEP_REG, we mark the register as being used.
646  *
647  *  If you require that the register returned is trashable, pass true for 'freeOnly'.
648  */
649
650 void                CodeGen::genComputeReg(GenTreePtr       tree,
651                                            regMaskTP        needReg,
652                                            RegSet::ExactReg mustReg,
653                                            RegSet::KeepReg  keepReg,
654                                            bool             freeOnly)
655 {
656     noway_assert(tree->gtType != TYP_VOID);
657     
658     regNumber       reg;
659     regNumber       rg2;
660
661 #if FEATURE_STACK_FP_X87
662     noway_assert(genActualType(tree->gtType) == TYP_INT    ||
663                  genActualType(tree->gtType) == TYP_I_IMPL ||
664                  genActualType(tree->gtType) == TYP_REF    ||
665                                tree->gtType  == TYP_BYREF);
666 #elif defined(_TARGET_ARM_)
667     noway_assert(genActualType(tree->gtType) == TYP_INT    ||
668                  genActualType(tree->gtType) == TYP_I_IMPL ||
669                  genActualType(tree->gtType) == TYP_REF    ||
670                                tree->gtType  == TYP_BYREF  ||
671                  genActualType(tree->gtType) == TYP_FLOAT  ||
672                  genActualType(tree->gtType) == TYP_DOUBLE ||
673                  genActualType(tree->gtType) == TYP_STRUCT);
674 #else
675     noway_assert(genActualType(tree->gtType) == TYP_INT    ||
676                  genActualType(tree->gtType) == TYP_I_IMPL ||
677                  genActualType(tree->gtType) == TYP_REF    ||
678                                tree->gtType  == TYP_BYREF  ||
679                  genActualType(tree->gtType) == TYP_FLOAT  ||
680                  genActualType(tree->gtType) == TYP_DOUBLE);
681 #endif
682
683     /* Generate the value, hopefully into the right register */
684
685     genCodeForTree(tree, needReg);
686     noway_assert(tree->gtFlags & GTF_REG_VAL);
687
688     // There is a workaround in genCodeForTreeLng() that changes the type of the
689     // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
690     // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
691     // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
692     // running the rest of this code, because anything looking at gtRegNum on ARM or
693     // attempting to move from EAX/EDX will be wrong.
694     if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
695         goto REG_OK;
696
697     reg = tree->gtRegNum;
698
699     /* Did the value end up in an acceptable register? */
700
701     if  ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
702     {
703         /* Not good enough to satisfy the caller's orders */
704
705         if (varTypeIsFloating(tree))
706         {
707             RegSet::RegisterPreference pref(needReg, RBM_NONE);
708             rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
709         }
710         else
711         {
712             rg2 = regSet.rsGrabReg(needReg);
713         }
714     }
715     else
716     {
717         /* Do we have to end up with a free register? */
718
719         if  (!freeOnly)
720             goto REG_OK;
721
722         /* Did we luck out and the value got computed into an unused reg? */
723
724         if  (genRegMask(reg) & regSet.rsRegMaskFree())
725             goto REG_OK;
726
727         /* Register already in use, so spill previous value */
728
729         if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
730         {
731             rg2 = regSet.rsGrabReg(needReg);
732             if (rg2 == reg)
733             {
734                 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
735                 tree->gtRegNum = reg;
736                 goto REG_OK;
737             }
738         }
739         else
740         {
741             /* OK, let's find a trashable home for the value */
742
743             regMaskTP   rv1RegUsed;
744
745             regSet.rsLockReg  (genRegMask(reg), &rv1RegUsed);
746             rg2 = regSet.rsPickReg(needReg);
747             regSet.rsUnlockReg(genRegMask(reg),  rv1RegUsed);
748         }
749     }
750
751     noway_assert(reg != rg2);
752
753     /* Update the value in the target register */
754
755     regTracker.rsTrackRegCopy(rg2, reg);
756
757     inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
758
759     /* The value has been transferred to 'reg' */
760
761     if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
762         gcInfo.gcMarkRegSetNpt(genRegMask(reg));
763
764     gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
765
766     /* The value is now in an appropriate register */
767
768     tree->gtRegNum = rg2;
769
770 REG_OK:
771
772     /* Does the caller want us to mark the register as used? */
773
774     if  (keepReg == RegSet::KEEP_REG)
775     {
776         /* In case we're computing a value into a register variable */
777
778         genUpdateLife(tree);
779
780         /* Mark the register as 'used' */
781
782         regSet.rsMarkRegUsed(tree);
783     }
784 }
785
786 /*****************************************************************************
787  *
788  *  Same as genComputeReg(), the only difference being that the result is
789  *  guaranteed to end up in a trashable register.
790  */
791
792 // inline
793 void                CodeGen::genCompIntoFreeReg(GenTreePtr   tree,
794                                                 regMaskTP    needReg,
795                                                 RegSet::KeepReg      keepReg)
796 {
797     genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
798 }
799
800 /*****************************************************************************
801  *
802  *  The value 'tree' was earlier computed into a register; free up that
803  *  register (but also make sure the value is presently in a register).
804  */
805
806 void                CodeGen::genReleaseReg(GenTreePtr    tree)
807 {
808     if  (tree->gtFlags & GTF_SPILLED)
809     {
810         /* The register has been spilled -- reload it */
811
812         regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
813         return;
814     }
815
816     regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
817 }
818
819 /*****************************************************************************
820  *
821  *  The value 'tree' was earlier computed into a register. Check whether that
822  *  register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
823  *  free the register. The caller shouldn't need to be setting GCness of the register
824  *  where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
825  */
826
827 void                CodeGen::genRecoverReg(GenTreePtr    tree,
828                                            regMaskTP     needReg,
829                                            RegSet::KeepReg       keepReg)
830 {
831     if  (tree->gtFlags & GTF_SPILLED)
832     {
833         /* The register has been spilled -- reload it */
834
835         regSet.rsUnspillReg(tree, needReg, keepReg);
836         return;
837     }
838     else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
839     {
840         /* We need the tree in another register. So move it there */
841
842         noway_assert(tree->gtFlags & GTF_REG_VAL);
843         regNumber   oldReg  = tree->gtRegNum;
844
845         /* Pick an acceptable register */
846
847         regNumber   reg     = regSet.rsGrabReg(needReg);
848
849         /* Copy the value */
850
851         inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
852         tree->gtRegNum      = reg;
853
854         gcInfo.gcMarkRegPtrVal(tree);
855         regSet.rsMarkRegUsed(tree);
856         regSet.rsMarkRegFree(oldReg, tree);
857
858         regTracker.rsTrackRegCopy(reg, oldReg);
859     }
860
861     /* Free the register if the caller desired so */
862
863     if  (keepReg == RegSet::FREE_REG)
864     {
865         regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
866         // Can't use RegSet::FREE_REG on a GC type
867         noway_assert(!varTypeIsGC(tree->gtType));
868     }
869     else
870     {
871         noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
872     }
873 }
874
875
876 /*****************************************************************************
877  *
878  * Move one half of a register pair to its new regPair(half).
879  */
880
881 // inline
882 void               CodeGen::genMoveRegPairHalf(GenTreePtr  tree,
883                                                regNumber   dst,
884                                                regNumber   src,
885                                                int         off)
886 {
887     if  (src == REG_STK)
888     {
889         // handle long to unsigned long overflow casts
890         while (tree->gtOper == GT_CAST)
891         {
892             noway_assert(tree->gtType == TYP_LONG);
893             tree = tree->gtCast.CastOp();
894         }
895         noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
896         noway_assert(tree->gtType == TYP_LONG);
897         inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
898         regTracker.rsTrackRegTrash(dst);
899     }
900     else
901     {
902         regTracker.rsTrackRegCopy(dst, src);
903         inst_RV_RV(INS_mov, dst, src, TYP_INT);
904     }
905 }
906
907 /*****************************************************************************
908  *
909  *  The given long value is in a register pair, but it's not an acceptable
910  *  one. We have to move the value into a register pair in 'needReg' (if
911  *  non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
912  *
913  *  Important note: if 'needReg' is non-zero, we assume the current pair
914  *  has not been marked as free. If, OTOH, 'newPair' is specified, we
915  *  assume that the current register pair is marked as used and free it.
916  */
917
918 void                CodeGen::genMoveRegPair(GenTreePtr  tree,
919                                             regMaskTP   needReg,
920                                             regPairNo   newPair)
921 {
922     regPairNo       oldPair;
923
924     regNumber       oldLo;
925     regNumber       oldHi;
926     regNumber       newLo;
927     regNumber       newHi;
928
929     /* Either a target set or a specific pair may be requested */
930
931     noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
932
933     /* Get hold of the current pair */
934
935     oldPair = tree->gtRegPair; noway_assert(oldPair != newPair);
936
937     /* Are we supposed to move to a specific pair? */
938
939     if  (newPair != REG_PAIR_NONE)
940     {
941         regMaskTP  oldMask = genRegPairMask(oldPair);
942         regMaskTP  loMask  = genRegMask(genRegPairLo(newPair));
943         regMaskTP  hiMask  = genRegMask(genRegPairHi(newPair));
944         regMaskTP  overlap = oldMask & (loMask|hiMask);
945
946         /* First lock any registers that are in both pairs */
947
948         noway_assert((regSet.rsMaskUsed &  overlap) == overlap);
949         noway_assert((regSet.rsMaskLock &  overlap) == 0);
950                 regSet.rsMaskLock |= overlap;
951
952         /* Make sure any additional registers we need are free */
953
954         if  ((loMask & regSet.rsMaskUsed) != 0 &&
955              (loMask & oldMask   ) == 0)
956         {
957             regSet.rsGrabReg(loMask);
958         }
959
960         if  ((hiMask & regSet.rsMaskUsed) != 0 &&
961              (hiMask & oldMask   ) == 0)
962         {
963             regSet.rsGrabReg(hiMask);
964         }
965
966         /* Unlock those registers we have temporarily locked */
967
968         noway_assert((regSet.rsMaskUsed &  overlap) == overlap);
969         noway_assert((regSet.rsMaskLock &  overlap) == overlap);
970                 regSet.rsMaskLock -= overlap;
971
972         /* We can now free the old pair */
973
974         regSet.rsMarkRegFree(oldMask);
975     }
976     else
977     {
978         /* Pick the new pair based on the caller's stated preference */
979
980         newPair = regSet.rsGrabRegPair(needReg);
981     }
982
983     // If grabbed pair is the same as old one we're done
984     if (newPair==oldPair)
985     {
986         noway_assert(
987             (oldLo = genRegPairLo(oldPair),
988              oldHi = genRegPairHi(oldPair),
989              newLo = genRegPairLo(newPair),
990              newHi = genRegPairHi(newPair),
991              newLo != REG_STK && newHi != REG_STK));
992         return;
993     }
994
995
996     /* Move the values from the old pair into the new one */
997
998     oldLo = genRegPairLo(oldPair);
999     oldHi = genRegPairHi(oldPair);
1000     newLo = genRegPairLo(newPair);
1001     newHi = genRegPairHi(newPair);
1002
1003     noway_assert(newLo != REG_STK && newHi != REG_STK);
1004
1005     /* Careful - the register pairs might overlap */
1006
1007     if  (newLo == oldLo)
1008     {
1009         /* The low registers are identical, just move the upper half */
1010
1011         noway_assert(newHi != oldHi);
1012         genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1013     }
1014     else
1015     {
1016         /* The low registers are different, are the upper ones the same? */
1017
1018         if  (newHi == oldHi)
1019         {
1020             /* Just move the lower half, then */
1021             genMoveRegPairHalf(tree, newLo, oldLo, 0);
1022         }
1023         else
1024         {
1025             /* Both sets are different - is there an overlap? */
1026
1027             if  (newLo == oldHi)
1028             {
1029                 /* Are high and low simply swapped ? */
1030
1031                 if  (newHi == oldLo)
1032                 {
1033 #ifdef _TARGET_ARM_
1034                     regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegPairMask(oldPair) & ~genRegPairMask(newPair));
1035                     inst_RV_RV(INS_mov, regTmp, oldLo);
1036                     inst_RV_RV(INS_mov, oldLo, oldHi);
1037                     inst_RV_RV(INS_mov, oldHi, regTmp);
1038                     regTracker.rsTrackRegTrash(regTmp);
1039 #else
1040                     inst_RV_RV(INS_xchg, oldHi, oldLo);
1041 #endif
1042                     regTracker.rsTrackRegSwap(oldHi, oldLo);
1043                 }
1044                 else
1045                 {
1046                     /* New lower == old higher, so move higher half first */
1047
1048                     noway_assert(newHi != oldLo);
1049                     genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1050                     genMoveRegPairHalf(tree, newLo, oldLo, 0);
1051                 }
1052             }
1053             else
1054             {
1055                 /* Move lower half first */
1056                 genMoveRegPairHalf(tree, newLo, oldLo, 0);
1057                 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1058             }
1059         }
1060     }
1061
1062     /* Record the fact that we're switching to another pair */
1063
1064     tree->gtRegPair   = newPair;
1065 }
1066
1067 /*****************************************************************************
1068  *
1069  *  Compute the value 'tree' into the register pair specified by 'needRegPair'
1070  *  if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
1071  *  those in avoidReg.
1072  *  If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
1073  *  value ends up in as being used.
1074  */
1075
1076 void                CodeGen::genComputeRegPair(GenTreePtr      tree,
1077                                                regPairNo       needRegPair,
1078                                                regMaskTP       avoidReg,
1079                                                RegSet::KeepReg keepReg,
1080                                                bool            freeOnly)
1081 {
1082     regMaskTP       regMask;
1083     regPairNo       regPair;
1084     regMaskTP       tmpMask;
1085     regMaskTP       tmpUsedMask;
1086     regNumber       rLo;
1087     regNumber       rHi;
1088
1089     noway_assert(isRegPairType(tree->gtType));
1090
1091     if (needRegPair == REG_PAIR_NONE)
1092     {
1093         if (freeOnly)
1094         {
1095             regMask = regSet.rsRegMaskFree() & ~avoidReg;
1096             if (genMaxOneBit(regMask))
1097                 regMask = regSet.rsRegMaskFree();
1098         }
1099         else
1100         {
1101             regMask = RBM_ALLINT & ~avoidReg;
1102         }
1103
1104         if (genMaxOneBit(regMask))
1105             regMask = regSet.rsRegMaskCanGrab();
1106     }
1107     else
1108     {
1109         regMask = genRegPairMask(needRegPair);
1110     }
1111
1112     /* Generate the value, hopefully into the right register pair */
1113
1114     genCodeForTreeLng(tree, regMask, avoidReg);
1115
1116     noway_assert(tree->gtFlags & GTF_REG_VAL);
1117
1118     regPair = tree->gtRegPair;
1119     tmpMask = genRegPairMask(regPair);
1120
1121     rLo     = genRegPairLo(regPair);
1122     rHi     = genRegPairHi(regPair);
1123
1124     /* At least one half is in a real register */
1125
1126     noway_assert(rLo != REG_STK || rHi != REG_STK);
1127
1128     /* Did the value end up in an acceptable register pair? */
1129
1130     if  (needRegPair != REG_PAIR_NONE)
1131     {
1132         if  (needRegPair != regPair)
1133         {
1134             /* This is a workaround. If we specify a regPair for genMoveRegPair */
1135             /* it expects the source pair being marked as used */
1136             regSet.rsMarkRegPairUsed(tree);
1137             genMoveRegPair(tree, 0, needRegPair);
1138         }
1139     }
1140     else if  (freeOnly)
1141     {
1142         /* Do we have to end up with a free register pair?
1143            Something might have gotten freed up above */
1144         bool mustMoveReg=false;
1145
1146         regMask = regSet.rsRegMaskFree() & ~avoidReg;
1147
1148         if (genMaxOneBit(regMask))
1149             regMask = regSet.rsRegMaskFree();
1150
1151         if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
1152         {
1153             /* Note that we must call genMoveRegPair if one of our registers
1154                comes from the used mask, so that it will be properly spilled. */
1155
1156             mustMoveReg = true;
1157         }
1158
1159         if (genMaxOneBit(regMask))
1160             regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
1161
1162         if (genMaxOneBit(regMask))
1163             regMask |= regSet.rsRegMaskCanGrab();
1164
1165         /* Did the value end up in a free register pair? */
1166
1167         if  (mustMoveReg)
1168         {
1169             /* We'll have to move the value to a free (trashable) pair */
1170             genMoveRegPair(tree, regMask, REG_PAIR_NONE);
1171         }
1172     }
1173     else
1174     {
1175         noway_assert(needRegPair == REG_PAIR_NONE);
1176         noway_assert(!freeOnly);
1177
1178         /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
1179         tmpUsedMask  = tmpMask & regSet.rsMaskUsed;
1180         tmpMask     &= ~regSet.rsMaskUsed;
1181
1182         /* Make sure that the value is in "real" registers*/
1183         if (rLo == REG_STK)
1184         {
1185             /* Get one of the desired registers, but exclude rHi */
1186
1187             regSet.rsLockReg(tmpMask);
1188             regSet.rsLockUsedReg(tmpUsedMask);
1189
1190             regNumber reg = regSet.rsPickReg(regMask);
1191
1192             regSet.rsUnlockUsedReg(tmpUsedMask);
1193             regSet.rsUnlockReg(tmpMask);
1194
1195             inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
1196
1197             tree->gtRegPair = gen2regs2pair(reg, rHi);
1198
1199             regTracker.rsTrackRegTrash(reg);
1200             gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1201         }
1202         else if (rHi == REG_STK)
1203         {
1204             /* Get one of the desired registers, but exclude rLo */
1205
1206             regSet.rsLockReg(tmpMask);
1207             regSet.rsLockUsedReg(tmpUsedMask);
1208
1209             regNumber reg = regSet.rsPickReg(regMask);
1210
1211             regSet.rsUnlockUsedReg(tmpUsedMask);
1212             regSet.rsUnlockReg(tmpMask);
1213
1214             inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
1215
1216             tree->gtRegPair = gen2regs2pair(rLo, reg);
1217
1218             regTracker.rsTrackRegTrash(reg);
1219             gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1220         }
1221     }
1222
1223     /* Does the caller want us to mark the register as used? */
1224
1225     if  (keepReg == RegSet::KEEP_REG)
1226     {
1227         /* In case we're computing a value into a register variable */
1228
1229         genUpdateLife(tree);
1230
1231         /* Mark the register as 'used' */
1232
1233         regSet.rsMarkRegPairUsed(tree);
1234     }
1235 }
1236
1237 /*****************************************************************************
1238  *
1239  *  Same as genComputeRegPair(), the only difference being that the result
1240  *  is guaranteed to end up in a trashable register pair.
1241  */
1242
1243 // inline
1244 void                CodeGen::genCompIntoFreeRegPair(GenTreePtr      tree,
1245                                                     regMaskTP       avoidReg,
1246                                                     RegSet::KeepReg keepReg)
1247 {
1248     genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
1249 }
1250
1251 /*****************************************************************************
1252  *
1253  *  The value 'tree' was earlier computed into a register pair; free up that
1254  *  register pair (but also make sure the value is presently in a register
1255  *  pair).
1256  */
1257
1258 void                CodeGen::genReleaseRegPair(GenTreePtr    tree)
1259 {
1260     if  (tree->gtFlags & GTF_SPILLED)
1261     {
1262         /* The register has been spilled -- reload it */
1263
1264         regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
1265         return;
1266     }
1267
1268     regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1269 }
1270
1271 /*****************************************************************************
1272  *
1273  *  The value 'tree' was earlier computed into a register pair. Check whether
1274  *  either register of that pair has been spilled (and reload it if so), and
1275  *  if 'keepReg' is 0, free the register pair.
1276  */
1277
1278 void                CodeGen::genRecoverRegPair(GenTreePtr       tree,
1279                                                regPairNo        regPair,
1280                                                RegSet::KeepReg  keepReg)
1281 {
1282     if  (tree->gtFlags & GTF_SPILLED)
1283     {
1284         regMaskTP regMask;
1285
1286         if (regPair == REG_PAIR_NONE)
1287             regMask = RBM_NONE;
1288         else
1289             regMask = genRegPairMask(regPair);
1290
1291         /* The register pair has been spilled -- reload it */
1292
1293         regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
1294     }
1295
1296     /* Does the caller insist on the value being in a specific place? */
1297
1298     if  (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
1299     {
1300         /* No good -- we'll have to move the value to a new place */
1301
1302         genMoveRegPair(tree, 0, regPair);
1303
1304         /* Mark the pair as used if appropriate */
1305
1306         if  (keepReg == RegSet::KEEP_REG)
1307             regSet.rsMarkRegPairUsed(tree);
1308
1309         return;
1310     }
1311
1312     /* Free the register pair if the caller desired so */
1313
1314     if  (keepReg == RegSet::FREE_REG)
1315         regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1316 }
1317
1318 /*****************************************************************************
1319  *
1320  *  Compute the given long value into the specified register pair; don't mark
1321  *  the register pair as used.
1322  */
1323
1324 // inline
1325 void         CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
1326 {
1327     genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
1328     genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
1329 }
1330
1331 /*****************************************************************************
1332  *  This helper makes sure that the regpair target of an assignment is
1333  *  available for use.  This needs to be called in genCodeForTreeLng just before
1334  *  a long assignment, but must not be called until everything has been
1335  *  evaluated, or else we might try to spill enregistered variables.
1336  *
1337  */
1338
1339 // inline
1340 void         CodeGen::genMakeRegPairAvailable(regPairNo regPair)
1341 {
1342     /* Make sure the target of the store is available */
1343
1344     regNumber regLo   = genRegPairLo(regPair);
1345     regNumber regHi   = genRegPairHi(regPair);
1346
1347     if  ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
1348         regSet.rsSpillReg(regHi);
1349
1350     if  ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
1351         regSet.rsSpillReg(regLo);
1352 }
1353
1354 /*****************************************************************************/
1355 /*****************************************************************************
1356  *
1357  *  Return true if the given tree 'addr' can be computed via an addressing mode,
1358  *  such as "[ebx+esi*4+20]". If the expression isn't an address mode already
1359  *  try to make it so (but we don't try 'too hard' to accomplish this). 
1360  *
1361  *  If we end up needing a register (or two registers) to hold some part(s) of the
1362  *  address, we return the use register mask via '*useMaskPtr'.
1363  *
1364  *  If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as 
1365  *  in use. The caller would then be responsible for calling
1366  *  regSet.rsMarkRegFree(*useMaskPtr).
1367  *
1368  *  If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
1369  *  calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
1370  */
1371
1372 bool                CodeGen::genMakeIndAddrMode(GenTreePtr   addr,
1373                                                  GenTreePtr   oper,
1374                                                  bool         forLea,
1375                                                  regMaskTP    regMask,
1376                                                  RegSet::KeepReg      keepReg,
1377                                                  regMaskTP *  useMaskPtr,
1378                                                  bool         deferOK)
1379 {
1380     if (addr->gtOper == GT_ARR_ELEM)
1381     {
1382         regMaskTP   regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
1383         *useMaskPtr = regs;
1384         return true;
1385     }
1386
1387     bool            rev;
1388     GenTreePtr      rv1;
1389     GenTreePtr      rv2;
1390     bool            operIsArrIndex; // is oper an array index
1391     GenTreePtr      scaledIndex;    // If scaled addressing mode can't be used
1392
1393     regMaskTP       anyMask = RBM_ALLINT;
1394
1395     unsigned        cns;
1396     unsigned        mul;
1397
1398     GenTreePtr      tmp;
1399     int            ixv = INT_MAX; // unset value
1400     
1401     GenTreePtr      scaledIndexVal;
1402
1403     regMaskTP       newLiveMask;
1404     regMaskTP       rv1Mask;
1405     regMaskTP       rv2Mask;
1406
1407     /* Deferred address mode forming NYI for x86 */
1408
1409     
1410     noway_assert(deferOK == false);
1411
1412     noway_assert(oper == NULL 
1413                  || ((oper->OperIsIndir() || oper->OperIsAtomicOp())
1414                      && 
1415                      ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr)
1416                       || oper->gtOp.gtOp1 == addr)));
1417     operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
1418
1419     if (addr->gtOper == GT_LEA)
1420     {
1421         rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
1422         GenTreeAddrMode * lea = addr->AsAddrMode();
1423         rv1 = lea->Base();
1424         rv2 = lea->Index();
1425         mul = lea->gtScale;
1426         cns = lea->gtOffset;
1427
1428         if (rv1 != NULL &&
1429             rv2 == NULL &&
1430             cns == 0    &&
1431             (rv1->gtFlags & GTF_REG_VAL) != 0)
1432         {
1433             scaledIndex = NULL;
1434             goto YES;
1435         }
1436     }
1437     else
1438     {
1439     // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
1440     // EASIER TO MERGE
1441
1442     /* Is the complete address already sitting in a register? */
1443
1444     if ((addr->gtFlags & GTF_REG_VAL) ||
1445         (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
1446     {
1447         genUpdateLife(addr);
1448
1449         rv1 = addr;
1450         rv2 = scaledIndex = 0;
1451         cns = 0;
1452
1453         goto YES;
1454     }
1455
1456     /* Is it an absolute address */
1457
1458     if (addr->IsCnsIntOrI())
1459     {
1460         rv1 = rv2 = scaledIndex = 0;
1461         // along this code path cns is never used, so place a BOGUS value in it as proof
1462         // cns = addr->gtIntCon.gtIconVal;
1463         cns = UINT_MAX;
1464
1465         goto YES;
1466     }
1467
1468     /* Is there a chance of forming an address mode? */
1469
1470     if  (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
1471     {
1472         /* This better not be an array index */
1473         noway_assert(!operIsArrIndex);
1474
1475         return  false;
1476     }
1477     // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
1478     }
1479
1480    /*  For scaled array access, RV2 may not be pointing to the index of the
1481        array if the CPU does not support the needed scaling factor.  We will
1482        make it point to the actual index, and scaledIndex will point to
1483        the scaled value */
1484
1485     scaledIndex = NULL;
1486     scaledIndexVal = NULL;
1487
1488     if  (operIsArrIndex && rv2 != NULL 
1489          && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) 
1490          && rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
1491     {
1492         scaledIndex = rv2;
1493         compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
1494
1495         noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
1496     }
1497
1498     /* Has the address already been computed? */
1499
1500     if  (addr->gtFlags & GTF_REG_VAL)
1501     {
1502         if  (forLea)
1503             return  true;
1504
1505         rv1         = addr;
1506         rv2         = NULL;
1507         scaledIndex = NULL;
1508         genUpdateLife(addr);
1509         goto YES;
1510     }
1511
1512     /*
1513         Here we have the following operands:
1514
1515             rv1     .....       base address
1516             rv2     .....       offset value        (or NULL)
1517             mul     .....       multiplier for rv2  (or 0)
1518             cns     .....       additional constant (or 0)
1519
1520         The first operand must be present (and be an address) unless we're
1521         computing an expression via 'LEA'. The scaled operand is optional,
1522         but must not be a pointer if present.
1523      */
1524
1525     noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
1526
1527     /*-------------------------------------------------------------------------
1528      *
1529      * Make sure both rv1 and rv2 (if present) are in registers
1530      *
1531      */
1532
1533     // Trivial case : Is either rv1 or rv2 a NULL ?
1534
1535     if  (!rv2)
1536     {
1537         /* A single operand, make sure it's in a register */
1538
1539         if (cns != 0)
1540         {
1541             // In the case where "rv1" is already in a register, there's no reason to get into a
1542             // register in "regMask" yet, if there's a non-zero constant that we're going to add;
1543             // if there is, we can do an LEA.
1544             genCodeForTree(rv1, RBM_NONE);
1545         }
1546         else
1547         {
1548             genCodeForTree(rv1, regMask);
1549         }
1550         goto DONE_REGS;
1551     }
1552     else if (!rv1)
1553     {
1554         /* A single (scaled) operand, make sure it's in a register */
1555
1556         genCodeForTree(rv2, 0);
1557         goto DONE_REGS;
1558     }
1559
1560     /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
1561        they are in registers */
1562
1563     noway_assert(rv1 && rv2);
1564
1565
1566     /*  If we have to check a constant array index, compare it against
1567         the array dimension (see below) but then fold the index with a
1568         scaling factor (if any) and additional offset (if any).
1569      */
1570
1571     if  (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
1572     {
1573         if (scaledIndex != NULL)
1574         {
1575             assert(rv2 == scaledIndex && scaledIndexVal != NULL);
1576             rv2 = scaledIndexVal;
1577         }
1578         /* We must have a range-checked index operation */
1579
1580         noway_assert(operIsArrIndex);
1581
1582         /* Get hold of the index value and see if it's a constant */
1583
1584         if  (rv2->IsIntCnsFitsInI32())
1585         {
1586             ixv = (int)rv2->gtIntCon.gtIconVal;
1587             // Maybe I should just set "fold" true in the call to genMakeAddressable above.
1588             if (scaledIndex != NULL)
1589             {
1590                 int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal);  // If this truncates, that's OK -- multiple of 2^6.
1591                 if (mul == 0)
1592                 {
1593                     mul = scale;
1594                 }
1595                 else
1596                 {
1597                     mul *= scale;
1598                 }
1599             }
1600             rv2 = scaledIndex = NULL;
1601
1602             /* Add the scaled index into the added value */
1603
1604             if  (mul)
1605                 cns += ixv * mul;
1606             else
1607                 cns += ixv;
1608
1609             /* Make sure 'rv1' is in a register */
1610
1611             genCodeForTree(rv1, regMask);
1612
1613             goto DONE_REGS;
1614         }
1615     }
1616
1617     if      (rv1->gtFlags & GTF_REG_VAL)
1618     {
1619         /* op1 already in register - how about op2? */
1620
1621         if  (rv2->gtFlags & GTF_REG_VAL)
1622         {
1623             /* Great - both operands are in registers already. Just update
1624                the liveness and we are done. */
1625
1626             if (rev)
1627             {
1628                 genUpdateLife(rv2);
1629                 genUpdateLife(rv1);
1630             }
1631             else
1632             {
1633                 genUpdateLife(rv1);
1634                 genUpdateLife(rv2);
1635             }
1636
1637             goto DONE_REGS;
1638         }
1639
1640         /* rv1 is in a register, but rv2 isn't */
1641
1642         if (!rev)
1643         {
1644             /* rv1 is already materialized in a register. Just update liveness
1645                to rv1 and generate code for rv2 */
1646
1647             genUpdateLife(rv1);
1648             regSet.rsMarkRegUsed(rv1, oper);
1649         }
1650
1651         goto GEN_RV2;
1652     }
1653     else if (rv2->gtFlags & GTF_REG_VAL)
1654     {
1655         /* rv2 is in a register, but rv1 isn't */
1656
1657         noway_assert(rv2->gtOper == GT_REG_VAR);
1658
1659         if (rev)
1660         {
1661             /* rv2 is already materialized in a register. Update liveness
1662                to after rv2 and then hang on to rv2 */
1663
1664             genUpdateLife(rv2);
1665             regSet.rsMarkRegUsed(rv2, oper);
1666         }
1667
1668         /* Generate the for the first operand */
1669
1670         genCodeForTree(rv1, regMask);
1671
1672         if (rev)
1673         {
1674             // Free up rv2 in the right fashion (it might be re-marked if keepReg)
1675             regSet.rsMarkRegUsed(rv1, oper);
1676             regSet.rsLockUsedReg  (genRegMask(rv1->gtRegNum));
1677             genReleaseReg(rv2);
1678             regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1679             genReleaseReg(rv1);
1680         }
1681         else
1682         {
1683             /* We have evaluated rv1, and now we just need to update liveness
1684                to rv2 which was already in a register */
1685
1686             genUpdateLife(rv2);
1687         }
1688
1689         goto DONE_REGS;
1690     }
1691
1692     if  (forLea && !cns)
1693         return  false;
1694
1695     /* Make sure we preserve the correct operand order */
1696
1697     if  (rev)
1698     {
1699         /* Generate the second operand first */
1700
1701         // Determine what registers go live between rv2 and rv1
1702         newLiveMask = genNewLiveRegMask(rv2, rv1);
1703
1704         rv2Mask = regMask & ~newLiveMask; 
1705         rv2Mask &= ~rv1->gtRsvdRegs;
1706
1707         if (rv2Mask == RBM_NONE)
1708         {
1709             // The regMask hint cannot be honored
1710             // We probably have a call that trashes the register(s) in regMask
1711             // so ignore the regMask hint, but try to avoid using
1712             // the registers in newLiveMask and the rv1->gtRsvdRegs
1713             //
1714             rv2Mask = RBM_ALLINT & ~newLiveMask; 
1715             rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
1716         }
1717
1718         genCodeForTree(rv2, rv2Mask);
1719         regMask &= ~genRegMask(rv2->gtRegNum);
1720
1721         regSet.rsMarkRegUsed(rv2, oper);
1722
1723         /* Generate the first operand second */
1724
1725         genCodeForTree(rv1, regMask);
1726         regSet.rsMarkRegUsed(rv1, oper);
1727
1728         /* Free up both operands in the right order (they might be
1729            re-marked as used below)
1730         */
1731         regSet.rsLockUsedReg  (genRegMask(rv1->gtRegNum));
1732         genReleaseReg(rv2);
1733         regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1734         genReleaseReg(rv1);
1735     }
1736     else
1737     {
1738         /* Get the first operand into a register */
1739
1740         // Determine what registers go live between rv1 and rv2
1741         newLiveMask = genNewLiveRegMask(rv1, rv2);
1742
1743         rv1Mask  = regMask & ~newLiveMask; 
1744         rv1Mask &= ~rv2->gtRsvdRegs;
1745  
1746         if (rv1Mask == RBM_NONE)
1747         {
1748             // The regMask hint cannot be honored
1749             // We probably have a call that trashes the register(s) in regMask
1750             // so ignore the regMask hint, but try to avoid using
1751             // the registers in liveMask and the rv2->gtRsvdRegs
1752             //
1753             rv1Mask = RBM_ALLINT & ~newLiveMask;
1754             rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
1755         }
1756
1757         genCodeForTree(rv1, rv1Mask);
1758         regSet.rsMarkRegUsed(rv1, oper);
1759
1760     GEN_RV2:
1761
1762         /* Here, we need to get rv2 in a register. We have either already
1763            materialized rv1 into a register, or it was already in a one */
1764
1765         noway_assert(rv1->gtFlags & GTF_REG_VAL);
1766         noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
1767
1768         /* Generate the second operand as well */
1769
1770         regMask &= ~genRegMask(rv1->gtRegNum);
1771         genCodeForTree(rv2, regMask);
1772
1773         if (rev)
1774         {
1775             /* rev==true means the evaluation order is rv2,rv1. We just
1776                evaluated rv2, and rv1 was already in a register. Just
1777                update liveness to rv1 and we are done. */
1778
1779             genUpdateLife(rv1);
1780         }
1781         else
1782         {
1783             /* We have evaluated rv1 and rv2. Free up both operands in
1784                the right order (they might be re-marked as used below) */
1785
1786             /* Even though we have not explicitly marked rv2 as used,
1787                rv2->gtRegNum may be used if rv2 is a multi-use or
1788                an enregistered variable. */
1789             regMaskTP   rv2Used;
1790             regSet.rsLockReg  (genRegMask(rv2->gtRegNum), &rv2Used);
1791
1792             /* Check for special case both rv1 and rv2 are the same register */
1793             if (rv2Used != genRegMask(rv1->gtRegNum))
1794             {
1795                 genReleaseReg(rv1);
1796                 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum),  rv2Used);
1797             }
1798             else
1799             {
1800                 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum),  rv2Used);
1801                 genReleaseReg(rv1);
1802             }
1803         }
1804     }
1805
1806     /*-------------------------------------------------------------------------
1807      *
1808      * At this point, both rv1 and rv2 (if present) are in registers
1809      *
1810      */
1811
1812 DONE_REGS:
1813
1814     /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
1815
1816     if  (rv1 && !(rv1->gtFlags & GTF_REG_VAL)) return false;
1817     if  (rv2 && !(rv2->gtFlags & GTF_REG_VAL)) return false;
1818
1819 YES:
1820
1821     // *(intVar1+intVar1) causes problems as we
1822     // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
1823     // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
1824     // that currently as we return a single mask in useMaskPtr.
1825
1826     if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 &&
1827         (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
1828     {
1829         if (rv1->gtRegNum == rv2->gtRegNum)
1830         {
1831             noway_assert(!operIsArrIndex);
1832             return false;
1833         }
1834     }
1835
1836     /* Check either register operand to see if it needs to be saved */
1837
1838     if  (rv1)
1839     {
1840         noway_assert(rv1->gtFlags & GTF_REG_VAL);
1841
1842         if (keepReg == RegSet::KEEP_REG)
1843         {
1844             regSet.rsMarkRegUsed(rv1, oper);
1845         }
1846         else
1847         {
1848             /* If the register holds an address, mark it */
1849
1850             gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
1851         }
1852     }
1853
1854     if  (rv2)
1855     {
1856         noway_assert(rv2->gtFlags & GTF_REG_VAL);
1857
1858         if (keepReg == RegSet::KEEP_REG)
1859             regSet.rsMarkRegUsed(rv2, oper);
1860     }
1861
1862     if  (deferOK)
1863     {
1864         noway_assert(!scaledIndex);
1865         return  true;
1866     }
1867
1868     /* Compute the set of registers the address depends on */
1869
1870     regMaskTP  useMask = RBM_NONE;
1871
1872     if (rv1)
1873     {
1874         if  (rv1->gtFlags & GTF_SPILLED)
1875             regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
1876
1877         noway_assert(rv1->gtFlags & GTF_REG_VAL);
1878         useMask |= genRegMask(rv1->gtRegNum);
1879     }
1880
1881     if (rv2)
1882     {
1883         if  (rv2->gtFlags & GTF_SPILLED)
1884         {
1885             if (rv1)
1886             {
1887                 regMaskTP   lregMask = genRegMask(rv1->gtRegNum);
1888                 regMaskTP   used;
1889                 
1890                 regSet.rsLockReg(lregMask, &used);
1891                 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1892                 regSet.rsUnlockReg(lregMask, used);
1893             }
1894             else
1895                 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1896         }
1897         noway_assert(rv2->gtFlags & GTF_REG_VAL);
1898         useMask |= genRegMask(rv2->gtRegNum);
1899     }
1900
1901     /* Tell the caller which registers we need to hang on to */
1902
1903     *useMaskPtr = useMask;
1904
1905     return true;
1906 }
1907
1908 /*****************************************************************************
1909  *
1910  *  'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
1911  */
1912
1913 void                CodeGen::genRangeCheck(GenTreePtr  oper)
1914 {
1915     noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1916     GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1917
1918     GenTreePtr arrLen = bndsChk->gtArrLen;
1919     GenTreePtr arrRef = NULL;
1920     int lenOffset = 0;
1921
1922     // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
1923     // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
1924     // a register.
1925
1926     if (arrLen->OperGet() == GT_ARR_LENGTH)
1927     {
1928         GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
1929         lenOffset = arrLenExact->ArrLenOffset();
1930
1931         // We always load the length into a register on ARM and x64.
1932 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1933         // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
1934         // lengths, but the index expression *can* be native int (64-bits)
1935         arrRef = arrLenExact->ArrRef();
1936         genCodeForTree(arrRef, RBM_ALLINT);
1937         noway_assert(arrRef->gtFlags & GTF_REG_VAL);
1938         regSet.rsMarkRegUsed(arrRef);
1939         noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1940 #endif
1941     }
1942 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1943     // This is another form in which we have an array reference and a constant length.  Don't use
1944     // on LOAD_STORE or 64BIT.
1945     else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
1946     {
1947         genCodeForTree(arrRef, RBM_ALLINT);
1948         noway_assert(arrRef->gtFlags & GTF_REG_VAL);
1949         regSet.rsMarkRegUsed(arrRef);
1950         noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1951     }
1952 #endif
1953
1954     // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.  
1955     if (arrRef == NULL)
1956     {
1957         // (Unless it's a constant.)
1958         if (!arrLen->IsCnsIntOrI())
1959         {
1960             genCodeForTree(arrLen, RBM_ALLINT);
1961             regSet.rsMarkRegUsed(arrLen);
1962
1963             noway_assert(arrLen->gtFlags & GTF_REG_VAL);
1964             noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
1965         }
1966     }
1967
1968     /* Is the array index a constant value? */
1969     GenTreePtr index = bndsChk->gtIndex;
1970     if  (!index->IsCnsIntOrI())
1971     {
1972         // No, it's not a constant.
1973         genCodeForTree(index, RBM_ALLINT);
1974         regSet.rsMarkRegUsed(index);
1975
1976         // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
1977         // from its register, get it back in a register.
1978         if (arrRef != NULL)
1979             genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
1980         else if (!arrLen->IsCnsIntOrI())
1981             genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
1982
1983         /* Make sure we have the values we expect */
1984         noway_assert(index->gtFlags & GTF_REG_VAL);
1985         noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
1986
1987         noway_assert(index->TypeGet() == TYP_I_IMPL || (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
1988         var_types indxType = index->TypeGet();
1989         if (indxType != TYP_I_IMPL) indxType = TYP_INT;
1990
1991         if (arrRef != NULL)
1992         {   // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1993
1994             /* Generate "cmp index, [arrRef+LenOffs]" */
1995             inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
1996         }
1997         else if (arrLen->IsCnsIntOrI())
1998         {
1999             ssize_t len = arrLen->AsIntConCommon()->IconValue();
2000             inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
2001         }
2002         else
2003         {
2004             inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
2005         }
2006
2007         /* Generate "jae <fail_label>" */
2008
2009         noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
2010         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2011         genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2012     }
2013     else
2014     {
2015         /* Generate "cmp [rv1+LenOffs], cns" */
2016
2017         bool indIsInt = true;
2018 #ifdef _TARGET_64BIT_
2019         int ixv = 0;
2020         ssize_t ixvFull = index->AsIntConCommon()->IconValue();
2021         if (ixvFull > INT32_MAX)
2022         {
2023             indIsInt = false;
2024         }
2025         else
2026         {
2027             ixv = (int)ixvFull;
2028         }
2029 #else
2030         ssize_t ixvFull = index->AsIntConCommon()->IconValue();
2031         int ixv = (int)ixvFull;
2032 #endif
2033         if (arrRef != NULL && indIsInt)
2034         {   // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
2035             /* Generate "cmp [arrRef+LenOffs], ixv" */
2036             inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
2037             // Generate "jbe <fail_label>"
2038             emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
2039             genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2040         }
2041         else if (arrLen->IsCnsIntOrI())
2042         {
2043             ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
2044             // Both are constants; decide at compile time.
2045             if (!(0 <= ixvFull && ixvFull < lenv))
2046             {
2047                 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2048             }
2049         }
2050         else if (!indIsInt)
2051         {
2052             genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2053         }
2054         else
2055         {
2056              /* Generate "cmp arrLen, ixv" */
2057             inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
2058             // Generate "jbe <fail_label>"
2059             emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
2060             genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2061         }
2062     }
2063
2064     // Free the registers that were used.
2065     if (arrRef != NULL)
2066     {
2067         regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
2068     }
2069     else if (!arrLen->IsCnsIntOrI())
2070     {
2071         regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
2072     }
2073
2074     if  (!index->IsCnsIntOrI())
2075     {
2076         regSet.rsMarkRegFree(index->gtRegNum, index);
2077     }
2078 }
2079
2080 /*****************************************************************************
2081  *
2082  * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
2083  * Otherwise, check if rvalue is in register. If so, mark it. Then
2084  * call genMakeAddressable(). Needed because genMakeAddressable is used
2085  * for both lvalue and rvalue, and we only can do this for rvalue.
2086  */
2087
2088 // inline
2089 regMaskTP           CodeGen::genMakeRvalueAddressable(GenTreePtr      tree,
2090                                                       regMaskTP       needReg,
2091                                                       RegSet::KeepReg keepReg,
2092                                                       bool            forLoadStore,
2093                                                       bool            smallOK)
2094 {
2095     regNumber reg;
2096
2097 #if REDUNDANT_LOAD
2098
2099     if (tree->gtOper == GT_LCL_VAR)
2100     {
2101         reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
2102
2103         if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
2104         {
2105             noway_assert(!isRegPairType(tree->gtType));
2106
2107             genMarkTreeInReg(tree, reg);
2108         }
2109     }
2110
2111 #endif
2112
2113     return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
2114 }
2115
2116 /*****************************************************************************/
2117
2118
2119 bool CodeGen::genIsLocalLastUse    (GenTreePtr     tree)
2120 {
2121     const LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
2122
2123     noway_assert(tree->OperGet() == GT_LCL_VAR);
2124     noway_assert(varDsc->lvTracked);
2125
2126     return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
2127 }
2128
2129
2130 /*****************************************************************************
2131  *
2132  *  This is genMakeAddressable(GT_ARR_ELEM).
2133  *  Makes the array-element addressible and returns the addressibility registers.
2134  *  It also marks them as used if keepReg==RegSet::KEEP_REG.
2135  *  tree is the dependant tree.
2136  *
2137  *  Note that an array-element needs 2 registers to be addressibile, the
2138  *  array-object and the offset. This function marks gtArrObj and gtArrInds[0]
2139  *  with the 2 registers so that other functions (like instGetAddrMode()) know
2140  *  where to look for the offset to use.
2141  */
2142
2143 regMaskTP           CodeGen::genMakeAddrArrElem(GenTreePtr      arrElem,
2144                                                 GenTreePtr      tree,
2145                                                 regMaskTP       needReg,
2146                                                 RegSet::KeepReg keepReg)
2147 {
2148     noway_assert(arrElem->gtOper == GT_ARR_ELEM);
2149     noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
2150
2151     /* Evaluate all the operands. We don't evaluate them into registers yet
2152        as GT_ARR_ELEM does not reorder the evaluation of the operands, and
2153        hence may use a sub-optimal ordering. We try to improve this
2154        situation somewhat by accessing the operands in stages
2155        (genMakeAddressable2 + genComputeAddressable and
2156        genCompIntoFreeReg + genRecoverReg).
2157
2158        Note: we compute operands into free regs to avoid multiple uses of
2159        the same register. Multi-use would cause problems when we free
2160        registers in FIFO order instead of the assumed LIFO order that
2161        applies to all type of tree nodes except for GT_ARR_ELEM.
2162      */
2163
2164     GenTreePtr  arrObj   = arrElem->gtArrElem.gtArrObj;
2165     unsigned    rank     = arrElem->gtArrElem.gtArrRank;
2166     var_types   elemType = arrElem->gtArrElem.gtArrElemType;
2167     regMaskTP   addrReg  = RBM_NONE;
2168     regMaskTP   regNeed  = RBM_ALLINT;
2169
2170 #if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
2171     // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
2172     // since the arrObj participates in the lea/add instruction 
2173     // that computes ARG_0 we should avoid putting it in ARG_1
2174     //
2175     if (varTypeIsGC(elemType))
2176     {
2177         regNeed &= ~RBM_ARG_1;
2178     }
2179 #endif
2180
2181     // Strip off any comma expression.
2182     arrObj = genCodeForCommaTree(arrObj);
2183
2184     // Having generated the code for the comma, we don't care about it anymore.
2185     arrElem->gtArrElem.gtArrObj = arrObj;
2186
2187     // If the array ref is a stack var that's dying here we have to move it
2188     // into a register (regalloc already counts of this), as if it's a GC pointer
2189     // it can be collected from here on. This is not an issue for locals that are
2190     // in a register, as they get marked as used an will be tracked.
2191     // The bug that caused this is #100776. (untracked vars?)
2192     if (arrObj->OperGet() == GT_LCL_VAR &&
2193         compiler->optIsTrackedLocal(arrObj) &&
2194         genIsLocalLastUse(arrObj) &&
2195         !genMarkLclVar(arrObj))
2196     {
2197         genCodeForTree(arrObj, regNeed);
2198         regSet.rsMarkRegUsed(arrObj, 0);
2199         addrReg = genRegMask(arrObj->gtRegNum);
2200     }
2201     else
2202     {
2203         addrReg = genMakeAddressable2(arrObj,
2204                                       regNeed,
2205                                       RegSet::KEEP_REG,
2206                                       true,       // forLoadStore
2207                                       false,      // smallOK
2208                                       false,      // deferOK
2209                                       true);      // evalSideEffs
2210     }
2211
2212     unsigned dim;
2213     for (dim = 0; dim < rank; dim++)
2214         genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
2215
2216     /* Ensure that the array-object is in a register */
2217
2218     addrReg = genKeepAddressable(arrObj, addrReg);
2219     genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
2220
2221     regNumber   arrReg     = arrObj->gtRegNum;
2222     regMaskTP   arrRegMask = genRegMask(arrReg);
2223     regMaskTP   indRegMask = RBM_ALLINT & ~arrRegMask;
2224     regSet.rsLockUsedReg(arrRegMask);
2225
2226     /* Now process all the indices, do the range check, and compute
2227        the offset of the element */
2228
2229     regNumber   accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
2230
2231     for (dim = 0; dim < rank; dim++)
2232     {
2233         GenTreePtr  index = arrElem->gtArrElem.gtArrInds[dim];
2234
2235         /* Get the index into a free register (other than the register holding the array) */
2236
2237         genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
2238
2239         /* Subtract the lower bound, and do the range check */
2240
2241 #if CPU_LOAD_STORE_ARCH
2242         regNumber   valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
2243         getEmitter()->emitIns_R_AR(
2244                         INS_ldr, EA_4BYTE,
2245                         valueReg,
2246                         arrReg,
2247                         compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2248         regTracker.rsTrackRegTrash(valueReg);
2249         getEmitter()->emitIns_R_R(
2250                         INS_sub, EA_4BYTE,
2251                         index->gtRegNum,
2252                         valueReg);
2253         regTracker.rsTrackRegTrash(index->gtRegNum);
2254
2255         getEmitter()->emitIns_R_AR(
2256                         INS_ldr, EA_4BYTE,
2257                         valueReg,
2258                         arrReg,
2259                         compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2260         getEmitter()->emitIns_R_R(
2261                         INS_cmp, EA_4BYTE,
2262                         index->gtRegNum,
2263                         valueReg);
2264 #else
2265         getEmitter()->emitIns_R_AR(
2266                         INS_sub, EA_4BYTE,
2267                         index->gtRegNum,
2268                         arrReg,
2269                         compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2270         regTracker.rsTrackRegTrash(index->gtRegNum);
2271
2272         getEmitter()->emitIns_R_AR(
2273                         INS_cmp, EA_4BYTE,
2274                         index->gtRegNum,
2275                         arrReg,
2276                         compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2277 #endif
2278         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2279         genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
2280
2281         if (dim == 0)
2282         {
2283             /* Hang on to the register of the first index */
2284
2285             noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
2286             accReg = index->gtRegNum;
2287             noway_assert(accReg != arrReg);
2288             regSet.rsLockUsedReg(genRegMask(accReg));
2289         }
2290         else
2291         {
2292             /* Evaluate accReg = accReg*dim_size + index */
2293
2294             noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
2295 #if CPU_LOAD_STORE_ARCH
2296             getEmitter()->emitIns_R_AR(
2297                             INS_ldr, EA_4BYTE,
2298                             valueReg,
2299                             arrReg,
2300                             compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2301             regTracker.rsTrackRegTrash(valueReg);
2302             getEmitter()->emitIns_R_R(
2303                             INS_MUL, EA_4BYTE,
2304                             accReg,
2305                             valueReg);
2306 #else
2307             getEmitter()->emitIns_R_AR(
2308                         INS_MUL, EA_4BYTE,
2309                         accReg,
2310                         arrReg,
2311                         compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2312 #endif
2313
2314             inst_RV_RV(INS_add, accReg, index->gtRegNum);
2315             regSet.rsMarkRegFree(index->gtRegNum, index);
2316             regTracker.rsTrackRegTrash(accReg);
2317         }
2318     }
2319
2320     if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
2321     {
2322         regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
2323
2324         getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
2325         regTracker.rsTrackRegTrash(accReg);
2326     }
2327
2328     regSet.rsUnlockUsedReg(genRegMask(arrReg));
2329     regSet.rsUnlockUsedReg(genRegMask(accReg));
2330
2331     regSet.rsMarkRegFree(genRegMask(arrReg));
2332     regSet.rsMarkRegFree(genRegMask(accReg));
2333
2334     if (keepReg == RegSet::KEEP_REG)
2335     {
2336         /* We mark the addressability registers on arrObj and gtArrInds[0].
2337            instGetAddrMode() knows to work with this. */
2338
2339         regSet.rsMarkRegUsed(arrObj,                          tree);
2340         regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
2341     }
2342
2343     return genRegMask(arrReg) | genRegMask(accReg);
2344 }
2345
2346 /*****************************************************************************
2347  *
2348  *  Make sure the given tree is addressable.  'needReg' is a mask that indicates
2349  *  the set of registers we would prefer the destination tree to be computed
2350  *  into (RBM_NONE means no preference).
2351  *
2352  *  'tree' can subsequently be used with the inst_XX_TT() family of functions.
2353  *
2354  *  If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
2355  *  on as used, and return the mask for that register set (if no registers
2356  *  are marked as used, RBM_NONE is returned).
2357  *
2358  *  If 'smallOK' is not true and the datatype being address is a byte or short,
2359  *  then the tree is forced into a register.  This is useful when the machine
2360  *  instruction being emitted does not have a byte or short version.
2361  *
2362  *  The "deferOK" parameter indicates the mode of operation - when it's false,
2363  *  upon returning an actual address mode must have been formed (i.e. it must
2364  *  be possible to immediately call one of the inst_TT methods to operate on
2365  *  the value). When "deferOK" is true, we do whatever it takes to be ready
2366  *  to form the address mode later - for example, if an index address mode on
2367  *  a particular CPU requires the use of a specific register, we usually don't
2368  *  want to immediately grab that register for an address mode that will only
2369  *  be needed later. The convention is to call genMakeAddressable() with
2370  *  "deferOK" equal to true, do whatever work is needed to prepare the other
2371  *  operand, call genMakeAddressable() with "deferOK" equal to false, and
2372  *  finally call one of the inst_TT methods right after that.
2373  *
2374  *  If we do any other codegen after genMakeAddressable(tree) which can
2375  *  potentially spill the addressability registers, genKeepAddressable()
2376  *  needs to be called before accessing the tree again.
2377  *
2378  *  genDoneAddressable() needs to be called when we are done with the tree
2379  *  to free the addressability registers.
2380  */
2381
2382 regMaskTP           CodeGen::genMakeAddressable(GenTreePtr      tree,
2383                                                 regMaskTP       needReg,
2384                                                 RegSet::KeepReg keepReg,
2385                                                 bool            smallOK,
2386                                                 bool            deferOK)
2387 {
2388     GenTreePtr      addr = NULL;
2389     regMaskTP       regMask;
2390
2391     /* Is the value simply sitting in a register? */
2392
2393     if  (tree->gtFlags & GTF_REG_VAL)
2394     {
2395         genUpdateLife(tree);
2396
2397         goto GOT_VAL;
2398     }
2399
2400     // TODO: If the value is for example a cast of float -> int, compute
2401     // TODO: the converted value into a stack temp, and leave it there,
2402     // TODO: since stack temps are always addressable. This would require
2403     // TODO: recording the fact that a particular tree is in a stack temp.
2404
2405
2406     /* byte/char/short operand -- is this acceptable to the caller? */
2407
2408     if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
2409         goto EVAL_TREE;
2410
2411     // Evaluate non-last elements of comma expressions, to get to the last.
2412     tree = genCodeForCommaTree(tree);
2413
2414     switch (tree->gtOper)
2415     {
2416     case GT_LCL_FLD:
2417
2418         // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
2419         // to worry about it being enregistered.
2420         noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
2421
2422         genUpdateLife(tree);
2423         return 0;
2424
2425
2426     case GT_LCL_VAR:
2427
2428         if (!genMarkLclVar(tree))
2429         {
2430             genUpdateLife(tree);
2431             return 0;
2432         }
2433
2434         __fallthrough; // it turns out the variable lives in a register
2435
2436     case GT_REG_VAR:
2437
2438         genUpdateLife(tree);
2439
2440         goto GOT_VAL;
2441
2442     case GT_CLS_VAR:
2443
2444         return 0;
2445
2446     case GT_CNS_INT:
2447 #ifdef _TARGET_64BIT_
2448         // Non-relocs will be sign extended, so we don't have to enregister
2449         // constants that are equivalent to a sign-extended int.
2450         // Relocs can be left alone if they are RIP-relative.
2451         if ((genTypeSize(tree->TypeGet()) > 4) && (!tree->IsIntCnsFitsInI32() || 
2452                 (tree->IsIconHandle() && 
2453                     (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
2454         {
2455             break;
2456         }
2457 #endif // _TARGET_64BIT_
2458         __fallthrough;
2459
2460     case GT_CNS_LNG:
2461     case GT_CNS_DBL:
2462         // For MinOpts, we don't do constant folding, so we have
2463         // constants showing up in places we don't like.
2464         // force them into a register now to prevent that.
2465         if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
2466             return 0;
2467         break;
2468
2469
2470     case GT_IND:
2471     case GT_NULLCHECK:
2472
2473         /* Try to make the address directly addressable */
2474
2475         if  (genMakeIndAddrMode(tree->gtOp.gtOp1,
2476                                 tree,
2477                                 false, /* not for LEA */
2478                                 needReg,
2479                                 keepReg,
2480                                 &regMask,
2481                                 deferOK))
2482         {
2483             genUpdateLife(tree);
2484             return regMask;
2485         }
2486
2487         /* No good, we'll have to load the address into a register */
2488
2489         addr = tree;
2490         tree = tree->gtOp.gtOp1;
2491         break;
2492
2493     default:
2494         break;
2495     }
2496
2497 EVAL_TREE:
2498
2499     /* Here we need to compute the value 'tree' into a register */
2500
2501     genCodeForTree(tree, needReg);
2502
2503 GOT_VAL:
2504
2505     noway_assert(tree->gtFlags & GTF_REG_VAL);
2506
2507     if  (isRegPairType(tree->gtType))
2508     {
2509         /* Are we supposed to hang on to the register? */
2510
2511         if (keepReg == RegSet::KEEP_REG)
2512             regSet.rsMarkRegPairUsed(tree);
2513
2514         regMask = genRegPairMask(tree->gtRegPair);
2515     }
2516     else
2517     {
2518         /* Are we supposed to hang on to the register? */
2519
2520         if (keepReg == RegSet::KEEP_REG)
2521             regSet.rsMarkRegUsed(tree, addr);
2522
2523         regMask = genRegMask(tree->gtRegNum);
2524     }
2525
2526     return  regMask;
2527 }
2528
2529 /*****************************************************************************
2530  *  Compute a tree (which was previously made addressable using
2531  *  genMakeAddressable()) into a register.
2532  *  needReg - mask of preferred registers.
2533  *  keepReg - should the computed register be marked as used by the tree
2534  *  freeOnly - target register needs to be a scratch register
2535  */
2536
2537 void        CodeGen::genComputeAddressable(GenTreePtr      tree,
2538                                            regMaskTP       addrReg,
2539                                            RegSet::KeepReg keptReg,
2540                                            regMaskTP       needReg,
2541                                            RegSet::KeepReg keepReg,
2542                                            bool            freeOnly)
2543 {
2544     noway_assert(genStillAddressable(tree));
2545     noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
2546
2547     genDoneAddressable(tree, addrReg, keptReg);
2548
2549     regNumber   reg;
2550
2551     if (tree->gtFlags & GTF_REG_VAL)
2552     {
2553         reg = tree->gtRegNum;
2554
2555         if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
2556             goto MOVE_REG;
2557     }
2558     else
2559     {
2560         if (tree->OperIsConst())
2561         {
2562             /* Need to handle consts separately as we don't want to emit
2563               "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
2564               handles consts better for SMALL_CODE */
2565
2566             noway_assert(tree->IsCnsIntOrI());
2567             reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
2568         }
2569         else
2570         {
2571         MOVE_REG:
2572             reg = regSet.rsPickReg(needReg);
2573
2574             inst_RV_TT(INS_mov, reg, tree);
2575             regTracker.rsTrackRegTrash(reg);
2576         }
2577     }
2578
2579     genMarkTreeInReg(tree, reg);
2580
2581     if (keepReg == RegSet::KEEP_REG)
2582         regSet.rsMarkRegUsed(tree);
2583     else
2584         gcInfo.gcMarkRegPtrVal(tree);
2585 }
2586
2587 /*****************************************************************************
2588  *  Should be similar to genMakeAddressable() but gives more control.
2589  */
2590
2591 regMaskTP       CodeGen::genMakeAddressable2(GenTreePtr      tree,
2592                                              regMaskTP       needReg,
2593                                              RegSet::KeepReg keepReg,
2594                                              bool            forLoadStore,
2595                                              bool            smallOK,
2596                                              bool            deferOK,
2597                                              bool            evalSideEffs)
2598
2599 {
2600     bool evalToReg = false;
2601
2602     if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
2603         evalToReg = true;
2604
2605 #if CPU_LOAD_STORE_ARCH
2606     if (!forLoadStore)
2607         evalToReg = true;
2608 #endif
2609
2610     if (evalToReg)
2611     {
2612         genCodeForTree(tree, needReg);
2613
2614         noway_assert(tree->gtFlags & GTF_REG_VAL);
2615
2616         if  (isRegPairType(tree->gtType))
2617         {
2618             /* Are we supposed to hang on to the register? */
2619
2620             if (keepReg == RegSet::KEEP_REG)
2621                 regSet.rsMarkRegPairUsed(tree);
2622
2623             return genRegPairMask(tree->gtRegPair);
2624         }
2625         else
2626         {
2627             /* Are we supposed to hang on to the register? */
2628
2629             if (keepReg == RegSet::KEEP_REG)
2630                 regSet.rsMarkRegUsed(tree);
2631
2632             return genRegMask(tree->gtRegNum);
2633         }
2634     }
2635     else
2636     {
2637         return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
2638     }
2639 }
2640
2641 /*****************************************************************************
2642  *
2643  *  The given tree was previously passed to genMakeAddressable(); return
2644  *  'true' if the operand is still addressable.
2645  */
2646
2647 // inline
2648 bool                CodeGen::genStillAddressable(GenTreePtr tree)
2649 {
2650     /* Has the value (or one or more of its sub-operands) been spilled? */
2651
2652     if  (tree->gtFlags & (GTF_SPILLED|GTF_SPILLED_OPER))
2653         return  false;
2654
2655     return  true;
2656 }
2657
2658 /*****************************************************************************
2659  *
2660  *  Recursive helper to restore complex address modes. The 'lockPhase'
2661  *  argument indicates whether we're in the 'lock' or 'reload' phase.
2662  */
2663
2664 regMaskTP           CodeGen::genRestoreAddrMode(GenTreePtr   addr,
2665                                                 GenTreePtr   tree,
2666                                                 bool         lockPhase)
2667 {
2668     regMaskTP  regMask = RBM_NONE;
2669
2670     /* Have we found a spilled value? */
2671
2672     if  (tree->gtFlags & GTF_SPILLED)
2673     {
2674         /* Do nothing if we're locking, otherwise reload and lock */
2675
2676         if  (!lockPhase)
2677         {
2678             /* Unspill the register */
2679
2680             regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
2681
2682             /* The value should now be sitting in a register */
2683
2684             noway_assert(tree->gtFlags & GTF_REG_VAL);
2685             regMask = genRegMask(tree->gtRegNum);
2686
2687             /* Mark the register as used for the address */
2688
2689             regSet.rsMarkRegUsed(tree, addr);
2690
2691             /* Lock the register until we're done with the entire address */
2692
2693             regSet.rsMaskLock |= regMask;
2694         }
2695
2696         return  regMask;
2697     }
2698
2699     /* Is this sub-tree sitting in a register? */
2700
2701     if  (tree->gtFlags & GTF_REG_VAL)
2702     {
2703         regMask = genRegMask(tree->gtRegNum);
2704
2705         /* Lock the register if we're in the locking phase */
2706
2707         if  (lockPhase)
2708             regSet.rsMaskLock |= regMask;
2709     }
2710     else
2711     {
2712         /* Process any sub-operands of this node */
2713
2714         unsigned        kind = tree->OperKind();
2715
2716         if  (kind & GTK_SMPOP)
2717         {
2718             /* Unary/binary operator */
2719
2720             if  (tree->gtOp.gtOp1)
2721                 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
2722             if  (tree->gtGetOp2())
2723                 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
2724         }
2725         else if (tree->gtOper == GT_ARR_ELEM)
2726         {
2727             /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
2728                which holds the offset-calculation */
2729
2730             regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj,     lockPhase);
2731             regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
2732         }
2733         else if (tree->gtOper == GT_CMPXCHG)
2734         {
2735             regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
2736         }
2737         else
2738         {
2739             /* Must be a leaf/constant node */
2740
2741             noway_assert(kind & (GTK_LEAF|GTK_CONST));
2742         }
2743     }
2744
2745     return  regMask;
2746 }
2747
2748 /*****************************************************************************
2749  *
2750  *  The given tree was previously passed to genMakeAddressable, but since then
2751  *  some of its registers are known to have been spilled; do whatever it takes
2752  *  to make the operand addressable again (typically by reloading any spilled
2753  *  registers).
2754  */
2755
2756 regMaskTP           CodeGen::genRestAddressable(GenTreePtr tree,
2757                                                 regMaskTP  addrReg,
2758                                                 regMaskTP  lockMask)
2759 {
2760     noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2761
2762     /* Is this a 'simple' register spill? */
2763
2764     if  (tree->gtFlags & GTF_SPILLED)
2765     {
2766         /* The mask must match the original register/regpair */
2767
2768         if  (isRegPairType(tree->gtType))
2769         {
2770             noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
2771
2772             regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2773
2774             addrReg = genRegPairMask(tree->gtRegPair);
2775         }
2776         else
2777         {
2778             noway_assert(addrReg == genRegMask(tree->gtRegNum));
2779
2780             regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2781
2782             addrReg = genRegMask(tree->gtRegNum);
2783         }
2784
2785         noway_assert((regSet.rsMaskLock &  lockMask) == lockMask);
2786                       regSet.rsMaskLock -= lockMask;
2787
2788         return  addrReg;
2789     }
2790
2791     /* We have a complex address mode with some of its sub-operands spilled */
2792
2793     noway_assert((tree->gtFlags & GTF_REG_VAL     ) == 0);
2794     noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
2795
2796     /*
2797         We'll proceed in several phases:
2798
2799          1. Lock any registers that are part of the address mode and
2800             have not been spilled. This prevents these registers from
2801             getting spilled in step 2.
2802
2803          2. Reload any registers that have been spilled; lock each
2804             one right after it is reloaded.
2805
2806          3. Unlock all the registers.
2807      */
2808
2809     addrReg   = genRestoreAddrMode(tree, tree,  true);
2810     addrReg  |= genRestoreAddrMode(tree, tree, false);
2811
2812     /* Unlock all registers that the address mode uses */
2813
2814     lockMask |= addrReg;
2815
2816     noway_assert((regSet.rsMaskLock &  lockMask) == lockMask);
2817                   regSet.rsMaskLock -= lockMask;
2818
2819     return  addrReg;
2820 }
2821
2822 /*****************************************************************************
2823  *
2824  *  The given tree was previously passed to genMakeAddressable, but since then
2825  *  some of its registers might have been spilled ('addrReg' is the set of
2826  *  registers used by the address). This function makes sure the operand is
2827  *  still addressable (while avoiding any of the registers in 'avoidMask'),
2828  *  and returns the (possibly modified) set of registers that are used by
2829  *  the address (these will be marked as used on exit).
2830  */
2831
2832 regMaskTP           CodeGen::genKeepAddressable(GenTreePtr   tree,
2833                                                 regMaskTP    addrReg,
2834                                                 regMaskTP    avoidMask)
2835 {
2836     /* Is the operand still addressable? */
2837
2838     tree = tree->gtEffectiveVal(/*commaOnly*/true);  // Strip off commas for this purpose.
2839
2840     if  (!genStillAddressable(tree))
2841     {
2842         if (avoidMask)
2843         {
2844             // Temporarily lock 'avoidMask' while we restore addressability
2845             // genRestAddressable will unlock the 'avoidMask' for us
2846             // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
2847             // In regSet.rsRegMaskFree() we require that all locked register be marked as used
2848             //
2849             regSet.rsLockUsedReg(avoidMask);
2850         }
2851
2852         addrReg = genRestAddressable(tree, addrReg, avoidMask);
2853
2854         noway_assert((regSet.rsMaskLock &  avoidMask) == 0);
2855     }
2856
2857     return  addrReg;
2858 }
2859
2860 /*****************************************************************************
2861  *
2862  *  After we're finished with the given operand (which was previously marked
2863  *  by calling genMakeAddressable), this function must be called to free any
2864  *  registers that may have been used by the address.
2865  *  keptReg indicates if the addressability registers were marked as used
2866  *  by genMakeAddressable().
2867  */
2868
2869 void                CodeGen::genDoneAddressable(GenTreePtr tree,
2870                                                 regMaskTP  addrReg,
2871                                                 RegSet::KeepReg    keptReg)
2872 {
2873     if (keptReg == RegSet::FREE_REG)
2874     {
2875         // We exclude regSet.rsMaskUsed since the registers may be multi-used.
2876         // ie. There may be a pending use in a higher-up tree.
2877
2878         addrReg &= ~regSet.rsMaskUsed;
2879         
2880         /* addrReg was not marked as used. So just reset its GC info */
2881         if (addrReg)
2882         {
2883             gcInfo.gcMarkRegSetNpt(addrReg);
2884         }
2885     }
2886     else
2887     {
2888         /* addrReg was marked as used. So we need to free it up (which
2889            will also reset its GC info) */
2890
2891         regSet.rsMarkRegFree(addrReg);
2892     }
2893 }
2894
2895 /*****************************************************************************/
2896 /*****************************************************************************
2897  *
2898  *  Make sure the given floating point value is addressable, and return a tree
2899  *  that will yield the value as an addressing mode (this tree may differ from
2900  *  the one passed in, BTW). If the only way to make the value addressable is
2901  *  to evaluate into the FP stack, we do this and return zero.
2902  */
2903
2904 GenTreePtr          CodeGen::genMakeAddrOrFPstk(GenTreePtr   tree,
2905                                                 regMaskTP *  regMaskPtr,
2906                                                 bool         roundResult)
2907 {
2908     *regMaskPtr = 0;
2909
2910     switch (tree->gtOper)
2911     {
2912     case GT_LCL_VAR:
2913     case GT_LCL_FLD:
2914     case GT_CLS_VAR:
2915         return tree;
2916
2917     case GT_CNS_DBL:
2918         if (tree->gtType == TYP_FLOAT)
2919         {
2920             float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
2921             return  genMakeConst(&f, TYP_FLOAT, tree, false);
2922         }
2923         return  genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
2924
2925     case GT_IND:
2926     case GT_NULLCHECK:
2927
2928         /* Try to make the address directly addressable */
2929
2930         if  (genMakeIndAddrMode(tree->gtOp.gtOp1,
2931                                 tree,
2932                                 false, /* not for LEA */
2933                                 0,
2934                                 RegSet::FREE_REG,
2935                                 regMaskPtr,
2936                                 false))
2937         {
2938             genUpdateLife(tree);
2939             return tree;
2940         }
2941
2942         break;
2943
2944     default:
2945         break;
2946     }
2947 #if FEATURE_STACK_FP_X87
2948     /* We have no choice but to compute the value 'tree' onto the FP stack */
2949
2950     genCodeForTreeFlt(tree);
2951 #endif
2952     return 0;
2953 }
2954
2955
2956 /*****************************************************************************/
2957 /*****************************************************************************
2958  *
2959  *  Display a string literal value (debug only).
2960  */
2961
2962 #ifdef  DEBUG
2963 #endif
2964
2965 /*****************************************************************************
2966  *
2967  *   Generate code to check that the GS cookie wasn't thrashed by a buffer
2968  *   overrun.  If pushReg is true, preserve all registers around code sequence.
2969  *   Otherwise, ECX maybe modified.
2970  *
2971  *   TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
2972  */
2973 void                CodeGen::genEmitGSCookieCheck(bool pushReg)
2974 {
2975     // Make sure that EAX didn't die in the return expression
2976     if (!pushReg && (compiler->info.compRetType == TYP_REF))
2977         gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2978
2979     // Add cookie check code for unsafe buffers
2980     BasicBlock  *gsCheckBlk;
2981     regMaskTP byrefPushedRegs = RBM_NONE;
2982     regMaskTP norefPushedRegs = RBM_NONE;
2983     regMaskTP pushedRegs = RBM_NONE;
2984
2985     noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2986
2987     if (compiler->gsGlobalSecurityCookieAddr == NULL)
2988     {
2989         // JIT case
2990 #if CPU_LOAD_STORE_ARCH
2991         regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
2992         getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE,
2993                                 reg,
2994                                 compiler->lvaGSSecurityCookie, 0);
2995         regTracker.rsTrackRegTrash(reg);
2996
2997         if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) || 
2998             arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
2999         {
3000             getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE,
3001                                     reg,
3002                                     compiler->gsGlobalSecurityCookieVal);
3003         }
3004         else
3005         {
3006             // Load CookieVal into a register
3007             regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
3008             instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
3009             getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE,
3010                                     reg,  immReg);
3011         }
3012 #else
3013         getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE,
3014                                 compiler->lvaGSSecurityCookie, 0, 
3015                                 (int)compiler->gsGlobalSecurityCookieVal);
3016 #endif
3017     }
3018     else
3019     {
3020         regNumber regGSCheck;
3021         regMaskTP regMaskGSCheck;
3022 #if CPU_LOAD_STORE_ARCH
3023         regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
3024         regMaskGSCheck = genRegMask(regGSCheck);
3025 #else
3026         // Don't pick the 'this' register
3027         if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
3028             (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
3029         {
3030             regGSCheck = REG_EDX;
3031             regMaskGSCheck = RBM_EDX;
3032         }
3033         else
3034         {
3035             regGSCheck = REG_ECX;
3036             regMaskGSCheck = RBM_ECX;
3037         }
3038
3039         // NGen case
3040         if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed|regSet.rsMaskVars|regSet.rsMaskLock)))
3041         {
3042             pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
3043         }
3044         else
3045         {
3046             noway_assert((regMaskGSCheck & (regSet.rsMaskUsed|regSet.rsMaskVars|regSet.rsMaskLock)) == 0);
3047         }
3048 #endif
3049 #if defined(_TARGET_ARM_)
3050         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
3051         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck,0);
3052 #else
3053         getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
3054 #endif // !_TARGET_ARM_
3055         regTracker.rsTrashRegSet(regMaskGSCheck);
3056 #ifdef _TARGET_ARM_
3057         regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
3058         getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
3059         regTracker.rsTrackRegTrash(regTmp);
3060         getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
3061 #else
3062         getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
3063 #endif
3064     }
3065
3066     gsCheckBlk = genCreateTempLabel();
3067     emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3068     inst_JMP(jmpEqual, gsCheckBlk);
3069     genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
3070     genDefineTempLabel(gsCheckBlk);
3071
3072     genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
3073 }
3074
3075
3076 /*****************************************************************************
3077  *
3078  *  Generate any side effects within the given expression tree.
3079  */
3080
3081 void                CodeGen::genEvalSideEffects(GenTreePtr tree)
3082 {
3083     genTreeOps      oper;
3084     unsigned        kind;
3085
3086 AGAIN:
3087
3088     /* Does this sub-tree contain any side-effects? */
3089     if  (tree->gtFlags & GTF_SIDE_EFFECT)
3090     {
3091 #if FEATURE_STACK_FP_X87
3092         /* Remember the current FP stack level */
3093         int iTemps = genNumberTemps();
3094 #endif
3095         if (tree->OperIsIndir())
3096         {
3097             regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
3098
3099             if  (tree->gtFlags & GTF_REG_VAL)
3100             {
3101                 gcInfo.gcMarkRegPtrVal(tree);
3102                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3103             }
3104             // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
3105             // do not need an additional null-check
3106             /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
3107             else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 &&
3108                      ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
3109             {
3110                 /* Compare against any register to do null-check */
3111  #if defined(_TARGET_XARCH_)
3112                 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
3113                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3114 #elif CPU_LOAD_STORE_ARCH
3115                 if (varTypeIsFloating(tree->TypeGet()))
3116                 {
3117                     genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT, RegSet::FREE_REG);
3118                 }
3119                 else
3120                 {
3121                     genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
3122                 }
3123 #ifdef _TARGET_ARM_
3124                 if (tree->gtFlags & GTF_IND_VOLATILE)
3125                 {
3126                     // Emit a memory barrier instruction after the load 
3127                     instGen_MemoryBarrier();
3128                 }
3129 #endif
3130 #else
3131                 NYI("TARGET");
3132 #endif
3133             }
3134             else
3135             {
3136                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3137             }
3138         }
3139         else
3140         {
3141             /* Generate the expression and throw it away */
3142             genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
3143             if  (tree->gtFlags & GTF_REG_VAL)
3144             {
3145                 gcInfo.gcMarkRegPtrVal(tree);
3146             }
3147         }
3148 #if FEATURE_STACK_FP_X87
3149         /* If the tree computed a value on the FP stack, pop the stack */
3150         if (genNumberTemps() > iTemps)
3151         {
3152             noway_assert(genNumberTemps() == iTemps+1);
3153             genDiscardStackFP(tree);
3154         }
3155 #endif
3156         return;
3157     }
3158
3159     noway_assert(tree->gtOper != GT_ASG);
3160
3161     /* Walk the tree, just to mark any dead values appropriately */
3162
3163     oper = tree->OperGet();
3164     kind = tree->OperKind();
3165
3166     /* Is this a constant or leaf node? */
3167
3168     if  (kind & (GTK_CONST|GTK_LEAF))
3169     {
3170 #if FEATURE_STACK_FP_X87
3171         if (tree->IsRegVar() && isFloatRegType(tree->gtType) &&
3172             tree->IsRegVarDeath())
3173         {
3174             genRegVarDeathStackFP(tree);
3175             FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
3176         }
3177 #endif
3178         genUpdateLife(tree);
3179         gcInfo.gcMarkRegPtrVal (tree);
3180         return;
3181     }
3182
3183     /* Must be a 'simple' unary/binary operator */
3184
3185     noway_assert(kind & GTK_SMPOP);
3186
3187     if  (tree->gtGetOp2())
3188     {
3189         genEvalSideEffects(tree->gtOp.gtOp1);
3190
3191         tree = tree->gtOp.gtOp2;
3192         goto AGAIN;
3193     }
3194     else
3195     {
3196         tree = tree->gtOp.gtOp1;
3197         if  (tree)
3198             goto AGAIN;
3199     }
3200 }
3201
3202 /*****************************************************************************
3203  *
3204  *  A persistent pointer value is being overwritten, record it for the GC.
3205  *
3206  *  tgt        : the destination being written to
3207  *  assignVal  : the value being assigned (the source). It must currently be in a register.
3208  *  tgtAddrReg : the set of registers being used by "tgt"
3209  *
3210  *  Returns    : the mask of the scratch register that was used.
3211  *               RBM_NONE if a write-barrier is not needed.
3212  */
3213
3214 regMaskTP           CodeGen::WriteBarrier(GenTreePtr tgt,
3215                                           GenTreePtr assignVal,
3216                                           regMaskTP  tgtAddrReg)
3217 {
3218     noway_assert(assignVal->gtFlags & GTF_REG_VAL);
3219
3220     GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3221     if  (wbf == GCInfo::WBF_NoBarrier)
3222         return RBM_NONE;
3223
3224     regMaskTP  resultRegMask = RBM_NONE;
3225
3226 #if FEATURE_WRITE_BARRIER
3227
3228     regNumber  reg = assignVal->gtRegNum;
3229
3230 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3231 #ifdef DEBUG
3232     if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
3233     {
3234 #endif
3235     const static int regToHelper[2][8] =
3236     {
3237         // If the target is known to be in managed memory
3238         {
3239             CORINFO_HELP_ASSIGN_REF_EAX,
3240             CORINFO_HELP_ASSIGN_REF_ECX,
3241             -1,
3242             CORINFO_HELP_ASSIGN_REF_EBX,
3243             -1,
3244             CORINFO_HELP_ASSIGN_REF_EBP,
3245             CORINFO_HELP_ASSIGN_REF_ESI,
3246             CORINFO_HELP_ASSIGN_REF_EDI,
3247         },
3248
3249         // Don't know if the target is in managed memory
3250         {
3251             CORINFO_HELP_CHECKED_ASSIGN_REF_EAX,
3252             CORINFO_HELP_CHECKED_ASSIGN_REF_ECX,
3253             -1,
3254             CORINFO_HELP_CHECKED_ASSIGN_REF_EBX,
3255             -1,
3256             CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
3257             CORINFO_HELP_CHECKED_ASSIGN_REF_ESI,
3258             CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
3259         },
3260     };
3261
3262     noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
3263     noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
3264     noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
3265     noway_assert(regToHelper[0][REG_ESP] == -1                 );
3266     noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
3267     noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
3268     noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
3269
3270     noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
3271     noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
3272     noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
3273     noway_assert(regToHelper[1][REG_ESP] == -1                     );
3274     noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
3275     noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
3276     noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
3277
3278     noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
3279
3280     /*
3281         Generate the following code:
3282
3283                 lea     edx, tgt
3284                 call    write_barrier_helper_reg
3285
3286         First grab the RBM_WRITE_BARRIER register for the target address.
3287      */
3288
3289     regNumber  rg1;
3290     bool       trashOp1;
3291
3292     if  ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
3293     {
3294         rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
3295
3296         regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
3297         regSet.rsMaskLock |= RBM_WRITE_BARRIER;
3298
3299         trashOp1 = false;
3300     }
3301     else
3302     {
3303         rg1 = REG_WRITE_BARRIER;
3304
3305         trashOp1 = true;
3306     }
3307
3308     noway_assert(rg1 == REG_WRITE_BARRIER);
3309
3310     /* Generate "lea EDX, [addr-mode]" */
3311
3312     noway_assert(tgt->gtType == TYP_REF);
3313     tgt->gtType = TYP_BYREF;
3314     inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
3315
3316     /* Free up anything that was tied up by the LHS */
3317     genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3318
3319     // In case "tgt" was a comma:
3320     tgt = tgt->gtEffectiveVal();
3321
3322     regTracker.rsTrackRegTrash(rg1);
3323     gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
3324     gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
3325
3326
3327     /* Call the proper vm helper */
3328
3329     // enforced by gcIsWriteBarrierCandidate
3330     noway_assert(tgt->gtOper == GT_IND ||
3331                  tgt->gtOper == GT_CLS_VAR);
3332
3333     unsigned    tgtAnywhere = 0;
3334     if ((tgt->gtOper == GT_IND) &&
3335         ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
3336     {
3337         tgtAnywhere = 1;
3338     }
3339
3340     int helper = regToHelper[tgtAnywhere][reg];
3341     resultRegMask = genRegMask(reg);
3342
3343     gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER);          // byref EDX is killed in the call
3344
3345     genEmitHelperCall(helper,
3346                       0,               // argSize
3347                       EA_PTRSIZE);     // retSize
3348
3349     if  (!trashOp1)
3350     {
3351         regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
3352         regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
3353     }
3354
3355     return resultRegMask;
3356
3357 #ifdef DEBUG
3358     }
3359     else
3360 #endif
3361 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3362
3363 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3364     {
3365     /*
3366         Generate the following code (or its equivalent on the given target):
3367
3368                 mov     arg1, srcReg
3369                 lea     arg0, tgt
3370                 call    write_barrier_helper
3371
3372         First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
3373      */
3374
3375     if (reg != REG_ARG_1)
3376     {
3377         // We may need to spill whatever is in the ARG_1 register
3378         //
3379         if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
3380         {
3381             regSet.rsSpillReg(REG_ARG_1);
3382         }
3383
3384         inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
3385     }
3386     resultRegMask = RBM_ARG_1;
3387
3388     regTracker.rsTrackRegTrash(REG_ARG_1);
3389     gcInfo.gcMarkRegSetNpt(REG_ARG_1);
3390     gcInfo.gcMarkRegSetGCref(RBM_ARG_1);        // gcref in ARG_1 
3391
3392     bool free_arg1 = false;
3393     if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
3394     {
3395         regSet.rsMaskUsed |= RBM_ARG_1;
3396         free_arg1 = true;
3397     }
3398
3399     // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
3400
3401     /* Generate "lea R0, [addr-mode]" */
3402
3403     noway_assert(tgt->gtType == TYP_REF);
3404     tgt->gtType = TYP_BYREF;
3405
3406     tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
3407
3408     // We may need to spill whatever is in the ARG_0 register
3409     //
3410     if (((tgtAddrReg & RBM_ARG_0) == 0) &&    // tgtAddrReg does not contain REG_ARG_0
3411         ((regSet.rsMaskUsed & RBM_ARG_0) != 0) &&    // and regSet.rsMaskUsed contains REG_ARG_0
3412         (reg != REG_ARG_0))                   // unless REG_ARG_0 contains the REF value being written, which we're finished with.
3413     {
3414         regSet.rsSpillReg(REG_ARG_0);
3415     }
3416
3417     inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
3418
3419     /* Free up anything that was tied up by the LHS */
3420     genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3421
3422     regTracker.rsTrackRegTrash(REG_ARG_0);
3423     gcInfo.gcMarkRegSetNpt(REG_ARG_0);
3424     gcInfo.gcMarkRegSetByref(RBM_ARG_0);        // byref in ARG_0 
3425
3426 #ifdef _TARGET_ARM_
3427     // Finally, we may be required to spill whatever is in the further argument registers
3428     // trashed by the call. The write barrier trashes some further registers --
3429     // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
3430 #if NOGC_WRITE_BARRIERS
3431     regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
3432 #else
3433     regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
3434 #endif
3435     // Spill any other registers trashed by the write barrier call and currently in use.
3436     regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0|RBM_ARG_1));
3437     if (mustSpill) regSet.rsSpillRegs(mustSpill);
3438 #endif // _TARGET_ARM_
3439
3440     bool free_arg0 = false;
3441     if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
3442     {
3443         regSet.rsMaskUsed |= RBM_ARG_0;
3444         free_arg0 = true;
3445     }
3446
3447     // genEmitHelperCall might need to grab a register
3448     // so don't let it spill one of the arguments
3449     //
3450     regMaskTP reallyUsedRegs = RBM_NONE;
3451     regSet.rsLockReg(RBM_ARG_0|RBM_ARG_1, &reallyUsedRegs);
3452
3453     genGCWriteBarrier(tgt, wbf);
3454
3455     regSet.rsUnlockReg(RBM_ARG_0|RBM_ARG_1, reallyUsedRegs);
3456     gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1);   // byref ARG_0 and reg ARG_1 are killed by the call
3457
3458     if (free_arg0)
3459     {
3460         regSet.rsMaskUsed &= ~RBM_ARG_0;
3461     }
3462     if (free_arg1)
3463     {
3464         regSet.rsMaskUsed &= ~RBM_ARG_1;
3465     }
3466
3467     return resultRegMask; 
3468     }
3469 #endif // _TARGET_ARM_
3470
3471 #else  // !FEATURE_WRITE_BARRIER
3472
3473     NYI("FEATURE_WRITE_BARRIER unimplemented");
3474     return resultRegMask; 
3475
3476 #endif // !FEATURE_WRITE_BARRIER
3477
3478 }
3479
3480 #ifdef _TARGET_X86_
3481 /*****************************************************************************
3482  *
3483  *  Generate the appropriate conditional jump(s) right after the low 32 bits
3484  *  of two long values have been compared.
3485  */
3486
3487 void                CodeGen::genJccLongHi(genTreeOps   cmp,
3488                                           BasicBlock * jumpTrue,
3489                                           BasicBlock * jumpFalse,
3490                                           bool         isUnsigned )
3491 {
3492     if (cmp != GT_NE)
3493     {
3494         jumpFalse->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
3495     }
3496
3497     switch (cmp)
3498     {
3499     case GT_EQ:
3500         inst_JMP(EJ_jne, jumpFalse);
3501         break;
3502
3503     case GT_NE:
3504         inst_JMP(EJ_jne, jumpTrue);
3505         break;
3506
3507     case GT_LT:
3508     case GT_LE:
3509         if (isUnsigned)
3510         {
3511             inst_JMP(EJ_ja , jumpFalse);
3512             inst_JMP(EJ_jb , jumpTrue);
3513         }
3514         else
3515         {
3516             inst_JMP(EJ_jg , jumpFalse);
3517             inst_JMP(EJ_jl , jumpTrue);
3518         }
3519         break;
3520
3521     case GT_GE:
3522     case GT_GT:
3523         if (isUnsigned)
3524         {
3525             inst_JMP(EJ_jb , jumpFalse);
3526             inst_JMP(EJ_ja , jumpTrue);
3527         }
3528         else
3529         {
3530             inst_JMP(EJ_jl , jumpFalse);
3531             inst_JMP(EJ_jg , jumpTrue);
3532         }
3533         break;
3534
3535     default:
3536         noway_assert(!"expected a comparison operator");
3537     }
3538 }
3539
3540 /*****************************************************************************
3541  *
3542  *  Generate the appropriate conditional jump(s) right after the high 32 bits
3543  *  of two long values have been compared.
3544  */
3545
3546 void            CodeGen::genJccLongLo(genTreeOps  cmp,
3547                                       BasicBlock* jumpTrue,
3548                                       BasicBlock* jumpFalse)
3549 {
3550     switch (cmp)
3551     {
3552     case GT_EQ:
3553         inst_JMP(EJ_je , jumpTrue);
3554         break;
3555
3556     case GT_NE:
3557         inst_JMP(EJ_jne, jumpTrue);
3558         break;
3559
3560     case GT_LT:
3561         inst_JMP(EJ_jb , jumpTrue);
3562         break;
3563
3564     case GT_LE:
3565         inst_JMP(EJ_jbe, jumpTrue);
3566         break;
3567
3568     case GT_GE:
3569         inst_JMP(EJ_jae, jumpTrue);
3570         break;
3571
3572     case GT_GT:
3573         inst_JMP(EJ_ja , jumpTrue);
3574         break;
3575
3576     default:
3577         noway_assert(!"expected comparison");
3578     }
3579 }
3580 #elif defined(_TARGET_ARM_)
3581 /*****************************************************************************
3582 *
3583 *  Generate the appropriate conditional jump(s) right after the low 32 bits
3584 *  of two long values have been compared.
3585 */
3586
3587 void                CodeGen::genJccLongHi(genTreeOps   cmp,
3588                                           BasicBlock * jumpTrue,
3589                                           BasicBlock * jumpFalse,
3590                                           bool         isUnsigned)
3591 {
3592     if (cmp != GT_NE)
3593     {
3594         jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3595     }
3596
3597     switch (cmp)
3598     {
3599     case GT_EQ:
3600         inst_JMP(EJ_ne, jumpFalse);
3601         break;
3602
3603     case GT_NE:
3604         inst_JMP(EJ_ne, jumpTrue);
3605         break;
3606
3607     case GT_LT:
3608     case GT_LE:
3609         if (isUnsigned)
3610         {
3611             inst_JMP(EJ_hi, jumpFalse);
3612             inst_JMP(EJ_lo, jumpTrue);
3613         }
3614         else
3615         {
3616             inst_JMP(EJ_gt, jumpFalse);
3617             inst_JMP(EJ_lt, jumpTrue);
3618         }
3619         break;
3620
3621     case GT_GE:
3622     case GT_GT:
3623         if (isUnsigned)
3624         {
3625             inst_JMP(EJ_lo, jumpFalse);
3626             inst_JMP(EJ_hi, jumpTrue);
3627         }
3628         else
3629         {
3630             inst_JMP(EJ_lt, jumpFalse);
3631             inst_JMP(EJ_gt, jumpTrue);
3632         }
3633         break;
3634
3635     default:
3636         noway_assert(!"expected a comparison operator");
3637     }
3638 }
3639
3640 /*****************************************************************************
3641 *
3642 *  Generate the appropriate conditional jump(s) right after the high 32 bits
3643 *  of two long values have been compared.
3644 */
3645
3646 void            CodeGen::genJccLongLo(genTreeOps  cmp,
3647                                       BasicBlock* jumpTrue,
3648                                       BasicBlock* jumpFalse)
3649 {
3650     switch (cmp)
3651     {
3652     case GT_EQ:
3653         inst_JMP(EJ_eq, jumpTrue);
3654         break;
3655
3656     case GT_NE:
3657         inst_JMP(EJ_ne, jumpTrue);
3658         break;
3659
3660     case GT_LT:
3661         inst_JMP(EJ_lo, jumpTrue);
3662         break;
3663
3664     case GT_LE:
3665         inst_JMP(EJ_ls, jumpTrue);
3666         break;
3667
3668     case GT_GE:
3669         inst_JMP(EJ_hs, jumpTrue);
3670         break;
3671
3672     case GT_GT:
3673         inst_JMP(EJ_hi, jumpTrue);
3674         break;
3675
3676     default:
3677         noway_assert(!"expected comparison");
3678     }
3679 }
3680 #endif
3681 /*****************************************************************************
3682  *
3683  *  Called by genCondJump() for TYP_LONG.
3684  */
3685
3686 void                CodeGen::genCondJumpLng(GenTreePtr     cond,
3687                                             BasicBlock*    jumpTrue,
3688                                             BasicBlock*    jumpFalse,
3689                                             bool           bFPTransition)
3690 {
3691     noway_assert(jumpTrue && jumpFalse);
3692     noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
3693     noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
3694
3695     GenTreePtr      op1       = cond->gtOp.gtOp1;
3696     GenTreePtr      op2       = cond->gtOp.gtOp2;
3697     genTreeOps      cmp       = cond->OperGet();
3698
3699     regMaskTP       addrReg;
3700
3701     /* Are we comparing against a constant? */
3702
3703     if  (op2->gtOper == GT_CNS_LNG)
3704     {
3705         __int64    lval = op2->gtLngCon.gtLconVal;
3706         regNumber  rTmp;
3707
3708         // We're "done" evaluating op2; let's strip any commas off op1 before we
3709         // evaluate it.
3710         op1 = genCodeForCommaTree(op1);
3711
3712         /* We can generate better code for some special cases */
3713         instruction     ins              = INS_invalid;
3714         bool            useIncToSetFlags = false;
3715         bool            specialCaseCmp   = false;
3716
3717         if (cmp == GT_EQ)
3718         {
3719             if (lval == 0)
3720             {
3721                 /* op1 == 0  */
3722                 ins = INS_OR;
3723                 useIncToSetFlags = false;
3724                 specialCaseCmp   = true;
3725             }
3726             else if (lval == -1)
3727             {
3728                 /* op1 == -1 */
3729                 ins = INS_AND;
3730                 useIncToSetFlags = true;
3731                 specialCaseCmp   = true;
3732             }
3733         }
3734         else if (cmp == GT_NE)
3735         {
3736             if (lval == 0)
3737             {
3738                 /* op1 != 0  */
3739                 ins = INS_OR;
3740                 useIncToSetFlags = false;
3741                 specialCaseCmp   = true;
3742             }
3743             else if (lval == -1)
3744             {
3745                 /* op1 != -1 */
3746                 ins = INS_AND;
3747                 useIncToSetFlags = true;
3748                 specialCaseCmp   = true;
3749             }
3750         }
3751
3752         if (specialCaseCmp)
3753         {
3754             /* Make the comparand addressable */
3755
3756             addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
3757
3758             regMaskTP tmpMask = regSet.rsRegMaskCanGrab();    
3759             insFlags  flags   = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
3760
3761             if (op1->gtFlags & GTF_REG_VAL)
3762             {
3763                 regPairNo regPair = op1->gtRegPair;
3764                 regNumber rLo     = genRegPairLo(regPair);
3765                 regNumber rHi     = genRegPairHi(regPair);
3766                 if (tmpMask & genRegMask(rLo))
3767                 {
3768                     rTmp = rLo;
3769                 }
3770                 else if (tmpMask & genRegMask(rHi))
3771                 {
3772                     rTmp = rHi;
3773                     rHi  = rLo;
3774                 }
3775                 else
3776                 {
3777                     rTmp = regSet.rsGrabReg(tmpMask);
3778                     inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
3779                 }
3780
3781                 /* The register is now trashed */
3782                 regTracker.rsTrackRegTrash(rTmp);
3783
3784                 if (rHi != REG_STK)
3785                 {
3786                     /* Set the flags using INS_AND | INS_OR */
3787                     inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
3788                 }
3789                 else
3790                 {
3791                     /* Set the flags using INS_AND | INS_OR */
3792                     inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3793                 }
3794
3795             }
3796             else  // op1 is not GTF_REG_VAL
3797             {
3798                 rTmp = regSet.rsGrabReg(tmpMask);
3799
3800                 /* Load the low 32-bits of op1 */
3801                 inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
3802
3803                 /* The register is now trashed */
3804                 regTracker.rsTrackRegTrash(rTmp);
3805
3806                 /* Set the flags using INS_AND | INS_OR */
3807                 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3808             }
3809
3810             /* Free up the addrReg(s) if any */
3811             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
3812
3813             /* compares against -1, also requires an an inc instruction */
3814             if (useIncToSetFlags)
3815             {
3816                 /* Make sure the inc will set the flags */
3817                 assert(cond->gtSetFlags());
3818                 genIncRegBy(rTmp, 1, cond, TYP_INT);
3819             }
3820
3821 #if FEATURE_STACK_FP_X87
3822             // We may need a transition block
3823             if (bFPTransition)
3824             {
3825                 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3826             }
3827 #endif
3828             emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
3829             inst_JMP(jmpKind, jumpTrue);
3830         }
3831         else // specialCaseCmp == false
3832         {
3833             /* Make the comparand addressable */
3834             addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
3835
3836             /* Compare the high part first */
3837
3838             int  ival = (int)(lval >> 32);
3839
3840             /* Comparing a register against 0 is easier */
3841
3842             if  (!ival && (op1->gtFlags & GTF_REG_VAL)
3843                  && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK )
3844             {
3845                 /* Generate 'test rTmp, rTmp' */
3846                 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3847             }
3848             else
3849             {
3850                 if  (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
3851                 {
3852                     /* Special case: comparison of two constants */
3853                     // Needed as gtFoldExpr() doesn't fold longs
3854
3855                     noway_assert(addrReg == 0);
3856                     int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
3857
3858                     /* Get the constant operand into a register */
3859                     rTmp = genGetRegSetToIcon(op1_hiword);
3860
3861                     /* Generate 'cmp rTmp, ival' */
3862
3863                     inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3864                 }
3865                 else
3866                 {
3867                     /* Generate 'cmp op1, ival' */
3868
3869                     inst_TT_IV(INS_cmp, op1, ival, 4);
3870                 }
3871             }
3872
3873 #if FEATURE_STACK_FP_X87
3874             // We may need a transition block
3875             if (bFPTransition)
3876             {
3877                 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3878             } 
3879 #endif
3880             /* Generate the appropriate jumps */
3881
3882             if  (cond->gtFlags & GTF_UNSIGNED)
3883                  genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3884             else
3885                  genJccLongHi(cmp, jumpTrue, jumpFalse);
3886
3887             /* Compare the low part second */
3888
3889             ival = (int)lval;
3890
3891             /* Comparing a register against 0 is easier */
3892
3893             if  (!ival && (op1->gtFlags & GTF_REG_VAL)
3894                  && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
3895             {
3896                 /* Generate 'test rTmp, rTmp' */
3897                 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3898             }
3899             else
3900             {
3901                 if  (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
3902                 {
3903                     /* Special case: comparison of two constants */
3904                     // Needed as gtFoldExpr() doesn't fold longs
3905
3906                     noway_assert(addrReg == 0);
3907                     int op1_loword = (int) op1->gtLngCon.gtLconVal;
3908
3909                     /* get the constant operand into a register */
3910                     rTmp = genGetRegSetToIcon(op1_loword);
3911
3912                     /* Generate 'cmp rTmp, ival' */
3913
3914                     inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3915                 }
3916                 else
3917                 {
3918                     /* Generate 'cmp op1, ival' */
3919
3920                     inst_TT_IV(INS_cmp, op1, ival, 0);
3921                 }
3922             }
3923
3924             /* Generate the appropriate jumps */
3925             genJccLongLo(cmp, jumpTrue, jumpFalse);
3926
3927             genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
3928         }
3929     }
3930     else // (op2->gtOper != GT_CNS_LNG)
3931     {
3932
3933         /* The operands would be reversed by physically swapping them */
3934
3935         noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
3936
3937         /* Generate the first operand into a register pair */
3938
3939         genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
3940         noway_assert(op1->gtFlags & GTF_REG_VAL);
3941
3942 #if CPU_LOAD_STORE_ARCH
3943         /* Generate the second operand into a register pair */
3944         // Fix 388442 ARM JitStress WP7
3945         genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3946         noway_assert(op2->gtFlags & GTF_REG_VAL);
3947         regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
3948 #else
3949         /* Make the second operand addressable */
3950
3951         addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3952 #endif
3953         /* Make sure the first operand hasn't been spilled */
3954
3955         genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
3956         noway_assert(op1->gtFlags & GTF_REG_VAL);
3957
3958         regPairNo regPair = op1->gtRegPair;
3959
3960 #if !CPU_LOAD_STORE_ARCH
3961         /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
3962
3963         addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
3964 #endif
3965
3966 #if FEATURE_STACK_FP_X87
3967         // We may need a transition block
3968         if (bFPTransition)
3969         {
3970             jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3971         }
3972 #endif
3973
3974         /* Perform the comparison - high parts */
3975
3976         inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
3977
3978         if  (cond->gtFlags & GTF_UNSIGNED)
3979             genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3980         else
3981             genJccLongHi(cmp, jumpTrue, jumpFalse);
3982
3983         /* Compare the low parts */
3984
3985         inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
3986         genJccLongLo(cmp, jumpTrue, jumpFalse);
3987
3988         /* Free up anything that was tied up by either operand */
3989
3990 #if CPU_LOAD_STORE_ARCH
3991         // Fix 388442 ARM JitStress WP7
3992         regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
3993         genReleaseRegPair(op2);
3994 #else
3995         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
3996 #endif
3997         genReleaseRegPair(op1);
3998     }
3999 }
4000
4001
4002 /*****************************************************************************
4003  *  gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
4004  *  Called by genCondJumpFlt() to generate the fcomp instruction appropriate
4005  *  to the architecture we're running on.
4006  *
4007  *  P5:
4008  *  gen_fcomp_FN:     fcomp ST(0), stk
4009  *  gen_fcomp_FS_TT:  fcomp ST(0), addr
4010  *  gen_fcompp_FS:    fcompp
4011  *    These are followed by fnstsw, sahf to get the flags in EFLAGS.
4012  *
4013  *  P6:
4014  *  gen_fcomp_FN:     fcomip ST(0), stk
4015  *  gen_fcomp_FS_TT:  fld addr, fcomip ST(0), ST(1), fstp ST(0)
4016  *      (and reverse the branch condition since addr comes first)
4017  *  gen_fcompp_FS:    fcomip, fstp
4018  *    These instructions will correctly set the EFLAGS register.
4019  *
4020  *  Return value:  These functions return true if the instruction has
4021  *    already placed its result in the EFLAGS register.
4022  */
4023
4024 bool                CodeGen::genUse_fcomip()
4025 {
4026     return compiler->opts.compUseFCOMI;
4027 }
4028
4029 /*****************************************************************************
4030  *
4031  *  Sets the flag for the TYP_INT/TYP_REF comparison.
4032  *  We try to use the flags if they have already been set by a prior
4033  *  instruction.
4034  *  eg. i++; if(i<0) {}  Here, the "i++;" will have set the sign flag. We don't
4035  *                       need to compare again with zero. Just use a "INS_js"
4036  *
4037  *  Returns the flags the following jump/set instruction should use.
4038  */
4039
4040 emitJumpKind            CodeGen::genCondSetFlags(GenTreePtr cond)
4041 {
4042     noway_assert(cond->OperIsCompare());
4043     noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
4044
4045     GenTreePtr      op1       = cond->gtOp.gtOp1;
4046     GenTreePtr      op2       = cond->gtOp.gtOp2;
4047     genTreeOps      cmp       = cond->OperGet();
4048
4049     if  (cond->gtFlags & GTF_REVERSE_OPS)
4050     {
4051         /* Don't forget to modify the condition as well */
4052
4053         cond->gtOp.gtOp1 = op2;
4054         cond->gtOp.gtOp2 = op1;
4055         cond->SetOper     (GenTree::SwapRelop(cmp));
4056         cond->gtFlags   &= ~GTF_REVERSE_OPS;
4057
4058         /* Get hold of the new values */
4059
4060         cmp  = cond->OperGet();
4061         op1  = cond->gtOp.gtOp1;
4062         op2  = cond->gtOp.gtOp2;
4063     }
4064
4065     // Note that op1's type may get bashed. So save it early
4066
4067     var_types     op1Type     = op1->TypeGet();
4068     bool          unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
4069     emitAttr      size        = EA_UNKNOWN;
4070
4071     regMaskTP     regNeed;
4072     regMaskTP     addrReg1 = RBM_NONE;
4073     regMaskTP     addrReg2 = RBM_NONE;
4074     emitJumpKind  jumpKind = EJ_COUNT;   // Initialize with an invalid value
4075
4076     bool  byteCmp;
4077     bool  shortCmp;
4078                   
4079     regMaskTP newLiveMask;
4080     regNumber op1Reg;
4081
4082     /* Are we comparing against a constant? */
4083
4084     if  (op2->IsCnsIntOrI())
4085     {
4086         ssize_t         ival = op2->gtIntConCommon.IconValue();
4087
4088         /* unsigned less than comparisons with 1 ('< 1' )
4089            should be transformed into '== 0' to potentially
4090            suppress a tst instruction.
4091         */
4092         if  ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
4093         {
4094             op2->gtIntCon.gtIconVal = ival = 0;
4095             cond->gtOper            = cmp  = GT_EQ;
4096         }
4097
4098         /* Comparisons against 0 can be easier */
4099
4100         if  (ival == 0)
4101         {
4102             // if we can safely change the comparison to unsigned we do so
4103             if  (!unsignedCmp                       &&
4104                  varTypeIsSmall(op1->TypeGet())     &&
4105                  varTypeIsUnsigned(op1->TypeGet()))
4106             {
4107                 unsignedCmp = true;
4108             }
4109
4110             /* unsigned comparisons with 0 should be transformed into
4111                '==0' or '!= 0' to potentially suppress a tst instruction. */
4112
4113             if (unsignedCmp)
4114             {
4115                 if (cmp == GT_GT)
4116                     cond->gtOper = cmp = GT_NE;
4117                 else if (cmp == GT_LE)
4118                     cond->gtOper = cmp = GT_EQ;
4119             }
4120
4121             /* Is this a simple zero/non-zero test? */
4122
4123             if  (cmp == GT_EQ || cmp == GT_NE)
4124             {
4125                 /* Is the operand an "AND" operation? */
4126
4127                 if  (op1->gtOper == GT_AND)
4128                 {
4129                     GenTreePtr      an1 = op1->gtOp.gtOp1;
4130                     GenTreePtr      an2 = op1->gtOp.gtOp2;
4131
4132                     /* Check for the case "expr & icon" */
4133
4134                     if  (an2->IsIntCnsFitsInI32())
4135                     {
4136                         int iVal = (int)an2->gtIntCon.gtIconVal;
4137
4138                         /* make sure that constant is not out of an1's range */
4139
4140                         switch (an1->gtType)
4141                         {
4142                         case TYP_BOOL:
4143                         case TYP_BYTE:
4144                             if (iVal & 0xffffff00)
4145                                 goto NO_TEST_FOR_AND;
4146                             break;
4147                         case TYP_CHAR:
4148                         case TYP_SHORT:
4149                             if (iVal & 0xffff0000)
4150                                 goto NO_TEST_FOR_AND;
4151                             break;
4152                         default:
4153                             break;
4154                         }
4155
4156                         if (an1->IsCnsIntOrI())
4157                         {
4158                             // Special case - Both operands of AND are consts
4159                             genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
4160                             addrReg1 = genRegMask(an1->gtRegNum);
4161                         }
4162                         else
4163                         {
4164                             addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
4165                         }
4166 #if CPU_LOAD_STORE_ARCH
4167                         if ((an1->gtFlags & GTF_REG_VAL) == 0)
4168                         {
4169                             genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
4170                             if (arm_Valid_Imm_For_Alu(iVal))
4171                             {
4172                                 inst_RV_IV(INS_TEST,an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
4173                             }
4174                             else
4175                             {
4176                                 regNumber regTmp = regSet.rsPickFreeReg();
4177                                 instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
4178                                 inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
4179                             }
4180                             genReleaseReg(an1);
4181                             addrReg1 = RBM_NONE;
4182                         }
4183                         else
4184 #endif
4185                         {
4186 #ifdef _TARGET_XARCH_
4187                             // Check to see if we can use a smaller immediate.
4188                             if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
4189                             {
4190                                 var_types testType = (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
4191 #if CPU_HAS_BYTE_REGS
4192                                 // if we don't have byte-able register, switch to the 2-byte form
4193                                 if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
4194                                 {
4195                                     testType = TYP_USHORT;
4196                                 }
4197 #endif // CPU_HAS_BYTE_REGS
4198
4199                                 inst_TT_IV(INS_TEST, an1, iVal, testType);
4200                             }
4201                             else
4202 #endif // _TARGET_XARCH_
4203                             {
4204                                 inst_TT_IV(INS_TEST, an1, iVal);
4205                             }
4206                         }
4207
4208
4209                         goto DONE;
4210
4211                     NO_TEST_FOR_AND:
4212                         ;
4213
4214                     }
4215
4216                     // TODO: Check for other cases that can generate 'test',
4217                     // TODO: also check for a 64-bit integer zero test which
4218                     // TODO: could generate 'or lo, hi' followed by jz/jnz.
4219                 }
4220             }
4221
4222             // See what Jcc instruction we would use if we can take advantage of
4223             // the knowledge of EFLAGs.
4224
4225             if (unsignedCmp)
4226             {
4227                 /*
4228                     Unsigned comparison to 0. Using this table:
4229                 
4230                     ----------------------------------------------------
4231                     | Comparison | Flags Checked    | Instruction Used |
4232                     ----------------------------------------------------
4233                     |    == 0    | ZF = 1           |       je         |
4234                     ----------------------------------------------------
4235                     |    != 0    | ZF = 0           |       jne        |
4236                     ----------------------------------------------------
4237                     |     < 0    | always FALSE     |       N/A        |
4238                     ----------------------------------------------------
4239                     |    <= 0    | ZF = 1           |       je         |
4240                     ----------------------------------------------------
4241                     |    >= 0    | always TRUE      |       N/A        |
4242                     ----------------------------------------------------
4243                     |     > 0    | ZF = 0           |       jne        |
4244                     ----------------------------------------------------
4245                 */   
4246                 switch (cmp)
4247                 {
4248 #ifdef _TARGET_ARM_
4249                 case GT_EQ: jumpKind = EJ_eq;      break;
4250                 case GT_NE: jumpKind = EJ_ne;      break;
4251                 case GT_LT: jumpKind = EJ_NONE;    break;
4252                 case GT_LE: jumpKind = EJ_eq;      break;
4253                 case GT_GE: jumpKind = EJ_NONE;    break;
4254                 case GT_GT: jumpKind = EJ_ne;      break;
4255 #elif defined(_TARGET_X86_)
4256                 case GT_EQ: jumpKind = EJ_je;      break;
4257                 case GT_NE: jumpKind = EJ_jne;     break;
4258                 case GT_LT: jumpKind = EJ_NONE;    break;
4259                 case GT_LE: jumpKind = EJ_je;      break;
4260                 case GT_GE: jumpKind = EJ_NONE;    break;
4261                 case GT_GT: jumpKind = EJ_jne;     break;
4262 #endif // TARGET
4263                 default:
4264                     noway_assert(!"Unexpected comparison OpCode");
4265                     break;
4266                 }
4267             }
4268             else
4269             {
4270                 /*
4271                     Signed comparison to 0. Using this table:
4272                 
4273                     -----------------------------------------------------
4274                     | Comparison | Flags Checked     | Instruction Used |
4275                     -----------------------------------------------------
4276                     |    == 0    | ZF = 1            |       je         |
4277                     -----------------------------------------------------
4278                     |    != 0    | ZF = 0            |       jne        |
4279                     -----------------------------------------------------
4280                     |     < 0    | SF = 1            |       js         |
4281                     -----------------------------------------------------
4282                     |    <= 0    |      N/A          |       N/A        |
4283                     -----------------------------------------------------
4284                     |    >= 0    | SF = 0            |       jns        |
4285                     -----------------------------------------------------
4286                     |     > 0    |      N/A          |       N/A        |
4287                     -----------------------------------------------------
4288                 */
4289
4290                 switch (cmp)
4291                 {
4292 #ifdef _TARGET_ARM_
4293                 case GT_EQ: jumpKind = EJ_eq;      break;
4294                 case GT_NE: jumpKind = EJ_ne;      break;
4295                 case GT_LT: jumpKind = EJ_mi;      break;
4296                 case GT_LE: jumpKind = EJ_NONE;    break;
4297                 case GT_GE: jumpKind = EJ_pl;      break;
4298                 case GT_GT: jumpKind = EJ_NONE;    break;
4299 #elif defined(_TARGET_X86_)
4300                 case GT_EQ: jumpKind = EJ_je;      break;
4301                 case GT_NE: jumpKind = EJ_jne;     break;
4302                 case GT_LT: jumpKind = EJ_js;      break;
4303                 case GT_LE: jumpKind = EJ_NONE;    break;
4304                 case GT_GE: jumpKind = EJ_jns;     break;
4305                 case GT_GT: jumpKind = EJ_NONE;    break;
4306 #endif // TARGET
4307                 default:
4308                     noway_assert(!"Unexpected comparison OpCode");
4309                     break;
4310                 }
4311                 assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
4312             }
4313             assert(jumpKind != EJ_COUNT);   // Ensure that it was assigned a valid value above
4314
4315             /* Is the value a simple local variable? */
4316
4317             if  (op1->gtOper == GT_LCL_VAR)
4318             {
4319                 /* Is the flags register set to the value? */
4320
4321                 if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
4322                 {
4323                     if (jumpKind != EJ_NONE)
4324                     {
4325                         addrReg1 = RBM_NONE;
4326                         genUpdateLife(op1);
4327                         goto DONE_FLAGS;
4328                     }
4329                 }
4330             }
4331
4332             /* Make the comparand addressable */
4333             addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4334
4335             /* Are the condition flags set based on the value? */
4336
4337             unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
4338
4339             if (op1->gtFlags & GTF_REG_VAL)
4340             {
4341                 if (genFlagsAreReg(op1->gtRegNum))
4342                 {
4343                     flags |= GTF_ZSF_SET;
4344                 }
4345             }
4346
4347             if  (flags)
4348             {
4349                 if (jumpKind != EJ_NONE)
4350                 {
4351                     goto DONE_FLAGS;
4352                 }
4353             }
4354
4355             /* Is the value in a register? */
4356
4357             if  (op1->gtFlags & GTF_REG_VAL)
4358             {
4359                 regNumber       reg = op1->gtRegNum;
4360
4361                 /* With a 'test' we can do any signed test or any test for equality */
4362
4363                 if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
4364                 {
4365                     emitAttr compareSize = emitTypeSize(op1->TypeGet());
4366
4367                     // If we have an GT_REG_VAR then the register will be properly sign/zero extended
4368                     // But only up to 4 bytes
4369                     if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
4370                     {
4371                         compareSize = EA_4BYTE;
4372                     }
4373
4374 #if CPU_HAS_BYTE_REGS
4375                     // Make sure if we require a byte compare that we have a byte-able register
4376                     if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
4377 #endif // CPU_HAS_BYTE_REGS
4378                     {
4379                         /* Generate 'test reg, reg' */
4380                         instGen_Compare_Reg_To_Zero(compareSize, reg);
4381                         goto DONE;
4382                     }
4383                 }
4384             }
4385         }
4386
4387         else // if (ival != 0)
4388         {
4389             bool smallOk = true;
4390
4391
4392             /* make sure that constant is not out of op1's range
4393                if it is, we need to perform an int with int comparison
4394                and therefore, we set smallOk to false, so op1 gets loaded
4395                into a register
4396             */
4397
4398             /* If op1 is TYP_SHORT, and is followed by an unsigned
4399              * comparison, we can use smallOk. But we don't know which
4400              * flags will be needed. This probably doesn't happen often.
4401             */
4402             var_types gtType=op1->TypeGet();
4403
4404             switch (gtType)
4405             {
4406             case TYP_BYTE:  if (ival != (signed   char )ival) smallOk = false; break;
4407             case TYP_BOOL:
4408             case TYP_UBYTE: if (ival != (unsigned char )ival) smallOk = false; break;
4409
4410             case TYP_SHORT: if (ival != (signed   short)ival) smallOk = false; break;
4411             case TYP_CHAR:  if (ival != (unsigned short)ival) smallOk = false; break;
4412
4413 #ifdef _TARGET_64BIT_
4414             case TYP_INT:   if (!FitsIn<INT32>(ival))         smallOk = false; break;
4415             case TYP_UINT:  if (!FitsIn<UINT32>(ival))        smallOk = false; break;
4416 #endif // _TARGET_64BIT_
4417
4418             default:                                                           break;
4419             }
4420
4421             if (smallOk                     &&      // constant is in op1's range
4422                 !unsignedCmp                &&      // signed comparison
4423                 varTypeIsSmall(gtType)      &&      // smalltype var
4424                 varTypeIsUnsigned(gtType))          // unsigned type
4425             {
4426                 unsignedCmp = true;
4427             }
4428
4429             /* Make the comparand addressable */
4430             addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
4431
4432         }
4433
4434 // #if defined(DEBUGGING_SUPPORT)
4435
4436         /* Special case: comparison of two constants */
4437
4438         // Needed if Importer doesn't call gtFoldExpr()
4439
4440         if  (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
4441         {
4442             // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
4443
4444             /* Workaround: get the constant operand into a register */
4445             genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4446
4447             noway_assert(addrReg1 == RBM_NONE);
4448             noway_assert(op1->gtFlags & GTF_REG_VAL);
4449
4450             addrReg1 = genRegMask(op1->gtRegNum);
4451         }
4452
4453 // #endif
4454
4455         /* Compare the operand against the constant */
4456
4457         if (op2->IsIconHandle())
4458         {
4459             inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
4460         }
4461         else
4462         {
4463             inst_TT_IV(INS_cmp, op1, ival);
4464         }
4465         goto DONE;
4466     }
4467
4468     //---------------------------------------------------------------------
4469     //
4470     // We reach here if op2 was not a GT_CNS_INT
4471     //
4472
4473     byteCmp  = false;
4474     shortCmp = false;
4475
4476     if (op1Type == op2->gtType)
4477     {
4478         shortCmp = varTypeIsShort(op1Type);
4479         byteCmp  = varTypeIsByte(op1Type);
4480     }
4481
4482     noway_assert(op1->gtOper != GT_CNS_INT);
4483
4484     if  (op2->gtOper == GT_LCL_VAR)
4485         genMarkLclVar(op2);
4486
4487     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4488     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4489
4490     /* Are we comparing against a register? */
4491
4492     if  (op2->gtFlags & GTF_REG_VAL)
4493     {
4494         /* Make the comparands addressable and mark as used */
4495
4496         assert(addrReg1 == RBM_NONE);
4497         addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4498
4499         /* Is the size of the comparison byte/char/short ? */
4500
4501         if  (varTypeIsSmall(op1->TypeGet()))
4502         {
4503             /* Is op2 sitting in an appropriate register? */
4504
4505             if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
4506                 goto NO_SMALL_CMP;
4507
4508             /* Is op2 of the right type for a small comparison */
4509
4510             if (op2->gtOper == GT_REG_VAR)
4511             {
4512                 if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
4513                     goto NO_SMALL_CMP;
4514             }
4515             else
4516             {
4517                 if (op1->gtType != op2->gtType)
4518                     goto NO_SMALL_CMP;
4519             }
4520
4521             if (varTypeIsUnsigned(op1->TypeGet()))
4522                 unsignedCmp = true;
4523         }
4524
4525         assert(addrReg2 == RBM_NONE);
4526
4527         genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4528         addrReg2 = genRegMask(op2->gtRegNum);
4529         addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
4530         assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4531         assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4532
4533         /* Compare against the register */
4534
4535         inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
4536
4537         goto DONE;
4538
4539 NO_SMALL_CMP:
4540
4541         // op1 has been made addressable and is marked as in use
4542         // op2 is un-generated
4543         assert(addrReg2 == 0);
4544
4545         if ((op1->gtFlags & GTF_REG_VAL) == 0)
4546         {
4547             regNumber reg1 = regSet.rsPickReg();
4548
4549             noway_assert(varTypeIsSmall(op1->TypeGet()));
4550             instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL)!=0);
4551
4552             // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
4553             // so we need to make sure it is still valid.  Note that at this point, reg1 is
4554             // *not* marked as in use, and it is possible for it to be used in the address
4555             // mode expression, but that is OK, because we are done with expression after
4556             // this.  We only need reg1.
4557             addrReg1 = genKeepAddressable(op1, addrReg1);
4558             inst_RV_TT(ins, reg1, op1);
4559             regTracker.rsTrackRegTrash(reg1);
4560
4561             genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4562             addrReg1 = 0;
4563
4564             genMarkTreeInReg(op1, reg1);
4565
4566             regSet.rsMarkRegUsed(op1);
4567             addrReg1 = genRegMask(op1->gtRegNum);
4568         }
4569
4570         assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4571         assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4572
4573         goto DONE_OP1;
4574     }
4575
4576     // We come here if op2 is not enregistered or not in a "good" register.
4577     
4578     assert(addrReg1 == 0);  
4579
4580     // Determine what registers go live between op1 and op2
4581     newLiveMask = genNewLiveRegMask(op1, op2);
4582
4583     // Setup regNeed with the set of register that we suggest for op1 to be in
4584     //
4585     regNeed = RBM_ALLINT;
4586
4587     // avoid selecting registers that get newly born in op2
4588     regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
4589
4590     // avoid selecting op2 reserved regs
4591     regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
4592
4593 #if CPU_HAS_BYTE_REGS
4594     // if necessary setup regNeed to select just the byte-able registers
4595     if  (byteCmp)
4596         regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
4597 #endif // CPU_HAS_BYTE_REGS
4598
4599     // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
4600     // 
4601     genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
4602     noway_assert(op1->gtFlags & GTF_REG_VAL);
4603
4604     op1Reg = op1->gtRegNum;
4605
4606     // Setup regNeed with the set of register that we require for op1 to be in
4607     //
4608     regNeed = RBM_ALLINT;
4609
4610 #if CPU_HAS_BYTE_REGS
4611     // if necessary setup regNeed to select just the byte-able registers
4612     if  (byteCmp)
4613         regNeed &= RBM_BYTE_REGS;
4614 #endif // CPU_HAS_BYTE_REGS
4615
4616     // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
4617     regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
4618
4619     // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
4620     regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
4621
4622     // Did we end up in an acceptable register?  
4623     // and do we have an acceptable free register available to grab?
4624     //
4625     if ( ((genRegMask(op1Reg) & regNeed) == 0) &&          
4626          ((regSet.rsRegMaskFree()    & regNeed) != 0)    )
4627     {
4628         // Grab an acceptable register 
4629         regNumber newReg = regSet.rsGrabReg(regNeed);
4630
4631         noway_assert(op1Reg != newReg);
4632
4633         /* Update the value in the target register */
4634
4635         regTracker.rsTrackRegCopy(newReg, op1Reg);
4636
4637         inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
4638
4639         /* The value has been transferred to 'reg' */
4640
4641         if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
4642             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
4643
4644         gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
4645
4646         /* The value is now in an appropriate register */
4647
4648         op1->gtRegNum = newReg;
4649     }
4650     noway_assert(op1->gtFlags & GTF_REG_VAL);
4651     op1Reg = op1->gtRegNum;
4652
4653     genUpdateLife(op1);
4654
4655     /* Mark the register as 'used' */
4656     regSet.rsMarkRegUsed(op1);
4657     
4658     addrReg1 = genRegMask(op1Reg);
4659
4660     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4661     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4662
4663 DONE_OP1:
4664
4665     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4666     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4667     noway_assert(op1->gtFlags & GTF_REG_VAL);
4668
4669     // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
4670     // when byteCmp is true we will perform a byte sized cmp instruction
4671     // and that instruction requires that any registers used are byte-able ones.
4672     //
4673     regNeed = RBM_ALLINT;
4674
4675 #if CPU_HAS_BYTE_REGS
4676     // if necessary setup regNeed to select just the byte-able registers
4677     if  (byteCmp)
4678         regNeed &= RBM_BYTE_REGS;
4679 #endif // CPU_HAS_BYTE_REGS
4680
4681     /* Make the comparand addressable */
4682     assert(addrReg2 == 0);
4683     addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
4684
4685     /*  Make sure the first operand is still in a register; if
4686         it's been spilled, we have to make sure it's reloaded
4687         into a byte-addressable register if needed.
4688         Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
4689      */
4690
4691     assert(addrReg1 != 0);
4692     genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
4693
4694     noway_assert(op1->gtFlags & GTF_REG_VAL);
4695     noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
4696
4697     addrReg1 = genRegMask(op1->gtRegNum);
4698     regSet.rsLockUsedReg(addrReg1);
4699
4700     /* Make sure that op2 is addressable. If we are going to do a
4701        byte-comparison, we need it to be in a byte register. */
4702
4703     if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
4704     {
4705         genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
4706         addrReg2 = genRegMask(op2->gtRegNum);
4707     }
4708     else
4709     {
4710         addrReg2 = genKeepAddressable(op2, addrReg2);
4711     }
4712
4713     regSet.rsUnlockUsedReg(addrReg1);
4714
4715     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4716     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4717
4718     if (byteCmp || shortCmp)
4719     {
4720         size = emitTypeSize(op2->TypeGet());
4721         if (varTypeIsUnsigned(op1Type))
4722             unsignedCmp = true;
4723     }
4724     else
4725     {
4726         size = emitActualTypeSize(op2->TypeGet());
4727     }
4728
4729     /* Perform the comparison */
4730     inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
4731
4732 DONE:
4733     
4734     jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
4735
4736 DONE_FLAGS: // We have determined what jumpKind to use
4737
4738     genUpdateLife(cond);
4739
4740     /* The condition value is dead at the jump that follows */
4741
4742     assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4743     assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4744     genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4745     genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
4746
4747     noway_assert(jumpKind != EJ_COUNT);   // Ensure that it was assigned a valid value
4748
4749     return jumpKind;
4750 }
4751
4752 /*****************************************************************************/
4753 /*****************************************************************************/
4754 /*****************************************************************************
4755  *
4756  *  Generate code to jump to the jump target of the current basic block if
4757  *  the given relational operator yields 'true'.
4758  */
4759
4760 void                CodeGen::genCondJump(GenTreePtr cond,
4761                                          BasicBlock *destTrue,
4762                                          BasicBlock *destFalse,
4763                                          bool bStackFPFixup
4764                                          )
4765 {
4766     BasicBlock  *   jumpTrue;
4767     BasicBlock  *   jumpFalse;
4768
4769     GenTreePtr      op1       = cond->gtOp.gtOp1;
4770     GenTreePtr      op2       = cond->gtOp.gtOp2;
4771     genTreeOps      cmp       = cond->OperGet();
4772
4773     if  (destTrue)
4774     {
4775         jumpTrue  = destTrue;
4776         jumpFalse = destFalse;
4777     }
4778     else
4779     {
4780         noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
4781
4782         jumpTrue  = compiler->compCurBB->bbJumpDest;
4783         jumpFalse = compiler->compCurBB->bbNext;
4784     }
4785
4786     noway_assert(cond->OperIsCompare());
4787
4788     /* Make sure the more expensive operand is 'op1' */
4789     noway_assert( (cond->gtFlags & GTF_REVERSE_OPS) == 0 );
4790
4791     if  (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
4792     {
4793         /* Don't forget to modify the condition as well */
4794
4795         cond->gtOp.gtOp1 = op2;
4796         cond->gtOp.gtOp2 = op1;
4797         cond->SetOper     (GenTree::SwapRelop(cmp));
4798         cond->gtFlags   &= ~GTF_REVERSE_OPS;
4799
4800         /* Get hold of the new values */
4801
4802         cmp  = cond->OperGet();
4803         op1  = cond->gtOp.gtOp1;
4804         op2  = cond->gtOp.gtOp2;
4805     }
4806
4807     /* What is the type of the operand? */
4808
4809     switch (genActualType(op1->gtType))
4810     {
4811     case TYP_INT:
4812     case TYP_REF:
4813     case TYP_BYREF:
4814         emitJumpKind    jumpKind;
4815
4816         // Check if we can use the currently set flags. Else set them
4817
4818         jumpKind = genCondSetFlags(cond);
4819
4820 #if FEATURE_STACK_FP_X87
4821         if (bStackFPFixup)
4822         {
4823             genCondJmpInsStackFP(jumpKind,
4824                                 jumpTrue,
4825                                 jumpFalse);
4826         }
4827         else
4828 #endif
4829         {
4830             /* Generate the conditional jump */
4831             inst_JMP(jumpKind, jumpTrue);
4832         }
4833
4834         return;
4835
4836     case TYP_LONG:
4837 #if FEATURE_STACK_FP_X87
4838         if (bStackFPFixup)
4839         {
4840             genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
4841         }
4842         else
4843 #endif
4844         {
4845             genCondJumpLng(cond, jumpTrue, jumpFalse);
4846         }
4847         return;
4848
4849     case TYP_FLOAT:
4850     case TYP_DOUBLE:
4851 #if FEATURE_STACK_FP_X87
4852         genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
4853 #else
4854         genCondJumpFloat(cond, jumpTrue, jumpFalse);
4855 #endif
4856         return;
4857
4858     default:
4859 #ifdef DEBUG
4860         compiler->gtDispTree(cond);
4861 #endif
4862         unreached(); // unexpected/unsupported 'jtrue' operands type
4863     }
4864 }
4865
4866 /*****************************************************************************
4867  *  Spill registers to check callers can handle it.
4868  */
4869
4870 #ifdef DEBUG
4871
4872 void                CodeGen::genStressRegs(GenTreePtr tree)
4873 {
4874     if (regSet.rsStressRegs() < 2)
4875         return;
4876
4877     /* Spill as many registers as possible. Callers should be prepared
4878        to handle this case.
4879        But don't spill trees with no size (TYP_STRUCT comes to mind) */
4880
4881     {
4882         regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
4883         regNumber regNum;
4884         regMaskTP regBit;
4885
4886         for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
4887         {
4888             if  ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) && (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
4889             {
4890                 regSet.rsSpillReg(regNum);
4891
4892                 spillRegs &= regSet.rsMaskUsed;
4893
4894                 if  (!spillRegs)
4895                     break;
4896             }
4897         }
4898     }
4899
4900     regMaskTP trashRegs = regSet.rsRegMaskFree();
4901
4902     if (trashRegs == RBM_NONE)
4903         return;
4904
4905     /* It is sometimes reasonable to expect that calling genCodeForTree()
4906        on certain trees won't spill anything */
4907
4908     if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) &&
4909         (compiler->compCurBB->bbCatchTyp) && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
4910     {
4911         trashRegs &= ~(RBM_EXCEPTION_OBJECT);
4912     }
4913
4914     // If genCodeForTree() effectively gets called a second time on the same tree
4915
4916     if (tree->gtFlags & GTF_REG_VAL)
4917     {
4918         noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
4919         trashRegs &= ~genRegMask(tree->gtRegNum);
4920     }
4921
4922     if (tree->gtType == TYP_INT && tree->OperIsSimple())
4923     {
4924         GenTreePtr  op1 = tree->gtOp.gtOp1;
4925         GenTreePtr  op2 = tree->gtOp.gtOp2;
4926         if (op1 && (op1->gtFlags & GTF_REG_VAL))
4927             trashRegs &= ~genRegMask(op1->gtRegNum);
4928         if (op2 && (op2->gtFlags & GTF_REG_VAL))
4929             trashRegs &= ~genRegMask(op2->gtRegNum);
4930     }
4931
4932     if (compiler->compCurBB == compiler->genReturnBB)
4933     {
4934         if (compiler->info.compCallUnmanaged)
4935         {
4936             LclVarDsc * varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
4937             if (varDsc->lvRegister)
4938                 trashRegs &= ~genRegMask(varDsc->lvRegNum);
4939         }
4940     }
4941
4942     /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
4943        to save/restore the register. We try to be as unintrusive
4944        as possible */
4945
4946     noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
4947     // This is obviously false for ARM, but this function is never called.
4948     for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
4949     {
4950         regMaskTP   regMask = genRegMask(reg);
4951
4952         if (regSet.rsRegsModified(regMask & trashRegs))
4953             genSetRegToIcon(reg, 0);
4954     }
4955 }
4956
4957 #endif // DEBUG
4958
4959
4960 /*****************************************************************************
4961  *
4962  *  Generate code for a GTK_CONST tree
4963  */
4964
4965 void                CodeGen::genCodeForTreeConst(GenTreePtr tree,
4966                                                  regMaskTP  destReg,
4967                                                  regMaskTP  bestReg)
4968 {
4969     noway_assert(tree->IsCnsIntOrI());
4970
4971     ssize_t         ival = tree->gtIntConCommon.IconValue();
4972     regMaskTP       needReg = destReg;
4973     regNumber       reg;
4974     bool            needReloc = compiler->opts.compReloc && tree->IsIconHandle();
4975
4976 #if REDUNDANT_LOAD
4977
4978     /* If we are targeting destReg and ival is zero           */
4979     /* we would rather xor needReg than copy another register */
4980
4981     if (!needReloc)
4982     {
4983         bool reuseConstantInReg = false;
4984
4985         if (destReg == RBM_NONE)
4986             reuseConstantInReg = true;
4987
4988 #ifdef _TARGET_ARM_
4989         // If we can set a register to a constant with a small encoding, then do that.
4990         // Assume we'll get a low register if needReg has low registers as options.
4991         if (!reuseConstantInReg &&
4992             !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
4993         {
4994             reuseConstantInReg = true;
4995         }
4996 #else
4997         if (!reuseConstantInReg && ival != 0)
4998             reuseConstantInReg = true;
4999 #endif
5000
5001         if (reuseConstantInReg)
5002         {
5003             /* Is the constant already in register? If so, use this register */
5004
5005             reg = regTracker.rsIconIsInReg(ival);
5006             if  (reg != REG_NA)
5007                 goto REG_LOADED;
5008         }
5009     }
5010
5011 #endif // REDUNDANT_LOAD
5012
5013     reg   = regSet.rsPickReg(needReg, bestReg);
5014
5015     /* If the constant is a handle, we need a reloc to be applied to it */
5016
5017     if (needReloc)
5018     {
5019         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
5020         regTracker.rsTrackRegTrash(reg);
5021     }
5022     else
5023     {
5024         genSetRegToIcon(reg, ival, tree->TypeGet());
5025     }
5026
5027 REG_LOADED:
5028
5029 #ifdef  DEBUG
5030     /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
5031
5032     if  (!genTempLiveChg)
5033     {
5034         VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
5035         genTempLiveChg = true;
5036     }
5037 #endif
5038
5039     gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
5040     genCodeForTree_DONE(tree, reg);
5041 }
5042
5043
5044 /*****************************************************************************
5045  *
5046  *  Generate code for a GTK_LEAF tree
5047  */
5048
5049 void                CodeGen::genCodeForTreeLeaf(GenTreePtr tree,
5050                                                 regMaskTP  destReg,
5051                                                 regMaskTP  bestReg)
5052 {
5053     genTreeOps      oper    = tree->OperGet();
5054     regNumber       reg     = DUMMY_INIT(REG_CORRUPT);
5055     regMaskTP       regs    = regSet.rsMaskUsed;
5056     regMaskTP       needReg = destReg;
5057     size_t          size;
5058
5059     noway_assert(tree->OperKind() & GTK_LEAF);
5060
5061     switch (oper)
5062     {
5063     case GT_REG_VAR:
5064         NO_WAY("GT_REG_VAR should have been caught above");
5065         break;
5066
5067     case GT_LCL_VAR:
5068
5069         /* Does the variable live in a register? */
5070
5071         if  (genMarkLclVar(tree))
5072         {
5073             genCodeForTree_REG_VAR1(tree);
5074             return;
5075         }
5076
5077 #if REDUNDANT_LOAD
5078
5079         /* Is the local variable already in register? */
5080
5081         reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
5082
5083         if (reg != REG_NA)
5084         {
5085             /* Use the register the variable happens to be in */
5086             regMaskTP regMask = genRegMask(reg);
5087
5088             // If the register that it was in isn't one of the needRegs
5089             // then try to move it into a needReg register
5090
5091             if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
5092             {
5093                 regNumber rg2 = reg;
5094                 reg = regSet.rsPickReg(needReg, bestReg);
5095                 if (reg != rg2)
5096                 {
5097                     regMask = genRegMask(reg);
5098                     inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
5099                 }
5100             }
5101
5102             gcInfo.gcMarkRegPtrVal (reg, tree->TypeGet());
5103             regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5104             break;
5105         }
5106
5107 #endif
5108         goto MEM_LEAF;
5109
5110     case GT_LCL_FLD:
5111
5112         // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
5113         // to worry about it being enregistered.
5114         noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
5115         goto MEM_LEAF;
5116
5117     case GT_CLS_VAR:
5118
5119     MEM_LEAF:
5120
5121         /* Pick a register for the value */
5122
5123         reg = regSet.rsPickReg(needReg, bestReg);
5124
5125         /* Load the variable into the register */
5126
5127         size = genTypeSize(tree->gtType);
5128
5129         if  (size < EA_4BYTE)
5130         {
5131             instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL)!=0);
5132             inst_RV_TT(ins, reg, tree, 0);
5133
5134             /* We've now "promoted" the tree-node to TYP_INT */
5135
5136             tree->gtType = TYP_INT;
5137         }
5138         else
5139         {
5140             inst_RV_TT(INS_mov, reg, tree, 0);
5141         }
5142
5143         regTracker.rsTrackRegTrash(reg);
5144
5145         gcInfo.gcMarkRegPtrVal (reg, tree->TypeGet());
5146
5147         switch (oper)
5148         {
5149         case GT_CLS_VAR:
5150             regTracker.rsTrackRegClsVar(reg, tree);
5151             break;
5152         case GT_LCL_VAR:
5153             regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5154             break;
5155         case GT_LCL_FLD:
5156             break;
5157         default: noway_assert(!"Unexpected oper");
5158         }
5159
5160 #ifdef _TARGET_ARM_
5161         if (tree->gtFlags & GTF_IND_VOLATILE)
5162         {
5163              // Emit a memory barrier instruction after the load 
5164              instGen_MemoryBarrier();
5165         }
5166 #endif
5167
5168         break;
5169
5170     case GT_NO_OP:
5171         if (tree->gtFlags & GTF_NO_OP_NO)
5172         {
5173             // The VM does certain things with actual NOP instructions
5174             // so generate something small that has no effect, but isn't
5175             // a typical NOP
5176 #ifdef _TARGET_XARCH_
5177             // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
5178             instGen(INS_nop);
5179             instGen(INS_nop);
5180 #elif defined (_TARGET_ARM_)
5181             // The VM isn't checking yet, when it does, hopefully it will
5182             // get fooled by the wider variant.
5183             instGen(INS_nopw);
5184 #else
5185             NYI("Non-nop NO_OP");
5186 #endif            
5187         }
5188         else
5189         {
5190             instGen(INS_nop);
5191         }
5192         reg = REG_STK;
5193         break;
5194
5195 #if !FEATURE_EH_FUNCLETS
5196     case GT_END_LFIN:
5197
5198         /* Have to clear the shadowSP of the nesting level which
5199            encloses the finally */
5200
5201         unsigned    finallyNesting;
5202         finallyNesting = (unsigned)tree->gtVal.gtVal1;
5203         noway_assert(tree->gtVal.gtVal1 < compiler->compHndBBtabCount); //assert we didn't truncate with the cast above.
5204         noway_assert(finallyNesting < compiler->compHndBBtabCount);
5205
5206         // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
5207         unsigned filterEndOffsetSlotOffs;
5208         PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > sizeof(void*)); //below doesn't underflow.
5209         filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
5210         
5211         unsigned curNestingSlotOffs;
5212         curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
5213         instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, 
5214                                    compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
5215         reg = REG_STK;
5216         break;
5217 #endif // !FEATURE_EH_FUNCLETS
5218
5219     case GT_CATCH_ARG:
5220
5221         noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
5222
5223         /* Catch arguments get passed in a register. genCodeForBBlist()
5224            would have marked it as holding a GC object, but not used. */
5225
5226         noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
5227         reg = REG_EXCEPTION_OBJECT;
5228         break;
5229
5230     case GT_JMP:
5231         genCodeForTreeLeaf_GT_JMP(tree);
5232         return;
5233
5234     case GT_MEMORYBARRIER:
5235         // Emit the memory barrier instruction
5236         instGen_MemoryBarrier();
5237         reg = REG_STK;
5238         break;
5239
5240     default:
5241 #ifdef DEBUG
5242         compiler->gtDispTree(tree);
5243 #endif
5244         noway_assert(!"unexpected leaf");
5245     }
5246
5247     noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
5248     genCodeForTree_DONE(tree, reg);
5249 }
5250
5251
5252 GenTreePtr          CodeGen::genCodeForCommaTree (GenTreePtr     tree)
5253 {
5254     while (tree->OperGet() == GT_COMMA)
5255     {
5256         GenTreePtr op1 = tree->gtOp.gtOp1;
5257         genCodeForTree(op1, RBM_NONE);
5258         gcInfo.gcMarkRegPtrVal(op1);
5259
5260         tree = tree->gtOp.gtOp2;
5261     }
5262     return tree;
5263 }
5264
5265 /*****************************************************************************
5266  *
5267  *  Generate code for the a leaf node of type GT_JMP
5268  */
5269
5270 void                CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
5271 {
5272     noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
5273
5274 #ifdef PROFILING_SUPPORTED
5275     if (compiler->compIsProfilerHookNeeded())
5276     {
5277         /* fire the event at the call site */
5278         unsigned         saveStackLvl2 = genStackLevel;
5279
5280         compiler->info.compProfilerCallback = true;
5281
5282 #ifdef _TARGET_X86_
5283         //
5284         // Push the profilerHandle
5285         //
5286         regMaskTP byrefPushedRegs;
5287         regMaskTP norefPushedRegs;
5288         regMaskTP pushedArgRegs = genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed|regSet.rsMaskVars|regSet.rsMaskLock), &byrefPushedRegs, &norefPushedRegs);
5289
5290         if (compiler->compProfilerMethHndIndirected)
5291         {
5292             getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
5293         }
5294         else
5295         {
5296             inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
5297         }
5298         genSinglePush();
5299
5300         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5301                           sizeof(int) * 1,  // argSize
5302                           EA_UNKNOWN);      // retSize
5303
5304         //
5305         // Adjust the number of stack slots used by this managed method if necessary.
5306         //
5307         if (compiler->fgPtrArgCntMax < 1)
5308         {
5309             compiler->fgPtrArgCntMax = 1;
5310         }
5311
5312         genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
5313 #elif _TARGET_ARM_
5314         // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
5315         // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target. 
5316         regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
5317         noway_assert(argReg == REG_PROFILER_JMP_ARG);
5318         regSet.rsLockReg(RBM_PROFILER_JMP_USED);
5319
5320         if (compiler->compProfilerMethHndIndirected)
5321         {
5322             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
5323             regTracker.rsTrackRegTrash(argReg);
5324         }
5325         else
5326         {            
5327             instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
5328         }
5329
5330         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5331                           0,                // argSize
5332                           EA_UNKNOWN);      // retSize
5333
5334         regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
5335 #else 
5336         NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
5337 #endif  //_TARGET_X86_
5338
5339         /* Restore the stack level */
5340         genStackLevel = saveStackLvl2;
5341     }
5342 #endif // PROFILING_SUPPORTED
5343
5344     /* This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
5345      * call genPInvokeMethodEpilog anywhere that we have a method return.  We should really
5346      * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
5347      */
5348
5349     if (compiler->info.compCallUnmanaged)
5350     {
5351         genPInvokeMethodEpilog();
5352     }
5353
5354     // Make sure register arguments are in their initial registers
5355     // and stack arguments are put back as well.
5356     //
5357     // This does not deal with circular dependencies of register
5358     // arguments, which is safe because RegAlloc prevents that by
5359     // not enregistering any RegArgs when a JMP opcode is used.
5360
5361     if  (compiler->info.compArgsCount == 0)
5362     {
5363         return;
5364     }
5365
5366     unsigned        varNum;
5367     LclVarDsc   *   varDsc;
5368
5369     // First move any enregistered stack arguments back to the stack
5370     for (varNum = 0, varDsc = compiler->lvaTable;
5371          varNum < compiler->info.compArgsCount;
5372          varNum++  , varDsc++)
5373     {
5374         noway_assert(varDsc->lvIsParam);
5375         if (varDsc->lvIsRegArg || !varDsc->lvRegister)
5376             continue;
5377
5378         /* Argument was passed on the stack, but ended up in a register
5379          * Store it back to the stack */
5380
5381 #ifndef _TARGET_64BIT_
5382         if (varDsc->TypeGet() == TYP_LONG)
5383         {
5384             /* long - at least the low half must be enregistered */
5385
5386             getEmitter()->emitIns_S_R(ins_Store(TYP_INT),
5387                                     EA_4BYTE,
5388                                     varDsc->lvRegNum,
5389                                     varNum,
5390                                     0);
5391
5392             /* Is the upper half also enregistered? */
5393
5394             if (varDsc->lvOtherReg != REG_STK)
5395             {
5396                 getEmitter()->emitIns_S_R(ins_Store(TYP_INT),
5397                                         EA_4BYTE,
5398                                         varDsc->lvOtherReg,
5399                                         varNum,
5400                                         sizeof(int));
5401             }
5402         }
5403         else
5404 #endif // _TARGET_64BIT_
5405         {
5406             getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()),
5407                                     emitTypeSize(varDsc->TypeGet()),
5408                                     varDsc->lvRegNum,
5409                                     varNum,
5410                                     0);
5411         }
5412     }
5413
5414 #ifdef _TARGET_ARM_
5415     regMaskTP fixedArgsMask = RBM_NONE;
5416 #endif
5417
5418     // Next move any un-enregistered register arguments back to their register
5419     for (varNum = 0, varDsc = compiler->lvaTable;
5420          varNum < compiler->info.compArgsCount;
5421          varNum++  , varDsc++)
5422     {
5423         /* Is this variable a register arg? */
5424
5425         if  (!varDsc->lvIsRegArg)
5426             continue;
5427
5428         /* Register argument */
5429
5430         noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5431         noway_assert(!varDsc->lvRegister);
5432
5433         /* Reload it from the stack */
5434
5435 #ifndef _TARGET_64BIT_
5436         if (varDsc->TypeGet() == TYP_LONG)
5437         {
5438             /* long - at least the low half must be enregistered */
5439
5440             getEmitter()->emitIns_R_S(ins_Load(TYP_INT),
5441                                     EA_4BYTE,
5442                                     varDsc->lvArgReg,
5443                                     varNum,
5444                                     0);
5445             regTracker.rsTrackRegTrash(varDsc->lvArgReg);
5446
5447             /* Also assume the upper half also enregistered */
5448
5449             getEmitter()->emitIns_R_S(ins_Load(TYP_INT),
5450                                     EA_4BYTE,
5451                                     genRegArgNext(varDsc->lvArgReg),
5452                                     varNum,
5453                                     sizeof(int));
5454             regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
5455
5456 #ifdef _TARGET_ARM_
5457             fixedArgsMask |= genRegMask(varDsc->lvArgReg);
5458             fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
5459 #endif
5460         }
5461         else
5462 #endif // _TARGET_64BIT_
5463 #ifdef _TARGET_ARM_
5464         if (varDsc->lvIsHfaRegArg())
5465         {
5466             const var_types   elemType = varDsc->GetHfaType();
5467             const instruction loadOp   = ins_Load(elemType);
5468             const emitAttr    size     = emitTypeSize(elemType);
5469             regNumber         argReg   = varDsc->lvArgReg;
5470             const unsigned    maxSize  = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
5471
5472             for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5473             {
5474                 getEmitter()->emitIns_R_S(loadOp,
5475                                     size,
5476                                     argReg,
5477                                     varNum,
5478                                     ofs);
5479                 assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
5480                 argReg = regNextOfType(argReg, elemType);
5481             }
5482         }
5483         else if (varDsc->TypeGet() == TYP_STRUCT)
5484         {
5485             const var_types   elemType = TYP_INT; // we pad everything out to at least 4 bytes
5486             const instruction loadOp   = ins_Load(elemType);
5487             const emitAttr    size     = emitTypeSize(elemType);
5488             regNumber         argReg   = varDsc->lvArgReg;
5489             const unsigned    maxSize  = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
5490
5491             for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5492             {
5493                 getEmitter()->emitIns_R_S(loadOp,
5494                                     size,
5495                                     argReg,
5496                                     varNum,
5497                                     ofs);
5498                 regTracker.rsTrackRegTrash(argReg);
5499
5500                 fixedArgsMask |= genRegMask(argReg);
5501
5502                 argReg = genRegArgNext(argReg);
5503             }
5504         }
5505         else
5506 #endif //_TARGET_ARM_
5507         {
5508             var_types  loadType  = varDsc->TypeGet();
5509             regNumber  argReg    = varDsc->lvArgReg;    // incoming arg register
5510             bool       twoParts  = false;
5511
5512             if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
5513             {
5514 #ifndef _TARGET_64BIT_
5515                 if (loadType == TYP_DOUBLE)
5516                     twoParts = true;
5517 #endif
5518                 loadType = TYP_I_IMPL;
5519                 assert(isValidIntArgReg(argReg));
5520             }
5521
5522             getEmitter()->emitIns_R_S(ins_Load(loadType),
5523                                     emitTypeSize(loadType),
5524                                     argReg,
5525                                     varNum,
5526                                     0);
5527             regTracker.rsTrackRegTrash(argReg);
5528
5529 #ifdef _TARGET_ARM_
5530             fixedArgsMask |= genRegMask(argReg);
5531 #endif
5532             if (twoParts)
5533             {
5534                 argReg = genRegArgNext(argReg);
5535                 assert(isValidIntArgReg(argReg));
5536
5537                 getEmitter()->emitIns_R_S(ins_Load(loadType),
5538                                         emitTypeSize(loadType),
5539                                         argReg,
5540                                         varNum,
5541                                         REGSIZE_BYTES);
5542                 regTracker.rsTrackRegTrash(argReg);
5543
5544 #ifdef _TARGET_ARM_
5545                 fixedArgsMask |= genRegMask(argReg);
5546 #endif
5547             }
5548         }
5549     }
5550
5551 #ifdef _TARGET_ARM_
5552     // Check if we have any non-fixed args possibly in the arg registers.
5553     if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
5554     {
5555         noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
5556
5557         regNumber regDeclArgs = REG_ARG_FIRST;
5558
5559         // Skip the 'this' pointer.
5560         if (!compiler->info.compIsStatic)
5561         {
5562             regDeclArgs = REG_NEXT(regDeclArgs);
5563         }
5564
5565         // Skip the 'generic context.'
5566         if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5567         {
5568             regDeclArgs = REG_NEXT(regDeclArgs);
5569         }
5570
5571         // Skip any 'return buffer arg.'
5572         if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
5573         {
5574             regDeclArgs = REG_NEXT(regDeclArgs);
5575         }
5576
5577         // Skip the 'vararg cookie.'
5578         regDeclArgs = REG_NEXT(regDeclArgs);
5579
5580         // Also add offset for the vararg cookie. 
5581         int offset = REGSIZE_BYTES;
5582
5583         // Load all the variable arguments in registers back to their registers.
5584         for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
5585         {
5586             if (!(fixedArgsMask & genRegMask(reg)))
5587             {
5588                 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
5589                 regTracker.rsTrackRegTrash(reg);
5590             }
5591             offset += REGSIZE_BYTES;
5592         }
5593     }
5594 #endif // _TARGET_ARM_
5595 }
5596
5597 /*****************************************************************************
5598  *
5599  *  Check if a variable is assigned to in a tree.  The variable number is
5600  *  passed in pCallBackData.  If the variable is assigned to, return
5601  *  Compiler::WALK_ABORT.  Otherwise return Compiler::WALK_CONTINUE.
5602  */
5603 Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr *pTree, Compiler::fgWalkData *data)
5604 {
5605     GenTreePtr tree = *pTree;
5606     if ((tree->OperIsAssignment())                                         &&
5607         (tree->gtOp.gtOp1->OperGet()         == GT_LCL_VAR)                &&
5608         (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned) (size_t)data->pCallbackData))
5609     {
5610         return Compiler::WALK_ABORT;
5611     }
5612
5613     return Compiler::WALK_CONTINUE;
5614 }
5615
5616
5617 regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
5618 {
5619     unsigned        varNum;
5620     LclVarDsc   *   varDsc;
5621
5622     if (tree->gtOper == GT_LCL_VAR)
5623     {
5624         /* Does the variable live in a register? */
5625
5626         varNum = tree->gtLclVarCommon.gtLclNum;
5627         noway_assert(varNum < compiler->lvaCount);
5628         varDsc = compiler->lvaTable + varNum;
5629
5630         if  (!varDsc->IsFloatRegType() && varDsc->lvRegister)
5631         {
5632             return varDsc->lvRegNum;
5633         }
5634     }
5635
5636     return REG_NA;
5637 }
5638
5639 // inline
5640 void CodeGen::unspillLiveness(genLivenessSet * ls)
5641 {
5642     // Only try to unspill the registers that are missing from the currentLiveRegs
5643     //
5644     regMaskTP  cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
5645     regMaskTP  currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
5646     cannotSpillMask &= ~currentLiveRegs;
5647
5648     // Typically this will always be true and we will return
5649     //
5650     if (cannotSpillMask == 0)
5651         return;
5652
5653     for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
5654     {
5655         // Is this a register that we cannot leave in the spilled state?
5656         //
5657         if ((cannotSpillMask & genRegMask(reg)) == 0)
5658             continue;
5659
5660         RegSet::SpillDsc *  spill = regSet.rsSpillDesc[reg];
5661         
5662         // Was it spilled, if not then skip it.
5663         //
5664         if (!spill)
5665             continue;
5666
5667         noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
5668
5669         regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
5670     }
5671 }
5672
5673 /*****************************************************************************
5674  *
5675  *  Generate code for a qmark colon
5676  */
5677
5678 void                CodeGen::genCodeForQmark(GenTreePtr tree,
5679                                              regMaskTP  destReg,
5680                                              regMaskTP  bestReg)
5681 {
5682     GenTreePtr      op1      = tree->gtOp.gtOp1;
5683     GenTreePtr      op2      = tree->gtOp.gtOp2;
5684     regNumber       reg;
5685     regMaskTP       regs     = regSet.rsMaskUsed;
5686     regMaskTP       needReg  = destReg;
5687
5688     noway_assert(compiler->compQmarkUsed);
5689     noway_assert(tree->gtOper == GT_QMARK);
5690     noway_assert(op1->OperIsCompare());
5691     noway_assert(op2->gtOper == GT_COLON);
5692
5693     GenTreePtr      thenNode = op2->AsColon()->ThenNode();
5694     GenTreePtr      elseNode = op2->AsColon()->ElseNode();
5695
5696     /* If elseNode is a Nop node you must reverse the
5697        thenNode and elseNode prior to reaching here!
5698        (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
5699
5700     noway_assert(!elseNode->IsNothingNode());
5701
5702     /* Try to implement the qmark colon using a CMOV.  If we can't for
5703        whatever reason, this will return false and we will implement
5704        it using regular branching constructs. */
5705
5706     if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
5707         return;
5708
5709     /*
5710         This is a ?: operator; generate code like this:
5711
5712             condition_compare
5713             jmp_if_true lab_true
5714
5715         lab_false:
5716             op1 (false = 'else' part)
5717             jmp lab_done
5718
5719         lab_true:
5720             op2 (true = 'then' part)
5721
5722         lab_done:
5723
5724
5725         NOTE: If no 'then' part we do not generate the 'jmp lab_done'
5726             or the 'lab_done' label
5727     */
5728
5729     BasicBlock *    lab_true;
5730     BasicBlock *    lab_false;
5731     BasicBlock *    lab_done;
5732
5733     genLivenessSet  entryLiveness; 
5734     genLivenessSet  exitLiveness;
5735
5736     lab_true  = genCreateTempLabel();
5737     lab_false = genCreateTempLabel();
5738
5739 #if FEATURE_STACK_FP_X87
5740     /* Spill any register that hold partial values so that the exit liveness
5741        from sides is the same */
5742 #ifdef DEBUG
5743     regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
5744
5745     // spillMask should be the whole FP stack      
5746     noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
5747 #endif
5748
5749     SpillTempsStackFP(regSet.rsMaskUsedFloat);
5750     noway_assert(regSet.rsMaskUsedFloat == 0);
5751 #endif
5752
5753     /* Before we generate code for qmark, we spill all the currently used registers
5754        that conflict with the registers used in the qmark tree. This is to avoid
5755        introducing spills that only occur on either the 'then' or 'else' side of
5756        the tree, but not both identically. We need to be careful with enregistered
5757        variables that are used; see below.
5758     */
5759
5760     if (regSet.rsMaskUsed)
5761     {
5762         /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
5763            variable), then it may not get spilled. However, the variable may
5764            then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
5765            may get spilled from one side and not the other. So unmark regSet.rsMaskVars
5766            before spilling regSet.rsMaskUsed */
5767
5768         regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
5769         regMaskTP rsAdditional = RBM_NONE;
5770
5771         // For each multi-use of an enregistered variable, we need to determine if
5772         // it can get spilled inside the qmark colon.  This can only happen if
5773         // its life ends somewhere in the qmark colon.  We have the following
5774         // cases:
5775         // 1) Variable is dead at the end of the colon -- needs to be spilled
5776         // 2) Variable is alive at the end of the colon -- needs to be spilled
5777         //    iff it is assigned to in the colon.  In order to determine that, we
5778         //    examine the GTF_ASG flag to see if any assignments were made in the
5779         //    colon.  If there are any, we need to do a tree walk to see if this
5780         //    variable is the target of an assignment.  This treewalk should not
5781         //    happen frequently.
5782         if (rsAdditionalCandidates)
5783         {
5784 #ifdef DEBUG
5785             if (compiler->verbose)
5786             {
5787                 Compiler::printTreeID(tree);
5788                 printf(": Qmark-Colon additional spilling candidates are ");
5789                 dspRegMask(rsAdditionalCandidates); printf("\n");
5790             }
5791 #endif
5792
5793             // If any candidates are not alive at the GT_QMARK node, then they
5794             // need to be spilled
5795
5796             VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
5797             VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
5798                                                                                 compiler->compCurLifeTree,
5799                                                                                 tree));
5800
5801             VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow, VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
5802
5803             VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
5804             while (iter.NextElem(compiler, &varIndex))
5805             {
5806                 // Find the variable in compiler->lvaTable
5807                 unsigned  varNum  = compiler->lvaTrackedToVarNum[varIndex];
5808                 LclVarDsc *varDsc = compiler->lvaTable + varNum;
5809
5810 #if !FEATURE_FP_REGALLOC
5811                 if (varDsc->IsFloatRegType())
5812                     continue;
5813 #endif
5814
5815                 noway_assert(varDsc->lvRegister);
5816
5817                 regMaskTP regBit;
5818
5819                 if (varTypeIsFloating(varDsc->TypeGet()))
5820                 {
5821                     regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
5822                 }
5823                 else
5824                 {
5825                     regBit = genRegMask(varDsc->lvRegNum);
5826
5827                     // For longs we may need to spill both regs
5828                     if  (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
5829                         regBit |= genRegMask(varDsc->lvOtherReg);
5830                 }
5831
5832                 // Is it one of our reg-use vars?  If not, we don't need to spill it.
5833                 regBit &= rsAdditionalCandidates;
5834                 if (!regBit)
5835                     continue;
5836
5837                 // Is the variable live at the end of the colon?
5838                 if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
5839                 {
5840                     // Variable is alive at the end of the colon.  Was it assigned
5841                     // to inside the colon?
5842
5843                     if (!(op2->gtFlags & GTF_ASG))
5844                         continue;
5845
5846                     if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void *)(size_t)varNum) == Compiler::WALK_ABORT)
5847                     {
5848                         // Variable was assigned to, so we need to spill it.
5849
5850                         rsAdditional |= regBit;
5851 #ifdef DEBUG
5852                         if (compiler->verbose)
5853                         {
5854                             Compiler::printTreeID(tree);
5855                             printf(": Qmark-Colon candidate ");
5856                             dspRegMask(regBit); printf("\n");
5857                             printf("    is assigned to inside colon and will be spilled\n");
5858                         }
5859 #endif
5860                     }
5861                 }
5862                 else
5863                 {
5864                     // Variable is not alive at the end of the colon.  We need to spill it.
5865
5866                     rsAdditional |= regBit;
5867 #ifdef DEBUG
5868                     if (compiler->verbose)
5869                     {
5870                         Compiler::printTreeID(tree);
5871                         printf(": Qmark-Colon candidate ");
5872                         dspRegMask(regBit); printf("\n");
5873                         printf("    is alive at end of colon and will be spilled\n");
5874                     }
5875 #endif
5876                 }
5877             }
5878
5879 #ifdef DEBUG
5880             if (compiler->verbose)
5881             {
5882                 Compiler::printTreeID(tree);
5883                 printf(": Qmark-Colon approved additional spilling candidates are ");
5884                 dspRegMask(rsAdditional); printf("\n");
5885             }
5886 #endif
5887
5888         }
5889
5890         noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
5891
5892         // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
5893         // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
5894         // we will have unbalanced spills and generate bad code.
5895         regMaskTP rsSpill = ((regSet.rsMaskUsed & ~(regSet.rsMaskVars|regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
5896
5897 #ifdef DEBUG
5898         // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
5899         // 'bad' registers, causing spills. So, just force all used registers to get spilled
5900         // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
5901         // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
5902         // unspilled while generating that same tree.
5903
5904         if (regSet.rsStressRegs() >= 1)
5905         {
5906             rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
5907         }
5908 #endif // DEBUG
5909
5910         if (rsSpill)
5911         {
5912             // Remember which registers hold pointers. We will spill
5913             // them, but the code that follows will fetch reg vars from
5914             // the registers, so we need that gc compiler->info.
5915             regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
5916             regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
5917
5918             // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
5919             // So, pretend there aren't any, and spill them anyway. This will only occur
5920             // if rsAdditional is non-empty.
5921             regMaskTP   rsTemp = regSet.rsMaskVars;
5922             regSet.ClearMaskVars();
5923
5924             regSet.rsSpillRegs(rsSpill);
5925
5926             // Restore gc tracking masks.
5927             gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
5928             gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
5929
5930             // Set regSet.rsMaskVars back to normal
5931             regSet.rsMaskVars = rsTemp;
5932         }
5933     }
5934
5935
5936     // Generate the conditional jump but without doing any StackFP fixups.
5937     genCondJump(op1, lab_true, lab_false, false);
5938
5939
5940     /* Save the current liveness, register status, and GC pointers */
5941     /* This is the liveness information upon entry                 */
5942     /* to both the then and else parts of the qmark                */
5943
5944     saveLiveness(&entryLiveness);
5945
5946     /* Clear the liveness of any local variables that are dead upon   */
5947     /* entry to the else part.                                        */
5948
5949     /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
5950     /* from the "colon or op2" liveSet                                */
5951     genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
5952
5953     /* genCondJump() closes the current emitter block */
5954
5955     genDefineTempLabel(lab_false);
5956
5957 #if FEATURE_STACK_FP_X87
5958     // Store fpstate
5959
5960     QmarkStateStackFP  tempFPState;
5961     bool bHasFPUState = !compCurFPState.IsEmpty();
5962     genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
5963 #endif
5964
5965     /* Does the operator yield a value? */
5966
5967     if  (tree->gtType == TYP_VOID)
5968     {
5969         /* Generate the code for the else part of the qmark */
5970
5971         genCodeForTree(elseNode, needReg, bestReg);
5972
5973         /* The type is VOID, so we shouldn't have computed a value */
5974
5975         noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
5976
5977         /* Save the current liveness, register status, and GC pointers               */
5978         /* This is the liveness information upon exit of the then part of the qmark  */
5979
5980         saveLiveness(&exitLiveness);
5981
5982         /* Is there a 'then' part? */
5983
5984         if  (thenNode->IsNothingNode())
5985         {
5986 #if FEATURE_STACK_FP_X87
5987             if (bHasFPUState)
5988             {
5989                 // We had FP state on entry just after the condition, so potentially, the else
5990                 // node may have to do transition work.
5991                 lab_done  = genCreateTempLabel();
5992
5993                 /* Generate jmp lab_done */
5994
5995                 inst_JMP  (EJ_jmp, lab_done);
5996
5997                 /* No 'then' - just generate the 'lab_true' label */
5998
5999                 genDefineTempLabel(lab_true);
6000
6001                 // We need to do this after defining the lab_false label
6002                 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
6003                 genQMarkAfterThenBlockStackFP(&tempFPState);
6004                 genDefineTempLabel(lab_done);
6005             }
6006             else
6007 #endif // FEATURE_STACK_FP_X87
6008             {
6009                 /* No 'then' - just generate the 'lab_true' label */
6010                 genDefineTempLabel(lab_true);
6011             }
6012         }
6013         else
6014         {
6015             lab_done  = genCreateTempLabel();
6016
6017             /* Generate jmp lab_done */
6018
6019             inst_JMP  (EJ_jmp, lab_done);
6020
6021             /* Restore the liveness that we had upon entry of the then part of the qmark */
6022
6023             restoreLiveness(&entryLiveness);
6024
6025             /* Clear the liveness of any local variables that are dead upon    */
6026             /* entry to the then part.                                         */
6027             genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
6028
6029             /* Generate lab_true: */
6030
6031             genDefineTempLabel(lab_true);
6032 #if FEATURE_STACK_FP_X87
6033             // We need to do this after defining the lab_false label
6034             genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
6035 #endif
6036             /* Enter the then part - trash all registers */
6037
6038             regTracker.rsTrackRegClr();
6039
6040             /* Generate the code for the then part of the qmark */
6041
6042             genCodeForTree(thenNode, needReg, bestReg);
6043
6044             /* The type is VOID, so we shouldn't have computed a value */
6045
6046             noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
6047
6048             unspillLiveness(&exitLiveness);
6049
6050             /* Verify that the exit liveness information is the same for the two parts of the qmark */
6051
6052             checkLiveness(&exitLiveness);
6053 #if FEATURE_STACK_FP_X87
6054             genQMarkAfterThenBlockStackFP(&tempFPState);
6055 #endif
6056             /* Define the "result" label */
6057
6058             genDefineTempLabel(lab_done);
6059         }
6060
6061         /* Join of the two branches - trash all registers */
6062
6063         regTracker.rsTrackRegClr();
6064
6065         /* We're just about done */
6066
6067         genUpdateLife(tree);
6068     }
6069     else
6070     {
6071         /* Generate code for a qmark that generates a value */
6072
6073         /* Generate the code for the else part of the qmark */
6074
6075         noway_assert(elseNode->IsNothingNode() == false);
6076
6077         /* Compute the elseNode into any free register */
6078         genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6079         noway_assert(elseNode->gtFlags & GTF_REG_VAL);
6080         noway_assert(elseNode->gtRegNum != REG_NA);
6081
6082         /* Record the chosen register */
6083         reg  = elseNode->gtRegNum;
6084         regs = genRegMask(reg);
6085
6086         /* Save the current liveness, register status, and GC pointers               */
6087         /* This is the liveness information upon exit of the else part of the qmark  */
6088
6089         saveLiveness(&exitLiveness);
6090
6091         /* Generate jmp lab_done */
6092         lab_done  = genCreateTempLabel();
6093
6094         // We would like to know here if the else node is really going to generate
6095         // code, as if it isn't, we're generating here a jump to the next instruction.
6096         // What you would really like is to be able to go back and remove the jump, but
6097         // we have no way of doing that right now.
6098
6099 #ifdef DEBUG
6100         // We will use this to assert we don't emit instructions if we decide not to
6101         // do the jmp
6102         unsigned emittedInstructions = getEmitter()->emitInsCount;
6103         bool bSkippedJump = false;
6104 #endif
6105         if (
6106 #if FEATURE_STACK_FP_X87
6107             !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
6108 #endif
6109              genIsEnregisteredIntVariable(thenNode) == reg)
6110         {
6111             // For the moment, fix this easy case (enregistered else node), which
6112             // is the one that happens all the time.
6113 #ifdef DEBUG
6114             bSkippedJump = true;
6115 #endif
6116         }
6117         else
6118         {
6119             inst_JMP  (EJ_jmp, lab_done);
6120         }
6121
6122         /* Restore the liveness that we had upon entry of the else part of the qmark */
6123
6124         restoreLiveness(&entryLiveness);
6125
6126         /* Clear the liveness of any local variables that are dead upon    */
6127         /* entry to the then part.                                         */
6128         genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
6129
6130         /* Generate lab_true: */
6131         genDefineTempLabel(lab_true);
6132 #if FEATURE_STACK_FP_X87
6133         // Store FP state
6134
6135         // We need to do this after defining the lab_true label
6136         genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
6137 #endif
6138         /* Enter the then part - trash all registers */
6139
6140         regTracker.rsTrackRegClr();
6141
6142         /* Generate the code for the then part of the qmark */
6143
6144         noway_assert(thenNode->IsNothingNode() == false);
6145
6146         /* This must place a value into the chosen register */
6147         genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
6148
6149         noway_assert(thenNode->gtFlags & GTF_REG_VAL);
6150         noway_assert(thenNode->gtRegNum == reg);
6151
6152         unspillLiveness(&exitLiveness);
6153
6154         /* Verify that the exit liveness information is the same for the two parts of the qmark */
6155         checkLiveness(&exitLiveness);
6156 #if FEATURE_STACK_FP_X87
6157         genQMarkAfterThenBlockStackFP(&tempFPState);
6158 #endif
6159
6160 #ifdef DEBUG
6161         noway_assert(bSkippedJump == false ||
6162                      getEmitter()->emitInsCount == emittedInstructions);
6163 #endif
6164
6165         /* Define the "result" label */
6166         genDefineTempLabel(lab_done);
6167
6168         /* Join of the two branches - trash all registers */
6169
6170         regTracker.rsTrackRegClr();
6171
6172         /* Check whether this subtree has freed up any variables */
6173
6174         genUpdateLife(tree);
6175
6176         genMarkTreeInReg(tree, reg);
6177
6178     }
6179 }
6180
6181
6182 /*****************************************************************************
6183  *
6184  *  Generate code for a qmark colon using the CMOV instruction.  It's OK
6185  *  to return false when we can't easily implement it using a cmov (leading
6186  *  genCodeForQmark to implement it using branches).
6187  */
6188
6189 bool                CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree,
6190                                                      regMaskTP  destReg,
6191                                                      regMaskTP  bestReg)
6192 {
6193 #ifdef _TARGET_XARCH_
6194     GenTreePtr      cond     = tree->gtOp.gtOp1;
6195     GenTreePtr      colon    = tree->gtOp.gtOp2;
6196     // Warning: this naming of the local vars is backwards!
6197     GenTreePtr      thenNode = colon->gtOp.gtOp1;
6198     GenTreePtr      elseNode = colon->gtOp.gtOp2;
6199     GenTreePtr      alwaysNode, predicateNode;
6200     regNumber       reg;
6201     regMaskTP       needReg  = destReg;
6202
6203     noway_assert(tree->gtOper == GT_QMARK);
6204     noway_assert(cond->OperIsCompare());
6205     noway_assert(colon->gtOper == GT_COLON);
6206
6207 #ifdef DEBUG
6208     if (JitConfig.JitNoCMOV())
6209     {
6210         return false;
6211     }
6212 #endif
6213
6214     /* Can only implement CMOV on processors that support it */
6215
6216     if (!compiler->opts.compUseCMOV)
6217     {
6218         return false;
6219     }
6220
6221     /* thenNode better be a local or a constant */
6222
6223     if ((thenNode->OperGet() != GT_CNS_INT) &&
6224         (thenNode->OperGet() != GT_LCL_VAR))
6225     {
6226         return false;
6227     }
6228
6229     /* elseNode better be a local or a constant or nothing */
6230
6231     if ((elseNode->OperGet() != GT_CNS_INT) &&
6232         (elseNode->OperGet() != GT_LCL_VAR))
6233     {
6234         return false;
6235     }
6236
6237     /* can't handle two constants here */
6238
6239     if ((thenNode->OperGet() == GT_CNS_INT) &&
6240         (elseNode->OperGet() == GT_CNS_INT))
6241     {
6242         return false;
6243     }
6244
6245     /* let's not handle comparisons of non-integer types */
6246
6247     if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
6248     {
6249         return false;
6250     }
6251
6252     /* Choose nodes for predicateNode and alwaysNode.  Swap cond if necessary.
6253        The biggest constraint is that cmov doesn't take an integer argument.
6254     */
6255
6256     bool reverseCond = false;
6257     if (elseNode->OperGet() == GT_CNS_INT)
6258     {
6259         // else node is a constant
6260
6261         alwaysNode    = elseNode;
6262         predicateNode = thenNode;
6263         reverseCond    = true;
6264     }
6265     else
6266     {
6267         alwaysNode    = thenNode;
6268         predicateNode = elseNode;
6269     }
6270
6271     // If the live set in alwaysNode is not the same as in tree, then
6272     // the variable in predicate node dies here.  This is a dangerous
6273     // case that we don't handle (genComputeReg could overwrite
6274     // the value of the variable in the predicate node).
6275
6276     // This assert is just paranoid (we've already asserted it above)
6277     assert (predicateNode->OperGet() == GT_LCL_VAR);
6278     if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
6279     {
6280         return false;
6281     }
6282
6283     // Pass this point we are comitting to use CMOV.
6284
6285     if (reverseCond)
6286     {
6287         compiler->gtReverseCond(cond);
6288     }
6289
6290     emitJumpKind jumpKind = genCondSetFlags(cond);
6291
6292     // Compute the always node into any free register.  If it's a constant,
6293     // we need to generate the mov instruction here (otherwise genComputeReg might
6294     // modify the flags, as in xor reg,reg).
6295
6296     if (alwaysNode->OperGet() == GT_CNS_INT)
6297     {
6298         reg = regSet.rsPickReg(needReg, bestReg);
6299         inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
6300         gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
6301         regTracker.rsTrackRegTrash(reg);
6302     }
6303     else
6304     {
6305         genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6306         noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
6307         noway_assert(alwaysNode->gtRegNum != REG_NA);
6308
6309         // Record the chosen register
6310
6311         reg  = alwaysNode->gtRegNum;
6312     }
6313
6314     regNumber regPredicate = REG_NA;
6315
6316     // Is predicateNode an enregistered variable?
6317
6318     if (genMarkLclVar(predicateNode))
6319     {
6320         // Variable lives in a register
6321
6322         regPredicate = predicateNode->gtRegNum;
6323     }
6324 #if REDUNDANT_LOAD
6325     else
6326     {
6327         // Checks if the variable happens to be in any of the registers
6328
6329         regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
6330     }
6331 #endif
6332
6333     const static
6334     instruction EJtoCMOV[] =
6335     {
6336         INS_nop,
6337         INS_nop,
6338         INS_cmovo,
6339         INS_cmovno,
6340         INS_cmovb,
6341         INS_cmovae,
6342         INS_cmove,
6343         INS_cmovne,
6344         INS_cmovbe,
6345         INS_cmova,
6346         INS_cmovs,
6347         INS_cmovns,
6348         INS_cmovpe,
6349         INS_cmovpo,
6350         INS_cmovl,
6351         INS_cmovge,
6352         INS_cmovle,
6353         INS_cmovg
6354     };
6355
6356     noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
6357     instruction cmov_ins = EJtoCMOV[jumpKind];
6358
6359     noway_assert(insIsCMOV(cmov_ins));
6360
6361     if (regPredicate != REG_NA)
6362     {
6363         // regPredicate is in a register
6364
6365         inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
6366     }
6367     else
6368     {
6369         // regPredicate is in memory
6370
6371         inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
6372     }
6373     gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
6374     regTracker.rsTrackRegTrash(reg);
6375
6376     genUpdateLife(alwaysNode);
6377     genUpdateLife(predicateNode);
6378     genCodeForTree_DONE_LIFE(tree, reg);
6379     return true;
6380 #else
6381     return false;
6382 #endif
6383 }
6384
6385
6386 #ifdef _TARGET_XARCH_
6387 void                CodeGen::genCodeForMultEAX(GenTreePtr tree)
6388 {
6389     GenTreePtr  op1      = tree->gtOp.gtOp1;
6390     GenTreePtr  op2      = tree->gtGetOp2();
6391     bool        ovfl     = tree->gtOverflow();
6392     regNumber   reg      = DUMMY_INIT(REG_CORRUPT);
6393     regMaskTP   addrReg;
6394
6395     noway_assert(tree->OperGet() == GT_MUL);
6396
6397     /* We'll evaluate 'op1' first */
6398     
6399     regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
6400     
6401     /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
6402     
6403     genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
6404     noway_assert(op1->gtFlags & GTF_REG_VAL);
6405
6406     // If op2 is a constant we need to load  the constant into a register
6407     if (op2->OperKind() & GTK_CONST)
6408     {
6409         genCodeForTree(op2, RBM_EDX);  // since EDX is going to be spilled anyway
6410         noway_assert(op2->gtFlags & GTF_REG_VAL);
6411         regSet.rsMarkRegUsed(op2);
6412         addrReg = genRegMask(op2->gtRegNum);
6413     }
6414     else
6415     {
6416         /* Make the second operand addressable */
6417         // Try to avoid EAX.
6418         addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
6419     }
6420
6421     /* Make sure the first operand is still in a register */
6422     // op1 *must* go into EAX.
6423     genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
6424     noway_assert(op1->gtFlags & GTF_REG_VAL);
6425
6426     reg = op1->gtRegNum;
6427
6428     // For 8 bit operations, we need to pick byte addressable registers
6429
6430     if (ovfl && varTypeIsByte(tree->TypeGet()) &&
6431         !(genRegMask(reg) & RBM_BYTE_REGS))
6432     {
6433         regNumber   byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6434
6435         inst_RV_RV(INS_mov, byteReg, reg);
6436         
6437         regTracker.rsTrackRegTrash(byteReg);
6438         regSet.rsMarkRegFree  (genRegMask(reg));
6439         
6440         reg = byteReg;
6441         op1->gtRegNum = reg;
6442         regSet.rsMarkRegUsed(op1);
6443     }
6444
6445     /* Make sure the operand is still addressable */
6446     addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
6447
6448     /* Free up the operand, if it's a regvar */
6449
6450     genUpdateLife(op2);
6451
6452     /* The register is about to be trashed */
6453
6454     regTracker.rsTrackRegTrash(reg);
6455
6456     // For overflow instructions, tree->TypeGet() is the accurate type,
6457     // and gives us the size for the operands.
6458
6459     emitAttr opSize = emitTypeSize(tree->TypeGet());
6460
6461     /* Compute the new value */
6462
6463     noway_assert(op1->gtRegNum == REG_EAX);
6464     
6465     // Make sure Edx is free (unless used by op2 itself)
6466     bool op2Released = false;
6467
6468     if ((addrReg & RBM_EDX) == 0)
6469     {
6470         // op2 does not use Edx, so make sure noone else does either
6471         regSet.rsGrabReg(RBM_EDX);
6472     }
6473     else if (regSet.rsMaskMult & RBM_EDX)
6474     {
6475         /* Edx is used by op2 and some other trees.
6476            Spill the other trees besides op2. */
6477
6478         regSet.rsGrabReg(RBM_EDX);
6479         op2Released = true;
6480         
6481         /* keepReg==RegSet::FREE_REG so that the other multi-used trees
6482            don't get marked as unspilled as well. */
6483         regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
6484     }
6485
6486     instruction     ins;
6487
6488     if (tree->gtFlags & GTF_UNSIGNED)
6489         ins = INS_mulEAX;
6490     else
6491         ins = INS_imulEAX;
6492     
6493     inst_TT(ins, op2, 0, 0, opSize);
6494     
6495     /* Both EAX and EDX are now trashed */
6496     
6497     regTracker.rsTrackRegTrash (REG_EAX);
6498     regTracker.rsTrackRegTrash (REG_EDX);
6499
6500     /* Free up anything that was tied up by the operand */
6501
6502     if (!op2Released)
6503         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
6504
6505     /* The result will be where the first operand is sitting */
6506
6507     /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
6508     genRecoverReg(op1, 0, RegSet::KEEP_REG);
6509
6510     reg = op1->gtRegNum;
6511     noway_assert(reg == REG_EAX);
6512
6513     genReleaseReg(op1);
6514
6515     /* Do we need an overflow check */
6516
6517     if (ovfl)
6518         genCheckOverflow(tree);
6519     
6520     genCodeForTree_DONE(tree, reg);
6521 }
6522 #endif // _TARGET_XARCH_
6523
6524 #ifdef _TARGET_ARM_
6525 void                CodeGen::genCodeForMult64(GenTreePtr tree,
6526                                               regMaskTP  destReg,
6527                                               regMaskTP  bestReg)
6528 {
6529     GenTreePtr  op1      = tree->gtOp.gtOp1;
6530     GenTreePtr  op2      = tree->gtGetOp2();
6531
6532     noway_assert(tree->OperGet() == GT_MUL);
6533
6534     /* Generate the first operand into some register */
6535
6536     genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6537     noway_assert(op1->gtFlags & GTF_REG_VAL);
6538
6539     /* Generate the second operand into some register */
6540
6541     genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6542     noway_assert(op2->gtFlags & GTF_REG_VAL);
6543
6544     /* Make sure the first operand is still in a register */
6545     genRecoverReg(op1, 0, RegSet::KEEP_REG);
6546     noway_assert(op1->gtFlags & GTF_REG_VAL);
6547
6548     /* Free up the operands */
6549     genUpdateLife(tree);
6550
6551     genReleaseReg(op1);
6552     genReleaseReg(op2);
6553
6554     regNumber regLo = regSet.rsPickReg(destReg, bestReg);
6555     regNumber regHi;
6556
6557     regSet.rsLockReg(genRegMask(regLo));
6558     regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
6559     regSet.rsUnlockReg(genRegMask(regLo));
6560
6561     instruction     ins;
6562     if (tree->gtFlags & GTF_UNSIGNED)
6563         ins = INS_umull;
6564     else
6565         ins = INS_smull;
6566
6567     getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
6568     regTracker.rsTrackRegTrash(regHi);
6569     regTracker.rsTrackRegTrash(regLo);
6570
6571     /* Do we need an overflow check */
6572
6573     if (tree->gtOverflow())
6574     {
6575         // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
6576         //
6577         regSet.rsLockReg(genRegMask(regLo));
6578         
6579         if (tree->gtFlags & GTF_MUL_64RSLT)
6580             regSet.rsLockReg(genRegMask(regHi));
6581
6582         regNumber regTmpHi = regHi;
6583         if ((tree->gtFlags & GTF_UNSIGNED) == 0)
6584         {
6585             getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
6586             regTmpHi = regSet.rsPickReg(RBM_ALLINT);
6587             getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
6588             regTracker.rsTrackRegTrash(regTmpHi);
6589         }
6590         getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
6591
6592         // Jump to the block which will throw the expection
6593         emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
6594         genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
6595
6596         // Unlock regLo [and regHi] after generating code for the gtOverflow() case
6597         //
6598         regSet.rsUnlockReg(genRegMask(regLo));
6599
6600         if (tree->gtFlags & GTF_MUL_64RSLT)
6601             regSet.rsUnlockReg(genRegMask(regHi));
6602     }
6603
6604     genUpdateLife(tree); 
6605
6606     if (tree->gtFlags & GTF_MUL_64RSLT)
6607         genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
6608     else
6609         genMarkTreeInReg(tree, regLo);
6610 }
6611 #endif // _TARGET_ARM_
6612
6613
6614 /*****************************************************************************
6615  *
6616  *  Generate code for a simple binary arithmetic or logical operator.
6617  *  Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
6618  */
6619
6620 void                CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree,
6621                                                             regMaskTP  destReg,
6622                                                             regMaskTP  bestReg)
6623 {
6624     instruction     ins;
6625     genTreeOps      oper     = tree->OperGet();
6626     const var_types treeType = tree->TypeGet();
6627     GenTreePtr      op1      = tree->gtOp.gtOp1;
6628     GenTreePtr      op2      = tree->gtGetOp2();
6629     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
6630     regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
6631     regMaskTP       needReg  = destReg;
6632
6633     /* Figure out what instruction to generate */
6634
6635     bool isArith;
6636     switch (oper)
6637     {
6638     case GT_AND: ins = INS_AND; isArith = false; break;
6639     case GT_OR : ins = INS_OR ; isArith = false; break;
6640     case GT_XOR: ins = INS_XOR; isArith = false; break;
6641     case GT_ADD: ins = INS_add; isArith = true;  break;
6642     case GT_SUB: ins = INS_sub; isArith = true;  break;
6643     case GT_MUL: ins = INS_MUL; isArith = true;  break;
6644     default:
6645         unreached();
6646     }
6647
6648 #ifdef _TARGET_XARCH_
6649     /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
6650
6651     if  ((oper == GT_MUL)                      &&
6652          op2->IsIntCnsFitsInI32()              &&  // op2 is a constant that fits in a sign-extended 32-bit immediate
6653          !op1->IsCnsIntOrI()                   &&  // op1 is not a constant
6654          (tree->gtFlags & GTF_MUL_64RSLT) == 0 &&  // tree not marked with MUL_64RSLT
6655          !varTypeIsByte(treeType)              &&  // No encoding for say "imul al,al,imm"
6656          !tree->gtOverflow()                     ) // 3 operand imul doesn't set flags
6657     {
6658         /* Make the first operand addressable */
6659
6660         regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
6661
6662         /* Grab a register for the target */
6663
6664         reg   = regSet.rsPickReg(needReg, bestReg);
6665
6666         /* Compute the value into the target: reg=op1*op2_icon */
6667
6668 #if LEA_AVAILABLE
6669         if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
6670         {
6671             regNumber regSrc;
6672             if (op1->gtFlags & GTF_REG_VAL)
6673             {
6674                 regSrc = op1->gtRegNum;
6675             }
6676             else
6677             {
6678                 inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
6679                 regSrc = reg;
6680             }
6681             getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc, (op2->gtIntCon.gtIconVal & -2), 0);
6682         }
6683         else
6684 #endif // LEA_AVAILABLE
6685         {
6686             inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
6687         }
6688
6689         /* The register has been trashed now */
6690
6691         regTracker.rsTrackRegTrash(reg);
6692
6693         /* The address is no longer live */
6694
6695         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
6696
6697         genCodeForTree_DONE(tree, reg);
6698         return;
6699     }
6700 #endif // _TARGET_XARCH_
6701
6702     bool ovfl = false;
6703
6704     if (isArith)
6705     {
6706         // We only reach here for GT_ADD, GT_SUB and GT_MUL.
6707         assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
6708
6709         ovfl = tree->gtOverflow();
6710
6711         /* We record the accurate (small) types in trees only we need to
6712          * check for overflow. Otherwise we record genActualType()
6713          */
6714     
6715         noway_assert(ovfl || (treeType == genActualType(treeType)));
6716     
6717 #if LEA_AVAILABLE
6718     
6719         /* Can we use an 'lea' to compute the result?
6720            Can't use 'lea' for overflow as it doesn't set flags
6721            Can't use 'lea' unless we have at least two free registers */
6722         {
6723             bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) +  // Live intreg variables
6724                                genCountBits(regSet.rsMaskLock) +              // Locked registers
6725                                2                                       // We will need two regisers
6726                    <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
6727     
6728             regMaskTP regs = RBM_NONE; // OUT argument
6729             if  (!ovfl        && 
6730                  bEnoughRegs  &&
6731                  genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
6732             {
6733                 emitAttr size;
6734                 
6735                 /* Is the value now computed in some register? */
6736     
6737                 if  (tree->gtFlags & GTF_REG_VAL)
6738                 {
6739                     genCodeForTree_REG_VAR1(tree);
6740                     return;
6741                 }
6742     
6743                 /* If we can reuse op1/2's register directly, and 'tree' is
6744                    a simple expression (ie. not in scaled index form),
6745                    might as well just use "add" instead of "lea" */
6746     
6747                 // However, if we're in a context where we want to evaluate "tree" into a specific
6748                 // register different from the reg we'd use in this optimization, then it doesn't
6749                 // make sense to do the "add", since we'd also have to do a "mov."
6750                 if  (op1->gtFlags & GTF_REG_VAL)
6751                 {
6752                     reg = op1->gtRegNum;
6753     
6754                     if  ((genRegMask(reg) & regSet.rsRegMaskFree()) &&
6755                          (genRegMask(reg) & needReg))
6756                     {
6757                         if (op2->gtFlags & GTF_REG_VAL)
6758                         {
6759                             /* Simply add op2 to the register */
6760     
6761                             inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
6762     
6763                             if  (tree->gtSetFlags())
6764                                 genFlagsEqualToReg(tree, reg);
6765     
6766                             goto DONE_LEA_ADD;
6767                         }
6768                         else if (op2->OperGet() == GT_CNS_INT)
6769                         {
6770                             /* Simply add op2 to the register */
6771     
6772                             genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
6773     
6774                             goto DONE_LEA_ADD;
6775                         }
6776                     }
6777                 }
6778     
6779                 if  (op2->gtFlags & GTF_REG_VAL)
6780                 {
6781                     reg = op2->gtRegNum;
6782     
6783                     if  ((genRegMask(reg) & regSet.rsRegMaskFree()) &&
6784                          (genRegMask(reg) & needReg))
6785                     {
6786                         if (op1->gtFlags & GTF_REG_VAL)
6787                         {
6788                             /* Simply add op1 to the register */
6789     
6790                             inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
6791     
6792                             if  (tree->gtSetFlags())
6793                                 genFlagsEqualToReg(tree, reg);
6794     
6795                             goto DONE_LEA_ADD;
6796                         }
6797                     }
6798                 }
6799     
6800                 // The expression either requires a scaled-index form, or the
6801                 // op1 or op2's register can't be targeted, this can be
6802                 // caused when op1 or op2 are enregistered variables.
6803     
6804                 reg = regSet.rsPickReg(needReg, bestReg);
6805                 size = emitActualTypeSize(treeType);
6806     
6807                 /* Generate "lea reg, [addr-mode]" */
6808     
6809                 inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
6810     
6811 #ifndef _TARGET_XARCH_
6812                 // Don't call genFlagsEqualToReg on x86/x64
6813                 //  as it does not set the flags
6814                 if  (tree->gtSetFlags())
6815                     genFlagsEqualToReg(tree, reg);
6816 #endif
6817     
6818 DONE_LEA_ADD:
6819                 /* The register has been trashed now */
6820                 regTracker.rsTrackRegTrash(reg);
6821     
6822                 genDoneAddressable(tree, regs, RegSet::FREE_REG);
6823     
6824                 /* The following could be an 'inner' pointer!!! */
6825     
6826                 noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
6827     
6828                 if (treeType == TYP_BYREF)
6829                 {
6830                     genUpdateLife(tree);
6831                     
6832                     gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
6833                     gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
6834                 }
6835     
6836                 genCodeForTree_DONE(tree, reg);
6837                 return;
6838             }
6839         }
6840     
6841 #endif // LEA_AVAILABLE
6842     
6843         noway_assert((varTypeIsGC(treeType) == false) ||
6844                      (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
6845     }
6846
6847     /* The following makes an assumption about gtSetEvalOrder(this) */
6848
6849     noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
6850
6851     /* Compute a useful register mask */
6852     needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6853     needReg = regSet.rsNarrowHint (needReg, regSet.rsRegMaskFree());
6854
6855     // Determine what registers go live between op1 and op2
6856     // Don't bother checking if op1 is already in a register.
6857     // This is not just for efficiency; if it's already in a
6858     // register then it may already be considered "evaluated"
6859     // for the purposes of liveness, in which genNewLiveRegMask
6860     // will assert
6861     if (!op1->InReg())
6862     {
6863         regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
6864         if (newLiveMask)
6865         {
6866             needReg = regSet.rsNarrowHint (needReg, ~newLiveMask);
6867         }
6868     }
6869
6870 #if CPU_HAS_BYTE_REGS
6871     /* 8-bit operations can only be done in the byte-regs */
6872     if (varTypeIsByte(treeType))
6873         needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6874 #endif // CPU_HAS_BYTE_REGS
6875
6876     // Try selecting one of the 'bestRegs'
6877     needReg = regSet.rsNarrowHint(needReg, bestReg);
6878
6879     /* Special case: small_val & small_mask */
6880
6881     if  ( varTypeIsSmall(op1->TypeGet()) &&
6882           op2->IsCnsIntOrI() &&
6883           oper == GT_AND)
6884     {
6885         size_t          and_val = op2->gtIntCon.gtIconVal;
6886         size_t          andMask;
6887         var_types       typ = op1->TypeGet();
6888
6889         switch (typ)
6890         {
6891         case TYP_BOOL:
6892         case TYP_BYTE:
6893         case TYP_UBYTE:
6894             andMask = 0x000000FF;
6895             break;
6896         case TYP_SHORT: 
6897         case TYP_CHAR:
6898             andMask = 0x0000FFFF;
6899             break;
6900         default: noway_assert(!"unexpected type"); return;
6901         }
6902
6903         // Is the 'and_val' completely contained within the bits found in 'andMask'
6904         if  ((and_val & ~andMask) == 0)
6905         {
6906             // We must use unsigned instructions when loading op1
6907             if (varTypeIsByte(typ))
6908             {
6909                 op1->gtType = TYP_UBYTE;
6910             }
6911             else // varTypeIsShort(typ)
6912             {
6913                 assert(varTypeIsShort(typ));
6914                 op1->gtType = TYP_CHAR;   
6915             }
6916
6917             /* Generate the first operand into a scratch register */
6918
6919             op1 = genCodeForCommaTree(op1);
6920             genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6921
6922             noway_assert(op1->gtFlags & GTF_REG_VAL);
6923
6924             regNumber op1Reg = op1->gtRegNum;
6925
6926             // Did we end up in an acceptable register?  
6927             // and do we have an acceptable free register available to grab?
6928             //
6929             if ( ((genRegMask(op1Reg) & needReg) == 0) &&          
6930                  ((regSet.rsRegMaskFree()    & needReg) != 0)    )
6931             {
6932                 // See if we can pick a register from bestReg
6933                 bestReg &= needReg;
6934
6935                 // Grab an acceptable register 
6936                 regNumber newReg;
6937                 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6938                     newReg = regSet.rsGrabReg(bestReg);
6939                 else
6940                     newReg = regSet.rsGrabReg(needReg);
6941
6942                 noway_assert(op1Reg != newReg);
6943
6944                 /* Update the value in the target register */
6945
6946                 regTracker.rsTrackRegCopy(newReg, op1Reg);
6947
6948                 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6949
6950                 /* The value has been transferred to 'reg' */
6951
6952                 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6953                     gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6954
6955                 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6956
6957                 /* The value is now in an appropriate register */
6958
6959                 op1->gtRegNum = newReg;
6960             }
6961             noway_assert(op1->gtFlags & GTF_REG_VAL);
6962             genUpdateLife(op1);
6963
6964             /* Mark the register as 'used' */
6965             regSet.rsMarkRegUsed(op1);
6966             reg = op1->gtRegNum;
6967
6968             if  (and_val != andMask)      // Does the "and" mask only cover some of the bits? 
6969             {
6970                 /* "and" the value */
6971
6972                 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
6973             }
6974
6975             /* Update the live set of register variables */
6976 #ifdef DEBUG
6977             if (compiler->opts.varNames) genUpdateLife(tree);
6978 #endif
6979
6980             /* Now we can update the register pointer information */
6981
6982             genReleaseReg(op1);
6983             gcInfo.gcMarkRegPtrVal(reg, treeType);
6984
6985             genCodeForTree_DONE_LIFE(tree, reg);
6986             return;
6987         }
6988     }
6989
6990 #ifdef _TARGET_XARCH_
6991
6992     // Do we have to use the special "imul" instruction 
6993     // which has eax as the implicit operand ?
6994     //
6995     bool multEAX = false;
6996
6997     if (oper == GT_MUL)
6998     {
6999         if (tree->gtFlags & GTF_MUL_64RSLT)
7000         {
7001             /* Only multiplying with EAX will leave the 64-bit
7002              * result in EDX:EAX */
7003
7004             multEAX = true;
7005         }
7006         else if (ovfl)
7007         {
7008             if (tree->gtFlags & GTF_UNSIGNED)
7009             {
7010                 /* "mul reg/mem" always has EAX as default operand */
7011
7012                 multEAX = true;
7013             }
7014             else if (varTypeIsSmall(treeType))
7015             {
7016                 /* Only the "imul with EAX" encoding has the 'w' bit
7017                  * to specify the size of the operands */
7018
7019                 multEAX = true;
7020             }
7021         }
7022     }
7023
7024     if  (multEAX)
7025     {
7026         noway_assert(oper == GT_MUL);
7027
7028         return genCodeForMultEAX(tree);
7029     }
7030 #endif // _TARGET_XARCH_
7031
7032 #ifdef _TARGET_ARM_
7033
7034     // Do we have to use the special 32x32 => 64 bit multiply
7035     //
7036     bool mult64 = false;
7037
7038     if (oper == GT_MUL)
7039     {
7040         if (tree->gtFlags & GTF_MUL_64RSLT)
7041         {
7042             mult64 = true;
7043         }
7044         else if (ovfl)
7045         {
7046             // We always must use the 32x32 => 64 bit multiply 
7047             // to detect overflow
7048             mult64 = true;
7049         }
7050     }
7051
7052     if  (mult64)
7053     {
7054         noway_assert(oper == GT_MUL);
7055
7056         return genCodeForMult64(tree, destReg, bestReg);
7057     }
7058 #endif // _TARGET_ARM_
7059
7060     /* Generate the first operand into a scratch register */
7061
7062     op1 = genCodeForCommaTree(op1);
7063     genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
7064
7065     noway_assert(op1->gtFlags & GTF_REG_VAL);
7066
7067     regNumber op1Reg = op1->gtRegNum;
7068
7069     // Setup needReg with the set of register that we require for op1 to be in
7070     //
7071     needReg = RBM_ALLINT;
7072
7073     /* Compute a useful register mask */
7074     needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
7075     needReg = regSet.rsNarrowHint (needReg, regSet.rsRegMaskFree());
7076
7077 #if CPU_HAS_BYTE_REGS
7078     /* 8-bit operations can only be done in the byte-regs */
7079     if (varTypeIsByte(treeType))
7080         needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
7081 #endif // CPU_HAS_BYTE_REGS
7082
7083     // Did we end up in an acceptable register?  
7084     // and do we have an acceptable free register available to grab?
7085     //
7086     if ( ((genRegMask(op1Reg) & needReg) == 0) &&          
7087          ((regSet.rsRegMaskFree()    & needReg) != 0)    )
7088     {
7089         // See if we can pick a register from bestReg
7090         bestReg &= needReg;
7091
7092         // Grab an acceptable register 
7093         regNumber newReg;
7094         if ((bestReg & regSet.rsRegMaskFree()) != 0)
7095             newReg = regSet.rsGrabReg(bestReg);
7096         else
7097             newReg = regSet.rsGrabReg(needReg);
7098
7099         noway_assert(op1Reg != newReg);
7100
7101         /* Update the value in the target register */
7102
7103         regTracker.rsTrackRegCopy(newReg, op1Reg);
7104
7105         inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
7106
7107         /* The value has been transferred to 'reg' */
7108
7109         if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
7110             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
7111
7112         gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
7113
7114         /* The value is now in an appropriate register */
7115
7116         op1->gtRegNum = newReg;
7117     }
7118     noway_assert(op1->gtFlags & GTF_REG_VAL);
7119     op1Reg = op1->gtRegNum;
7120
7121     genUpdateLife(op1);
7122
7123     /* Mark the register as 'used' */
7124     regSet.rsMarkRegUsed(op1);
7125
7126     bool isSmallConst = false;
7127
7128 #ifdef _TARGET_ARM_
7129     if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))           
7130     {
7131         isSmallConst = true;
7132     }
7133 #endif
7134     /* Make the second operand addressable */
7135
7136     regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
7137
7138 #if CPU_LOAD_STORE_ARCH
7139     genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
7140 #else // !CPU_LOAD_STORE_ARCH
7141     /* Is op1 spilled and op2 in a register? */
7142
7143     if  ((op1->gtFlags & GTF_SPILLED) &&
7144          (op2->gtFlags & GTF_REG_VAL) &&
7145          (ins != INS_sub) )
7146     {
7147         noway_assert(ins == INS_add  ||
7148                      ins == INS_MUL  ||
7149                      ins == INS_AND  ||
7150                      ins == INS_OR   ||
7151                      ins == INS_XOR);
7152
7153         // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
7154         noway_assert(op2->gtOper != GT_LCL_VAR ||
7155                      varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
7156
7157         reg = op2->gtRegNum;
7158         regMaskTP regMask = genRegMask(reg);
7159
7160         /* Is the register holding op2 available? */
7161
7162         if  (regMask & regSet.rsMaskVars)
7163         {
7164         }
7165         else
7166         {
7167             /* Get the temp we spilled into. */
7168
7169             TempDsc * temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
7170
7171             /* For 8bit operations, we need to make sure that op2 is
7172                in a byte-addressable registers */
7173
7174             if (varTypeIsByte(treeType) &&
7175                 !(regMask & RBM_BYTE_REGS))
7176             {
7177                 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7178
7179                 inst_RV_RV(INS_mov, byteReg, reg);
7180                 regTracker.rsTrackRegTrash(byteReg);
7181
7182                 /* op2 couldn't have spilled as it was not sitting in
7183                    RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
7184                 noway_assert(op2->gtFlags & GTF_REG_VAL);
7185
7186                 regSet.rsUnlockReg  (regMask);
7187                 regSet.rsMarkRegFree(regMask);
7188
7189                 reg             = byteReg;
7190                 regMask         = genRegMask(reg);
7191                 op2->gtRegNum   = reg;
7192                 regSet.rsMarkRegUsed(op2);
7193             }
7194
7195             inst_RV_ST(ins, reg, temp, 0, treeType);
7196
7197             regTracker.rsTrackRegTrash(reg);
7198
7199             /* Free the temp */
7200
7201             compiler->tmpRlsTemp(temp);
7202
7203             /* 'add'/'sub' set all CC flags, others only ZF */
7204
7205             /* If we need to check overflow, for small types, the
7206              * flags can't be used as we perform the arithmetic
7207              * operation (on small registers) and then sign extend it
7208              *
7209              * NOTE : If we ever don't need to sign-extend the result,
7210              * we can use the flags
7211              */
7212
7213             if  (tree->gtSetFlags())
7214             {
7215                 genFlagsEqualToReg(tree, reg);
7216             }
7217
7218             /* The result is where the second operand is sitting. Mark result reg as free */
7219             regSet.rsMarkRegFree(genRegMask(reg)) ;
7220
7221             gcInfo.gcMarkRegPtrVal(reg, treeType);
7222
7223             goto CHK_OVF;
7224         }
7225     }
7226 #endif // !CPU_LOAD_STORE_ARCH
7227
7228     /* Make sure the first operand is still in a register */
7229     regSet.rsLockUsedReg(addrReg);
7230     genRecoverReg(op1, 0, RegSet::KEEP_REG);
7231     noway_assert(op1->gtFlags & GTF_REG_VAL);
7232     regSet.rsUnlockUsedReg(addrReg);
7233
7234     reg = op1->gtRegNum;
7235
7236     // For 8 bit operations, we need to pick byte addressable registers
7237
7238     if (varTypeIsByte(treeType) &&
7239         !(genRegMask(reg) & RBM_BYTE_REGS))
7240     {
7241         regNumber   byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7242
7243         inst_RV_RV(INS_mov, byteReg, reg);
7244
7245         regTracker.rsTrackRegTrash(byteReg);
7246         regSet.rsMarkRegFree  (genRegMask(reg));
7247
7248         reg = byteReg;
7249         op1->gtRegNum = reg;
7250         regSet.rsMarkRegUsed(op1);
7251     }
7252
7253     /* Make sure the operand is still addressable */
7254     addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
7255
7256     /* Free up the operand, if it's a regvar */
7257
7258     genUpdateLife(op2);
7259
7260     /* The register is about to be trashed */
7261
7262     regTracker.rsTrackRegTrash(reg);
7263
7264     bool op2Released = false;
7265
7266     // For overflow instructions, tree->gtType is the accurate type,
7267     // and gives us the size for the operands.
7268
7269     emitAttr opSize = emitTypeSize(treeType);
7270
7271     /* Compute the new value */
7272
7273     if (isArith &&
7274         !op2->InReg() &&
7275         (op2->OperKind() & GTK_CONST)
7276 #if !CPU_HAS_FP_SUPPORT
7277          && (treeType == TYP_INT || treeType == TYP_I_IMPL)
7278 #endif
7279          )       
7280     {
7281         ssize_t     ival = op2->gtIntCon.gtIconVal;
7282
7283         if      (oper == GT_ADD)
7284         {
7285             genIncRegBy(reg, ival, tree, treeType, ovfl);
7286         }
7287         else if (oper == GT_SUB)
7288         {
7289             if (ovfl &&
7290                 ((tree->gtFlags & GTF_UNSIGNED) ||
7291                  (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000. Therefore we can't use -ival.
7292                )
7293             {
7294                 /* For unsigned overflow, we have to use INS_sub to set
7295                    the flags correctly */
7296
7297                 genDecRegBy(reg, ival, tree);
7298             }
7299             else
7300             {
7301                 /* Else, we simply add the negative of the value */
7302
7303                 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7304             }
7305         }
7306         else if (oper == GT_MUL)
7307         {
7308             genMulRegBy(reg, ival, tree, treeType, ovfl);
7309         }
7310     }
7311     else
7312     {
7313         // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
7314         op2 = op2->gtEffectiveVal();
7315         if (varTypeIsByte(treeType) && op2->InReg())
7316         {
7317             noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
7318
7319             regNumber   op2reg      = op2->gtRegNum;
7320             regMaskTP   op2regMask  = genRegMask(op2reg);
7321
7322             if (!(op2regMask & RBM_BYTE_REGS))
7323             {
7324                 regNumber   byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7325
7326                 inst_RV_RV(INS_mov, byteReg, op2reg);
7327                 regTracker.rsTrackRegTrash(byteReg);
7328
7329                 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7330                 op2Released = true;
7331
7332                 op2->gtRegNum = byteReg;
7333             }
7334         }
7335
7336         inst_RV_TT(ins, reg, op2, 0, opSize, flags);
7337     }
7338
7339     /* Free up anything that was tied up by the operand */
7340
7341     if (!op2Released)
7342         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7343
7344     /* The result will be where the first operand is sitting */
7345
7346     /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
7347     genRecoverReg(op1, 0, RegSet::KEEP_REG);
7348
7349     reg = op1->gtRegNum;
7350
7351     /* 'add'/'sub' set all CC flags, others only ZF+SF */
7352
7353     if  (tree->gtSetFlags())
7354         genFlagsEqualToReg(tree, reg);
7355
7356     genReleaseReg(op1);
7357
7358 #if !CPU_LOAD_STORE_ARCH
7359 CHK_OVF:
7360 #endif // !CPU_LOAD_STORE_ARCH
7361
7362     /* Do we need an overflow check */
7363
7364     if (ovfl)
7365         genCheckOverflow(tree);
7366
7367     genCodeForTree_DONE(tree, reg);
7368 }
7369
7370
7371 /*****************************************************************************
7372  *
7373  *  Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
7374  *  Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
7375  */
7376
7377 void                CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree,
7378                                                                regMaskTP  destReg,
7379                                                                regMaskTP  bestReg)
7380 {
7381     instruction     ins;
7382     const genTreeOps oper    = tree->OperGet();
7383     const var_types treeType = tree->TypeGet();
7384     GenTreePtr      op1      = tree->gtOp.gtOp1;
7385     GenTreePtr      op2      = tree->gtGetOp2();
7386     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7387     regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
7388     regMaskTP       needReg  = destReg;
7389     regMaskTP       addrReg;
7390
7391     /* Figure out what instruction to generate */
7392
7393     bool isArith;
7394     switch (oper)
7395     {
7396     case GT_ASG_AND: ins = INS_AND; isArith = false; break;
7397     case GT_ASG_OR : ins = INS_OR ; isArith = false; break;
7398     case GT_ASG_XOR: ins = INS_XOR; isArith = false; break;
7399     case GT_ASG_ADD: ins = INS_add; isArith = true;  break;
7400     case GT_ASG_SUB: ins = INS_sub; isArith = true;  break;
7401     default:
7402         unreached();
7403     }
7404
7405     bool ovfl = false;
7406
7407     if (isArith)
7408     {
7409         // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
7410
7411         ovfl = tree->gtOverflow();
7412
7413         // We can't use += with overflow if the value cannot be changed
7414         // in case of an overflow-exception which the "+" might cause
7415         noway_assert(!ovfl ||
7416                      ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) &&
7417                       !compiler->compCurBB->hasTryIndex()));
7418
7419         /* Do not allow overflow instructions with refs/byrefs */
7420
7421         noway_assert(!ovfl || !varTypeIsGC(treeType));
7422
7423         // We disallow overflow and byte-ops here as it is too much trouble
7424         noway_assert(!ovfl || !varTypeIsByte(treeType));
7425
7426         /* Is the second operand a constant? */
7427
7428         if  (op2->IsIntCnsFitsInI32())
7429         {
7430             int        ival = (int)op2->gtIntCon.gtIconVal;
7431
7432             /* What is the target of the assignment? */
7433
7434             switch (op1->gtOper)
7435             {
7436             case GT_REG_VAR:
7437
7438 REG_VAR4:
7439
7440                 reg = op1->gtRegVar.gtRegNum;
7441
7442                 /* No registers are needed for addressing */
7443
7444                 addrReg = RBM_NONE;
7445 #if !CPU_LOAD_STORE_ARCH
7446 INCDEC_REG:
7447 #endif
7448                 /* We're adding a constant to a register */
7449
7450                 if  (oper == GT_ASG_ADD)
7451                     genIncRegBy(reg,  ival, tree, treeType, ovfl);
7452                 else if (ovfl &&
7453                          ((tree->gtFlags & GTF_UNSIGNED) || ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))  // -0x80000000 == 0x80000000. Therefore we can't use -ival.
7454                         )
7455                     /* For unsigned overflow, we have to use INS_sub to set
7456                         the flags correctly */
7457                     genDecRegBy(reg,  ival, tree);
7458                 else
7459                     genIncRegBy(reg, -ival, tree, treeType, ovfl);
7460
7461                 break;
7462
7463             case GT_LCL_VAR:
7464
7465                 /* Does the variable live in a register? */
7466
7467                 if  (genMarkLclVar(op1))
7468                     goto REG_VAR4;
7469
7470                 __fallthrough;
7471
7472             default:
7473
7474                 /* Make the target addressable for load/store */
7475                 addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
7476
7477     #if CPU_LOAD_STORE_ARCH
7478                 // We always load from memory then store to memory 
7479     #else
7480                 /* For small types with overflow check, we need to
7481                     sign/zero extend the result, so we need it in a reg */
7482
7483                 if (ovfl && genTypeSize(treeType) < sizeof(int))
7484     #endif // CPU_LOAD_STORE_ARCH
7485                 {
7486                     // Load op1 into a reg
7487
7488                     reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
7489
7490                     inst_RV_TT(INS_mov, reg, op1);
7491
7492                     // Issue the add/sub and the overflow check
7493
7494                     inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
7495                     regTracker.rsTrackRegTrash(reg);
7496
7497                     if (ovfl)
7498                     {
7499                         genCheckOverflow(tree);
7500                     }
7501
7502                     /* Store the (sign/zero extended) result back to
7503                         the stack location of the variable */
7504
7505                     inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7506
7507                     break;
7508                 }
7509 #if !CPU_LOAD_STORE_ARCH
7510                 else
7511                 {
7512                     /* Add/subtract the new value into/from the target */
7513
7514                     if  (op1->gtFlags & GTF_REG_VAL)
7515                     {
7516                         reg = op1->gtRegNum;
7517                         goto INCDEC_REG;
7518                     }
7519
7520                     /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
7521                     if (!ovfl && (ival == 1 || ival == -1) && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
7522                     {
7523                         noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
7524                         if (oper == GT_ASG_SUB)
7525                             ival = -ival;
7526
7527                         ins = (ival > 0) ? INS_inc : INS_dec;
7528                         inst_TT(ins, op1);
7529                     }
7530                     else
7531                     {
7532                         inst_TT_IV(ins, op1, ival);
7533                     }
7534
7535                     if  ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
7536                     {
7537                         if (tree->gtSetFlags())
7538                             genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
7539                     }
7540
7541                     break;
7542                 }
7543 #endif // !CPU_LOAD_STORE_ARCH
7544             } // end switch (op1->gtOper)
7545
7546             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7547
7548             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7549             return;
7550         } // end if (op2->IsIntCnsFitsInI32())
7551     } // end if (isArith)
7552
7553     noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
7554
7555     /* Is the target a register or local variable? */
7556
7557     switch (op1->gtOper)
7558     {
7559     case GT_LCL_VAR:
7560
7561         /* Does the target variable live in a register? */
7562
7563         if  (!genMarkLclVar(op1))
7564             break;
7565
7566         __fallthrough;
7567
7568     case GT_REG_VAR:
7569
7570         /* Get hold of the target register */
7571
7572         reg = op1->gtRegVar.gtRegNum;
7573
7574         /* Make sure the target of the store is available */
7575
7576         if  (regSet.rsMaskUsed & genRegMask(reg))
7577         {
7578             regSet.rsSpillReg(reg);
7579         }
7580
7581         /* Make the RHS addressable */
7582
7583         addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
7584
7585         /* Compute the new value into the target register */
7586
7587 #if CPU_HAS_BYTE_REGS
7588         // Fix 383833 X86 ILGEN
7589         regNumber  reg2; 
7590         if ((op2->gtFlags & GTF_REG_VAL) != 0) 
7591         {
7592             reg2 = op2->gtRegNum;
7593         }
7594         else
7595         {
7596             reg2 = REG_STK;
7597         }
7598
7599         // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
7600         // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
7601         //
7602         if (varTypeIsByte(treeType) && 
7603             (((genRegMask(reg)  & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
7604         {
7605             // We will force op2 into a register (via sign/zero extending load)
7606             // for the cases where op2 is in memory and thus could have 
7607             // an unmapped page just beyond its location
7608             //
7609             if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
7610             {
7611                 genCodeForTree(op2, 0);
7612                 assert((op2->gtFlags & GTF_REG_VAL) != 0);
7613             }
7614
7615             inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
7616
7617             bool canOmit = false;
7618
7619             if (varTypeIsUnsigned(treeType))
7620             {
7621                 // When op2 is a byte sized constant we can omit the zero extend instruction
7622                 if ((op2->gtOper == GT_CNS_INT) &&
7623                     ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
7624                 {
7625                     canOmit = true;
7626                 }
7627             }
7628             else // treeType is signed
7629             {
7630                 // When op2 is a positive 7-bit or smaller constant 
7631                 // we can omit the sign extension sequence.
7632                 if  ((op2->gtOper == GT_CNS_INT) &&
7633                         ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
7634                 {
7635                     canOmit = true;
7636                 }
7637             }
7638             
7639             if (!canOmit)
7640             {
7641                 // If reg is a byte reg then we can use a movzx/movsx instruction
7642                 //
7643                 if ((genRegMask(reg) & RBM_BYTE_REGS) != 0) 
7644                 {
7645                     instruction extendIns = ins_Move_Extend(treeType, true);
7646                     inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
7647                 }
7648                 else // we can't encode a movzx/movsx instruction
7649                 {
7650                     if (varTypeIsUnsigned(treeType))
7651                     {
7652                         // otherwise, we must zero the upper 24 bits of 'reg'
7653                         inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
7654                     }
7655                     else // treeType is signed
7656                     {
7657                         // otherwise, we must sign extend the result in the non-byteable register 'reg'
7658                         // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
7659                         // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
7660                         //                   
7661                         inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
7662                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
7663                     }
7664                 }
7665             }
7666         }
7667         else
7668 #endif // CPU_HAS_BYTE_REGS
7669         {
7670             inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
7671         }
7672
7673         /* The zero flag is now equal to the register value */
7674
7675         if  (tree->gtSetFlags())
7676             genFlagsEqualToReg(tree, reg);
7677
7678         /* Remember that we trashed the target */
7679
7680         regTracker.rsTrackRegTrash(reg);
7681
7682         /* Free up anything that was tied up by the RHS */
7683
7684         genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7685
7686         genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7687         return;
7688
7689     default:
7690         break;
7691     } // end switch (op1->gtOper)
7692
7693 #if !CPU_LOAD_STORE_ARCH
7694     /* Special case: "x ^= -1" is actually "not(x)" */
7695
7696     if  (oper == GT_ASG_XOR)
7697     {
7698         if  (op2->gtOper == GT_CNS_INT &&
7699              op2->gtIntCon.gtIconVal == -1)
7700         {
7701             addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
7702             inst_TT(INS_NOT, op1);
7703             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7704
7705             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7706             return;
7707         }
7708     }
7709 #endif // !CPU_LOAD_STORE_ARCH
7710
7711     /* Setup target mask for op2 (byte-regs for small operands) */
7712
7713     unsigned needMask;
7714     needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS
7715                                          : RBM_ALLINT;
7716
7717     /* Is the second operand a constant? */
7718
7719     if  (op2->IsIntCnsFitsInI32())
7720     {
7721         int        ival = (int)op2->gtIntCon.gtIconVal;
7722
7723         /* Make the target addressable */
7724         addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
7725
7726         inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
7727
7728         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
7729
7730         genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7731         return;
7732     }
7733
7734     /* Is the value or the address to be computed first? */
7735
7736     if  (tree->gtFlags & GTF_REVERSE_OPS)
7737     {
7738         /* Compute the new value into a register */
7739
7740         genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7741
7742         /* Make the target addressable for load/store */
7743         addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
7744         regSet.rsLockUsedReg(addrReg);
7745
7746 #if CPU_LOAD_STORE_ARCH
7747         // We always load from memory then store to memory 
7748 #else
7749         /* For small types with overflow check, we need to
7750             sign/zero extend the result, so we need it in a reg */
7751
7752         if (ovfl && genTypeSize(treeType) < sizeof(int))
7753 #endif // CPU_LOAD_STORE_ARCH
7754         {
7755             reg = regSet.rsPickReg();
7756             regSet.rsLockReg(genRegMask(reg));
7757
7758             noway_assert(genIsValidReg(reg));
7759
7760             /* Generate "ldr reg, [var]" */
7761
7762             inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
7763
7764             if (op1->gtOper == GT_LCL_VAR)
7765                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7766             else
7767                 regTracker.rsTrackRegTrash(reg);
7768
7769             /* Make sure the new value is in a register */
7770
7771             genRecoverReg(op2, 0, RegSet::KEEP_REG);
7772
7773             /* Compute the new value */
7774
7775             inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7776
7777             if (ovfl)
7778                 genCheckOverflow(tree);
7779
7780             /* Move the new value back to the variable */
7781             /* Generate "str reg, [var]" */
7782
7783             inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7784             regSet.rsUnlockReg(genRegMask(reg));
7785
7786             if (op1->gtOper == GT_LCL_VAR)
7787                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
7788         }
7789 #if !CPU_LOAD_STORE_ARCH
7790         else
7791         {
7792             /* Make sure the new value is in a register */
7793
7794             genRecoverReg(op2, 0, RegSet::KEEP_REG);
7795
7796             /* Add the new value into the target */
7797
7798             inst_TT_RV(ins, op1, op2->gtRegNum);
7799         }
7800 #endif // !CPU_LOAD_STORE_ARCH
7801         /* Free up anything that was tied up either side */
7802         regSet.rsUnlockUsedReg(addrReg);
7803         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7804         genReleaseReg (op2);
7805     }
7806     else
7807     {
7808         /* Make the target addressable */
7809
7810         addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
7811
7812         /* Compute the new value into a register */
7813
7814         genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7815         regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
7816
7817         /* Make sure the target is still addressable */
7818
7819         addrReg = genKeepAddressable(op1, addrReg);
7820         regSet.rsLockUsedReg(addrReg);
7821
7822 #if CPU_LOAD_STORE_ARCH
7823         // We always load from memory then store to memory 
7824 #else
7825         /* For small types with overflow check, we need to
7826             sign/zero extend the result, so we need it in a reg */
7827
7828         if (ovfl && genTypeSize(treeType) < sizeof(int))
7829 #endif // CPU_LOAD_STORE_ARCH
7830         {
7831             reg = regSet.rsPickReg();
7832
7833             inst_RV_TT(INS_mov, reg, op1);
7834
7835             inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7836             regTracker.rsTrackRegTrash(reg);
7837
7838             if (ovfl)
7839                 genCheckOverflow(tree);
7840
7841             inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7842
7843             if (op1->gtOper == GT_LCL_VAR)
7844                 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7845         }
7846 #if !CPU_LOAD_STORE_ARCH
7847         else
7848         {
7849             /* Add the new value into the target */
7850
7851             inst_TT_RV(ins, op1, op2->gtRegNum);
7852         }
7853 #endif
7854
7855         /* Free up anything that was tied up either side */
7856         regSet.rsUnlockUsedReg(addrReg);
7857         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7858
7859         regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
7860         genReleaseReg (op2);
7861     }
7862
7863     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7864 }
7865
7866
7867 /*****************************************************************************
7868  *
7869  *  Generate code for GT_UMOD.
7870  */
7871
7872 void                CodeGen::genCodeForUnsignedMod(GenTreePtr tree,
7873                                                    regMaskTP  destReg,
7874                                                    regMaskTP  bestReg)
7875 {
7876     assert(tree->OperGet() == GT_UMOD);
7877
7878     GenTreePtr      op1      = tree->gtOp.gtOp1;
7879     GenTreePtr      op2      = tree->gtOp.gtOp2;
7880     const var_types treeType = tree->TypeGet();
7881     regMaskTP       needReg  = destReg;
7882     regNumber       reg;
7883
7884     /* Is this a division by an integer constant? */
7885
7886     noway_assert(op2);
7887     if  (compiler->fgIsUnsignedModOptimizable(op2))
7888     {
7889         /* Generate the operand into some register */
7890
7891         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7892         noway_assert(op1->gtFlags & GTF_REG_VAL);
7893
7894         reg   = op1->gtRegNum;
7895
7896         /* Generate the appropriate sequence */
7897         size_t ival = op2->gtIntCon.gtIconVal - 1;
7898         inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
7899
7900         /* The register is now trashed */
7901
7902         regTracker.rsTrackRegTrash(reg);
7903
7904         genCodeForTree_DONE(tree, reg);
7905         return;
7906     }
7907
7908     genCodeForGeneralDivide(tree, destReg, bestReg);
7909 }
7910
7911
7912 /*****************************************************************************
7913  *
7914  *  Generate code for GT_MOD.
7915  */
7916
7917 void                CodeGen::genCodeForSignedMod(GenTreePtr tree,
7918                                                  regMaskTP  destReg,
7919                                                  regMaskTP  bestReg)
7920 {
7921     assert(tree->OperGet() == GT_MOD);
7922
7923     GenTreePtr      op1      = tree->gtOp.gtOp1;
7924     GenTreePtr      op2      = tree->gtOp.gtOp2;
7925     const var_types treeType = tree->TypeGet();
7926     regMaskTP       needReg  = destReg;
7927     regNumber       reg;
7928
7929     /* Is this a division by an integer constant? */
7930
7931     noway_assert(op2);
7932     if  (compiler->fgIsSignedModOptimizable(op2))
7933     {
7934         ssize_t ival = op2->gtIntCon.gtIconVal;
7935         BasicBlock *    skip = genCreateTempLabel();
7936
7937         /* Generate the operand into some register */
7938
7939         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7940         noway_assert(op1->gtFlags & GTF_REG_VAL);
7941
7942         reg   = op1->gtRegNum;
7943
7944         /* Generate the appropriate sequence */
7945
7946         inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
7947
7948         /* The register is now trashed */
7949
7950         regTracker.rsTrackRegTrash(reg);
7951
7952         /* Check and branch for a postive value */
7953         emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7954         inst_JMP(jmpGEL, skip);
7955
7956         /* Generate the rest of the sequence and we're done */
7957
7958         genIncRegBy(reg, -1, NULL, treeType);
7959         ival = -ival;
7960         if ((treeType == TYP_LONG) && ((int)ival != ival))
7961         {
7962             regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
7963             instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
7964             inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
7965         }
7966         else
7967         {
7968             inst_RV_IV (INS_OR,  reg,  (int)ival, emitActualTypeSize(treeType));
7969         }
7970         genIncRegBy(reg,  1, NULL, treeType);
7971
7972         /* Define the 'skip' label and we're done */
7973
7974         genDefineTempLabel(skip);
7975
7976         genCodeForTree_DONE(tree, reg);
7977         return;
7978     }
7979
7980     genCodeForGeneralDivide(tree, destReg, bestReg);
7981 }
7982
7983
7984 /*****************************************************************************
7985  *
7986  *  Generate code for GT_UDIV.
7987  */
7988
7989 void                CodeGen::genCodeForUnsignedDiv(GenTreePtr tree,
7990                                                    regMaskTP  destReg,
7991                                                    regMaskTP  bestReg)
7992 {
7993     assert(tree->OperGet() == GT_UDIV);
7994
7995     GenTreePtr      op1      = tree->gtOp.gtOp1;
7996     GenTreePtr      op2      = tree->gtOp.gtOp2;
7997     const var_types treeType = tree->TypeGet();
7998     regMaskTP       needReg  = destReg;
7999     regNumber       reg;
8000
8001     /* Is this a division by an integer constant? */
8002
8003     noway_assert(op2);
8004     if  (compiler->fgIsUnsignedDivOptimizable(op2))
8005     {
8006         size_t    ival = op2->gtIntCon.gtIconVal;
8007
8008         /* Division by 1 must be handled elsewhere */
8009
8010         noway_assert(ival != 1 || compiler->opts.MinOpts());
8011
8012         /* Generate the operand into some register */
8013
8014         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
8015         noway_assert(op1->gtFlags & GTF_REG_VAL);
8016
8017         reg   = op1->gtRegNum;
8018
8019         /* Generate "shr reg, log2(value)" */
8020
8021         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
8022
8023         /* The register is now trashed */
8024
8025         regTracker.rsTrackRegTrash(reg);
8026
8027         genCodeForTree_DONE(tree, reg);
8028         return;
8029     }
8030
8031     genCodeForGeneralDivide(tree, destReg, bestReg);
8032 }
8033
8034
8035 /*****************************************************************************
8036  *
8037  *  Generate code for GT_DIV.
8038  */
8039
8040 void                CodeGen::genCodeForSignedDiv(GenTreePtr tree,
8041                                                  regMaskTP  destReg,
8042                                                  regMaskTP  bestReg)
8043 {
8044     assert(tree->OperGet() == GT_DIV);
8045
8046     GenTreePtr      op1      = tree->gtOp.gtOp1;
8047     GenTreePtr      op2      = tree->gtOp.gtOp2;
8048     const var_types treeType = tree->TypeGet();
8049     regMaskTP       needReg  = destReg;
8050     regNumber       reg;
8051
8052     /* Is this a division by an integer constant? */
8053
8054     noway_assert(op2);
8055     if  (compiler->fgIsSignedDivOptimizable(op2))
8056     {
8057         ssize_t ival_s = op2->gtIntConCommon.IconValue();
8058         assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
8059         size_t ival = static_cast<size_t>(ival_s);
8060
8061         /* Division by 1 must be handled elsewhere */
8062
8063         noway_assert(ival != 1);
8064
8065         BasicBlock *    onNegDivisee = genCreateTempLabel();
8066
8067         /* Generate the operand into some register */
8068
8069         genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
8070         noway_assert(op1->gtFlags & GTF_REG_VAL);
8071
8072         reg   = op1->gtRegNum;
8073
8074         if (ival == 2)
8075         {
8076             /* Generate "sar reg, log2(value)" */
8077
8078             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
8079
8080             // Check and branch for a postive value, skipping the INS_ADDC instruction
8081             emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
8082             inst_JMP(jmpGEL, onNegDivisee);
8083
8084             // Add the carry flag to 'reg'
8085             inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
8086
8087             /* Define the 'onNegDivisee' label and we're done */
8088
8089             genDefineTempLabel(onNegDivisee);
8090
8091             /* The register is now trashed */
8092
8093             regTracker.rsTrackRegTrash(reg);
8094
8095             /* The result is the same as the operand */
8096
8097             reg  = op1->gtRegNum;
8098         }
8099         else
8100         {
8101             /* Generate the following sequence */
8102             /*
8103             test    reg, reg
8104             jns     onNegDivisee
8105             add     reg, ival-1
8106             onNegDivisee:
8107             sar     reg, log2(ival)
8108             */
8109
8110             instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
8111
8112             // Check and branch for a postive value, skipping the INS_add instruction
8113             emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
8114             inst_JMP(jmpGEL, onNegDivisee);
8115
8116             inst_RV_IV(INS_add, reg, (int)ival-1, emitActualTypeSize(treeType));
8117
8118             /* Define the 'onNegDivisee' label and we're done */
8119
8120             genDefineTempLabel(onNegDivisee);
8121
8122             /* Generate "sar reg, log2(value)" */
8123
8124             inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
8125
8126             /* The register is now trashed */
8127
8128             regTracker.rsTrackRegTrash(reg);
8129
8130             /* The result is the same as the operand */
8131
8132             reg  = op1->gtRegNum;
8133         }
8134
8135         genCodeForTree_DONE(tree, reg);
8136         return;
8137     }
8138
8139     genCodeForGeneralDivide(tree, destReg, bestReg);
8140 }
8141
8142
8143 /*****************************************************************************
8144  *
8145  *  Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
8146  *  (if op2 is not a power of 2 constant).
8147  */
8148
8149 void                CodeGen::genCodeForGeneralDivide(GenTreePtr tree,
8150                                                      regMaskTP  destReg,
8151                                                      regMaskTP  bestReg)
8152 {
8153     assert(tree->OperGet() == GT_UMOD ||
8154            tree->OperGet() == GT_MOD  ||
8155            tree->OperGet() == GT_UDIV ||
8156            tree->OperGet() == GT_DIV);
8157
8158     GenTreePtr      op1      = tree->gtOp.gtOp1;
8159     GenTreePtr      op2      = tree->gtOp.gtOp2;
8160     const var_types treeType = tree->TypeGet();
8161     regMaskTP       needReg  = destReg;
8162     regNumber       reg;
8163     instruction     ins;
8164     bool            gotOp1;
8165     regMaskTP       addrReg;
8166
8167 #if USE_HELPERS_FOR_INT_DIV
8168     noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
8169 #endif
8170
8171 #if defined(_TARGET_XARCH_)
8172
8173     /* Which operand are we supposed to evaluate first? */
8174
8175     if  (tree->gtFlags & GTF_REVERSE_OPS)
8176     {
8177         /* We'll evaluate 'op2' first */
8178
8179         gotOp1   = false;
8180         destReg &= ~op1->gtRsvdRegs;
8181
8182         /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8183         if (op1->gtOper == GT_LCL_VAR)
8184         {
8185             unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8186             noway_assert(varNum < compiler->lvaCount);
8187             LclVarDsc* varDsc = compiler->lvaTable + varNum;
8188             if  (varDsc->lvRegister)
8189             {
8190                 destReg &= ~genRegMask(varDsc->lvRegNum);
8191             }
8192         }
8193     }
8194     else
8195     {
8196         /* We'll evaluate 'op1' first */
8197
8198         gotOp1 = true;
8199
8200         regMaskTP op1Mask;
8201         if (RBM_EAX & op2->gtRsvdRegs)
8202             op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8203         else
8204             op1Mask = RBM_EAX;  // EAX would be ideal
8205
8206         /* Generate the dividend into EAX and hold on to it. freeOnly=true */
8207
8208         genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8209     }
8210
8211     /* We want to avoid using EAX or EDX for the second operand */
8212
8213     destReg = regSet.rsMustExclude(destReg, RBM_EAX|RBM_EDX);
8214
8215     /* Make the second operand addressable */
8216     op2 = genCodeForCommaTree(op2);
8217
8218     /* Special case: if op2 is a local var we are done */
8219
8220     if  (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
8221     {
8222         if ((op2->gtFlags & GTF_REG_VAL) == 0)
8223             addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
8224         else
8225             addrReg = 0;
8226     }
8227     else
8228     {
8229         genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8230
8231         noway_assert(op2->gtFlags & GTF_REG_VAL);
8232         addrReg = genRegMask(op2->gtRegNum);
8233     }
8234
8235     /* Make sure we have the dividend in EAX */
8236
8237     if  (gotOp1)
8238     {
8239         /* We've previously computed op1 into EAX */
8240
8241         genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
8242     }
8243     else
8244     {
8245         /* Compute op1 into EAX and hold on to it */
8246
8247         genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8248     }
8249
8250     noway_assert(op1->gtFlags & GTF_REG_VAL);
8251     noway_assert(op1->gtRegNum == REG_EAX);
8252
8253     /* We can now safely (we think) grab EDX */
8254
8255     regSet.rsGrabReg(RBM_EDX);
8256     regSet.rsLockReg(RBM_EDX);
8257
8258     /* Convert the integer in EAX into a un/signed long in EDX:EAX */
8259
8260     const genTreeOps oper = tree->OperGet();
8261
8262     if (oper == GT_UMOD || oper == GT_UDIV)
8263         instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
8264     else
8265         instGen(INS_cdq);
8266
8267     /* Make sure the divisor is still addressable */
8268
8269     addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
8270
8271     /* Perform the division */
8272
8273     if (oper == GT_UMOD || oper == GT_UDIV)
8274         inst_TT(INS_UNSIGNED_DIVIDE,  op2);
8275     else
8276         inst_TT(INS_SIGNED_DIVIDE, op2);
8277
8278     /* Free up anything tied up by the divisor's address */
8279
8280     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
8281
8282     /* Unlock and free EDX */
8283
8284     regSet.rsUnlockReg(RBM_EDX);
8285
8286     /* Free up op1 (which is in EAX) as well */
8287
8288     genReleaseReg(op1);
8289
8290     /* Both EAX and EDX are now trashed */
8291
8292     regTracker.rsTrackRegTrash (REG_EAX);
8293     regTracker.rsTrackRegTrash (REG_EDX);
8294
8295     /* Figure out which register the result is in */
8296
8297     reg = (oper == GT_DIV || oper == GT_UDIV)   ? REG_EAX
8298                                                 : REG_EDX;
8299
8300     /* Don't forget to mark the first operand as using EAX and EDX */
8301
8302     op1->gtRegNum    = reg;
8303
8304     genCodeForTree_DONE(tree, reg);
8305
8306 #elif defined(_TARGET_ARM_)
8307
8308     /* Which operand are we supposed to evaluate first? */
8309
8310     if  (tree->gtFlags & GTF_REVERSE_OPS)
8311     {
8312         /* We'll evaluate 'op2' first */
8313
8314         gotOp1   = false;
8315         destReg &= ~op1->gtRsvdRegs;
8316
8317         /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8318         if (op1->gtOper == GT_LCL_VAR)
8319         {
8320             unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8321             noway_assert(varNum < compiler->lvaCount);
8322             LclVarDsc* varDsc = compiler->lvaTable + varNum;
8323             if  (varDsc->lvRegister)
8324             {
8325                 destReg &= ~genRegMask(varDsc->lvRegNum);
8326             }
8327         }
8328     }
8329     else
8330     {
8331         /* We'll evaluate 'op1' first */
8332
8333         gotOp1   = true;
8334         regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8335
8336         /* Generate the dividend into a register and hold on to it. */
8337
8338         genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8339     }
8340
8341     /* Evaluate the second operand into a register and hold onto it. */
8342
8343     genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8344
8345     noway_assert(op2->gtFlags & GTF_REG_VAL);
8346     addrReg = genRegMask(op2->gtRegNum);
8347
8348     if  (gotOp1)
8349     {
8350         // Recover op1 if spilled
8351         genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
8352     }
8353     else
8354     {
8355         /* Compute op1 into any register and hold on to it */
8356         genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8357     }
8358     noway_assert(op1->gtFlags & GTF_REG_VAL);
8359
8360     reg = regSet.rsPickReg(needReg, bestReg);
8361
8362     // Perform the divison
8363
8364     const genTreeOps oper = tree->OperGet();
8365
8366     if (oper == GT_UMOD || oper == GT_UDIV)
8367         ins = INS_udiv;
8368     else
8369         ins = INS_sdiv;
8370
8371     getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
8372             
8373     if (oper == GT_UMOD || oper == GT_MOD)
8374     {
8375         getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
8376         getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
8377     }
8378     /* Free up op1 and op2 */
8379     genReleaseReg(op1);
8380     genReleaseReg(op2);
8381
8382     genCodeForTree_DONE(tree, reg);
8383
8384 #else
8385 #error "Unknown _TARGET_"
8386 #endif
8387 }
8388
8389
8390 /*****************************************************************************
8391  *
8392  *  Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
8393  */
8394
8395 void                CodeGen::genCodeForAsgShift(GenTreePtr tree,
8396                                                 regMaskTP  destReg,
8397                                                 regMaskTP  bestReg)
8398 {
8399     assert(tree->OperGet() == GT_ASG_LSH ||
8400            tree->OperGet() == GT_ASG_RSH ||
8401            tree->OperGet() == GT_ASG_RSZ);
8402
8403     const genTreeOps oper    = tree->OperGet();
8404     GenTreePtr      op1      = tree->gtOp.gtOp1;
8405     GenTreePtr      op2      = tree->gtOp.gtOp2;
8406     const var_types treeType = tree->TypeGet();
8407     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8408     regMaskTP       needReg  = destReg;
8409     regNumber       reg;
8410     instruction     ins;
8411     regMaskTP       addrReg;
8412
8413     switch (oper)
8414     {
8415     case GT_ASG_LSH: ins = INS_SHIFT_LEFT_LOGICAL;  break;
8416     case GT_ASG_RSH: ins = INS_SHIFT_RIGHT_ARITHM;  break;
8417     case GT_ASG_RSZ: ins = INS_SHIFT_RIGHT_LOGICAL; break;
8418     default:
8419         unreached();
8420     }
8421
8422     noway_assert(!varTypeIsGC(treeType));
8423     noway_assert(op2);
8424
8425     /* Shifts by a constant amount are easier */
8426
8427     if  (op2->IsCnsIntOrI())
8428     {
8429         /* Make the target addressable */
8430
8431         addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
8432
8433         /* Are we shifting a register left by 1 bit? */
8434
8435         if  ((oper == GT_ASG_LSH) &&
8436              (op2->gtIntCon.gtIconVal == 1) &&
8437              (op1->gtFlags & GTF_REG_VAL))
8438         {
8439             /* The target lives in a register */
8440
8441             reg  = op1->gtRegNum;
8442
8443             /* "add reg, reg" is cheaper than "shl reg, 1" */
8444
8445             inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
8446         }
8447         else
8448         {
8449 #if CPU_LOAD_STORE_ARCH
8450             if ((op1->gtFlags & GTF_REG_VAL) == 0)
8451             {
8452                 regSet.rsLockUsedReg(addrReg);
8453
8454                 // Load op1 into a reg
8455
8456                 reg = regSet.rsPickReg(RBM_ALLINT);
8457
8458                 inst_RV_TT(INS_mov, reg, op1);
8459
8460                 // Issue the shift
8461
8462                 inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
8463                 regTracker.rsTrackRegTrash(reg);
8464
8465                 /* Store the (sign/zero extended) result back to the stack location of the variable */
8466
8467                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
8468
8469                 regSet.rsUnlockUsedReg(addrReg);
8470             }
8471             else
8472 #endif // CPU_LOAD_STORE_ARCH
8473             {
8474                 /* Shift by the constant value */
8475
8476                 inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
8477             }
8478         }
8479
8480         /* If the target is a register, it has a new value */
8481
8482         if  (op1->gtFlags & GTF_REG_VAL)
8483             regTracker.rsTrackRegTrash(op1->gtRegNum);
8484
8485         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8486
8487         /* The zero flag is now equal to the target value */
8488         /* X86: But only if the shift count is != 0 */
8489
8490         if (op2->gtIntCon.gtIconVal != 0)
8491         {
8492             if (tree->gtSetFlags())
8493             {
8494                 if (op1->gtOper == GT_LCL_VAR)
8495                 {
8496                     genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
8497                 }
8498                 else if  (op1->gtOper == GT_REG_VAR)
8499                 {
8500                     genFlagsEqualToReg(tree, op1->gtRegNum);
8501                 }
8502             }
8503         }
8504         else
8505         {
8506             // It is possible for the shift count to equal 0 with valid
8507             // IL, and not be optimized away, in the case where the node
8508             // is of a small type.  The sequence of instructions looks like
8509             // ldsfld, shr, stsfld and executed on a char field.  This will
8510             // never happen with code produced by our compilers, because the
8511             // compilers will insert a conv.u2 before the stsfld (which will
8512             // lead us down a different codepath in the JIT and optimize away
8513             // the shift by zero).  This case is not worth optimizing and we
8514             // will just make sure to generate correct code for it.
8515
8516             genFlagsEqualToNone();
8517         }
8518     }
8519     else
8520     {
8521         regMaskTP op2Regs = RBM_NONE;
8522         if (REG_SHIFT != REG_NA)
8523             op2Regs = RBM_SHIFT;
8524
8525         regMaskTP tempRegs;
8526
8527         if (tree->gtFlags & GTF_REVERSE_OPS)
8528         {
8529             tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
8530             genCodeForTree(op2, tempRegs);
8531             regSet.rsMarkRegUsed(op2);
8532
8533             tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
8534             addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8535
8536             genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
8537         }
8538         else
8539         {
8540             /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
8541             regMaskTP excludeMask = op2->gtRsvdRegs;
8542             if (REG_SHIFT != REG_NA)
8543                 excludeMask |= RBM_SHIFT;
8544
8545             tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
8546             addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8547
8548             /* Load the shift count into the necessary register */
8549             genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
8550         }
8551
8552         /* Make sure the address registers are still here */
8553         addrReg = genKeepAddressable(op1, addrReg, op2Regs);
8554
8555         /* Perform the shift */
8556 #ifdef _TARGET_XARCH_
8557         inst_TT_CL(ins, op1);
8558 #else
8559         noway_assert(op2->gtFlags & GTF_REG_VAL);
8560         op2Regs = genRegMask(op2->gtRegNum);
8561
8562         regSet.rsLockUsedReg(addrReg | op2Regs);
8563         inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
8564         regSet.rsUnlockUsedReg(addrReg | op2Regs);
8565 #endif
8566         /* Free the address registers */
8567         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8568
8569         /* If the value is in a register, it's now trash */
8570
8571         if  (op1->gtFlags & GTF_REG_VAL)
8572             regTracker.rsTrackRegTrash(op1->gtRegNum);
8573
8574         /* Release the op2 [RBM_SHIFT] operand */
8575
8576         genReleaseReg(op2);
8577     }
8578
8579     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
8580 }
8581
8582
8583 /*****************************************************************************
8584  *
8585  *  Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
8586  */
8587
8588 void                CodeGen::genCodeForShift(GenTreePtr tree,
8589                                              regMaskTP  destReg,
8590                                              regMaskTP  bestReg)
8591 {
8592     assert(tree->OperIsShift());
8593
8594     const genTreeOps oper    = tree->OperGet();
8595     GenTreePtr      op1      = tree->gtOp.gtOp1;
8596     GenTreePtr      op2      = tree->gtOp.gtOp2;
8597     const var_types treeType = tree->TypeGet();
8598     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8599     regMaskTP       needReg  = destReg;
8600     regNumber       reg;
8601     instruction     ins;
8602
8603     switch (oper)
8604     {
8605     case GT_LSH: ins = INS_SHIFT_LEFT_LOGICAL;  break;
8606     case GT_RSH: ins = INS_SHIFT_RIGHT_ARITHM;  break;
8607     case GT_RSZ: ins = INS_SHIFT_RIGHT_LOGICAL; break;
8608     default:
8609         unreached();
8610     }
8611
8612     /* Is the shift count constant? */
8613     noway_assert(op2);
8614     if  (op2->IsIntCnsFitsInI32())
8615     {
8616         // TODO: Check to see if we could generate a LEA instead!
8617
8618         /* Compute the left operand into any free register */
8619
8620         genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
8621
8622         noway_assert(op1->gtFlags & GTF_REG_VAL);
8623         reg = op1->gtRegNum;
8624
8625         /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
8626
8627         // On ARM, until proven otherwise by performance numbers, just do the shift.
8628         // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
8629         // It's smaller than two "add reg, reg".
8630 #ifndef _TARGET_ARM_
8631         if  (oper == GT_LSH)
8632         {
8633             emitAttr size = emitActualTypeSize(treeType);
8634             if (op2->gtIntConCommon.IconValue() == 1)
8635             {
8636                 /* "add reg, reg" is smaller and faster than "shl reg, 1" */
8637                 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8638             }
8639             else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
8640             {
8641                 /* two "add reg, reg" instructions are faster than "shl reg, 2" */
8642                 inst_RV_RV(INS_add, reg, reg, treeType);
8643                 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8644             }
8645             else
8646                 goto DO_SHIFT_BY_CNS;
8647         }
8648         else
8649 #endif // _TARGET_ARM_
8650         {
8651 #ifndef _TARGET_ARM_
8652 DO_SHIFT_BY_CNS:
8653 #endif // _TARGET_ARM_
8654             // If we are shifting 'reg' by zero bits and do not need the flags to be set 
8655             // then we can just skip emitting the instruction as 'reg' is already correct.
8656             //
8657             if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
8658             {
8659                 /* Generate the appropriate shift instruction */
8660                 inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
8661             }
8662         }
8663     }
8664     else
8665     {
8666         /* Calculate a useful register mask for computing op1 */
8667         needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
8668         regMaskTP op2RegMask;
8669 #ifdef _TARGET_XARCH_
8670         op2RegMask = RBM_ECX;
8671 #else
8672         op2RegMask = RBM_NONE;
8673 #endif
8674         needReg = regSet.rsMustExclude(needReg, op2RegMask);
8675
8676         regMaskTP tempRegs;
8677
8678         /* Which operand are we supposed to evaluate first? */
8679         if (tree->gtFlags & GTF_REVERSE_OPS)
8680         {
8681             /* Load the shift count [into ECX on XARCH] */
8682             tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
8683             genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8684
8685             /* We must not target the register that is holding op2 */
8686             needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
8687
8688             /* Now evaluate 'op1' into a free register */
8689             genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8690
8691             /* Recover op2 into ECX */
8692             genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
8693         }
8694         else
8695         {
8696             /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
8697             tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
8698             genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8699
8700             /* Load the shift count [into ECX on XARCH] */
8701             genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8702         }
8703
8704         noway_assert(op2->gtFlags & GTF_REG_VAL);
8705 #ifdef _TARGET_XARCH_
8706         noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
8707 #endif 
8708         // Check for the case of op1 being spilled during the evaluation of op2
8709         if  (op1->gtFlags & GTF_SPILLED)
8710         {
8711             // The register has been spilled -- reload it to any register except ECX
8712             regSet.rsLockUsedReg(op2RegMask);
8713             regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
8714             regSet.rsUnlockUsedReg(op2RegMask);
8715         }
8716
8717         noway_assert(op1->gtFlags & GTF_REG_VAL);
8718         reg = op1->gtRegNum;
8719
8720         /* Perform the shift */
8721 #ifdef _TARGET_ARM_
8722         getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
8723 #else
8724         inst_RV_CL(ins, reg);
8725 #endif
8726         genReleaseReg(op2);
8727     }
8728
8729     noway_assert(op1->gtFlags & GTF_REG_VAL);
8730     noway_assert(reg == op1->gtRegNum);
8731
8732     /* The register is now trashed */
8733     genReleaseReg(op1);
8734     regTracker.rsTrackRegTrash(reg);
8735
8736     genCodeForTree_DONE(tree, reg);
8737 }
8738
8739
8740 /*****************************************************************************
8741  *
8742  *  Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree). Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
8743  */
8744
8745 void                CodeGen::genCodeForRelop(GenTreePtr tree,
8746                                              regMaskTP  destReg,
8747                                              regMaskTP  bestReg)
8748 {
8749     assert(tree->OperGet() == GT_EQ ||
8750            tree->OperGet() == GT_NE ||
8751            tree->OperGet() == GT_LT ||
8752            tree->OperGet() == GT_LE ||
8753            tree->OperGet() == GT_GE ||
8754            tree->OperGet() == GT_GT);
8755
8756     const genTreeOps oper    = tree->OperGet();
8757     GenTreePtr      op1      = tree->gtOp.gtOp1;
8758     const var_types treeType = tree->TypeGet();
8759     regMaskTP       needReg  = destReg;
8760     regNumber       reg;
8761
8762     // Longs and float comparisons are converted to "?:"
8763     noway_assert(!compiler->fgMorphRelopToQmark(op1));
8764
8765     // Check if we can use the currently set flags. Else set them
8766
8767     emitJumpKind jumpKind = genCondSetFlags(tree);
8768
8769     // Grab a register to materialize the bool value into
8770
8771     bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
8772
8773     // Check that the predictor did the right job
8774     noway_assert(bestReg);
8775
8776     // If needReg is in bestReg then use it
8777     if (needReg & bestReg)
8778         reg = regSet.rsGrabReg(needReg & bestReg);
8779     else
8780         reg = regSet.rsGrabReg(bestReg);
8781
8782 #if defined(_TARGET_ARM_)
8783
8784     // Generate:
8785     //      jump-if-true L_true
8786     //      mov reg, 0
8787     //      jmp L_end
8788     //    L_true:
8789     //      mov reg, 1
8790     //    L_end:
8791
8792     BasicBlock * L_true;
8793     BasicBlock * L_end;
8794
8795     L_true = genCreateTempLabel();
8796     L_end  = genCreateTempLabel();
8797
8798     inst_JMP(jumpKind, L_true);
8799     getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0);   // Executes when the cond is false
8800     inst_JMP(EJ_jmp, L_end);
8801     genDefineTempLabel(L_true);
8802     getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1);   // Executes when the cond is true
8803     genDefineTempLabel(L_end);
8804
8805     regTracker.rsTrackRegTrash(reg);
8806
8807 #elif defined(_TARGET_XARCH_)
8808     regMaskTP regs = genRegMask(reg);
8809     noway_assert(regs & RBM_BYTE_REGS);
8810
8811     // Set (lower byte of) reg according to the flags
8812
8813     /* Look for the special case where just want to transfer the carry bit */
8814
8815     if (jumpKind == EJ_jb)
8816     {
8817         inst_RV_RV(INS_SUBC, reg, reg);
8818         inst_RV   (INS_NEG, reg, TYP_INT);
8819         regTracker.rsTrackRegTrash(reg);
8820     }
8821     else if (jumpKind == EJ_jae)
8822     {
8823         inst_RV_RV(INS_SUBC, reg, reg);
8824         genIncRegBy(reg, 1, tree, TYP_INT);
8825         regTracker.rsTrackRegTrash(reg);
8826     }
8827     else
8828     {
8829         inst_SET(jumpKind, reg);
8830
8831         regTracker.rsTrackRegTrash(reg);
8832
8833         if (treeType == TYP_INT)
8834         {
8835             // Set the higher bytes to 0
8836             inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
8837         }
8838         else
8839         {
8840             noway_assert(treeType == TYP_BYTE);
8841         }
8842     }
8843 #else
8844     NYI("TARGET");
8845 #endif  // _TARGET_XXX
8846
8847     genCodeForTree_DONE(tree, reg);
8848 }
8849
8850 void                CodeGen::genCodeForBlkOp(GenTreePtr tree,
8851                                              regMaskTP  destReg)
8852 {
8853     genTreeOps      oper     = tree->OperGet();
8854     GenTreePtr      op1      = tree->gtOp.gtOp1;
8855     GenTreePtr      op2      = tree->gtGetOp2();
8856     regMaskTP       needReg  = destReg;
8857     regMaskTP       regs     = regSet.rsMaskUsed;
8858     GenTreePtr      opsPtr[3];
8859     regMaskTP       regsPtr[3];
8860
8861     noway_assert(oper == GT_COPYBLK || oper == GT_INITBLK);
8862     noway_assert(op1->IsList());
8863
8864 #ifdef _TARGET_ARM_
8865     if (tree->AsBlkOp()->IsVolatile())
8866     {
8867         // Emit a memory barrier instruction before the InitBlk/CopyBlk
8868         instGen_MemoryBarrier();
8869     }
8870 #endif
8871     {
8872         GenTreePtr destPtr, srcPtrOrVal;
8873         destPtr = op1->gtOp.gtOp1;
8874         srcPtrOrVal = op1->gtOp.gtOp2;
8875         noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
8876         noway_assert((oper == GT_COPYBLK &&
8877             (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet())))
8878             ||
8879             (oper == GT_INITBLK &&
8880             varTypeIsIntegral(srcPtrOrVal->TypeGet())));
8881
8882         noway_assert(op1 && op1->IsList());
8883         noway_assert(destPtr && srcPtrOrVal);
8884
8885 #if CPU_USES_BLOCK_MOVE 
8886         regs = (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI;   // What is the needReg for Val/Src
8887
8888         /* Some special code for block moves/inits for constant sizes */
8889
8890         //
8891         // Is this a fixed size COPYBLK?
8892         //      or a fixed size INITBLK with a constant init value?
8893         //
8894         if ((op2->IsCnsIntOrI()) &&
8895             ((oper == GT_COPYBLK) || (srcPtrOrVal->IsCnsIntOrI())))
8896         {
8897             size_t length = (size_t)op2->gtIntCon.gtIconVal;
8898             size_t initVal = 0;
8899             instruction ins_P, ins_PR, ins_B;
8900
8901             if (oper == GT_INITBLK)
8902             {
8903                 ins_P = INS_stosp;
8904                 ins_PR = INS_r_stosp;
8905                 ins_B = INS_stosb;
8906
8907                 /* Properly extend the init constant from a U1 to a U4 */
8908                 initVal = 0xFF & ((unsigned)op1->gtOp.gtOp2->gtIntCon.gtIconVal);
8909
8910                 /* If it is a non-zero value we have to replicate      */
8911                 /* the byte value four times to form the DWORD         */
8912                 /* Then we change this new value into the tree-node      */
8913
8914                 if (initVal)
8915                 {
8916                     initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
8917 #ifdef _TARGET_64BIT_
8918                     if (length > 4)
8919                     {
8920                         initVal = initVal | (initVal << 32);
8921                         op1->gtOp.gtOp2->gtType = TYP_LONG;
8922                     }
8923                     else
8924                     {
8925                         op1->gtOp.gtOp2->gtType = TYP_INT;
8926                     }
8927 #endif // _TARGET_64BIT_
8928                 }
8929                 op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
8930             }
8931             else
8932             {
8933                 ins_P = INS_movsp;
8934                 ins_PR = INS_r_movsp;
8935                 ins_B = INS_movsb;
8936             }
8937
8938             // Determine if we will be using SSE2
8939             unsigned movqLenMin = 8;
8940             unsigned movqLenMax = 24;
8941
8942             bool bWillUseSSE2 = false;
8943             bool bWillUseOnlySSE2 = false;
8944             bool bNeedEvaluateCnst = true;   // If we only use SSE2, we will just load the constant there. 
8945
8946 #ifdef _TARGET_64BIT_
8947
8948             // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
8949             // there is no point in wasting space on the bigger instructions
8950
8951 #else // !_TARGET_64BIT_
8952
8953             if (compiler->opts.compCanUseSSE2)
8954             {
8955                 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
8956
8957                 /* Adjust for BB weight */
8958                 if (curBBweight == BB_ZERO_WEIGHT)
8959                 {
8960                     // Don't bother with this optimization in
8961                     // rarely run blocks
8962                     movqLenMax = movqLenMin = 0;
8963                 }
8964                 else if (curBBweight < BB_UNITY_WEIGHT)
8965                 {
8966                     // Be less aggressive when we are inside a conditional
8967                     movqLenMax = 16;
8968                 }
8969                 else if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT) / 2)
8970                 {
8971                     // Be more aggressive when we are inside a loop
8972                     movqLenMax = 48;
8973                 }
8974
8975                 if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || (oper == GT_INITBLK))
8976                 {
8977                     // Be more aggressive when optimizing for speed
8978                     // InitBlk uses fewer instructions
8979                     movqLenMax += 16;
8980                 }
8981
8982                 if (compiler->compCodeOpt() != Compiler::SMALL_CODE &&
8983                     length >= movqLenMin &&
8984                     length <= movqLenMax)
8985                 {
8986                     bWillUseSSE2 = true;
8987
8988                     if ((length % 8) == 0)
8989                     {
8990                         bWillUseOnlySSE2 = true;
8991                         if (oper == GT_INITBLK && (initVal == 0))
8992                         {
8993                             bNeedEvaluateCnst = false;
8994                             noway_assert((op1->gtOp.gtOp2->OperGet() == GT_CNS_INT));
8995                         }
8996                     }
8997                 }
8998             }
8999
9000 #endif // !_TARGET_64BIT_
9001
9002             const bool bWillTrashRegSrc = ((oper == GT_COPYBLK) && !bWillUseOnlySSE2);
9003             /* Evaluate dest and src/val */
9004
9005             if (op1->gtFlags & GTF_REVERSE_OPS)
9006             {
9007                 if (bNeedEvaluateCnst)
9008                 {
9009                     genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9010                 }
9011                 genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9012                 if (bNeedEvaluateCnst)
9013                 {
9014                     genRecoverReg(op1->gtOp.gtOp2, regs, RegSet::KEEP_REG);
9015                 }
9016             }
9017             else
9018             {
9019                 genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9020                 if (bNeedEvaluateCnst)
9021                 {
9022                     genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9023                 }
9024                 genRecoverReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::KEEP_REG);
9025             }
9026
9027             bool bTrashedESI = false;
9028             bool bTrashedEDI = false;
9029
9030             if (bWillUseSSE2)
9031             {
9032                 int      blkDisp = 0;
9033                 regNumber xmmReg = REG_XMM0;
9034
9035                 if (oper == GT_INITBLK)
9036                 {
9037                     if (initVal)
9038                     {
9039                         getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
9040                         getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
9041                     }
9042                     else
9043                     {
9044                         getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
9045                     }
9046                 }
9047
9048                 JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
9049                     length, (oper == GT_INITBLK) ? "initblk" : "copyblk", compiler->info.compFullName));
9050
9051                 while (length > 7)
9052                 {
9053                     if (oper == GT_INITBLK)
9054                     {
9055                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9056                     }
9057                     else
9058                     {
9059                         getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
9060                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9061                     }
9062                     blkDisp += 8;
9063                     length -= 8;
9064                 }
9065
9066                 if (length > 0)
9067                 {
9068                     noway_assert(bNeedEvaluateCnst);
9069                     noway_assert(!bWillUseOnlySSE2);
9070
9071                     if (oper == GT_COPYBLK)
9072                     {
9073                         inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
9074                         bTrashedESI = true;
9075                     }
9076
9077                     inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
9078                     bTrashedEDI = true;
9079
9080                     if (length >= REGSIZE_BYTES)
9081                     {
9082                         instGen(ins_P);
9083                         length -= REGSIZE_BYTES;
9084                     }
9085                 }
9086             }
9087             else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
9088             {
9089                 /* For small code, we can only use ins_DR to generate fast
9090                     and small code. We also can't use "rep movsb" because
9091                     we may not atomically reading and writing the DWORD */
9092
9093                 noway_assert(bNeedEvaluateCnst);
9094
9095                 goto USE_DR;
9096             }
9097             else if (length <= 4 * REGSIZE_BYTES)
9098             {
9099                 noway_assert(bNeedEvaluateCnst);
9100
9101                 while (length >= REGSIZE_BYTES)
9102                 {
9103                     instGen(ins_P);
9104                     length -= REGSIZE_BYTES;
9105                 }
9106
9107                 bTrashedEDI = true;
9108                 if (oper == GT_COPYBLK)
9109                     bTrashedESI = true;
9110             }
9111             else
9112             {
9113             USE_DR:
9114                 noway_assert(bNeedEvaluateCnst);
9115
9116                 /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
9117                 genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
9118
9119                 length &= (REGSIZE_BYTES - 1);
9120
9121                 instGen(ins_PR);
9122
9123                 regTracker.rsTrackRegTrash(REG_ECX);
9124
9125                 bTrashedEDI = true;
9126                 if (oper == GT_COPYBLK)
9127                     bTrashedESI = true;
9128             }
9129
9130             /* Now take care of the remainder */
9131
9132 #ifdef _TARGET_64BIT_
9133             if (length > 4)
9134             {
9135                 noway_assert(bNeedEvaluateCnst);
9136                 noway_assert(length < 8);
9137
9138                 instGen((oper == GT_INITBLK) ? INS_stosd : INS_movsd);
9139                 length -= 4;
9140
9141                 bTrashedEDI = true;
9142                 if (oper == GT_COPYBLK)
9143                     bTrashedESI = true;
9144             }
9145
9146 #endif // _TARGET_64BIT_
9147
9148             if (length)
9149             {
9150                 noway_assert(bNeedEvaluateCnst);
9151
9152                 while (length--)
9153                 {
9154                     instGen(ins_B);
9155                 }
9156
9157                 bTrashedEDI = true;
9158                 if (oper == GT_COPYBLK)
9159                     bTrashedESI = true;
9160             }
9161
9162             noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
9163             if (bTrashedEDI)
9164                 regTracker.rsTrackRegTrash(REG_EDI);
9165             if (bTrashedESI)
9166                 regTracker.rsTrackRegTrash(REG_ESI);
9167             // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9168
9169             genReleaseReg(op1->gtOp.gtOp1);
9170             if (bNeedEvaluateCnst) genReleaseReg(op1->gtOp.gtOp2);
9171
9172         }
9173         else
9174         {
9175             //
9176             // This a variable-sized COPYBLK/INITBLK,
9177             //   or a fixed size INITBLK with a variable init value,
9178             //
9179
9180             // What order should the Dest, Val/Src, and Size be calculated
9181
9182             compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX,
9183                 opsPtr, regsPtr); // OUT arguments
9184
9185             noway_assert(((oper == GT_INITBLK) && (regs == RBM_EAX)) || ((oper == GT_COPYBLK) && (regs == RBM_ESI)));
9186             genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
9187             genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
9188             genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
9189
9190             genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9191             genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9192
9193             noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) &&  // Dest
9194                 (op1->gtOp.gtOp1->gtRegNum == REG_EDI));
9195
9196             noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) &&  // Val/Src
9197                 (genRegMask(op1->gtOp.gtOp2->gtRegNum) == regs));
9198
9199             noway_assert((op2->gtFlags & GTF_REG_VAL) &&              // Size
9200                 (op2->gtRegNum == REG_ECX));
9201
9202             if (oper == GT_INITBLK)
9203                 instGen(INS_r_stosb);
9204             else
9205                 instGen(INS_r_movsb);
9206
9207             regTracker.rsTrackRegTrash(REG_EDI);
9208             regTracker.rsTrackRegTrash(REG_ECX);
9209
9210             if (oper == GT_COPYBLK)
9211                 regTracker.rsTrackRegTrash(REG_ESI);
9212             // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9213
9214             genReleaseReg(opsPtr[0]);
9215             genReleaseReg(opsPtr[1]);
9216             genReleaseReg(opsPtr[2]);
9217         }
9218
9219 #else // !CPU_USES_BLOCK_MOVE 
9220
9221 #ifndef _TARGET_ARM_
9222         // Currently only the ARM implementation is provided
9223 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
9224 #endif
9225         //
9226         // Is this a fixed size COPYBLK?
9227         //      or a fixed size INITBLK with a constant init value?
9228         //
9229         if ((op2->OperGet() == GT_CNS_INT) &&
9230             ((oper == GT_COPYBLK) || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
9231         {
9232             GenTreePtr  dstOp = op1->gtOp.gtOp1;
9233             GenTreePtr  srcOp = op1->gtOp.gtOp2;
9234             unsigned    length = (unsigned)op2->gtIntCon.gtIconVal;
9235             unsigned    fullStoreCount = length / TARGET_POINTER_SIZE;
9236             unsigned    initVal = 0;
9237             bool        useLoop = false;
9238
9239             if (oper == GT_INITBLK)
9240             {
9241                 /* Properly extend the init constant from a U1 to a U4 */
9242                 initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
9243
9244                 /* If it is a non-zero value we have to replicate      */
9245                 /* the byte value four times to form the DWORD         */
9246                 /* Then we store this new value into the tree-node      */
9247
9248                 if (initVal != 0)
9249                 {
9250                     initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9251                     op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
9252                 }
9253             }
9254
9255             // Will we be using a loop to implement this INITBLK/COPYBLK?
9256             if (((oper == GT_COPYBLK) && (fullStoreCount >= 8)) ||
9257                 ((oper == GT_INITBLK) && (fullStoreCount >= 16)))
9258             {
9259                 useLoop = true;
9260             }
9261
9262             regMaskTP    usedRegs;
9263             regNumber    regDst;
9264             regNumber    regSrc;
9265             regNumber    regTemp;
9266
9267             /* Evaluate dest and src/val */
9268
9269             if (op1->gtFlags & GTF_REVERSE_OPS)
9270             {
9271                 genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9272                 assert(srcOp->gtFlags & GTF_REG_VAL);
9273
9274                 genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9275                 assert(dstOp->gtFlags & GTF_REG_VAL);
9276                 regDst = dstOp->gtRegNum;
9277
9278                 genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
9279                 regSrc = srcOp->gtRegNum;
9280             }
9281             else
9282             {
9283                 genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9284                 assert(dstOp->gtFlags & GTF_REG_VAL);
9285
9286                 genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9287                 assert(srcOp->gtFlags & GTF_REG_VAL);
9288                 regSrc = srcOp->gtRegNum;
9289
9290                 genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
9291                 regDst = dstOp->gtRegNum;
9292             }
9293             assert(dstOp->gtFlags & GTF_REG_VAL);
9294             assert(srcOp->gtFlags & GTF_REG_VAL);
9295
9296             regDst = dstOp->gtRegNum;
9297             regSrc = srcOp->gtRegNum;
9298             usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
9299             bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
9300             emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9301             emitAttr srcType;
9302
9303             if (oper == GT_COPYBLK)
9304             {
9305                 // Prefer a low register,but avoid one of the ones we've already grabbed
9306                 regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9307                 usedRegs |= genRegMask(regTemp);
9308                 bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
9309                 srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9310             }
9311             else
9312             {
9313                 regTemp = REG_STK;
9314                 srcType = EA_PTRSIZE;
9315             }
9316
9317             instruction  loadIns = ins_Load(TYP_I_IMPL);   // INS_ldr
9318             instruction  storeIns = ins_Store(TYP_I_IMPL);  // INS_str
9319
9320             int       finalOffset;
9321
9322             // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
9323             if (!useLoop)
9324             {
9325                 for (unsigned i = 0; i < fullStoreCount; i++)
9326                 {
9327                     if (oper == GT_COPYBLK)
9328                     {
9329                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
9330                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
9331                         gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9332                         regTracker.rsTrackRegTrash(regTemp);
9333                     }
9334                     else
9335                     {
9336                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
9337                     }
9338                 }
9339
9340                 finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
9341                 length -= finalOffset;
9342             }
9343             else  // We will use a loop to implement this INITBLK/COPYBLK
9344             {
9345                 unsigned   pairStoreLoopCount = fullStoreCount / 2;
9346
9347                 // We need a second temp register for CopyBlk
9348                 regNumber  regTemp2 = REG_STK;
9349                 if (oper == GT_COPYBLK)
9350                 {
9351                     // Prefer a low register, but avoid one of the ones we've already grabbed
9352                     regTemp2 = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9353                     usedRegs |= genRegMask(regTemp2);
9354                 }
9355
9356                 // Pick and initialize the loop counter register
9357                 regNumber regLoopIndex;
9358                 regLoopIndex = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9359                 genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
9360
9361                 // Create and define the Basic Block for the loop top
9362                 BasicBlock * loopTopBlock = genCreateTempLabel();
9363                 genDefineTempLabel(loopTopBlock);
9364
9365                 // The loop body
9366                 if (oper == GT_COPYBLK)
9367                 {
9368                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9369                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
9370                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9371                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
9372                     getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
9373                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9374                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
9375                     regTracker.rsTrackRegTrash(regSrc);
9376                     regTracker.rsTrackRegTrash(regTemp);
9377                     regTracker.rsTrackRegTrash(regTemp2);
9378                 }
9379                 else // GT_INITBLK
9380                 {
9381                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9382                     getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
9383                 }
9384
9385                 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
9386                 regTracker.rsTrackRegTrash(regDst);
9387                 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
9388                 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
9389                 inst_JMP(jmpGTS, loopTopBlock);
9390
9391                 regTracker.rsTrackRegIntCns(regLoopIndex, 0);
9392
9393                 length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
9394
9395                 if (length & TARGET_POINTER_SIZE)
9396                 {
9397                     if (oper == GT_COPYBLK)
9398                     {
9399                         getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9400                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9401                     }
9402                     else
9403                     {
9404                         getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9405                     }
9406                     finalOffset = TARGET_POINTER_SIZE;
9407                     length -= TARGET_POINTER_SIZE;
9408                 }
9409                 else
9410                 {
9411                     finalOffset = 0;
9412                 }
9413             }
9414
9415             if (length & sizeof(short))
9416             {
9417                 loadIns = ins_Load(TYP_USHORT);   // INS_ldrh
9418                 storeIns = ins_Store(TYP_USHORT);  // INS_strh
9419
9420                 if (oper == GT_COPYBLK)
9421                 {
9422                     getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
9423                     getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
9424                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9425                     regTracker.rsTrackRegTrash(regTemp);
9426                 }
9427                 else
9428                 {
9429                     getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
9430                 }
9431                 length -= sizeof(short);
9432                 finalOffset += sizeof(short);
9433             }
9434
9435             if (length & sizeof(char))
9436             {
9437                 loadIns = ins_Load(TYP_UBYTE);   // INS_ldrb
9438                 storeIns = ins_Store(TYP_UBYTE);  // INS_strb
9439
9440                 if (oper == GT_COPYBLK)
9441                 {
9442                     getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
9443                     getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
9444                     gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9445                     regTracker.rsTrackRegTrash(regTemp);
9446                 }
9447                 else
9448                 {
9449                     getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
9450                 }
9451                 length -= sizeof(char);
9452             }
9453             assert(length == 0);
9454
9455             genReleaseReg(dstOp);
9456             genReleaseReg(srcOp);
9457         }
9458         else
9459         {
9460             //
9461             // This a variable-sized COPYBLK/INITBLK,
9462             //   or a fixed size INITBLK with a variable init value,
9463             //
9464
9465             // What order should the Dest, Val/Src, and Size be calculated
9466
9467             compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2,
9468                 opsPtr, regsPtr); // OUT arguments
9469
9470             genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
9471             genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
9472             genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
9473
9474             genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9475             genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9476
9477             noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
9478                 (op1->gtOp.gtOp1->gtRegNum == REG_ARG_0));
9479
9480             noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
9481                 (op1->gtOp.gtOp2->gtRegNum == REG_ARG_1));
9482
9483             noway_assert((op2->gtFlags & GTF_REG_VAL) &&             // Size
9484                 (op2->gtRegNum == REG_ARG_2));
9485
9486             regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9487
9488             genEmitHelperCall(oper == GT_COPYBLK ? CORINFO_HELP_MEMCPY
9489                 /* GT_INITBLK */ : CORINFO_HELP_MEMSET,
9490                 0, EA_UNKNOWN);
9491
9492             regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
9493
9494             regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9495             genReleaseReg(opsPtr[0]);
9496             genReleaseReg(opsPtr[1]);
9497             genReleaseReg(opsPtr[2]);
9498         }
9499
9500         if ((oper == GT_COPYBLK) && tree->AsBlkOp()->IsVolatile())
9501         {
9502             // Emit a memory barrier instruction after the CopyBlk 
9503             instGen_MemoryBarrier();
9504         }
9505 #endif // !CPU_USES_BLOCK_MOVE 
9506     }
9507 }
9508 BasicBlock dummyBB;
9509
9510 #ifdef _PREFAST_
9511 #pragma warning(push)
9512 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
9513 #endif
9514 void                CodeGen::genCodeForTreeSmpOp(GenTreePtr tree,
9515                                                  regMaskTP  destReg,
9516                                                  regMaskTP  bestReg)
9517 {
9518     const genTreeOps oper    = tree->OperGet();
9519     const var_types treeType = tree->TypeGet();
9520     GenTreePtr      op1      = tree->gtOp.gtOp1;
9521     GenTreePtr      op2      = tree->gtGetOp2();
9522     regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
9523     regMaskTP       regs     = regSet.rsMaskUsed;
9524     regMaskTP       needReg  = destReg;
9525     insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
9526     emitAttr        size;
9527     instruction     ins;
9528     regMaskTP       addrReg;
9529     GenTreePtr      opsPtr[3];
9530     regMaskTP       regsPtr[3];
9531
9532 #ifdef DEBUG
9533     addrReg = 0xDEADCAFE;
9534 #endif
9535
9536     noway_assert(tree->OperKind() & GTK_SMPOP);
9537
9538     switch (oper)
9539     {
9540         case GT_ASG:
9541             genCodeForTreeSmpOpAsg(tree);
9542             return;
9543
9544         case GT_ASG_LSH:
9545         case GT_ASG_RSH:
9546         case GT_ASG_RSZ:
9547             genCodeForAsgShift(tree, destReg, bestReg);
9548             return;
9549
9550         case GT_ASG_AND:
9551         case GT_ASG_OR :
9552         case GT_ASG_XOR:
9553         case GT_ASG_ADD:
9554         case GT_ASG_SUB:
9555             genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
9556             return;
9557
9558         case GT_CHS:
9559             addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
9560 #ifdef _TARGET_XARCH_
9561             // Note that the specialCase here occurs when the treeType specifies a byte sized operation
9562             // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
9563             //
9564             bool specialCase; specialCase = false;
9565             if (op1->gtOper == GT_REG_VAR)
9566             {
9567                 /* Get hold of the target register */
9568
9569                 reg = op1->gtRegVar.gtRegNum;
9570                 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
9571                 {
9572                     regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
9573
9574                     inst_RV_RV(INS_mov, byteReg, reg);
9575                     regTracker.rsTrackRegTrash(byteReg);
9576
9577                     inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
9578                     var_types op1Type = op1->TypeGet();
9579                     instruction wideningIns = ins_Move_Extend(op1Type, true);
9580                     inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type)); 
9581                     regTracker.rsTrackRegTrash(reg);
9582                     specialCase = true;
9583                 }
9584             }
9585
9586             if (!specialCase)
9587             {
9588                 inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
9589             }
9590 #else // not  _TARGET_XARCH_
9591             if (op1->gtFlags & GTF_REG_VAL)
9592             {
9593                 inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
9594             }
9595             else
9596             {
9597                 // Fix 388382 ARM JitStress WP7
9598                 var_types op1Type = op1->TypeGet();
9599                 regNumber reg = regSet.rsPickFreeReg();
9600                 inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
9601                 regTracker.rsTrackRegTrash(reg);
9602                 inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
9603                 inst_TT_RV(ins_Store(op1Type), op1,  reg, 0, emitTypeSize(op1Type));
9604             }
9605 #endif
9606             if (op1->gtFlags & GTF_REG_VAL)
9607                 regTracker.rsTrackRegTrash(op1->gtRegNum);
9608             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
9609
9610             genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
9611             return;
9612
9613         case GT_AND:
9614         case GT_OR :
9615         case GT_XOR:
9616         case GT_ADD:
9617         case GT_SUB:
9618         case GT_MUL:
9619             genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
9620             return;
9621
9622         case GT_UMOD:
9623             genCodeForUnsignedMod(tree, destReg, bestReg);
9624             return;
9625
9626         case GT_MOD:
9627             genCodeForSignedMod(tree, destReg, bestReg);
9628             return;
9629
9630         case GT_UDIV:
9631             genCodeForUnsignedDiv(tree, destReg, bestReg);
9632             return;
9633
9634         case GT_DIV:
9635             genCodeForSignedDiv(tree, destReg, bestReg);
9636             return;
9637
9638         case GT_LSH:
9639         case GT_RSH:
9640         case GT_RSZ:
9641             genCodeForShift(tree, destReg, bestReg);
9642             return;
9643
9644         case GT_NEG:
9645         case GT_NOT:
9646
9647             /* Generate the operand into some register */
9648
9649             genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
9650             noway_assert(op1->gtFlags & GTF_REG_VAL);
9651
9652             reg   = op1->gtRegNum;
9653
9654             /* Negate/reverse the value in the register */
9655
9656             inst_RV((oper == GT_NEG) ? INS_NEG
9657                                      : INS_NOT, reg, treeType);
9658
9659             /* The register is now trashed */
9660
9661             regTracker.rsTrackRegTrash(reg);
9662
9663             genCodeForTree_DONE(tree, reg);
9664             return;
9665
9666         case GT_IND:
9667         case GT_NULLCHECK:  // At this point, explicit null checks are just like inds...
9668
9669             /* Make sure the operand is addressable */
9670
9671             addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
9672
9673             genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9674
9675             /* Figure out the size of the value being loaded */
9676
9677             size = EA_ATTR(genTypeSize(tree->gtType));
9678
9679             /* Pick a register for the value */
9680
9681             if  (needReg == RBM_ALLINT && bestReg == 0)
9682             {
9683                 /* Absent a better suggestion, pick a useless register */
9684
9685                 bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
9686             }
9687
9688             reg = regSet.rsPickReg(needReg, bestReg);
9689
9690             if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
9691             {
9692                 noway_assert(size == EA_PTRSIZE);
9693                 getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
9694                                          EA_PTRSIZE,
9695                                          reg,
9696                                          FLD_GLOBAL_FS,
9697                                          (int)op1->gtIntCon.gtIconVal);
9698             }
9699             else
9700             {
9701                 /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
9702
9703                 inst_mov_RV_ST(reg, tree);
9704             }
9705
9706 #ifdef _TARGET_ARM_
9707             if (tree->gtFlags & GTF_IND_VOLATILE)
9708             {
9709                 // Emit a memory barrier instruction after the load
9710                 instGen_MemoryBarrier();
9711             }
9712 #endif
9713
9714             /* Note the new contents of the register we used */
9715
9716             regTracker.rsTrackRegTrash(reg);
9717
9718             /* Update the live set of register variables */
9719
9720 #ifdef DEBUG
9721             if (compiler->opts.varNames) genUpdateLife(tree);
9722 #endif
9723
9724             /* Now we can update the register pointer information */
9725
9726 //          genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9727             gcInfo.gcMarkRegPtrVal(reg, treeType);
9728
9729             genCodeForTree_DONE_LIFE(tree, reg);
9730             return;
9731
9732         case GT_CAST:
9733
9734             genCodeForNumericCast(tree, destReg, bestReg);
9735             return;
9736
9737
9738         case GT_JTRUE:
9739
9740             /* Is this a test of a relational operator? */
9741
9742             if  (op1->OperIsCompare())
9743             {
9744                 /* Generate the conditional jump */
9745
9746                 genCondJump(op1);
9747
9748                 genUpdateLife(tree);
9749                 return;
9750             }
9751
9752 #ifdef  DEBUG
9753             compiler->gtDispTree(tree);
9754 #endif
9755             NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
9756             break;
9757
9758         case GT_SWITCH:
9759             genCodeForSwitch(tree);
9760             return;
9761
9762         case GT_RETFILT:
9763             noway_assert(tree->gtType == TYP_VOID || op1 != 0);
9764             if (op1 == 0)   // endfinally
9765             {
9766                 reg  = REG_NA;
9767
9768 #ifdef _TARGET_XARCH_
9769                 /* Return using a pop-jmp sequence. As the "try" block calls
9770                    the finally with a jmp, this leaves the x86 call-ret stack
9771                    balanced in the normal flow of path. */
9772
9773                 noway_assert(isFramePointerRequired());
9774                 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
9775                 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
9776 #elif defined(_TARGET_ARM_)
9777                 // Nothing needed for ARM
9778 #else
9779                 NYI("TARGET");
9780 #endif
9781             }
9782             else            // endfilter
9783             {
9784                 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9785                 noway_assert(op1->gtFlags & GTF_REG_VAL);
9786                 noway_assert(op1->gtRegNum == REG_INTRET);
9787                 /* The return value has now been computed */
9788                 reg   = op1->gtRegNum;
9789
9790                 /* Return */
9791                 instGen_Return(0);
9792             }
9793
9794             genCodeForTree_DONE(tree, reg);
9795             return;
9796
9797         case GT_RETURN:
9798
9799             // TODO: this should be done AFTER we called exit mon so that
9800             //       we are sure that we don't have to keep 'this' alive
9801
9802             if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
9803             {
9804                 /* either it's an "empty" statement or the return statement
9805                    of a synchronized method
9806                  */
9807
9808                 genPInvokeMethodEpilog();
9809             }
9810
9811             /* Is there a return value and/or an exit statement? */
9812
9813             if  (op1)
9814             {
9815                 if  (op1->gtType == TYP_VOID)
9816                 {
9817                     //We're returning nothing, just generate the block (shared epilog calls).
9818                     genCodeForTree(op1, 0);
9819                 }
9820 #ifdef _TARGET_ARM_
9821                 else if (op1->gtType == TYP_STRUCT)
9822                 {
9823                     if (op1->gtOper == GT_CALL)
9824                     {
9825                         // We have a return call() because we failed to tail call.
9826                         // In any case, just generate the call and be done.
9827                         assert(compiler->IsHfa(op1));
9828                         genCodeForCall(op1, true);
9829                         genMarkTreeInReg(op1, REG_FLOATRET);
9830                     }
9831                     else
9832                     {
9833                         assert(op1->gtOper == GT_LCL_VAR);
9834                         assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
9835                         genLoadIntoFltRetRegs(op1);
9836                     }
9837                 }
9838                 else if (op1->TypeGet() == TYP_FLOAT)
9839                 {
9840                     // This can only occur when we are returning a non-HFA struct
9841                     // that is composed of a single float field and we performed
9842                     // struct promotion and enregistered the float field.
9843                     // 
9844                     genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
9845                     getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
9846                 }
9847 #endif // _TARGET_ARM_
9848                 else
9849                 {
9850                     //we can now go through this code for compiler->genReturnBB.  I've regularized all the code.
9851
9852                     //noway_assert(compiler->compCurBB != compiler->genReturnBB);
9853                     
9854                     noway_assert(op1->gtType != TYP_VOID);
9855
9856                     /* Generate the return value into the return register */
9857
9858                     genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9859
9860                     /* The result must now be in the return register */
9861
9862                     noway_assert(op1->gtFlags & GTF_REG_VAL);
9863                     noway_assert(op1->gtRegNum == REG_INTRET);
9864                 }
9865
9866                 /* The return value has now been computed */
9867
9868                 reg   = op1->gtRegNum;
9869
9870                 genCodeForTree_DONE(tree, reg);
9871
9872             }
9873
9874             //The profiling hook does not trash registers, so it's safe to call after we emit the code for
9875             //the GT_RETURN tree.
9876 #ifdef PROFILING_SUPPORTED
9877             if (compiler->compCurBB == compiler->genReturnBB)
9878             {
9879                 genProfilingLeaveCallback();
9880             }
9881 #endif
9882 #ifdef DEBUG
9883             if (compiler->opts.compStackCheckOnRet)
9884             {
9885                 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
9886                              compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
9887                              compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
9888                 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
9889
9890                 BasicBlock  *   esp_check = genCreateTempLabel();
9891                 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
9892                 inst_JMP(jmpEqual, esp_check);
9893                 getEmitter()->emitIns(INS_BREAKPOINT);
9894                 genDefineTempLabel(esp_check);
9895             }
9896 #endif
9897             return;
9898
9899         case GT_COMMA:
9900
9901             if (tree->gtFlags & GTF_REVERSE_OPS)
9902             {
9903                 if  (tree->gtType == TYP_VOID)
9904                 {
9905                     genEvalSideEffects(op2);
9906                     genUpdateLife (op2);
9907                     genEvalSideEffects(op1);
9908                     genUpdateLife(tree);
9909                     return;
9910                 }
9911
9912                 // Generate op2
9913                 genCodeForTree(op2, needReg);
9914                 genUpdateLife(op2);
9915
9916                 noway_assert(op2->gtFlags & GTF_REG_VAL);
9917
9918                 regSet.rsMarkRegUsed(op2);
9919
9920                 // Do side effects of op1
9921                 genEvalSideEffects(op1);
9922
9923                 // Recover op2 if spilled
9924                 genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
9925
9926                 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
9927
9928                 // set gc info if we need so
9929                 gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
9930
9931                 genUpdateLife(tree);
9932                 genCodeForTree_DONE(tree, op2->gtRegNum);
9933
9934                 return;
9935             }
9936             else
9937             {
9938                 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
9939
9940                 /* Generate side effects of the first operand */
9941
9942                 genEvalSideEffects(op1);
9943                 genUpdateLife (op1);
9944
9945                 /* Is the value of the second operand used? */
9946
9947                 if  (tree->gtType == TYP_VOID)
9948                 {
9949                     /* The right operand produces no result. The morpher is
9950                        responsible for resetting the type of GT_COMMA nodes
9951                        to TYP_VOID if op2 isn't meant to yield a result. */
9952
9953                     genEvalSideEffects(op2);
9954                     genUpdateLife(tree);
9955                     return;
9956                 }
9957
9958                 /* Generate the second operand, i.e. the 'real' value */
9959
9960                 genCodeForTree(op2, needReg);
9961                 noway_assert(op2->gtFlags & GTF_REG_VAL);
9962
9963                 /* The result of 'op2' is also the final result */
9964
9965                 reg  = op2->gtRegNum;
9966
9967                 /* Remember whether we set the flags */
9968
9969                 tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
9970
9971                 genCodeForTree_DONE(tree, reg);
9972                 return;
9973             }
9974
9975         case GT_BOX:
9976             genCodeForTree(op1, needReg);
9977             noway_assert(op1->gtFlags & GTF_REG_VAL);
9978
9979             /* The result of 'op1' is also the final result */
9980
9981             reg  = op1->gtRegNum;
9982
9983             /* Remember whether we set the flags */
9984
9985             tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
9986
9987             genCodeForTree_DONE(tree, reg);
9988             return;
9989
9990         case GT_QMARK:
9991
9992             genCodeForQmark(tree, destReg, bestReg);
9993             return;
9994
9995         case GT_NOP:
9996
9997 #if OPT_BOOL_OPS
9998             if  (op1 == NULL)
9999                 return;
10000 #endif
10001
10002             /* Generate the operand into some register */
10003
10004             genCodeForTree(op1, needReg);
10005
10006             /* The result is the same as the operand */
10007
10008             reg  = op1->gtRegNum;
10009
10010             genCodeForTree_DONE(tree, reg);
10011             return;
10012
10013         case GT_INTRINSIC:
10014
10015             switch (tree->gtIntrinsic.gtIntrinsicId)
10016             {
10017             case CORINFO_INTRINSIC_Round:
10018                 {
10019                     noway_assert(tree->gtType == TYP_INT);
10020
10021 #if FEATURE_STACK_FP_X87
10022                     genCodeForTreeFlt(op1);
10023
10024                     /* Store the FP value into the temp */
10025                     TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
10026
10027                     FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10028                     FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
10029                     inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
10030
10031                     reg = regSet.rsPickReg(needReg, bestReg);
10032                     regTracker.rsTrackRegTrash(reg);
10033
10034                     inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
10035
10036                     compiler->tmpRlsTemp(temp);
10037 #else
10038                     genCodeForTreeFloat(tree, needReg, bestReg);
10039                     return;
10040 #endif
10041                 } 
10042                 break;
10043
10044             default:
10045                 noway_assert(!"unexpected math intrinsic");
10046
10047             }
10048
10049             genCodeForTree_DONE(tree, reg);
10050             return;
10051
10052         case GT_LCLHEAP:
10053
10054             reg = genLclHeap(op1);
10055             genCodeForTree_DONE(tree, reg);
10056             return;
10057
10058         case GT_COPYOBJ:
10059             noway_assert(op1->IsList());
10060
10061             /* If the value class doesn't have any fields that are GC refs or
10062             the target isn't on the GC-heap, we can merge it with CPBLK.
10063             GC fields cannot be copied directly, instead we will
10064             need to use a jit-helper for that. */
10065             assert(tree->AsCpObj()->gtGcPtrCount > 0);
10066
10067             {
10068                 GenTreeCpObj* cpObjOp = tree->AsCpObj();
10069
10070 #ifdef _TARGET_ARM_
10071                 if (cpObjOp->IsVolatile())
10072                 {
10073                     // Emit a memory barrier instruction before the CopyBlk 
10074                     instGen_MemoryBarrier();
10075                 }
10076 #endif
10077                 GenTreePtr  srcObj = cpObjOp->Source();
10078                 GenTreePtr  dstObj = cpObjOp->Dest();
10079
10080                 noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
10081
10082 #ifdef DEBUG
10083                 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
10084                 size_t  debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
10085
10086                 // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
10087                 // The EE currently does not allow this.  Let's assert it just to be safe.
10088                 noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
10089 #endif
10090
10091                 size_t    blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
10092                 unsigned  slots = cpObjOp->gtSlots;
10093                 BYTE *    gcPtrs = cpObjOp->gtGcPtrs;
10094                 unsigned  gcPtrCount = cpObjOp->gtGcPtrCount;
10095
10096                 // If we have GC pointers then the GTF_BLK_HASGCPTR flags must be set
10097                 if (gcPtrCount > 0)
10098                     assert((tree->gtFlags & GTF_BLK_HASGCPTR) != 0);
10099
10100                 GenTreePtr  treeFirst, treeSecond;
10101                 regNumber    regFirst, regSecond;
10102
10103                 // Check what order the object-ptrs have to be evaluated in ?
10104
10105                 if (op1->gtFlags & GTF_REVERSE_OPS)
10106                 {
10107                     treeFirst = srcObj;
10108                     treeSecond = dstObj;
10109 #if CPU_USES_BLOCK_MOVE
10110                     regFirst = REG_ESI;
10111                     regSecond = REG_EDI;
10112 #else
10113                     regFirst = REG_ARG_1;
10114                     regSecond = REG_ARG_0;
10115 #endif
10116                 }
10117                 else
10118                 {
10119                     treeFirst = dstObj;
10120                     treeSecond = srcObj;
10121 #if CPU_USES_BLOCK_MOVE
10122                     regFirst = REG_EDI;
10123                     regSecond = REG_ESI;
10124 #else
10125                     regFirst = REG_ARG_0;
10126                     regSecond = REG_ARG_1;
10127 #endif
10128                 }
10129
10130                 bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
10131                 bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
10132                 emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
10133                 emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
10134
10135                 // Materialize the trees in the order desired
10136
10137 #if CPU_USES_BLOCK_MOVE
10138                 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10139                 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10140                 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
10141
10142                 // Grab ECX because it will be trashed by the helper
10143                 //
10144                 regSet.rsGrabReg(RBM_ECX);
10145
10146                 while (blkSize >= TARGET_POINTER_SIZE)
10147                 {
10148                     if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
10149                     {
10150                         // Note that we can use movsd even if it is a GC pointer being transfered
10151                         // because the value is not cached anywhere.  If we did this in two moves,
10152                         // we would have to make certain we passed the appropriate GC info on to
10153                         // the emitter.
10154                         instGen(INS_movsp);
10155                     }
10156                     else
10157                     {
10158                         // This helper will act like a MOVSD                        
10159                         //    -- inputs EDI and ESI are byrefs
10160                         //    -- including incrementing of ESI and EDI by 4
10161                         //    -- helper will trash ECX
10162                         //
10163                         regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
10164                         regSet.rsLockUsedReg(argRegs);
10165                         genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
10166                             0,             // argSize
10167                             EA_PTRSIZE);   // retSize
10168                         regSet.rsUnlockUsedReg(argRegs);
10169                     }
10170
10171                     blkSize -= TARGET_POINTER_SIZE;
10172                 }
10173
10174                 // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
10175
10176                 regTracker.rsTrackRegTrash(REG_EDI);
10177                 regTracker.rsTrackRegTrash(REG_ESI);
10178                 regTracker.rsTrackRegTrash(REG_ECX);
10179
10180                 gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
10181
10182                 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
10183                    it is a emitNoGChelper. However, we have to let the emitter know that
10184                    the GC liveness has changed. We do this by creating a new label. 
10185                  */
10186
10187                 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
10188
10189                 genDefineTempLabel(&dummyBB);
10190
10191 #else //  !CPU_USES_BLOCK_MOVE
10192
10193 #ifndef _TARGET_ARM_
10194                 // Currently only the ARM implementation is provided
10195 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
10196 #endif
10197
10198                 bool         helperUsed;
10199                 regNumber    regDst;
10200                 regNumber    regSrc;
10201                 regNumber    regTemp;
10202
10203                 if ((gcPtrCount > 0) && !dstIsOnStack)
10204                 {
10205                     genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10206                     genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10207                     genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
10208
10209                     /* The helper is a Asm-routine that will trash R2,R3 and LR */
10210                     {
10211                         /* Spill any callee-saved registers which are being used */
10212                         regMaskTP  spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
10213
10214                         if (spillRegs)
10215                         {
10216                             regSet.rsSpillRegs(spillRegs);
10217                         }
10218                     }
10219
10220                     // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
10221                     // We will also use it as the temp register for our load/store sequences
10222                     //
10223                     assert(REG_R2 == REG_TMP_1);
10224                     regTemp = regSet.rsGrabReg(RBM_R2);
10225                     helperUsed = true;
10226                 }
10227                 else
10228                 {
10229                     genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
10230                     genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
10231                     genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
10232
10233                     // Grab any temp register to use for our load/store sequences
10234                     //
10235                     regTemp = regSet.rsGrabReg(RBM_ALLINT);
10236                     helperUsed = false;
10237                 }
10238                 assert(dstObj->gtFlags & GTF_REG_VAL);
10239                 assert(srcObj->gtFlags & GTF_REG_VAL);
10240
10241                 regDst = dstObj->gtRegNum;
10242                 regSrc = srcObj->gtRegNum;
10243
10244                 assert(regDst != regTemp);
10245                 assert(regSrc != regTemp);
10246
10247                 instruction  loadIns = ins_Load(TYP_I_IMPL);   // INS_ldr
10248                 instruction  storeIns = ins_Store(TYP_I_IMPL);  // INS_str
10249
10250                 size_t  offset = 0;
10251                 while (blkSize >= TARGET_POINTER_SIZE)
10252                 {
10253                     CorInfoGCType gcType;
10254                     CorInfoGCType gcTypeNext = TYPE_GC_NONE;
10255                     var_types     type = TYP_I_IMPL;
10256
10257 #if  FEATURE_WRITE_BARRIER
10258                     gcType = (CorInfoGCType)(*gcPtrs++);
10259                     if (blkSize > TARGET_POINTER_SIZE)
10260                         gcTypeNext = (CorInfoGCType)(*gcPtrs);
10261
10262                     if (gcType == TYPE_GC_REF)
10263                         type = TYP_REF;
10264                     else if (gcType == TYPE_GC_BYREF)
10265                         type = TYP_BYREF;
10266
10267                     if (helperUsed)
10268                     {
10269                         assert(regDst == REG_ARG_0);
10270                         assert(regSrc == REG_ARG_1);
10271                         assert(regTemp == REG_R2);
10272                     }
10273 #else
10274                     gcType = TYPE_GC_NONE;
10275 #endif  // FEATURE_WRITE_BARRIER
10276
10277                     blkSize -= TARGET_POINTER_SIZE;
10278
10279                     emitAttr opSize = emitTypeSize(type);
10280
10281                     if (!helperUsed || (gcType == TYPE_GC_NONE))
10282                     {
10283                         getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
10284                         getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
10285                         offset += TARGET_POINTER_SIZE;
10286
10287                         if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) ||
10288                             ((offset >= 128) && (blkSize > 0)))
10289                         {
10290                             getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
10291                             getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
10292                             offset = 0;
10293                         }
10294                     }
10295                     else
10296                     {
10297                         assert(offset == 0);
10298
10299                         // The helper will act like this:                 
10300                         //    -- inputs R0 and R1 are byrefs
10301                         //    -- helper will perform copy from *R1 into *R0
10302                         //    -- helper will perform post increment of R0 and R1 by 4
10303                         //    -- helper will trash R2
10304                         //    -- helper will trash R3
10305                         //    -- calling the helper implicitly trashes LR
10306                         //
10307                         assert(helperUsed);
10308                         regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
10309                         regSet.rsLockUsedReg(argRegs);
10310                         genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
10311                             0,             // argSize
10312                             EA_PTRSIZE);   // retSize
10313
10314                         regSet.rsUnlockUsedReg(argRegs);
10315                         regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
10316                     }
10317                 }
10318
10319                 regTracker.rsTrackRegTrash(regDst);
10320                 regTracker.rsTrackRegTrash(regSrc);
10321                 regTracker.rsTrackRegTrash(regTemp);
10322
10323                 gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
10324
10325                 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
10326                    it is a emitNoGChelper. However, we have to let the emitter know that
10327                    the GC liveness has changed. We do this by creating a new label. 
10328                  */
10329
10330                 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
10331
10332                 genDefineTempLabel(&dummyBB);
10333
10334 #endif   //  !CPU_USES_BLOCK_MOVE
10335
10336                 assert(blkSize == 0);
10337
10338                 genReleaseReg(dstObj);
10339                 genReleaseReg(srcObj);
10340
10341                 reg = REG_NA;
10342
10343                 genCodeForTree_DONE(tree, reg);
10344
10345 #ifdef _TARGET_ARM_
10346                 if (tree->AsBlkOp()->IsVolatile())
10347                 {
10348                     // Emit a memory barrier instruction after the CopyBlk 
10349                     instGen_MemoryBarrier();
10350                 }
10351 #endif
10352             }
10353             return;
10354              
10355         case GT_COPYBLK:
10356         case GT_INITBLK:
10357
10358             genCodeForBlkOp(tree, destReg);
10359             genCodeForTree_DONE(tree, REG_NA);
10360             return;
10361
10362         case GT_EQ:
10363         case GT_NE:
10364         case GT_LT:
10365         case GT_LE:
10366         case GT_GE:
10367         case GT_GT:
10368             genCodeForRelop(tree, destReg, bestReg);
10369             return;
10370
10371         case GT_ADDR:
10372
10373             genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
10374             return;
10375
10376 #ifdef _TARGET_XARCH_
10377         case GT_LOCKADD:
10378
10379             // This is for a locked add operation.  We know that the resulting value doesn't "go" anywhere.
10380             // For reference, op1 is the location.  op2 is the addend or the value.
10381             if (op2->OperIsConst())
10382             {
10383                 noway_assert(op2->TypeGet() == TYP_INT);
10384                 ssize_t cns = op2->gtIntCon.gtIconVal;
10385
10386                 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
10387                 switch (cns)
10388                 {
10389                 case 1:
10390                     instGen(INS_lock);
10391                     instEmit_RM(INS_inc, op1, op1, 0); break;
10392                 case -1:
10393                     instGen(INS_lock);
10394                     instEmit_RM(INS_dec, op1, op1, 0); break;
10395                 default:
10396                     assert((int)cns == cns); // By test above for AMD64.
10397                     instGen(INS_lock);
10398                     inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0); break;
10399                 }
10400                 genReleaseReg(op1);
10401             }
10402             else
10403             {
10404                 //non constant addend means it needs to go into a register.
10405                 ins = INS_add;
10406                 goto LockBinOpCommon;
10407             }
10408
10409             genFlagsEqualToNone();    // We didn't compute a result into a register.
10410             genUpdateLife(tree);      // We didn't compute an operand into anything.
10411             return;
10412
10413         case GT_XADD:
10414             ins = INS_xadd; goto LockBinOpCommon;
10415         case GT_XCHG:
10416             ins = INS_xchg; goto LockBinOpCommon;
10417 LockBinOpCommon:
10418             {
10419                 //Compute the second operand into a register.  xadd and xchg are r/m32, r32.  So even if op2
10420                 //is a constant, it needs to be in a register.  This should be the output register if
10421                 //possible.
10422                 //
10423                 //For reference, gtOp1 is the location.  gtOp2 is the addend or the value.
10424
10425                 GenTreePtr location = op1;
10426                 GenTreePtr value = op2;
10427
10428                 //Again, a friendly reminder.  IL calling convention is left to right.
10429                 if (tree->gtFlags & GTF_REVERSE_OPS)
10430                 {
10431                     // The atomic operations destroy this argument, so force it into a scratch register
10432                     reg = regSet.rsPickFreeReg();
10433                     genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10434
10435                     // Must evaluate location into a register
10436                     genCodeForTree(location, needReg, RBM_NONE);
10437                     assert(location->gtFlags & GTF_REG_VAL);
10438                     regSet.rsMarkRegUsed(location);
10439                     regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
10440                     genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
10441                     regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
10442
10443                     if (ins != INS_xchg)
10444                     {
10445                         //xchg implies the lock prefix, but xadd and add require it.
10446                         instGen(INS_lock);
10447                     }
10448                     instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10449                     genReleaseReg(value);
10450                     regTracker.rsTrackRegTrash(reg);
10451                     genReleaseReg(location);
10452                 }
10453                 else
10454                 {
10455                     regMaskTP addrReg;
10456                     if (genMakeIndAddrMode(location,
10457                                            tree,
10458                                            false, /* not for LEA */
10459                                            needReg,
10460                                            RegSet::KEEP_REG,
10461                                            &addrReg))
10462                     {
10463                         genUpdateLife(location);
10464
10465                         reg = regSet.rsPickFreeReg();
10466                         genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10467                         addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
10468
10469                         if (ins != INS_xchg)
10470                         {
10471                             //xchg implies the lock prefix, but xadd and add require it.
10472                             instGen(INS_lock);
10473                         }
10474
10475                         // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10476                         // inst_TT_RV(ins, location, reg);
10477                         sched_AM(ins, EA_4BYTE, reg, false, location, 0);
10478
10479                         genReleaseReg(value);
10480                         regTracker.rsTrackRegTrash(reg);
10481                         genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
10482                     }
10483                     else
10484                     {
10485                         // Must evalute location into a register.
10486                         genCodeForTree(location, needReg, RBM_NONE);
10487                         assert(location->gtFlags && GTF_REG_VAL);
10488                         regSet.rsMarkRegUsed(location);
10489
10490                         // xadd destroys this argument, so force it into a scratch register
10491                         reg = regSet.rsPickFreeReg();
10492                         genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10493                         regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
10494                         genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
10495                         regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
10496
10497                         if (ins != INS_xchg)
10498                         {
10499                             //xchg implies the lock prefix, but xadd and add require it.
10500                             instGen(INS_lock);
10501                         }
10502
10503                         instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10504
10505                         genReleaseReg(value);
10506                         regTracker.rsTrackRegTrash(reg);
10507                         genReleaseReg(location);
10508                     }
10509                 }
10510
10511                 //The flags are equal to the target of the tree (i.e. the result of the add), not to the
10512                 //result in the register.  If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
10513                 //that information to set the flags.  Doesn't seem like there is a good reason for that.
10514                 //Therefore, trash the flags.
10515                 genFlagsEqualToNone();
10516
10517                 if (ins == INS_add)
10518                 {
10519                     genUpdateLife(tree); //If the operator was add, then we were called from the GT_LOCKADD
10520                                          //case.  In that case we don't use the result, so we don't need to
10521                                          //update anything.
10522                 }
10523                 else
10524                 {
10525                     genCodeForTree_DONE(tree, reg);
10526                 }
10527             }
10528             return;
10529
10530 #else // !_TARGET_XARCH_
10531
10532         case GT_LOCKADD:
10533         case GT_XADD:
10534         case GT_XCHG:
10535
10536             NYI_ARM("LOCK instructions");
10537 #endif
10538
10539         case GT_ARR_LENGTH:
10540         {
10541             // Make the corresponding ind(a + c) node, and do codegen for that.
10542             GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, 
10543                                             tree->gtArrLen.ArrRef(),
10544                                             compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
10545             tree->SetOper(GT_IND);
10546             tree->gtFlags |= GTF_IND_ARR_LEN;   // Record that this node represents an array length expression.
10547             assert(tree->TypeGet() == TYP_INT);
10548             tree->gtOp.gtOp1 = addr;
10549             genCodeForTree(tree, destReg, bestReg);
10550             return;
10551         }
10552
10553         case GT_OBJ:
10554             // All GT_OBJ nodes must have been morphed prior to this.
10555             noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
10556
10557         default:
10558 #ifdef DEBUG
10559             compiler->gtDispTree(tree);
10560 #endif
10561             noway_assert(!"unexpected unary/binary operator");
10562     } // end switch (oper)
10563
10564     unreached();
10565 }
10566 #ifdef _PREFAST_
10567 #pragma warning(pop) // End suppress PREFast warning about overly large function
10568 #endif
10569
10570
10571 regNumber CodeGen::genIntegerCast(GenTree *tree,
10572                                   regMaskTP needReg, 
10573                                   regMaskTP bestReg)
10574 {
10575     instruction ins;
10576     emitAttr    size;
10577     bool        unsv;
10578     bool        andv = false;
10579     regNumber   reg;
10580     GenTreePtr  op1 = tree->gtOp.gtOp1->gtEffectiveVal();
10581     var_types   dstType  = tree->CastToType();
10582     var_types   srcType = op1->TypeGet();
10583
10584     if  (genTypeSize(srcType) < genTypeSize(dstType))
10585     {
10586         // Widening cast
10587         
10588         /* we need the source size */
10589
10590         size = EA_ATTR(genTypeSize(srcType));
10591
10592         noway_assert(size < EA_PTRSIZE);
10593
10594         unsv = varTypeIsUnsigned(srcType);
10595         ins = ins_Move_Extend(srcType, op1->InReg());
10596
10597         /*
10598             Special case: for a cast of byte to char we first
10599             have to expand the byte (w/ sign extension), then
10600             mask off the high bits.
10601             Use 'movsx' followed by 'and'
10602         */
10603         if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
10604         {
10605             noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
10606             andv = true;
10607         }
10608     }
10609     else
10610     {
10611         // Narrowing cast, or sign-changing cast
10612
10613         noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
10614
10615         size = EA_ATTR(genTypeSize(dstType));
10616
10617         unsv = varTypeIsUnsigned(dstType);
10618         ins  = ins_Move_Extend(dstType, op1->InReg());
10619     }
10620
10621     noway_assert(size < EA_PTRSIZE);
10622
10623     // Set bestReg to the same register a op1 if op1 is a regVar and is available
10624     if (op1->InReg())
10625     {
10626         regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
10627         if ( (((op1RegMask & bestReg)         != 0) || (bestReg == 0)) &&
10628               ((op1RegMask & regSet.rsRegMaskFree()) != 0)               )
10629         {
10630             bestReg = op1RegMask;
10631         }
10632     }
10633
10634     /* Is the value sitting in a non-byte-addressable register? */
10635
10636     if  (op1->InReg() &&
10637         (size == EA_1BYTE) &&
10638         !isByteReg(op1->gtRegNum))
10639     {
10640         if (unsv)
10641         {
10642             // for unsigned values we can AND, so it needs not be a byte register
10643
10644             reg = regSet.rsPickReg(needReg, bestReg);
10645
10646             ins = INS_AND;
10647         }
10648         else
10649         {
10650             /* Move the value into a byte register */
10651
10652             reg   = regSet.rsGrabReg(RBM_BYTE_REGS);
10653         }
10654
10655         if (reg != op1->gtRegNum)
10656         {
10657             /* Move the value into that register */
10658
10659             regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
10660             inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
10661
10662             /* The value has a new home now */
10663
10664             op1->gtRegNum = reg;
10665         }
10666     }
10667     else
10668     {
10669         /* Pick a register for the value (general case) */
10670
10671         reg = regSet.rsPickReg(needReg, bestReg);
10672
10673         // if we (might) need to set the flags and the value is in the same register
10674         // and we have an unsigned value then use AND instead of MOVZX
10675         if  (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
10676         {
10677 #ifdef _TARGET_X86_
10678             noway_assert(ins == INS_movzx);
10679 #endif
10680             ins = INS_AND;
10681         }
10682     }
10683
10684     if (ins == INS_AND)
10685     {
10686         noway_assert(andv == false && unsv);
10687
10688         /* Generate "and reg, MASK */
10689
10690         insFlags  flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10691         inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
10692
10693         if  (tree->gtSetFlags())
10694             genFlagsEqualToReg(tree, reg);
10695     }
10696     else
10697     {
10698 #ifdef _TARGET_XARCH_
10699         noway_assert(ins == INS_movsx || ins == INS_movzx);
10700 #endif
10701
10702         /* Generate "movsx/movzx reg, [addr]" */
10703
10704         inst_RV_ST(ins, size, reg, op1);
10705
10706         /* Mask off high bits for cast from byte to char */
10707
10708         if  (andv)
10709         {
10710 #ifdef _TARGET_XARCH_
10711             noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
10712 #endif
10713             insFlags  flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10714             inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
10715
10716             if  (tree->gtSetFlags())
10717                 genFlagsEqualToReg(tree, reg);
10718         }
10719     }
10720
10721     regTracker.rsTrackRegTrash(reg);
10722     return reg;
10723 }
10724
10725 void                CodeGen::genCodeForNumericCast(GenTreePtr tree,
10726                                                    regMaskTP  destReg,
10727                                                    regMaskTP  bestReg)
10728 {
10729     GenTreePtr      op1      = tree->gtOp.gtOp1;
10730     var_types       dstType  = tree->CastToType();
10731     var_types       baseType = TYP_INT;
10732     regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
10733     regMaskTP       needReg  = destReg;
10734     regMaskTP       addrReg;
10735     emitAttr        size;
10736     BOOL            unsv;
10737
10738     /*
10739       * Constant casts should have been folded earlier
10740       * If not finite don't bother
10741       * We don't do this optimization for debug code/no optimization
10742       */
10743
10744     noway_assert((op1->gtOper != GT_CNS_INT &&
10745                   op1->gtOper != GT_CNS_LNG &&
10746                   op1->gtOper != GT_CNS_DBL) ||
10747                  tree->gtOverflow() ||
10748                  (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
10749                  !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
10750
10751     noway_assert(dstType != TYP_VOID);
10752
10753     /* What type are we casting from? */
10754
10755     switch (op1->TypeGet())
10756     {
10757     case TYP_LONG:
10758
10759         /* Special case: the long is generated via the mod of long
10760            with an int.  This is really an int and need not be
10761            converted to a reg pair. NOTE: the flag only indicates
10762            that this is a case to TYP_INT, it hasn't actually
10763            verified the second operand of the MOD! */
10764
10765         if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) &&
10766             (op1->gtFlags & GTF_MOD_INT_RESULT))
10767         {
10768
10769             /* Verify that the op2 of the mod node is
10770                1) An integer tree, or
10771                2) A long constant that is small enough to fit in an integer
10772             */
10773
10774             GenTreePtr modop2 = op1->gtOp.gtOp2;
10775             if ((genActualType(modop2->gtType) == TYP_INT) ||
10776                    ((modop2->gtOper == GT_CNS_LNG) &&
10777                        (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
10778             {
10779                 genCodeForTree(op1, destReg, bestReg);
10780
10781 #ifdef _TARGET_64BIT_
10782                 reg = op1->gtRegNum;
10783 #else // _TARGET_64BIT_
10784                 reg = genRegPairLo(op1->gtRegPair);
10785 #endif //_TARGET_64BIT_
10786
10787                 genCodeForTree_DONE(tree, reg);
10788                 return;
10789             }
10790         }
10791
10792         /* Make the operand addressable.  When gtOverflow() is true, 
10793            hold on to the addrReg as we will need it to access the higher dword */
10794
10795         op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1 twice!)
10796                                         // See, e.g., the TYP_INT case below...
10797
10798         addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
10799
10800         /* Load the lower half of the value into some register */
10801
10802         if  (op1->gtFlags & GTF_REG_VAL)
10803         {
10804             /* Can we simply use the low part of the value? */
10805             reg = genRegPairLo(op1->gtRegPair);
10806
10807             if (tree->gtOverflow())
10808                 goto REG_OK;
10809
10810             regMaskTP loMask;
10811             loMask = genRegMask(reg);
10812             if  (loMask & regSet.rsRegMaskFree())
10813                 bestReg = loMask;
10814         }
10815
10816         // for cast overflow we need to preserve addrReg for testing the hiDword
10817         // so we lock it to prevent regSet.rsPickReg from picking it.
10818         if (tree->gtOverflow())
10819             regSet.rsLockUsedReg(addrReg);
10820
10821         reg   = regSet.rsPickReg(needReg, bestReg);
10822
10823         if (tree->gtOverflow())
10824             regSet.rsUnlockUsedReg(addrReg);
10825
10826         noway_assert(genStillAddressable(op1));
10827
10828 REG_OK:
10829         if  (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
10830         {
10831             /* Generate "mov reg, [addr-mode]" */
10832             inst_RV_TT(ins_Load(TYP_INT), reg, op1);
10833         }
10834
10835         /* conv.ovf.i8i4, or conv.ovf.u8u4 */
10836
10837         if (tree->gtOverflow())
10838         {
10839             regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair)
10840                                                            : REG_NA;
10841
10842             emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10843             emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
10844
10845             switch (dstType)
10846             {
10847             case TYP_INT:   // conv.ovf.i8.i4
10848                 /*  Generate the following sequence
10849
10850                         test loDWord, loDWord   // set flags
10851                         jl neg
10852                    pos: test hiDWord, hiDWord   // set flags
10853                         jne ovf
10854                         jmp done
10855                    neg: cmp hiDWord, 0xFFFFFFFF
10856                         jne ovf
10857                   done:
10858
10859                 */
10860
10861                 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
10862                 if (tree->gtFlags & GTF_UNSIGNED)       // conv.ovf.u8.i4       (i4 > 0 and upper bits 0)
10863                 {
10864                     genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
10865                     goto UPPER_BITS_ZERO;
10866                 }
10867
10868 #if CPU_LOAD_STORE_ARCH
10869                 // This is tricky.
10870                 // We will generate code like
10871                 // if (...)
10872                 // {
10873                 // ...
10874                 // }
10875                 // else
10876                 // {
10877                 // ...
10878                 // }
10879                 // We load the tree op1 into regs when we generate code for if clause.
10880                 // When we generate else clause, we see the tree is already loaded into reg, and start use it directly.
10881                 // Well, when the code is run, we may execute else clause without going through if clause.
10882                 //
10883                 genCodeForTree(op1, 0);
10884 #endif
10885
10886                 BasicBlock * neg;
10887                 BasicBlock * done;
10888
10889                 neg  = genCreateTempLabel();
10890                 done = genCreateTempLabel();
10891
10892                 // Is the loDWord positive or negative
10893                 inst_JMP(jmpLTS, neg);
10894
10895                 // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
10896
10897                 if (hiReg < REG_STK)
10898                 {
10899                     instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
10900                 }
10901                 else
10902                 {
10903                     inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
10904                 }
10905
10906                 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10907                 inst_JMP(EJ_jmp, done);
10908
10909                 // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
10910
10911                 genDefineTempLabel(neg);
10912
10913                 if (hiReg < REG_STK)
10914                 {
10915                     inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
10916                 }
10917                 else
10918                 {
10919                     inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
10920                 }
10921                 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10922
10923                 // Done
10924
10925                 genDefineTempLabel(done);
10926
10927                 break;
10928
10929             case TYP_UINT:  // conv.ovf.u8u4
10930 UPPER_BITS_ZERO:
10931                 // Just check that the upper DWord is 0
10932
10933                 if (hiReg < REG_STK)
10934                 {
10935                     instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
10936                 }
10937                 else
10938                 {
10939                     inst_TT_IV(INS_cmp, op1, 0, 4);
10940                 }
10941                
10942                 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10943                 break;
10944
10945             default:
10946                 noway_assert(!"Unexpected dstType");
10947                 break;
10948             }
10949
10950             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
10951         }
10952
10953         regTracker.rsTrackRegTrash(reg);
10954         genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
10955
10956         genCodeForTree_DONE(tree, reg);
10957         return;
10958
10959     case TYP_BOOL:
10960     case TYP_BYTE:
10961     case TYP_SHORT:
10962     case TYP_CHAR:
10963     case TYP_UBYTE:
10964         break;
10965
10966     case TYP_UINT:
10967     case TYP_INT:
10968         break;
10969
10970 #if FEATURE_STACK_FP_X87
10971     case TYP_FLOAT:
10972         NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
10973         break;
10974
10975     case TYP_DOUBLE:
10976         if (compiler->opts.compCanUseSSE2)
10977         {
10978             // do the SSE2 based cast inline
10979             // getting the fp operand
10980
10981             regMaskTP       addrRegInt = 0;
10982             regMaskTP       addrRegFlt = 0;
10983
10984             // make the operand addressable
10985             // We don't want to collapse constant doubles into floats, as the SSE2 instruction
10986             // operates on doubles. Note that these (casts from constant doubles) usually get
10987             // folded, but we don't do it for some cases (infinitys, etc). So essentially this
10988             // shouldn't affect performance or size at all. We're fixing this for #336067
10989             op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
10990             if (!addrRegFlt &&  !op1->IsRegVar())
10991             {
10992                 // we have the address
10993
10994                 inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
10995                 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10996                 genUpdateLife(op1);
10997
10998                 reg = regSet.rsPickReg(needReg);
10999                 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
11000
11001                 regTracker.rsTrackRegTrash(reg);
11002                 genCodeForTree_DONE(tree, reg);
11003             }
11004             else
11005             {
11006                 // we will need to use a temp to get it into the xmm reg
11007                 var_types   typeTemp = op1->TypeGet();
11008                 TempDsc  *  temp     = compiler->tmpGetTemp(typeTemp);
11009
11010                 size = EA_ATTR(genTypeSize(typeTemp));
11011
11012                 if (addrRegFlt )
11013                 {
11014                     // On the fp stack; Take reg to top of stack
11015
11016                     FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
11017                 }
11018                 else
11019                 {
11020                     // op1->IsRegVar()
11021                     // pick a register
11022                     reg = regSet.PickRegFloat();
11023                     if (!op1->IsRegVarDeath())
11024                     {
11025                         // Load it on the fp stack
11026                         genLoadStackFP(op1, reg);
11027                     }
11028                     else
11029                     {
11030                         // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
11031                         genLoadStackFP(op1, reg);
11032                         FlatFPX87_MoveToTOS(&compCurFPState, reg);
11033                     }
11034                 }
11035
11036                 // pop it off the fp stack
11037                 compCurFPState.Pop();
11038
11039                 getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
11040                 // pick a reg
11041                 reg = regSet.rsPickReg(needReg);
11042
11043                 inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
11044                 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
11045
11046                 // done..release the temp
11047                 compiler->tmpRlsTemp(temp);
11048
11049                 // the reg is now trashed
11050                 regTracker.rsTrackRegTrash(reg);
11051                 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
11052                 genUpdateLife(op1);
11053                 genCodeForTree_DONE(tree, reg);
11054             }
11055         }
11056 #else
11057     case TYP_FLOAT:
11058     case TYP_DOUBLE:
11059         genCodeForTreeFloat(tree, needReg, bestReg);
11060 #endif // FEATURE_STACK_FP_X87
11061         return;
11062
11063     default:
11064         noway_assert(!"unexpected cast type");
11065     }
11066
11067     if (tree->gtOverflow())
11068     {
11069         /* Compute op1 into a register, and free the register */
11070
11071         genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
11072         reg = op1->gtRegNum;
11073
11074         /* Do we need to compare the value, or just check masks */
11075
11076         ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
11077         ssize_t typeMask;
11078
11079         switch (dstType)
11080         {
11081         case TYP_BYTE:
11082             typeMask = ssize_t((int)0xFFFFFF80);
11083             typeMin = SCHAR_MIN; typeMax = SCHAR_MAX;
11084             unsv = (tree->gtFlags & GTF_UNSIGNED);
11085             break;
11086         case TYP_SHORT:
11087             typeMask = ssize_t((int)0xFFFF8000);
11088             typeMin = SHRT_MIN;  typeMax = SHRT_MAX;
11089             unsv = (tree->gtFlags & GTF_UNSIGNED);
11090             break;
11091         case TYP_INT:
11092             typeMask = ssize_t((int)0x80000000L);
11093 #ifdef _TARGET_64BIT_
11094             unsv = (tree->gtFlags & GTF_UNSIGNED);
11095             typeMin = INT_MIN;  typeMax = INT_MAX;
11096 #else // _TARGET_64BIT_
11097             noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
11098             unsv = true;
11099 #endif // _TARGET_64BIT_
11100             break;
11101         case TYP_UBYTE:
11102             unsv = true;
11103             typeMask = ssize_t((int)0xFFFFFF00L);
11104             break;
11105         case TYP_CHAR:
11106             unsv = true;
11107             typeMask = ssize_t((int)0xFFFF0000L);
11108             break;
11109         case TYP_UINT:
11110             unsv = true;
11111 #ifdef _TARGET_64BIT_
11112             typeMask = 0xFFFFFFFF00000000LL;
11113 #else // _TARGET_64BIT_
11114             typeMask = 0x80000000L;
11115             noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
11116 #endif // _TARGET_64BIT_
11117             break;
11118         default:
11119             NO_WAY("Unknown type");
11120             return;
11121         }
11122
11123         // If we just have to check a mask.
11124         // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
11125         // or conv.i4u4
11126
11127         if (unsv)
11128         {
11129             inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
11130             emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
11131             genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
11132         }
11133         else
11134         {
11135             // Check the value is in range.
11136             // This must be conv.ovf.i4i1, etc.
11137
11138             // Compare with the MAX
11139
11140             noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
11141
11142             inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
11143             emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
11144             genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
11145
11146             // Compare with the MIN
11147
11148             inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
11149             emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
11150             genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
11151         }
11152
11153         genCodeForTree_DONE(tree, reg);
11154         return;
11155     }
11156
11157     /* Make the operand addressable */
11158
11159     addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
11160
11161     reg = genIntegerCast(tree, needReg, bestReg);
11162
11163     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11164
11165     genCodeForTree_DONE(tree, reg);
11166 }
11167
11168 /*****************************************************************************
11169  *
11170  *  Generate code for a leaf node of type GT_ADDR
11171  */
11172
11173 void                CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree,
11174                                                          regMaskTP  destReg,
11175                                                          regMaskTP  bestReg)
11176 {
11177     genTreeOps      oper     = tree->OperGet();
11178     const var_types treeType = tree->TypeGet();
11179     GenTreePtr      op1;
11180     regNumber       reg;
11181     regMaskTP       needReg  = destReg;
11182     regMaskTP       addrReg;
11183
11184 #ifdef DEBUG
11185     reg     =  (regNumber)0xFEEFFAAF;          // to detect uninitialized use
11186     addrReg = 0xDEADCAFE;
11187 #endif
11188
11189     // We should get here for ldloca, ldarga, ldslfda, ldelema,
11190     // or ldflda.
11191     if (oper == GT_ARR_ELEM)
11192     {
11193         op1 = tree;
11194     }
11195     else
11196     {
11197         op1 = tree->gtOp.gtOp1;
11198     }
11199
11200     // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
11201     if (oper == GT_ARR_ELEM) {
11202         // To get the address of the array element,
11203         // we first call genMakeAddrArrElem to make the element addressable.
11204         //     (That is, for example, we first emit code to calculate EBX, and EAX.)
11205         // And then use lea to obtain the address.
11206         //     (That is, for example, we then emit
11207         //         lea EBX, bword ptr [EBX+4*EAX+36]
11208         //      to obtain the address of the array element.)
11209         addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
11210     }
11211     else
11212     {
11213         addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
11214     }
11215
11216     noway_assert( treeType == TYP_BYREF || treeType == TYP_I_IMPL );
11217
11218     // We want to reuse one of the scratch registers that were used
11219     // in forming the address mode as the target register for the lea.
11220     // If bestReg is unset or if it is set to one of the registers used to
11221     // form the address (i.e. addrReg), we calculate the scratch register
11222     // to use as the target register for the LEA
11223
11224     bestReg = regSet.rsUseIfZero (bestReg, addrReg);
11225     bestReg = regSet.rsNarrowHint(bestReg, addrReg);
11226
11227     /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
11228        it since keepReg==false.
11229        If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
11230        So this is guaranteed not to spill addrReg */
11231
11232     reg = regSet.rsPickReg(needReg, bestReg);
11233
11234     // Slight workaround, force the inst routine to think that
11235     // value being loaded is an int (since that is what what
11236     // LEA will return)  otherwise it would try to allocate
11237     // two registers for a long etc.
11238     noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
11239     op1->gtType = treeType;
11240
11241     inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
11242
11243     // The Lea instruction above better not have tried to put the
11244     // 'value' pointed to by 'op1' in a register, LEA will not work.
11245     noway_assert(!(op1->gtFlags & GTF_REG_VAL));
11246
11247     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11248 //    gcInfo.gcMarkRegSetNpt(genRegMask(reg));
11249     noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
11250
11251     regTracker.rsTrackRegTrash(reg);       // reg does have foldable value in it
11252     gcInfo.gcMarkRegPtrVal(reg, treeType);
11253
11254     genCodeForTree_DONE(tree, reg);
11255 }
11256
11257
11258 #ifdef _TARGET_ARM_
11259
11260 /*****************************************************************************
11261  *
11262  * Move (load/store) between float ret regs and struct promoted variable.
11263  *
11264  * varDsc - The struct variable to be loaded from or stored into.
11265  * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
11266  *
11267  */
11268 void                CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
11269 {
11270     regNumber curReg = REG_FLOATRET;
11271
11272     unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
11273     for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
11274     {
11275         LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
11276
11277         // Is the struct field promoted and sitting in a register?
11278         if (varDscFld->lvRegister)
11279         {
11280             // Move from the struct field into curReg if load
11281             // else move into struct field from curReg if store
11282             regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
11283             regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
11284             if (srcReg != dstReg)
11285             {
11286                 inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
11287                 regTracker.rsTrackRegCopy(dstReg, srcReg);
11288             }
11289         }
11290         else
11291         {
11292             // This field is in memory, do a move between the field and float registers.
11293             emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
11294             if (isLoadIntoFlt)
11295             {
11296                 getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11297                 regTracker.rsTrackRegTrash(curReg);
11298             }
11299             else
11300             {
11301                 getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11302             }
11303         }
11304
11305         // Advance the current reg.
11306         curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
11307     }
11308 }
11309
11310 void                CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
11311 {
11312     assert(tree->TypeGet() == TYP_STRUCT);
11313     assert(tree->gtOper == GT_LCL_VAR);
11314     LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
11315     int slots = varDsc->lvSize() / REGSIZE_BYTES;
11316     if (varDsc->lvPromoted)
11317     {
11318         genLdStFltRetRegsPromotedVar(varDsc, true);
11319     }
11320     else
11321     {
11322         if (slots <= 2)
11323         {
11324             // Use the load float/double instruction.
11325             inst_RV_TT(
11326                 ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE),
11327                 REG_FLOATRET,
11328                 tree,
11329                 0,
11330                 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11331         }
11332         else
11333         {
11334             // Use the load store multiple instruction.
11335             regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11336             inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
11337             regTracker.rsTrackRegTrash(reg);
11338             getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11339         }
11340     }
11341     genMarkTreeInReg(tree, REG_FLOATRET);
11342 }
11343
11344 void                CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
11345 {
11346     assert(tree->TypeGet() == TYP_STRUCT);
11347     assert(tree->OperGet() == GT_ASG);
11348
11349     // LHS should be lcl var or fld.
11350     GenTreePtr op1 = tree->gtOp.gtOp1;
11351
11352     // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
11353     // handling multiple levels of inlined functions that return HFA on the right-hand-side.
11354     // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
11355     // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
11356     // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
11357     // as a regular assert().
11358     noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
11359     unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11360     assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
11361
11362     // The RHS should be a call.
11363     GenTreePtr op2 = tree->gtOp.gtOp2;
11364     assert(op2->gtOper == GT_CALL);
11365
11366     // Generate code for call and copy the return registers into the local.
11367     regMaskTP retMask = genCodeForCall(op2, true);
11368
11369     // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
11370 #ifdef DEBUG
11371     regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
11372     assert((mask & (mask - 1)) == 0);
11373     assert(mask <= (1 << MAX_HFA_RET_SLOTS));
11374     assert((retMask & (((regMaskTP) RBM_FLOATRET) - 1)) == 0);
11375 #endif
11376
11377     int slots = genCountBits(retMask & RBM_ALLFLOAT);
11378
11379     LclVarDsc* varDsc = &compiler->lvaTable[varNum];
11380
11381     if (varDsc->lvPromoted)
11382     {
11383         genLdStFltRetRegsPromotedVar(varDsc, false);
11384     }
11385     else
11386     {
11387         if (slots <= 2)
11388         {
11389             inst_TT_RV(
11390                 ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE),
11391                 op1,
11392                 REG_FLOATRET,
11393                 0,
11394                 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11395         }
11396         else
11397         {
11398             regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11399             inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
11400             regTracker.rsTrackRegTrash(reg);
11401             getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11402         }
11403     }
11404 }
11405
11406 #endif // _TARGET_ARM_
11407
11408 /*****************************************************************************
11409  *
11410  *  Generate code for a GT_ASG tree
11411  */
11412
11413 #ifdef _PREFAST_
11414 #pragma warning(push)
11415 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
11416 #endif
11417 void                CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
11418 {
11419     noway_assert(tree->gtOper == GT_ASG);
11420
11421     GenTreePtr      op1      = tree->gtOp.gtOp1;
11422     GenTreePtr      op2      = tree->gtOp.gtOp2;
11423     regMaskTP       needReg  = RBM_ALLINT;
11424     regMaskTP       bestReg  = RBM_CORRUPT;
11425     regMaskTP       addrReg  = DUMMY_INIT(RBM_CORRUPT);
11426     bool            ovfl     = false;        // Do we need an overflow check
11427     bool            volat    = false;        // Is this a volatile store
11428     regMaskTP       regGC;
11429     instruction     ins;
11430 #ifdef DEBUGGING_SUPPORT
11431     unsigned        lclVarNum = compiler->lvaCount;
11432     unsigned        lclILoffs = DUMMY_INIT(0);
11433 #endif
11434
11435 #ifdef _TARGET_ARM_
11436     if (tree->gtType == TYP_STRUCT)
11437     {
11438         // We use copy block to assign structs, however to receive HFAs in registers
11439         // from a CALL, we use assignment, var = (hfa) call();
11440         assert(compiler->IsHfa(tree));
11441         genStoreFromFltRetRegs(tree);
11442         return;
11443     }
11444 #endif
11445
11446 #ifdef DEBUG 
11447     if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
11448     {
11449         if (varTypeIsFloating(op1))
11450             assert(!"Bad IL: Illegal assignment of integer into float!");
11451         else
11452             assert(!"Bad IL: Illegal assignment of float into integer!");
11453     }
11454 #endif
11455
11456     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
11457     {
11458         op1 = genCodeForCommaTree(op1);  // Strip away any comma expressions.
11459     }
11460
11461     /* Is the target a register or local variable? */
11462     switch (op1->gtOper)
11463     {
11464         unsigned        varNum;
11465         LclVarDsc   *   varDsc;
11466
11467     case GT_LCL_VAR:
11468         varNum = op1->gtLclVarCommon.gtLclNum;
11469         noway_assert(varNum < compiler->lvaCount);
11470         varDsc = compiler->lvaTable + varNum;
11471
11472  #ifdef DEBUGGING_SUPPORT
11473         /* For non-debuggable code, every definition of a lcl-var has
11474          * to be checked to see if we need to open a new scope for it.
11475          * Remember the local var info to call siCheckVarScope
11476          * AFTER code generation of the assignment.
11477          */
11478         if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
11479         {
11480             lclVarNum = varNum;
11481             lclILoffs = op1->gtLclVar.gtLclILoffs;
11482         }
11483  #endif
11484
11485         /* Check against dead store ? (with min opts we may have dead stores) */
11486
11487         noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
11488
11489         /* Does this variable live in a register? */
11490
11491         if  (genMarkLclVar(op1))
11492             goto REG_VAR2;
11493
11494         break;
11495
11496 REG_VAR2:
11497
11498         /* Get hold of the target register */
11499
11500         regNumber op1Reg;
11501
11502         op1Reg = op1->gtRegVar.gtRegNum;
11503
11504         /* Compute the RHS (hopefully) into the variable's register.
11505            For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
11506            as variables are kept alive everywhere. So we have to be
11507            careful if we want to compute the value directly into
11508            the variable's register. */
11509
11510 #ifdef DEBUG 
11511         bool   needToUpdateRegSetCheckLevel;
11512         needToUpdateRegSetCheckLevel = false;
11513 #endif   
11514
11515         // We should only be accessing lvVarIndex if varDsc is tracked.
11516         assert(varDsc->lvTracked);
11517
11518         if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
11519         {
11520             noway_assert(compiler->opts.compDbgCode);
11521
11522             /* The predictor might expect us to generate op2 directly
11523                into the var's register. However, since the variable is
11524                already alive, first kill it and its register. */
11525
11526             if (rpCanAsgOperWithoutReg(op2, true))
11527             {
11528                 genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
11529                 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11530 #ifdef DEBUG                
11531                 needToUpdateRegSetCheckLevel = true;
11532 #endif   
11533             }
11534         }
11535         else
11536         {
11537             needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11538         }
11539
11540 #ifdef DEBUG
11541
11542         /* Special cases: op2 is a GT_CNS_INT */
11543
11544         if  (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
11545         {
11546             /* Save the old life status */
11547
11548             VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
11549             VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
11550
11551             /* Set a flag to avoid printing the message
11552                and remember that life was changed. */
11553
11554             genTempLiveChg = false;
11555         }
11556 #endif
11557
11558 #ifdef DEBUG        
11559         if (needToUpdateRegSetCheckLevel)
11560             compiler->compRegSetCheckLevel++;
11561 #endif        
11562         genCodeForTree(op2, needReg, genRegMask(op1Reg));
11563 #ifdef DEBUG        
11564         if (needToUpdateRegSetCheckLevel)
11565             compiler->compRegSetCheckLevel--;
11566         noway_assert(compiler->compRegSetCheckLevel>=0);
11567 #endif        
11568         noway_assert(op2->gtFlags & GTF_REG_VAL);
11569
11570         /* Make sure the value ends up in the right place ... */
11571
11572         if  (op2->gtRegNum != op1Reg)
11573         {
11574             /* Make sure the target of the store is available */
11575
11576             if  (regSet.rsMaskUsed & genRegMask(op1Reg))
11577                 regSet.rsSpillReg(op1Reg);
11578
11579 #ifdef _TARGET_ARM_
11580             if (op1->TypeGet() == TYP_FLOAT)
11581             {
11582                 // This can only occur when we are returning a non-HFA struct
11583                 // that is composed of a single float field.
11584                 // 
11585                 inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
11586             }
11587             else
11588 #endif // _TARGET_ARM_
11589             {
11590                 inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
11591             }
11592
11593             /* The value has been transferred to 'op1Reg' */
11594
11595             regTracker.rsTrackRegCopy (op1Reg, op2->gtRegNum);
11596
11597             if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
11598                 gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
11599
11600             gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11601         }
11602         else
11603         {
11604             // First we need to remove it from the original reg set mask (or else trigger an
11605             // assert when we add it to the other reg set mask).
11606             gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
11607             gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11608
11609             // The emitter has logic that tracks the GCness of registers and asserts if you
11610             // try to do bad things to a GC pointer (like lose its GCness).
11611
11612             // An explict cast of a GC pointer to an int (which is legal if the
11613             // pointer is pinned) is encoded as an assignment of a GC source
11614             // to a integer variable.  Unfortunately if the source was the last
11615             // use, and the source register gets reused by the destination, no
11616             // code gets emitted (That is where we are at right now).  The emitter
11617             // thinks the register is a GC pointer (it did not see the cast).
11618             // This causes asserts, as well as bad GC info since we will continue
11619             // to report the register as a GC pointer even if we do arithmetic
11620             // with it. So force the emitter to see the change in the type
11621             // of variable by placing a label.
11622             // We only have to do this check at this point because in the
11623             // CAST morphing, we create a temp and assignment whenever we
11624             // have a cast that loses its GCness.
11625
11626             if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
11627             {
11628                 void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
11629             }
11630
11631         }
11632
11633         addrReg = 0;
11634
11635         genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
11636         goto LExit;
11637
11638     case GT_LCL_FLD:
11639
11640         // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
11641         // to worry about it being enregistered.
11642         noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
11643         break;
11644
11645     case GT_CLS_VAR:
11646
11647         __fallthrough;
11648
11649     case GT_IND:
11650     case GT_NULLCHECK:
11651
11652         assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
11653
11654         if (op1->gtFlags & GTF_IND_VOLATILE)
11655         {
11656             volat = true;
11657         }
11658
11659         break;
11660
11661     default:
11662         break;
11663     }
11664
11665     /* Is the value being assigned a simple one? */
11666
11667     noway_assert(op2);
11668     switch (op2->gtOper)
11669     {
11670     case GT_LCL_VAR:
11671
11672         if  (!genMarkLclVar(op2))
11673             goto SMALL_ASG;
11674
11675         __fallthrough;
11676
11677     case GT_REG_VAR:
11678
11679         /* Is the target a byte/short/char value? */
11680
11681         if  (varTypeIsSmall(op1->TypeGet()))
11682             goto SMALL_ASG;
11683
11684         if  (tree->gtFlags & GTF_REVERSE_OPS)
11685             goto SMALL_ASG;
11686
11687         /* Make the target addressable */
11688
11689         op1 = genCodeForCommaTree(op1);  // Strip away comma expressions.
11690
11691         addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11692
11693         /* Does the write barrier helper do the assignment? */
11694
11695         regGC = WriteBarrier(op1, op2, addrReg);
11696
11697         if  (regGC == RBM_NONE)
11698         {
11699             // No, assignment was not done by the WriteBarrier
11700
11701 #ifdef _TARGET_ARM_
11702             if (volat)
11703             {
11704                 // Emit a memory barrier instruction before the store
11705                 instGen_MemoryBarrier();
11706             }
11707 #endif
11708
11709             /* Move the value into the target */
11710
11711             inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
11712
11713             // This is done in WriteBarrier when (regGC != RBM_NONE)
11714
11715             /* Free up anything that was tied up by the LHS */
11716             genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11717         }
11718
11719         /* Free up the RHS */
11720         genUpdateLife(op2);
11721
11722         /* Remember that we've also touched the op2 register */
11723
11724         addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
11725         break;
11726
11727
11728     case GT_CNS_INT:
11729
11730         ssize_t   ival;  ival  = op2->gtIntCon.gtIconVal;
11731         emitAttr  size;  size  = emitTypeSize(tree->TypeGet());
11732
11733         ins = ins_Store(op1->TypeGet());
11734
11735         // If we are storing a constant into a local variable
11736         // we extend the size of the store here 
11737         // this normally takes place in CodeGen::inst_TT_IV on x86.
11738         // 
11739         if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
11740         {
11741             unsigned        varNum = op1->gtLclVarCommon.gtLclNum;
11742             LclVarDsc   *   varDsc = compiler->lvaTable + varNum;
11743             
11744             // Fix the immediate by sign extending if needed
11745             if (!varTypeIsUnsigned(varDsc->TypeGet()))
11746             {
11747                 if (size == EA_1BYTE)
11748                 {
11749                     if ((ival & 0x7f) != ival)
11750                         ival = ival | 0xffffff00;
11751                 }
11752                 else
11753                 {
11754                     assert(size == EA_2BYTE);
11755                     if ((ival & 0x7fff) != ival)
11756                         ival = ival | 0xffff0000;
11757                 }
11758             }
11759
11760             // A local stack slot is at least 4 bytes in size, regardless of
11761             // what the local var is typed as, so auto-promote it here
11762             // unless it is a field of a promoted struct
11763             if (!varDsc->lvIsStructField)
11764             {
11765                 size = EA_SET_SIZE(size, EA_4BYTE);
11766                 ins  = ins_Store(TYP_INT);
11767             }
11768         }
11769
11770         /* Make the target addressable */
11771
11772         addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11773
11774 #ifdef _TARGET_ARM_
11775         if (volat)
11776         {
11777             // Emit a memory barrier instruction before the store 
11778             instGen_MemoryBarrier();
11779         }
11780 #endif
11781
11782         /* Move the value into the target */
11783
11784         noway_assert(op1->gtOper != GT_REG_VAR);
11785         if (compiler->opts.compReloc && op2->IsIconHandle())
11786         {
11787             /* The constant is actually a handle that may need relocation
11788                applied to it.  genComputeReg will do the right thing (see
11789                code in genCodeForTreeConst), so we'll just call it to load
11790                the constant into a register. */
11791
11792             genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
11793             addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11794             noway_assert(op2->gtFlags & GTF_REG_VAL);
11795             inst_TT_RV(ins, op1, op2->gtRegNum);
11796             genReleaseReg(op2);
11797         }
11798         else
11799         {
11800             regSet.rsLockUsedReg(addrReg);
11801
11802
11803 #if REDUNDANT_LOAD
11804             bool copyIconFromReg = true;
11805             regNumber iconReg = REG_NA;
11806
11807 #ifdef _TARGET_ARM_
11808             // Only if the constant can't be encoded in a small instruction,
11809             // look for another register to copy the value from. (Assumes
11810             // target is a small register.)
11811             if ((op1->gtFlags & GTF_REG_VAL) &&
11812                 !isRegPairType(tree->gtType) &&
11813                 arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
11814             {
11815                 copyIconFromReg = false;
11816             }
11817 #endif // _TARGET_ARM_
11818
11819             if (copyIconFromReg)
11820             {
11821                 iconReg = regTracker.rsIconIsInReg(ival);
11822                 if (iconReg == REG_NA)
11823                     copyIconFromReg = false;
11824             }
11825
11826             if  (copyIconFromReg &&
11827                  (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) || (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
11828             {
11829                 /* Move the value into the target */
11830
11831                 inst_TT_RV(ins, op1, iconReg, 0, size);
11832             }
11833             else
11834 #endif // REDUNDANT_LOAD
11835             {
11836                 inst_TT_IV(ins, op1, ival, 0, size);
11837             }
11838
11839             regSet.rsUnlockUsedReg(addrReg);
11840         }
11841
11842         /* Free up anything that was tied up by the LHS */
11843
11844         genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11845         break;
11846
11847     default:
11848
11849 SMALL_ASG:
11850
11851         bool             isWriteBarrier = false;
11852         regMaskTP        needRegOp1     = RBM_ALLINT;
11853         RegSet::ExactReg mustReg        = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
11854
11855         /*  Is the LHS more complex than the RHS? */
11856
11857         if  (tree->gtFlags & GTF_REVERSE_OPS)
11858         {
11859             /* Is the target a byte/short/char value? */
11860
11861             if (varTypeIsSmall(op1->TypeGet()))
11862             {
11863                 noway_assert(op1->gtOper != GT_LCL_VAR ||
11864                        (op1->gtFlags & GTF_VAR_CAST) ||
11865                        // TODO: Why does this have to be true?
11866                        compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
11867                        compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
11868
11869                 if  (op2->gtOper == GT_CAST && !op2->gtOverflow())
11870                 {
11871                     /* Special case: cast to small type */
11872
11873                     if  (op2->CastToType() >= op1->gtType)
11874                     {
11875                         /* Make sure the cast operand is not > int */
11876
11877                         if  (op2->CastFromType() <= TYP_INT)
11878                         {
11879                             /* Cast via a non-smaller type */
11880
11881                             op2 = op2->gtCast.CastOp();
11882                         }
11883                     }
11884                 }
11885
11886                 if (op2->gtOper             == GT_AND &&
11887                     op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
11888                 {
11889                     unsigned mask;
11890                     switch (op1->gtType)
11891                     {
11892                     case TYP_BYTE : mask = 0x000000FF; break;
11893                     case TYP_SHORT: mask = 0x0000FFFF; break;
11894                     case TYP_CHAR : mask = 0x0000FFFF; break;
11895                     default: goto SIMPLE_SMALL;
11896                     }
11897
11898                     if  (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
11899                     {
11900                         /* Redundant AND */
11901
11902                         op2 = op2->gtOp.gtOp1;
11903                     }
11904                 }
11905
11906                 /* Must get the new value into a byte register */
11907
11908 SIMPLE_SMALL:
11909                     if (varTypeIsByte(op1->TypeGet()))
11910                         genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
11911                     else
11912                         goto NOT_SMALL;
11913             }
11914             else
11915             {
11916 NOT_SMALL:
11917                 /* Generate the RHS into a register */
11918
11919                 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11920                 if (isWriteBarrier)
11921                 {
11922 #if NOGC_WRITE_BARRIERS
11923                     // Exclude the REG_WRITE_BARRIER from op2's needReg mask
11924                     needReg = Target::exclude_WriteBarrierReg(needReg);
11925                     mustReg = RegSet::EXACT_REG;
11926 #else // !NOGC_WRITE_BARRIERS
11927                     // This code should be generic across architectures.
11928
11929                     // For the standard JIT Helper calls
11930                     // op1 goes into REG_ARG_0 and
11931                     // op2 goes into REG_ARG_1
11932                     //
11933                     needRegOp1 = RBM_ARG_0;
11934                     needReg    = RBM_ARG_1;
11935 #endif // !NOGC_WRITE_BARRIERS
11936                 }
11937                 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11938             }
11939
11940             noway_assert(op2->gtFlags & GTF_REG_VAL);
11941
11942             /* Make the target addressable */
11943
11944             op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
11945             addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11946                 
11947             /*  Make sure the RHS register hasn't been spilled;
11948                 keep the register marked as "used", otherwise
11949                 we might get the pointer lifetimes wrong.
11950             */
11951
11952             if (varTypeIsByte(op1->TypeGet()))
11953                 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11954
11955             genRecoverReg(op2, needReg, RegSet::KEEP_REG);
11956             noway_assert(op2->gtFlags & GTF_REG_VAL);
11957
11958             /* Lock the RHS temporarily (lock only already used) */
11959
11960             regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
11961
11962             /* Make sure the LHS is still addressable */
11963
11964             addrReg = genKeepAddressable(op1, addrReg);
11965
11966             /* We can unlock (only already used ) the RHS register */
11967
11968             regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
11969
11970             /* Does the write barrier helper do the assignment? */
11971
11972             regGC = WriteBarrier(op1, op2, addrReg);
11973
11974             if  (regGC != 0)
11975             {
11976                 // Yes, assignment done by the WriteBarrier
11977                 noway_assert(isWriteBarrier);
11978             }
11979             else
11980             {
11981 #ifdef _TARGET_ARM_
11982                 if (volat)
11983                 {
11984                     // Emit a memory barrier instruction before the store 
11985                     instGen_MemoryBarrier();
11986                 }
11987 #endif
11988
11989                 /* Move the value into the target */
11990
11991                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11992             }
11993
11994             /* Update the current liveness info */
11995
11996 #ifdef DEBUG
11997             if (compiler->opts.varNames) genUpdateLife(tree);
11998 #endif
11999
12000             // If op2 register is still in use, free it.  (Might not be in use, if
12001             // a full-call write barrier was done, and the register was a caller-saved
12002             // register.)
12003             regMaskTP op2RM = genRegMask(op2->gtRegNum);
12004             if (op2RM & regSet.rsMaskUsed) regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
12005
12006             // This is done in WriteBarrier when (regGC != 0)
12007             if  (regGC == 0)
12008             {
12009                 /* Free up anything that was tied up by the LHS */
12010                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
12011             }
12012         }
12013         else
12014         {
12015             /* Make the target addressable */
12016
12017             isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
12018
12019             if  (isWriteBarrier)
12020             {
12021 #if NOGC_WRITE_BARRIERS
12022                 /* Try to avoid RBM_TMP_0 */
12023                 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
12024                 mustReg    = RegSet::EXACT_REG;   // For op2
12025 #else // !NOGC_WRITE_BARRIERS
12026                 // This code should be generic across architectures.
12027
12028                 // For the standard JIT Helper calls
12029                 // op1 goes into REG_ARG_0 and
12030                 // op2 goes into REG_ARG_1
12031                 //
12032                 needRegOp1 = RBM_ARG_0;
12033                 needReg    = RBM_ARG_1;
12034                 mustReg    = RegSet::EXACT_REG;   // For op2
12035 #endif // !NOGC_WRITE_BARRIERS
12036             }
12037
12038             needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
12039
12040             op1 = genCodeForCommaTree(op1);  // Strip away any comma expression.
12041
12042             addrReg = genMakeAddressable(op1,
12043                                          needRegOp1,
12044                                          RegSet::KEEP_REG, true);
12045             
12046 #if CPU_HAS_BYTE_REGS
12047             /* Is the target a byte value? */
12048             if (varTypeIsByte(op1->TypeGet()))
12049             {
12050                 /* Must get the new value into a byte register */
12051                 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
12052                 mustReg = RegSet::EXACT_REG;
12053
12054                 if  (op2->gtType >= op1->gtType)
12055                     op2->gtFlags |= GTF_SMALL_OK;
12056             }
12057 #endif
12058
12059 #if NOGC_WRITE_BARRIERS
12060             /* For WriteBarrier we can't use REG_WRITE_BARRIER */
12061             if  (isWriteBarrier)
12062                 needReg = Target::exclude_WriteBarrierReg(needReg);
12063
12064             /* Also avoid using the previously computed addrReg(s) */
12065             bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
12066
12067             /* If we have a reg available to grab then use bestReg */
12068             if (bestReg & regSet.rsRegMaskCanGrab())
12069                 needReg = bestReg;
12070
12071             mustReg = RegSet::EXACT_REG;
12072 #endif
12073
12074             /* Generate the RHS into a register */
12075             genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
12076             noway_assert(op2->gtFlags & GTF_REG_VAL);
12077
12078             /* Make sure the target is still addressable */
12079             addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
12080             noway_assert(op2->gtFlags & GTF_REG_VAL);
12081
12082             /* Does the write barrier helper do the assignment? */
12083
12084             regGC = WriteBarrier(op1, op2, addrReg);
12085
12086             if  (regGC != 0)
12087             {
12088                 // Yes, assignment done by the WriteBarrier
12089                 noway_assert(isWriteBarrier);
12090             }
12091             else
12092             {
12093                 assert(!isWriteBarrier);
12094
12095 #ifdef _TARGET_ARM_
12096                 if (volat)
12097                 {
12098                     // Emit a memory barrier instruction before the store 
12099                     instGen_MemoryBarrier();
12100                 }
12101 #endif
12102
12103                 /* Move the value into the target */
12104
12105                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
12106             }
12107
12108             /* The new value is no longer needed */
12109
12110             genReleaseReg(op2);
12111
12112             /* Update the current liveness info */
12113
12114 #ifdef DEBUG
12115             if (compiler->opts.varNames) genUpdateLife(tree);
12116 #endif
12117
12118             // This is done in WriteBarrier when (regGC != 0)
12119             if  (regGC == 0)
12120             {
12121                 /* Free up anything that was tied up by the LHS */
12122                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
12123             }
12124         }
12125
12126         addrReg = RBM_NONE;
12127         break;
12128     }
12129
12130     noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
12131     genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
12132
12133 LExit:
12134 #ifdef DEBUGGING_SUPPORT
12135     /* For non-debuggable code, every definition of a lcl-var has
12136      * to be checked to see if we need to open a new scope for it.
12137      */
12138     if (lclVarNum < compiler->lvaCount)
12139         siCheckVarScope(lclVarNum, lclILoffs);
12140 #endif
12141 }
12142 #ifdef _PREFAST_
12143 #pragma warning(pop)
12144 #endif
12145
12146 /*****************************************************************************
12147  *
12148  *  Generate code to complete the assignment operation
12149  */
12150
12151 void                CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree,
12152                                                               regMaskTP  addrReg,
12153                                                               regNumber  reg,
12154                                                               bool       ovfl)
12155 {
12156     const var_types treeType = tree->TypeGet();
12157     GenTreePtr      op1      = tree->gtOp.gtOp1;
12158     GenTreePtr      op2      = tree->gtOp.gtOp2;
12159     noway_assert(op2);
12160
12161     if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR) genUpdateLife(op1);
12162     genUpdateLife(tree);
12163
12164 #if REDUNDANT_LOAD
12165
12166     if (op1->gtOper == GT_LCL_VAR)
12167         regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
12168
12169     /* Have we just assigned a value that is in a register? */
12170
12171     if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
12172     {
12173         regTracker.rsTrackRegAssign(op1, op2);
12174     }
12175
12176 #endif
12177
12178     noway_assert(addrReg != 0xDEADCAFE);
12179
12180     gcInfo.gcMarkRegSetNpt(addrReg);
12181
12182     if (ovfl)
12183     {
12184         noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
12185
12186         /* If GTF_REG_VAL is not set, and it is a small type, then
12187            we must have loaded it up from memory, done the increment,
12188            checked for overflow, and then stored it back to memory */
12189
12190         bool ovfCheckDone =  (genTypeSize(op1->TypeGet()) < sizeof(int)) &&
12191                             !(op1->gtFlags & GTF_REG_VAL);
12192
12193         if (!ovfCheckDone)
12194         {
12195             // For small sizes, reg should be set as we sign/zero extend it.
12196
12197             noway_assert(genIsValidReg(reg) ||
12198                          genTypeSize(treeType) == sizeof(int));
12199
12200             /* Currently we don't morph x=x+y into x+=y in try blocks
12201              * if we need overflow check, as x+y may throw an exception.
12202              * We can do it if x is not live on entry to the catch block.
12203              */
12204             noway_assert(!compiler->compCurBB->hasTryIndex());
12205
12206             genCheckOverflow(tree);
12207         }
12208     }
12209 }
12210
12211
12212 /*****************************************************************************
12213  *
12214  *  Generate code for a special op tree
12215  */
12216
12217 void                CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
12218                                                      regMaskTP  destReg,
12219                                                      regMaskTP  bestReg)
12220 {
12221     genTreeOps oper         = tree->OperGet();
12222     regNumber       reg     = DUMMY_INIT(REG_CORRUPT);
12223     regMaskTP       regs    = regSet.rsMaskUsed;
12224
12225     noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
12226
12227     switch  (oper)
12228     {
12229     case GT_CALL:
12230         regs = genCodeForCall(tree, true);
12231
12232         /* If the result is in a register, make sure it ends up in the right place */
12233
12234         if (regs != RBM_NONE)
12235         {
12236             genMarkTreeInReg(tree, genRegNumFromMask(regs));
12237         }
12238
12239         genUpdateLife(tree);
12240         return;
12241
12242     case GT_FIELD:
12243         NO_WAY("should not see this operator in this phase");
12244         break;
12245
12246     case GT_ARR_BOUNDS_CHECK:
12247         {
12248 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
12249             // MUST NEVER CHECK-IN WITH THIS ENABLED.
12250             // This is just for convenience in doing performance investigations and requires x86ret builds
12251             if (!JitConfig.JitNoRngChk())
12252 #endif
12253                 genRangeCheck(tree);
12254         }
12255         return;
12256
12257     case GT_ARR_ELEM:
12258         genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
12259         return;
12260
12261     case GT_CMPXCHG:
12262         {
12263 #if defined(_TARGET_XARCH_)
12264             // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
12265             //
12266             // Since this is a "call", evaluate the operands from right to left.  Don't worry about spilling
12267             // right now, just get the trees evaluated.
12268
12269             // As a friendly reminder.  IL args are evaluated left to right.
12270             //
12271             GenTreePtr location  = tree->gtCmpXchg.gtOpLocation;     // arg1
12272             GenTreePtr value     = tree->gtCmpXchg.gtOpValue;        // arg2
12273             GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand;    // arg3
12274             regMaskTP addrReg;
12275
12276
12277             // This little piggy (on the left) went to market.
12278             bool isAddr = genMakeIndAddrMode(location,
12279                                              tree,
12280                                              false, /* not for LEA */
12281                                              RBM_ALLINT,
12282                                              RegSet::KEEP_REG,
12283                                              &addrReg);
12284
12285             if (!isAddr)
12286             {
12287                 genCodeForTree(location, RBM_NONE, RBM_NONE);
12288                 assert(location->gtFlags && GTF_REG_VAL);
12289                 addrReg = genRegMask(location->gtRegNum);
12290                 regSet.rsMarkRegUsed(location);
12291             }
12292
12293             // This little piggy (in the middle) went home.
12294             // We must have a reg for the Value, but it doesn't really matter which register. 
12295             
12296             // Try to avoid EAX and the address regsiter if possible.
12297             genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
12298
12299             // This little piggy (on the right) had roast beef
12300             // cmpxchg uses EAX as an implicit operand to hold the comparand
12301             // We're going to destroy EAX in this operation, so we better not be keeping 
12302             // anything important in it.
12303
12304 #ifdef DEBUG
12305             if (RBM_EAX & regSet.rsMaskVars)
12306             {
12307                 //We have a variable enregistered in EAX.  Make sure it goes dead in this tree.
12308                 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
12309                 {
12310                     const LclVarDsc & varDesc = compiler->lvaTable[varNum];
12311                     if (!varDesc.lvIsRegCandidate())
12312                         continue;
12313                     if (!varDesc.lvRegister)
12314                         continue;
12315                     if (isFloatRegType(varDesc.lvType))
12316                         continue;
12317                     if (varDesc.lvRegNum != REG_EAX)
12318                         continue;
12319                     //I suppose I should technically check lvOtherReg.
12320
12321                     //OK, finally.  Let's see if this local goes dead.
12322                     //If the variable isn't going dead during this tree, we've just trashed a local with
12323                     //cmpxchg.
12324                     noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
12325
12326                     break;
12327                 }
12328             }
12329 #endif
12330             genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
12331
12332             //Oh, no more piggies.
12333             //* Author's note.  I believe in bounty and chose to omit the piggy who got none.
12334
12335
12336             //By this point we've evaluated everything.  However the odds are that we've spilled something by
12337             //now.  Let's recover all the registers and force them to stay.
12338
12339             //Well, we just computed comparand, so it's still in EAX.
12340             noway_assert(comparand->gtRegNum == REG_EAX);
12341             regSet.rsLockUsedReg(RBM_EAX);
12342
12343             //Stick it anywhere other than EAX.
12344             genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
12345             reg = value->gtRegNum;
12346             noway_assert(reg != REG_EAX);
12347             regSet.rsLockUsedReg(genRegMask(reg));
12348
12349             if (isAddr)
12350             {
12351                 addrReg = genKeepAddressable(/*location*/tree, addrReg, 0/*avoidMask*/);
12352             }
12353             else
12354             {
12355                 genRecoverReg(location, ~(RBM_EAX|genRegMask(reg)), RegSet::KEEP_REG);
12356             }
12357
12358             regSet.rsUnlockUsedReg(genRegMask(reg));
12359             regSet.rsUnlockUsedReg(RBM_EAX);
12360
12361             instGen(INS_lock);
12362             if (isAddr)
12363             {
12364                 sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
12365                 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
12366             }
12367             else
12368             {
12369                 instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
12370                 genReleaseReg(location);
12371             }
12372
12373             genReleaseReg(value);
12374             genReleaseReg(comparand);
12375
12376             //EAX and the value register are both trashed at this point.
12377             regTracker.rsTrackRegTrash(REG_EAX);
12378             regTracker.rsTrackRegTrash(reg);
12379
12380             reg = REG_EAX;
12381
12382             //Until I try to optimize a cmp after a cmpxchg, just trash the flags for safety's sake.
12383             genFlagsEqualToNone();
12384             break;
12385 #else // not defined(_TARGET_XARCH_)
12386             NYI("GT_CMPXCHG codegen");
12387             break;
12388 #endif
12389         }
12390
12391     default:
12392 #ifdef  DEBUG
12393         compiler->gtDispTree(tree);
12394 #endif
12395         noway_assert(!"unexpected operator");
12396         NO_WAY("unexpected operator");
12397     }
12398
12399     noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
12400     genCodeForTree_DONE(tree, reg);
12401 }
12402
12403
12404 /*****************************************************************************
12405  *
12406  *  Generate code for the given tree. tree->gtRegNum will be set to the
12407  *  register where the tree lives.
12408  *
12409  *  If 'destReg' is non-zero, we'll do our best to compute the value into a 
12410  *  register that is in that register set.
12411  *  Use genComputeReg() if you need the tree in a specific register.
12412  *  Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
12413  *  the register can only be used for read, but not for write.
12414  *  Use genMakeAddressable() if you only need the tree to be accessible
12415  *  using a complex addressing mode, and do not necessarily need the tree
12416  *  materialized in a register.
12417  *
12418  *  The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
12419  *
12420  *  The register will not be marked as used. Use regSet.rsMarkRegUsed() if the 
12421  *  register will not be consumed right away and could possibly be spilled.
12422  */
12423
12424 void                CodeGen::genCodeForTree(GenTreePtr tree,
12425                                             regMaskTP  destReg,
12426                                             regMaskTP  bestReg)
12427 {
12428 #if 0
12429     if  (compiler->verbose)
12430     {
12431         printf("Generating code for tree ");
12432         Compiler::printTreeID(tree);
12433         printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
12434     }
12435     genStressRegs(tree);
12436 #endif
12437
12438     noway_assert(tree);
12439     noway_assert(tree->gtOper != GT_STMT);
12440     assert(tree->IsNodeProperlySized());
12441
12442     // When assigning to a enregistered local variable we receive
12443     // a hint that we should target the register that is used to 
12444     // hold the enregistered local variable.
12445     // When receiving this hint both destReg and bestReg masks are set
12446     // to the register that is used by the enregistered local variable.
12447     // 
12448     // However it is possible to us to have a different local variable
12449     // targeting the same register to become alive (and later die)
12450     // as we descend the expression tree.
12451     // 
12452     // To handle such cases we will remove any registers that are alive from the 
12453     // both the destReg and bestReg masks.
12454     // 
12455     regMaskTP liveMask = genLiveMask(tree);
12456
12457     // This removes any registers used to hold enregistered locals
12458     // from the destReg and bestReg masks.
12459     // After this either mask could become 0
12460     // 
12461     destReg &= ~liveMask; 
12462     bestReg &= ~liveMask;
12463
12464     /* 'destReg' of 0 really means 'any' */
12465
12466     destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
12467
12468     if (destReg != RBM_ALL(tree->TypeGet()))
12469         bestReg = regSet.rsUseIfZero(bestReg, destReg);
12470
12471     // Long, float, and double have their own codegen functions
12472     switch (tree->TypeGet())
12473     {
12474
12475     case TYP_LONG:
12476 #if !   CPU_HAS_FP_SUPPORT
12477     case TYP_DOUBLE:
12478 #endif
12479         genCodeForTreeLng(tree, destReg, /*avoidReg*/RBM_NONE);
12480         return;
12481
12482
12483 #if CPU_HAS_FP_SUPPORT
12484     case TYP_FLOAT:
12485     case TYP_DOUBLE:
12486     
12487         // For comma nodes, we'll get back here for the last node in the comma list.
12488         if (tree->gtOper != GT_COMMA)
12489         {
12490             genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
12491             return;
12492         }
12493         break;
12494 #endif
12495
12496 #ifdef DEBUG
12497     case TYP_UINT:
12498     case TYP_ULONG:
12499         noway_assert(!"These types are only used as markers in GT_CAST nodes");
12500         break;
12501 #endif
12502
12503     default:
12504         break;
12505     }
12506
12507     /* Is the value already in a register? */
12508
12509     if  (tree->gtFlags & GTF_REG_VAL)
12510     {
12511         genCodeForTree_REG_VAR1(tree);
12512         return;
12513     }
12514
12515     /* We better not have a spilled value here */
12516
12517     noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
12518
12519     /* Figure out what kind of a node we have */
12520
12521     unsigned kind = tree->OperKind();
12522
12523     if  (kind & GTK_CONST)
12524     {
12525         /* Handle constant nodes */
12526
12527         genCodeForTreeConst(tree, destReg, bestReg);
12528     }
12529     else if (kind & GTK_LEAF)
12530     {
12531         /* Handle leaf nodes */
12532
12533         genCodeForTreeLeaf(tree, destReg, bestReg);
12534     }
12535     else if (kind & GTK_SMPOP)
12536     {
12537         /* Handle 'simple' unary/binary operators */
12538
12539         genCodeForTreeSmpOp(tree, destReg, bestReg);
12540     }
12541     else
12542     {
12543         /* Handle special operators */
12544
12545         genCodeForTreeSpecialOp(tree, destReg, bestReg);
12546     }
12547 }
12548
12549
12550 /*****************************************************************************
12551  *
12552  *  Generate code for all the basic blocks in the function.
12553  */
12554
12555 #ifdef _PREFAST_
12556 #pragma warning(push)
12557 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
12558 #endif
12559 void                CodeGen::genCodeForBBlist()
12560 {
12561     unsigned        varNum;
12562     LclVarDsc   *   varDsc;
12563
12564     unsigned        savedStkLvl;
12565
12566 #ifdef  DEBUG
12567     genInterruptibleUsed        = true;
12568     unsigned        stmtNum     = 0;
12569     unsigned        totalCostEx = 0;
12570     unsigned        totalCostSz = 0;
12571
12572     // You have to be careful if you create basic blocks from now on
12573     compiler->fgSafeBasicBlockCreation = false;
12574
12575     // This stress mode is not comptible with fully interruptible GC
12576     if (genInterruptible && compiler->opts.compStackCheckOnCall)
12577     {
12578         compiler->opts.compStackCheckOnCall = false;
12579     }
12580
12581     // This stress mode is not comptible with fully interruptible GC
12582     if (genInterruptible && compiler->opts.compStackCheckOnRet)
12583     {
12584         compiler->opts.compStackCheckOnRet = false;
12585     }
12586 #endif
12587
12588     // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
12589     genPrepForEHCodegen();
12590
12591     assert(!compiler->fgFirstBBScratch || compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
12592
12593     /* Initialize the spill tracking logic */
12594
12595     regSet.rsSpillBeg();
12596
12597     /* Initialize the line# tracking logic */
12598
12599 #ifdef DEBUGGING_SUPPORT
12600     if (compiler->opts.compScopeInfo)
12601     {
12602         siInit();
12603     }
12604 #endif
12605
12606
12607 #ifdef _TARGET_X86_
12608     if (compiler->compTailCallUsed)
12609     {
12610         noway_assert(isFramePointerUsed());
12611         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12612     }
12613 #endif
12614
12615     if (compiler->opts.compDbgEnC)
12616     {
12617         noway_assert(isFramePointerUsed());
12618         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12619     }
12620
12621     /* If we have any pinvoke calls, we might potentially trash everything */
12622
12623     if (compiler->info.compCallUnmanaged)
12624     {
12625         noway_assert(isFramePointerUsed());  // Setup of Pinvoke frame currently requires an EBP style frame
12626         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12627     }
12628
12629     /* Initialize the pointer tracking code */
12630
12631     gcInfo.gcRegPtrSetInit();
12632     gcInfo.gcVarPtrSetInit();
12633
12634     /* If any arguments live in registers, mark those regs as such */
12635
12636     for (varNum = 0, varDsc = compiler->lvaTable;
12637          varNum < compiler->lvaCount;
12638          varNum++  , varDsc++)
12639     {
12640         /* Is this variable a parameter assigned to a register? */
12641
12642         if  (!varDsc->lvIsParam || !varDsc->lvRegister)
12643             continue;
12644
12645         /* Is the argument live on entry to the method? */
12646
12647         if  (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
12648             continue;
12649
12650 #if CPU_HAS_FP_SUPPORT
12651         /* Is this a floating-point argument? */
12652
12653         if (varDsc->IsFloatRegType())
12654             continue;
12655
12656         noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
12657 #endif
12658
12659         /* Mark the register as holding the variable */
12660
12661         if  (isRegPairType(varDsc->lvType))
12662         {
12663             regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
12664
12665             if  (varDsc->lvOtherReg != REG_STK)
12666                 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12667         }
12668         else
12669         {
12670             regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
12671         }
12672     }
12673
12674     unsigned finallyNesting = 0;
12675
12676     // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
12677     // allocation at the start of each basic block.
12678     VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
12679  
12680     /*-------------------------------------------------------------------------
12681      *
12682      *  Walk the basic blocks and generate code for each one
12683      *
12684      */
12685
12686     BasicBlock *    block;
12687     BasicBlock *    lblk;  /* previous block */
12688
12689     for (lblk =  NULL, block  = compiler->fgFirstBB;
12690                        block != NULL;
12691          lblk = block, block  = block->bbNext)
12692     {
12693 #ifdef DEBUG
12694         if (compiler->verbose)
12695         {
12696             printf("\n=============== Generating ");
12697             block->dspBlockHeader(compiler, true, true);
12698             compiler->fgDispBBLiveness(block);
12699         }
12700 #endif // DEBUG
12701
12702         VARSET_TP       VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
12703
12704         regMaskTP       gcrefRegs = 0;
12705         regMaskTP       byrefRegs = 0;
12706
12707         /* Does any other block jump to this point ? */
12708
12709         if  (block->bbFlags & BBF_JMP_TARGET)
12710         {
12711             /* Someone may jump here, so trash all regs */
12712
12713             regTracker.rsTrackRegClr();
12714
12715             genFlagsEqualToNone();
12716         }
12717         else
12718         {
12719             /* No jump, but pointers always need to get trashed for proper GC tracking */
12720
12721             regTracker.rsTrackRegClrPtr();
12722         }
12723
12724         /* No registers are used or locked on entry to a basic block */
12725
12726         regSet.rsMaskUsed  = RBM_NONE;
12727         regSet.rsMaskMult  = RBM_NONE;
12728         regSet.rsMaskLock  = RBM_NONE;
12729
12730         // If we need to reserve registers such that they are not used 
12731         // by CodeGen in this BasicBlock we do so here.
12732         // On the ARM when we have large frame offsets for locals we
12733         // will have RBM_R10 in the regSet.rsMaskResvd set,
12734         // additionally if a LocAlloc or alloca is used RBM_R9 is in
12735         // the regSet.rsMaskResvd set and we lock these registers here.
12736         //
12737         if (regSet.rsMaskResvd != RBM_NONE)
12738         {
12739             regSet.rsLockReg(regSet.rsMaskResvd); 
12740             regSet.rsSetRegsModified(regSet.rsMaskResvd);
12741         }
12742
12743         /* Figure out which registers hold variables on entry to this block */
12744         
12745         regMaskTP specialUseMask = regSet.rsMaskResvd;
12746
12747         specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE|RBM_FPBASE
12748                                                      : RBM_SPBASE;
12749         regSet.ClearMaskVars();
12750         VarSetOps::ClearD(compiler, compiler->compCurLife);
12751         VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
12752
12753 #if FEATURE_STACK_FP_X87
12754         VarSetOps::AssignNoCopy(compiler,
12755                                 genFPregVars,
12756                                 VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
12757         genFPregCnt     = VarSetOps::Count(compiler, genFPregVars);
12758         genFPdeadRegCnt = 0;
12759 #endif
12760         gcInfo.gcResetForBB();
12761         
12762         genUpdateLife(liveSet);  // This updates regSet.rsMaskVars with bits from any enregistered LclVars
12763 #if FEATURE_STACK_FP_X87
12764         VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
12765 #endif
12766
12767         // We should never enregister variables in any of the specialUseMask registers
12768         noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
12769
12770         VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
12771         while (iter.NextElem(compiler, &varIndex))
12772         {
12773             varNum = compiler->lvaTrackedToVarNum[varIndex];
12774             varDsc = compiler->lvaTable + varNum;
12775             assert(varDsc->lvTracked);
12776             /* Ignore the variable if it's not not in a reg */
12777
12778             if  (!varDsc->lvRegister)
12779                 continue;
12780             if (isFloatRegType(varDsc->lvType))
12781                 continue;
12782
12783             /* Get hold of the index and the bitmask for the variable */
12784             regNumber  regNum  = varDsc->lvRegNum;
12785             regMaskTP  regMask = genRegMask(regNum);
12786
12787             regSet.AddMaskVars(regMask);
12788
12789             if       (varDsc->lvType == TYP_REF)
12790                 gcrefRegs |= regMask;
12791             else if  (varDsc->lvType == TYP_BYREF)
12792                 byrefRegs |= regMask;
12793
12794             /* Mark the register holding the variable as such */
12795
12796             if (varTypeIsMultiReg(varDsc))
12797             {
12798                 regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
12799                 if  (varDsc->lvOtherReg != REG_STK)
12800                 {
12801                     regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12802                     regMask |= genRegMask(varDsc->lvOtherReg);
12803                 }
12804             }
12805             else
12806             {
12807                 regTracker.rsTrackRegLclVar(regNum, varNum);
12808             }
12809         }
12810
12811         gcInfo.gcPtrArgCnt  = 0;
12812
12813 #if FEATURE_STACK_FP_X87
12814
12815         regSet.rsMaskUsedFloat =
12816         regSet.rsMaskRegVarFloat =
12817         regSet.rsMaskLockedFloat = RBM_NONE;
12818
12819         memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
12820         memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
12821
12822         // Setup fp state on block entry
12823         genSetupStateStackFP(block);
12824
12825 #ifdef DEBUG
12826         if (compiler->verbose)
12827         {
12828             JitDumpFPState();
12829         }
12830 #endif // DEBUG
12831 #endif // FEATURE_STACK_FP_X87
12832
12833         /* Make sure we keep track of what pointers are live */
12834
12835         noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
12836         gcInfo.gcRegGCrefSetCur = gcrefRegs;
12837         gcInfo.gcRegByrefSetCur = byrefRegs;
12838
12839         /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
12840            represent the exception object (TYP_REF). 
12841            We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
12842            to the block,  it will be the first thing evaluated
12843            (thanks to GTF_ORDER_SIDEEFF).
12844          */
12845
12846         if (handlerGetsXcptnObj(block->bbCatchTyp))
12847         {
12848             GenTreePtr firstStmt = block->FirstNonPhiDef(); 
12849             if (firstStmt != NULL)
12850             {
12851                 GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
12852                 if (compiler->gtHasCatchArg(firstTree))
12853                 {
12854                     gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
12855                 }
12856             }
12857         }
12858
12859         /* Start a new code output block */
12860
12861 #if FEATURE_EH_FUNCLETS
12862 #if defined(_TARGET_ARM_)
12863         // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
12864         // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
12865         // calls the funclet during non-exceptional control flow.
12866         if (block->bbFlags & BBF_FINALLY_TARGET)
12867         {
12868             assert(block->bbFlags & BBF_JMP_TARGET);
12869
12870             // Create a label that we'll use for computing the start of an EH region, if this block is
12871             // at the beginning of such a region. If we used the existing bbEmitCookie as is for
12872             // determining the EH regions, then this NOP would end up outside of the region, if this
12873             // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
12874             // would be executed, which we would prefer not to do.
12875
12876 #ifdef  DEBUG
12877             if (compiler->verbose)
12878             {
12879                 printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
12880             }
12881 #endif
12882
12883             block->bbUnwindNopEmitCookie = getEmitter()->emitAddLabel(
12884                                      gcInfo.gcVarPtrSetCur,
12885                                      gcInfo.gcRegGCrefSetCur,
12886                                      gcInfo.gcRegByrefSetCur);
12887
12888             instGen(INS_nop);
12889         }
12890 #endif // defined(_TARGET_ARM_)
12891
12892         genUpdateCurrentFunclet(block);
12893 #endif // FEATURE_EH_FUNCLETS
12894
12895 #ifdef _TARGET_XARCH_
12896         if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
12897         {
12898             getEmitter()->emitLoopAlign();
12899         }
12900 #endif
12901
12902 #ifdef  DEBUG
12903         if  (compiler->opts.dspCode)
12904             printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
12905 #endif
12906
12907         block->bbEmitCookie = NULL;
12908
12909         if  (block->bbFlags & (BBF_JMP_TARGET|BBF_HAS_LABEL))
12910         {
12911             /* Mark a label and update the current set of live GC refs */
12912
12913             block->bbEmitCookie = getEmitter()->emitAddLabel(
12914                                      gcInfo.gcVarPtrSetCur,
12915                                      gcInfo.gcRegGCrefSetCur,
12916                                      gcInfo.gcRegByrefSetCur,
12917 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
12918                                      /*isFinally*/block->bbFlags & BBF_FINALLY_TARGET
12919 #else
12920                                      FALSE
12921 #endif
12922                                      );
12923         }
12924
12925         if (block == compiler->fgFirstColdBlock)
12926         {
12927 #ifdef DEBUG
12928             if (compiler->verbose)
12929             {
12930                 printf("\nThis is the start of the cold region of the method\n");
12931             }
12932 #endif
12933             // We should never have a block that falls through into the Cold section
12934             noway_assert(!lblk->bbFallsThrough());
12935
12936             // We require the block that starts the Cold section to have a label 
12937             noway_assert(block->bbEmitCookie);
12938             getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
12939         }
12940
12941         /* Both stacks are always empty on entry to a basic block */
12942
12943         genStackLevel = 0;
12944 #if FEATURE_STACK_FP_X87
12945         genResetFPstkLevel();
12946 #endif // FEATURE_STACK_FP_X87
12947
12948 #if !FEATURE_FIXED_OUT_ARGS
12949         /* Check for inserted throw blocks and adjust genStackLevel */
12950
12951         if  (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
12952         {
12953             noway_assert(block->bbFlags & BBF_JMP_TARGET);
12954
12955             genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
12956
12957             if  (genStackLevel)
12958             {
12959 #ifdef _TARGET_X86_
12960                 getEmitter()->emitMarkStackLvl(genStackLevel);
12961                 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
12962                 genStackLevel = 0;
12963 #else // _TARGET_X86_
12964                 NYI("Need emitMarkStackLvl()");
12965 #endif // _TARGET_X86_
12966             }
12967         }
12968 #endif // !FEATURE_FIXED_OUT_ARGS
12969
12970         savedStkLvl = genStackLevel;
12971
12972         /* Tell everyone which basic block we're working on */
12973
12974         compiler->compCurBB = block;
12975
12976 #ifdef DEBUGGING_SUPPORT
12977         siBeginBlock(block);
12978
12979         // BBF_INTERNAL blocks don't correspond to any single IL instruction.
12980         if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
12981             genIPmappingAdd((IL_OFFSETX) ICorDebugInfo::NO_MAPPING, true);
12982
12983         bool    firstMapping = true;
12984 #endif // DEBUGGING_SUPPORT
12985
12986         /*---------------------------------------------------------------------
12987          *
12988          *  Generate code for each statement-tree in the block
12989          *
12990          */
12991
12992 #if FEATURE_EH_FUNCLETS
12993         if (block->bbFlags & BBF_FUNCLET_BEG)
12994         {
12995             genReserveFuncletProlog(block);
12996         }
12997 #endif // FEATURE_EH_FUNCLETS
12998
12999         for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
13000         {
13001             noway_assert(stmt->gtOper == GT_STMT);
13002
13003 #if defined(DEBUGGING_SUPPORT)
13004
13005             /* Do we have a new IL-offset ? */
13006
13007             if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
13008             {
13009                 /* Create and append a new IP-mapping entry */
13010                 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
13011                 firstMapping = false;
13012             }
13013
13014 #endif // DEBUGGING_SUPPORT
13015
13016 #ifdef DEBUG
13017             if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
13018             {
13019                 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
13020                 if (compiler->opts.dspCode && compiler->opts.dspInstrs)
13021                 {
13022                     while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
13023                     {
13024                         genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
13025                     }
13026                 }
13027             }
13028 #endif // DEBUG
13029
13030             /* Get hold of the statement tree */
13031             GenTreePtr  tree = stmt->gtStmt.gtStmtExpr;
13032
13033 #ifdef  DEBUG
13034             stmtNum++;
13035             if (compiler->verbose)
13036             {
13037                 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
13038                 printf("Holding variables: ");
13039                 dspRegMask(regSet.rsMaskVars); printf("\n\n");
13040                 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
13041                 printf("\n");
13042 #if FEATURE_STACK_FP_X87
13043                 JitDumpFPState();
13044 #endif
13045
13046                 printf("Execution Order:\n");
13047                 for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList;
13048                      treeNode != NULL;
13049                      treeNode = treeNode->gtNext)
13050                 {
13051                     compiler->gtDispTree(treeNode, 0, NULL, true);
13052                 }
13053                 printf("\n");
13054             }
13055             totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
13056             totalCostSz +=  stmt->gtCostSz;
13057 #endif // DEBUG
13058
13059             compiler->compCurStmt = stmt;
13060
13061             compiler->compCurLifeTree = NULL;
13062             switch (tree->gtOper)
13063             {
13064             case GT_CALL:
13065                 // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
13066                 // reported as alive even though not used within the caller for managed debugger sake.  So
13067                 // consider the return value of the method as used if generating debuggable code.
13068                 genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
13069                 genUpdateLife  (tree);
13070                 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
13071                 break;
13072
13073             case GT_IND:
13074             case GT_NULLCHECK:
13075
13076                 // Just do the side effects
13077                 genEvalSideEffects(tree);
13078                 break;
13079
13080             default:
13081                 /* Generate code for the tree */
13082
13083                 genCodeForTree(tree, 0);
13084                 break;
13085             }
13086
13087             regSet.rsSpillChk();
13088
13089             /* The value of the tree isn't used, unless it's a return stmt */
13090
13091             if  (tree->gtOper != GT_RETURN)
13092                 gcInfo.gcMarkRegPtrVal(tree);
13093
13094 #if FEATURE_STACK_FP_X87
13095             genEndOfStatement();
13096 #endif
13097
13098 #ifdef DEBUG
13099             /* Make sure we didn't bungle pointer register tracking */
13100
13101             regMaskTP ptrRegs       = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur);
13102             regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
13103
13104             // If return is a GC-type, clear it.  Note that if a common
13105             // epilog is generated (compiler->genReturnBB) it has a void return
13106             // even though we might return a ref.  We can't use the compRetType
13107             // as the determiner because something we are tracking as a byref
13108             // might be used as a return value of a int function (which is legal)
13109             if  (tree->gtOper == GT_RETURN &&
13110                 (varTypeIsGC(compiler->info.compRetType) ||
13111                     (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
13112             {
13113                 nonVarPtrRegs &= ~RBM_INTRET;
13114             }
13115
13116             // When profiling, the first statement in a catch block will be the
13117             // harmless "inc" instruction (does not interfere with the exception
13118             // object).
13119
13120             if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) &&
13121                 (stmt == block->bbTreeList) &&
13122                 (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
13123             {
13124                 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
13125             }
13126
13127             if  (nonVarPtrRegs)
13128             {
13129                 printf("Regset after tree=");
13130                 Compiler::printTreeID(tree);
13131                 printf(" BB%02u gcr=", block->bbNum);
13132                 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
13133                 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
13134                 printf(", byr=");
13135                 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
13136                 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
13137                 printf(", regVars=");
13138                 printRegMaskInt(regSet.rsMaskVars);
13139                 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
13140                 printf("\n");
13141             }
13142
13143             noway_assert(nonVarPtrRegs == 0);
13144 #endif // DEBUG
13145
13146             noway_assert(stmt->gtOper == GT_STMT);
13147
13148 #ifdef DEBUGGING_SUPPORT
13149             genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
13150 #endif
13151
13152         } //-------- END-FOR each statement-tree of the current block ---------
13153
13154 #ifdef  DEBUGGING_SUPPORT
13155
13156         if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
13157         {
13158             siEndBlock(block);
13159
13160             /* Is this the last block, and are there any open scopes left ? */
13161
13162             bool isLastBlockProcessed = (block->bbNext == NULL);
13163             if (block->isBBCallAlwaysPair())
13164             {
13165                 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
13166             }
13167
13168             if (isLastBlockProcessed && siOpenScopeList.scNext)
13169             {
13170                 /* This assert no longer holds, because we may insert a throw
13171                    block to demarcate the end of a try or finally region when they
13172                    are at the end of the method.  It would be nice if we could fix
13173                    our code so that this throw block will no longer be necessary. */
13174
13175                 //noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
13176
13177                 siCloseAllOpenScopes();
13178             }
13179         }
13180
13181 #endif // DEBUGGING_SUPPORT
13182
13183         genStackLevel -= savedStkLvl;
13184
13185         gcInfo.gcMarkRegSetNpt(gcrefRegs|byrefRegs);
13186
13187         if  (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
13188             compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
13189
13190         /* Both stacks should always be empty on exit from a basic block */
13191
13192         noway_assert(genStackLevel == 0);
13193 #if FEATURE_STACK_FP_X87
13194         noway_assert(genGetFPstkLevel() == 0);
13195
13196         // Do the FPState matching that may have to be done
13197         genCodeForEndBlockTransitionStackFP(block);
13198 #endif
13199
13200         noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
13201
13202         /* Do we need to generate a jump or return? */
13203
13204         switch (block->bbJumpKind)
13205         {
13206         case BBJ_ALWAYS:
13207             inst_JMP(EJ_jmp, block->bbJumpDest);
13208             break;
13209
13210         case BBJ_RETURN:
13211             genExitCode(block);
13212             break;
13213
13214         case BBJ_THROW:
13215             // If we have a throw at the end of a function or funclet, we need to emit another instruction
13216             // afterwards to help the OS unwinder determine the correct context during unwind.
13217             // We insert an unexecuted breakpoint instruction in several situations
13218             // following a throw instruction:
13219             // 1. If the throw is the last instruction of the function or funclet. This helps
13220             //    the OS unwinder determine the correct context during an unwind from the
13221             //    thrown exception.
13222             // 2. If this is this is the last block of the hot section.
13223             // 3. If the subsequent block is a special throw block.
13224             if ((block->bbNext == NULL)
13225 #if FEATURE_EH_FUNCLETS
13226                 || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
13227 #endif // FEATURE_EH_FUNCLETS
13228                 || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext))
13229                 || block->bbNext == compiler->fgFirstColdBlock
13230                 )
13231             {
13232                 instGen(INS_BREAKPOINT); // This should never get executed
13233             }
13234
13235             break;
13236
13237         case BBJ_CALLFINALLY:
13238
13239 #if defined(_TARGET_X86_)
13240
13241             /* If we are about to invoke a finally locally from a try block,
13242                we have to set the hidden slot corresponding to the finally's
13243                nesting level. When invoked in response to an exception, the
13244                EE usually does it.
13245
13246                We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
13247
13248                This code depends on this order not being messed up.
13249                We will emit :
13250                     mov [ebp-(n+1)],0
13251                     mov [ebp-  n  ],0xFC
13252                     push &step
13253                     jmp  finallyBlock
13254
13255               step: mov [ebp-  n  ],0
13256                     jmp leaveTarget
13257               leaveTarget:
13258              */
13259
13260             noway_assert(isFramePointerUsed());
13261
13262             // Get the nesting level which contains the finally
13263             compiler->fgGetNestingLevel(block, &finallyNesting);
13264
13265             // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
13266             unsigned filterEndOffsetSlotOffs;
13267             filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
13268             
13269             unsigned curNestingSlotOffs;
13270             curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
13271             
13272             // Zero out the slot for the next nesting level
13273             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
13274                                        compiler->lvaShadowSPslotsVar, curNestingSlotOffs - sizeof(void*));
13275
13276             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK,
13277                                        compiler->lvaShadowSPslotsVar, curNestingSlotOffs); 
13278
13279             // Now push the address of where the finally funclet should
13280             // return to directly.
13281             if ( !(block->bbFlags & BBF_RETLESS_CALL) )
13282             {
13283                 assert(block->isBBCallAlwaysPair());
13284                 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
13285             }
13286             else
13287             {
13288                 // EE expects a DWORD, so we give him 0
13289                 inst_IV(INS_push_hide, 0);
13290             }
13291
13292             // Jump to the finally BB
13293             inst_JMP(EJ_jmp, block->bbJumpDest);
13294
13295 #elif defined(_TARGET_ARM_)
13296
13297             // Now set REG_LR to the address of where the finally funclet should
13298             // return to directly.
13299
13300             BasicBlock * bbFinallyRet; bbFinallyRet = NULL;
13301
13302             // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
13303             // we would have otherwise created retless calls.
13304             assert(block->isBBCallAlwaysPair());
13305
13306             assert(block->bbNext                     != NULL);
13307             assert(block->bbNext->bbJumpKind         == BBJ_ALWAYS);
13308             assert(block->bbNext->bbJumpDest         != NULL);
13309             assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
13310
13311             bbFinallyRet = block->bbNext->bbJumpDest;
13312             bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
13313
13314 #if 0
13315             // We don't know the address of finally funclet yet.  But adr requires the offset
13316             // to finally funclet from current IP is within 4095 bytes. So this code is disabled
13317             // for now.
13318             getEmitter()->emitIns_J_R (INS_adr,
13319                                      EA_4BYTE,
13320                                      bbFinallyRet,
13321                                      REG_LR);
13322 #else // 0
13323             // Load the address where the finally funclet should return into LR.
13324             // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
13325             // the return.
13326             getEmitter()->emitIns_R_L (INS_movw,
13327                                      EA_4BYTE_DSP_RELOC,
13328                                      bbFinallyRet,
13329                                      REG_LR);
13330             getEmitter()->emitIns_R_L (INS_movt,
13331                                      EA_4BYTE_DSP_RELOC,
13332                                      bbFinallyRet,
13333                                      REG_LR);
13334             regTracker.rsTrackRegTrash(REG_LR);
13335 #endif // 0
13336
13337             // Jump to the finally BB
13338             inst_JMP(EJ_jmp, block->bbJumpDest);
13339 #else
13340             NYI("TARGET");
13341 #endif
13342
13343             // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
13344             // jump target using bbJumpDest - that is already used to point
13345             // to the finally block. So just skip past the BBJ_ALWAYS unless the
13346             // block is RETLESS.
13347             if ( !(block->bbFlags & BBF_RETLESS_CALL) )
13348             {
13349                 assert(block->isBBCallAlwaysPair());
13350
13351                 lblk = block; 
13352                 block = block->bbNext;
13353             }
13354             break;
13355
13356 #ifdef _TARGET_ARM_
13357
13358         case BBJ_EHCATCHRET:
13359             // set r0 to the address the VM should return to after the catch
13360             getEmitter()->emitIns_R_L (INS_movw,
13361                                      EA_4BYTE_DSP_RELOC,
13362                                      block->bbJumpDest,
13363                                      REG_R0);
13364             getEmitter()->emitIns_R_L (INS_movt,
13365                                      EA_4BYTE_DSP_RELOC,
13366                                      block->bbJumpDest,
13367                                      REG_R0);
13368             regTracker.rsTrackRegTrash(REG_R0);
13369
13370             __fallthrough;
13371
13372         case BBJ_EHFINALLYRET:
13373         case BBJ_EHFILTERRET:
13374             genReserveFuncletEpilog(block);
13375             break;
13376
13377 #else // _TARGET_ARM_
13378
13379         case BBJ_EHFINALLYRET:
13380         case BBJ_EHFILTERRET:
13381         case BBJ_EHCATCHRET:
13382             break;
13383
13384 #endif // _TARGET_ARM_
13385
13386         case BBJ_NONE:
13387         case BBJ_COND:
13388         case BBJ_SWITCH:
13389             break;
13390
13391         default:
13392             noway_assert(!"Unexpected bbJumpKind");
13393             break;
13394         }
13395
13396 #ifdef  DEBUG
13397         compiler->compCurBB = 0;
13398 #endif
13399
13400     } //------------------ END-FOR each block of the method -------------------
13401
13402     /* Nothing is live at this point */
13403     genUpdateLife(VarSetOps::MakeEmpty(compiler));
13404
13405     /* Finalize the spill  tracking logic */
13406
13407     regSet.rsSpillEnd();
13408
13409     /* Finalize the temp   tracking logic */
13410
13411     compiler->tmpEnd();
13412
13413 #ifdef  DEBUG
13414     if (compiler->verbose)
13415     {
13416         printf("\n# ");
13417         printf("totalCostEx = %6d, totalCostSz = %5d ",
13418                totalCostEx, totalCostSz);
13419         printf("%s\n", compiler->info.compFullName);
13420     }
13421 #endif
13422 }
13423 #ifdef _PREFAST_
13424 #pragma warning(pop)
13425 #endif
13426
13427 /*****************************************************************************
13428  *
13429  *  Generate code for a long operation.
13430  *  needReg is a recommendation of which registers to use for the tree.
13431  *  For partially enregistered longs, the tree will be marked as GTF_REG_VAL
13432  *    without loading the stack part into a register. Note that only leaf
13433  *    nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
13434  *    enregistered so that we can know the memory location of the other half.
13435  */
13436
13437 #ifdef _PREFAST_
13438 #pragma warning(push)
13439 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
13440 #endif
13441 void                CodeGen::genCodeForTreeLng(GenTreePtr tree,
13442                                                regMaskTP  needReg,
13443                                                regMaskTP  avoidReg)
13444 {
13445     genTreeOps      oper;
13446     unsigned        kind;
13447
13448     regPairNo       regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
13449     regMaskTP       addrReg;
13450     regNumber       regLo;
13451     regNumber       regHi;
13452
13453     noway_assert(tree);
13454     noway_assert(tree->gtOper != GT_STMT);
13455     noway_assert(genActualType(tree->gtType) == TYP_LONG);
13456
13457     /* Figure out what kind of a node we have */
13458
13459     oper = tree->OperGet();
13460     kind = tree->OperKind();
13461
13462     if  (tree->gtFlags & GTF_REG_VAL)
13463     {
13464 REG_VAR_LONG:
13465         regPair   = tree->gtRegPair;
13466
13467         gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
13468
13469         goto DONE;
13470     }
13471
13472     /* Is this a constant node? */
13473
13474     if  (kind & GTK_CONST)
13475     {
13476         __int64         lval;
13477
13478         /* Pick a register pair for the value */
13479
13480         regPair  = regSet.rsPickRegPair(needReg);
13481
13482         /* Load the value into the registers */
13483
13484 #if !   CPU_HAS_FP_SUPPORT
13485         if  (oper == GT_CNS_DBL)
13486         {
13487             noway_assert(sizeof(__int64) == sizeof(double));
13488
13489             noway_assert(sizeof(tree->gtLngCon.gtLconVal) ==
13490                    sizeof(tree->gtDblCon.gtDconVal));
13491
13492             lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
13493         }
13494         else
13495 #endif
13496         {
13497             noway_assert(oper == GT_CNS_LNG);
13498
13499             lval = tree->gtLngCon.gtLconVal;
13500         }
13501
13502         genSetRegToIcon(genRegPairLo(regPair), int(lval      ));
13503         genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
13504         goto DONE;
13505     }
13506
13507     /* Is this a leaf node? */
13508
13509     if  (kind & GTK_LEAF)
13510     {
13511         switch (oper)
13512         {
13513         case GT_LCL_VAR:
13514
13515 #if REDUNDANT_LOAD
13516
13517             /*  This case has to consider the case in which an int64 LCL_VAR
13518              *  may both be enregistered and also have a cached copy of itself
13519              *  in a different set of registers.
13520              *  We want to return the registers that have the most in common
13521              *  with the needReg mask
13522              */
13523
13524             /*  Does the var have a copy of itself in the cached registers?
13525              *  And are these cached registers both free?
13526              *  If so use these registers if they match any needReg.
13527              */
13528
13529             regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
13530
13531             if ( (                      regPair       != REG_PAIR_NONE)  &&
13532                  ( (regSet.rsRegMaskFree() & needReg) == needReg      )  &&
13533                  ((genRegPairMask(regPair) & needReg) != RBM_NONE     ))
13534             {
13535                 goto DONE;
13536             }
13537
13538             /*  Does the variable live in a register?
13539              *  If so use these registers.
13540              */
13541             if  (genMarkLclVar(tree))
13542                 goto REG_VAR_LONG;
13543
13544             /*  If tree is not an enregistered variable then
13545              *  be sure to use any cached register that contain
13546              *  a copy of this local variable
13547              */
13548             if (regPair != REG_PAIR_NONE)
13549             {
13550                 goto DONE;
13551             }
13552 #endif
13553             goto MEM_LEAF;
13554
13555         case GT_LCL_FLD:
13556
13557             // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
13558             // to worry about it being enregistered.
13559             noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
13560             goto MEM_LEAF;
13561
13562         case GT_CLS_VAR:
13563         MEM_LEAF:
13564
13565             /* Pick a register pair for the value */
13566
13567             regPair = regSet.rsPickRegPair(needReg);
13568
13569             /* Load the value into the registers */
13570
13571             instruction  loadIns;
13572
13573             loadIns = ins_Load(TYP_INT);   // INS_ldr
13574             regLo   = genRegPairLo(regPair);
13575             regHi   = genRegPairHi(regPair); 
13576             // assert(regLo != regHi);  // regpair property
13577
13578 #if CPU_LOAD_STORE_ARCH
13579             {
13580                 regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
13581                 inst_RV_TT(INS_lea, regAddr, tree, 0);
13582                 regTracker.rsTrackRegTrash(regAddr);
13583
13584                 if (regLo != regAddr)
13585                 {
13586                     // assert(regLo != regAddr);  // forced by if statement
13587                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13588                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13589                 }
13590                 else
13591                 {
13592                     // assert(regHi != regAddr);  // implied by regpair property and the if statement
13593                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13594                     getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13595                 }
13596             }
13597 #else
13598             inst_RV_TT(loadIns, regLo, tree, 0);
13599             inst_RV_TT(loadIns, regHi, tree, 4);
13600 #endif
13601
13602 #ifdef _TARGET_ARM_
13603             if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
13604             {
13605                 // Emit a memory barrier instruction after the load  
13606                 instGen_MemoryBarrier();
13607             }
13608 #endif
13609
13610             regTracker.rsTrackRegTrash(regLo);
13611             regTracker.rsTrackRegTrash(regHi);
13612
13613             goto DONE;
13614
13615         default:
13616 #ifdef  DEBUG
13617             compiler->gtDispTree(tree);
13618 #endif
13619             noway_assert(!"unexpected leaf");
13620         }
13621     }
13622
13623     /* Is it a 'simple' unary/binary operator? */
13624
13625     if  (kind & GTK_SMPOP)
13626     {
13627         instruction     insLo;
13628         instruction     insHi;
13629         bool            doLo;
13630         bool            doHi;
13631         bool            setCarry = false;
13632         int             helper;
13633
13634         GenTreePtr      op1  = tree->gtOp.gtOp1;
13635         GenTreePtr      op2  = tree->gtGetOp2();
13636
13637         switch (oper)
13638         {
13639         case GT_ASG:
13640             {
13641 #ifdef DEBUGGING_SUPPORT
13642                 unsigned lclVarNum = compiler->lvaCount;
13643                 unsigned lclVarILoffs = DUMMY_INIT(0);
13644 #endif
13645
13646                 /* Is the target a local ? */
13647
13648                 if  (op1->gtOper == GT_LCL_VAR)
13649                 {
13650                     unsigned    varNum = op1->gtLclVarCommon.gtLclNum;
13651                     LclVarDsc * varDsc;
13652
13653                     noway_assert(varNum < compiler->lvaCount);
13654                     varDsc = compiler->lvaTable + varNum;
13655
13656                     // No dead stores, (with min opts we may have dead stores)
13657                     noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
13658
13659 #ifdef DEBUGGING_SUPPORT
13660                     /* For non-debuggable code, every definition of a lcl-var has
13661                      * to be checked to see if we need to open a new scope for it.
13662                      * Remember the local var info to call siCheckVarScope
13663                      * AFTER codegen of the assignment.
13664                      */
13665                     if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
13666                     {
13667                         lclVarNum = varNum;
13668                         lclVarILoffs   = op1->gtLclVar.gtLclILoffs;
13669                     }
13670 #endif
13671
13672                     /* Has the variable been assigned to a register (pair) ? */
13673
13674                     if  (genMarkLclVar(op1))
13675                     {
13676                         noway_assert(op1->gtFlags & GTF_REG_VAL);
13677                         regPair = op1->gtRegPair;
13678                         regLo   = genRegPairLo(regPair);
13679                         regHi   = genRegPairHi(regPair);
13680                         noway_assert(regLo != regHi);
13681
13682                         /* Is the value being assigned a constant? */
13683
13684                         if  (op2->gtOper == GT_CNS_LNG)
13685                         {
13686                             /* Move the value into the target */
13687
13688                             genMakeRegPairAvailable(regPair);
13689
13690                             instruction ins;
13691                             if (regLo == REG_STK)
13692                             {
13693                                 ins = ins_Store(TYP_INT);
13694                             }
13695                             else
13696                             {
13697                                 // Always do the stack first (in case it grabs a register it can't
13698                                 // clobber regLo this way)
13699                                 if (regHi == REG_STK)
13700                                 {
13701                                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13702                                 }
13703                                 ins = INS_mov;
13704                             }
13705                             inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal      ), 0);
13706
13707                             // The REG_STK case has already been handled
13708                             if (regHi != REG_STK)
13709                             {
13710                                 ins = INS_mov;
13711                                 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13712                             }
13713
13714                             goto DONE_ASSG_REGS;
13715                         }
13716
13717                         /* Compute the RHS into desired register pair */
13718
13719                         if  (regHi != REG_STK)
13720                         {
13721                             genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
13722                             noway_assert(op2->gtFlags & GTF_REG_VAL);
13723                             noway_assert(op2->gtRegPair == regPair);
13724                         }
13725                         else
13726                         {
13727                             regPairNo curPair;
13728                             regNumber curLo;
13729                             regNumber curHi;
13730
13731                             genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
13732
13733                             noway_assert(op2->gtFlags & GTF_REG_VAL);
13734
13735                             curPair = op2->gtRegPair;
13736                             curLo   = genRegPairLo(curPair);
13737                             curHi   = genRegPairHi(curPair);
13738
13739                             /* move high first, target is on stack */
13740                             inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
13741
13742                             if  (regLo != curLo)
13743                             {
13744                                 if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
13745                                     regSet.rsSpillReg(regLo);
13746                                 inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
13747                                 regTracker.rsTrackRegCopy(regLo, curLo);
13748                             }
13749                         }
13750
13751                         genReleaseRegPair(op2);
13752                         goto DONE_ASSG_REGS;
13753                     }
13754                 }
13755
13756
13757                 /* Is the value being assigned a constant? */
13758
13759                 if  (op2->gtOper == GT_CNS_LNG)
13760                 {
13761                     /* Make the target addressable */
13762
13763                     addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
13764
13765                     /* Move the value into the target */
13766
13767                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal      ), 0);
13768                     inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13769
13770                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13771
13772                     goto LAsgExit;
13773                 }
13774
13775 #if 0
13776                 /* Catch a case where can avoid generating op reg, mem. Better pairing
13777                  * from
13778                  *     mov regHi, mem
13779                  *     op  regHi, reg
13780                  *
13781                  * To avoid problems with order of evaluation, only do this if op2 is
13782                  * a non-enregistered local variable
13783                  */
13784
13785                 if (GenTree::OperIsCommutative(oper) &&
13786                     op1->gtOper == GT_LCL_VAR &&
13787                     op2->gtOper == GT_LCL_VAR)
13788                 {
13789                     regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
13790
13791                     /* Is op2 a non-enregistered local variable? */
13792                     if (regPair == REG_PAIR_NONE)
13793                     {
13794                         regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
13795
13796                         /* Is op1 an enregistered local variable? */
13797                         if (regPair != REG_PAIR_NONE)
13798                         {
13799                             /* Swap the operands */
13800                             GenTreePtr op = op1;
13801                             op1 = op2;
13802                             op2 = op;
13803                         }
13804                     }
13805                 }
13806 #endif
13807
13808                 /* Eliminate worthless assignment "lcl = lcl" */
13809
13810                 if  (op2->gtOper == GT_LCL_VAR &&
13811                      op1->gtOper == GT_LCL_VAR && op2->gtLclVarCommon.gtLclNum ==
13812                                                   op1->gtLclVarCommon.gtLclNum)
13813                 {
13814                     genUpdateLife(op2);
13815                     goto LAsgExit;
13816                 }
13817
13818
13819                 if (op2->gtOper  == GT_CAST &&
13820                     TYP_ULONG == op2->CastToType() &&
13821                     op2->CastFromType() <= TYP_INT &&
13822                     // op1,op2 need to be materialized in the correct order.
13823                     (tree->gtFlags & GTF_REVERSE_OPS))
13824                 {
13825                     /* Generate the small RHS into a register pair */
13826
13827                     GenTreePtr smallOpr = op2->gtOp.gtOp1;
13828
13829                     genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
13830
13831                     /* Make the target addressable */
13832
13833                     addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
13834
13835                     /* Make sure everything is still addressable */
13836
13837                     genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
13838                     noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
13839                     regHi   = smallOpr->gtRegNum;
13840                     addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
13841
13842                     // conv.ovf.u8 could overflow if the original number was negative
13843                     if (op2->gtOverflow())
13844                     {
13845                         noway_assert((op2->gtFlags & GTF_UNSIGNED) == 0); // conv.ovf.u8.un should be bashed to conv.u8.un
13846                         instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi);     // set flags
13847                         emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
13848                         genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
13849                     }
13850
13851                     /* Move the value into the target */
13852
13853                     inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
13854                     inst_TT_IV(ins_Store(TYP_INT), op1, 0,     4); // Store 0 in hi-word
13855
13856                     /* Free up anything that was tied up by either side */
13857
13858                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13859                     genReleaseReg     (smallOpr);
13860
13861 #if REDUNDANT_LOAD
13862                     if (op1->gtOper == GT_LCL_VAR)
13863                     {
13864                         /* clear this local from reg table */
13865                         regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13866
13867                         /* mark RHS registers as containing the local var */
13868                         regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
13869                     }                    
13870 #endif
13871                     goto LAsgExit;
13872                 }
13873
13874
13875                 /* Is the LHS more complex than the RHS? */
13876
13877                 if  (tree->gtFlags & GTF_REVERSE_OPS)
13878                 {
13879                     /* Generate the RHS into a register pair */
13880
13881                     genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
13882                     noway_assert(op2->gtFlags & GTF_REG_VAL);
13883
13884                     /* Make the target addressable */
13885                     op1 = genCodeForCommaTree(op1);
13886                     addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
13887
13888                     /* Make sure the RHS register hasn't been spilled */
13889
13890                     genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
13891                 }
13892                 else
13893                 {
13894                     /* Make the target addressable */
13895
13896                     op1 = genCodeForCommaTree(op1);
13897                     addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
13898
13899                     /* Generate the RHS into a register pair */
13900
13901                     genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
13902                 }
13903
13904                 /* Lock 'op2' and make sure 'op1' is still addressable */
13905
13906                 noway_assert(op2->gtFlags & GTF_REG_VAL);
13907                 regPair = op2->gtRegPair;
13908
13909                 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13910
13911                 /* Move the value into the target */
13912
13913                 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
13914                 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
13915
13916                 /* Free up anything that was tied up by either side */
13917
13918                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13919                 genReleaseRegPair(op2);
13920
13921             DONE_ASSG_REGS:
13922
13923 #if REDUNDANT_LOAD
13924
13925                 if (op1->gtOper == GT_LCL_VAR)
13926                 {
13927                     /* Clear this local from reg table */
13928
13929                     regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13930
13931                     if ((op2->gtFlags & GTF_REG_VAL) &&
13932                         /* constant has precedence over local */
13933     //                    rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
13934                         tree->gtOper == GT_ASG)
13935                     {
13936                         regNumber regNo;
13937
13938                         /* mark RHS registers as containing the local var */
13939
13940                         regNo = genRegPairLo(op2->gtRegPair);
13941                         if  (regNo != REG_STK)
13942                             regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
13943
13944                         regNo = genRegPairHi(op2->gtRegPair);
13945                         if  (regNo != REG_STK)
13946                         {
13947                             /* For partially enregistered longs, we might have
13948                                stomped on op2's hiReg */
13949                             if (!(op1->gtFlags & GTF_REG_VAL) ||
13950                                 regNo != genRegPairLo(op1->gtRegPair))
13951                             {
13952                                 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
13953                             }
13954                         }
13955                     }
13956                 }
13957 #endif
13958
13959
13960 LAsgExit:
13961
13962                 genUpdateLife(op1);
13963                 genUpdateLife(tree);
13964
13965 #ifdef DEBUGGING_SUPPORT
13966                 /* For non-debuggable code, every definition of a lcl-var has
13967                  * to be checked to see if we need to open a new scope for it.
13968                  */
13969                 if (lclVarNum < compiler->lvaCount)
13970                     siCheckVarScope(lclVarNum, lclVarILoffs);
13971 #endif
13972               }
13973               return;
13974
13975
13976         case GT_SUB: insLo = INS_sub; insHi = INS_SUBC; setCarry = true; goto BINOP_OVF;
13977         case GT_ADD: insLo = INS_add; insHi = INS_ADDC; setCarry = true; goto BINOP_OVF;
13978
13979             bool ovfl;
13980
13981         BINOP_OVF:
13982             ovfl = tree->gtOverflow();
13983             goto _BINOP;
13984
13985         case GT_AND: insLo = insHi = INS_AND; goto BINOP;
13986         case GT_OR : insLo = insHi = INS_OR ; goto BINOP;
13987         case GT_XOR: insLo = insHi = INS_XOR; goto BINOP;
13988
13989         BINOP: ovfl = false; goto _BINOP;
13990
13991        _BINOP:
13992
13993             /* The following makes an assumption about gtSetEvalOrder(this) */
13994
13995             noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
13996
13997             /* Special case: check for "(long(intval) << 32) | longval" */
13998
13999             if  (oper == GT_OR && op1->gtOper == GT_LSH)
14000             {
14001                 GenTreePtr      lshLHS = op1->gtOp.gtOp1;
14002                 GenTreePtr      lshRHS = op1->gtOp.gtOp2;
14003
14004                 if  (lshLHS->gtOper             == GT_CAST    &&
14005                      lshRHS->gtOper             == GT_CNS_INT &&
14006                      lshRHS->gtIntCon.gtIconVal == 32         &&
14007                      genTypeSize(TYP_INT)       == genTypeSize(lshLHS->CastFromType()))
14008                 {
14009
14010                     /* Throw away the cast of the shift operand. */
14011
14012                     op1 = lshLHS->gtCast.CastOp();
14013
14014                     /* Special case: check op2 for "ulong(intval)" */
14015                     if ((op2->gtOper            == GT_CAST) &&
14016                         (op2->CastToType()      == TYP_ULONG) &&
14017                         genTypeSize(TYP_INT)    == genTypeSize(op2->CastFromType()))
14018                     {
14019                         /* Throw away the cast of the second operand. */
14020
14021                         op2 = op2->gtCast.CastOp();
14022                         goto SIMPLE_OR_LONG;
14023                     }
14024                     /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
14025                     else if  (op2->gtOper == GT_AND)
14026                     {
14027                         GenTreePtr      andLHS; andLHS = op2->gtOp.gtOp1;
14028                         GenTreePtr      andRHS; andRHS = op2->gtOp.gtOp2;
14029
14030                         if  (andLHS->gtOper             == GT_CAST            &&
14031                              andRHS->gtOper             == GT_CNS_LNG         &&
14032                              andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
14033                              genTypeSize(TYP_INT)       == genTypeSize(andLHS->CastFromType()))
14034                         {
14035                             /* Throw away the cast of the second operand. */
14036
14037                             op2 = andLHS->gtCast.CastOp();
14038
14039 SIMPLE_OR_LONG:
14040                             // Load the high DWORD, ie. op1
14041
14042                             genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
14043
14044                             noway_assert(op1->gtFlags & GTF_REG_VAL);
14045                             regHi = op1->gtRegNum;
14046                             regSet.rsMarkRegUsed(op1);
14047
14048                             // Load the low DWORD, ie. op2
14049
14050                             genCodeForTree(op2, needReg & ~genRegMask(regHi));
14051
14052                             noway_assert(op2->gtFlags & GTF_REG_VAL);
14053                             regLo = op2->gtRegNum;
14054
14055                             /* Make sure regHi is still around. Also, force
14056                                regLo to be excluded in case regLo==regHi */
14057
14058                             genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
14059                             regHi = op1->gtRegNum;
14060
14061                             regPair = gen2regs2pair(regLo, regHi);
14062                             goto DONE;
14063                         }
14064                     }
14065
14066                     /*  Generate the following sequence:
14067                            Prepare op1 (discarding shift)
14068                            Compute op2 into some regpair
14069                            OR regpairhi, op1
14070                      */
14071
14072                     /* First, make op1 addressable */
14073
14074                     /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
14075
14076                        It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
14077                        which the long value is computed is from needReg.  But at this point the safest fix is
14078                        to exclude regSet.rsMaskResvd.
14079
14080                        Note that needReg could be the set of free registers (excluding reserved ones).  If we don't
14081                        exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a reg from
14082                        reserved set which is bound to fail.  To prevent that we avoid regSet.rsMaskResvd.
14083                      */
14084                     regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
14085
14086                     addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
14087
14088                     genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
14089
14090                     noway_assert(op2->gtFlags & GTF_REG_VAL);
14091                     regPair  = op2->gtRegPair;
14092                     regHi    = genRegPairHi(regPair);
14093
14094                     /* The operand might have interfered with the address */
14095
14096                     addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
14097
14098                     /* Now compute the result */
14099
14100                     inst_RV_TT(insHi, regHi, op1, 0);
14101
14102                     regTracker.rsTrackRegTrash(regHi);
14103
14104                     /* Free up anything that was tied up by the LHS */
14105
14106                     genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
14107
14108                     /* The result is where the second operand is sitting */
14109
14110                     genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
14111
14112                     regPair = op2->gtRegPair;
14113                     goto DONE;
14114                 }
14115             }
14116
14117             /* Special case: check for "longval | (long(intval) << 32)" */
14118
14119             if  (oper == GT_OR && op2->gtOper == GT_LSH)
14120             {
14121                 GenTreePtr      lshLHS = op2->gtOp.gtOp1;
14122                 GenTreePtr      lshRHS = op2->gtOp.gtOp2;
14123
14124                 if  (lshLHS->gtOper             == GT_CAST    &&
14125                      lshRHS->gtOper             == GT_CNS_INT &&
14126                      lshRHS->gtIntCon.gtIconVal == 32         &&
14127                      genTypeSize(TYP_INT)       == genTypeSize(lshLHS->CastFromType()))
14128
14129                 {
14130                     /* We throw away the cast of the shift operand. */
14131
14132                     op2 = lshLHS->gtCast.CastOp();
14133
14134                    /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
14135
14136                     if  (op1->gtOper == GT_AND)
14137                     {
14138                         GenTreePtr      andLHS = op1->gtOp.gtOp1;
14139                         GenTreePtr      andRHS = op1->gtOp.gtOp2;
14140
14141                         if  (andLHS->gtOper             == GT_CAST            &&
14142                              andRHS->gtOper             == GT_CNS_LNG         &&
14143                              andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
14144                              genTypeSize(TYP_INT)       == genTypeSize(andLHS->CastFromType()))
14145                         {
14146                             /* Throw away the cast of the first operand. */
14147
14148                             op1 = andLHS->gtCast.CastOp();
14149
14150                             // Load the low DWORD, ie. op1
14151
14152                             genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
14153
14154                             noway_assert(op1->gtFlags & GTF_REG_VAL);
14155                             regLo = op1->gtRegNum;
14156                             regSet.rsMarkRegUsed(op1);
14157
14158                             // Load the high DWORD, ie. op2
14159
14160                             genCodeForTree(op2, needReg & ~genRegMask(regLo));
14161
14162                             noway_assert(op2->gtFlags & GTF_REG_VAL);
14163                             regHi = op2->gtRegNum;
14164
14165                             /* Make sure regLo is still around. Also, force
14166                                regHi to be excluded in case regLo==regHi */
14167
14168                             genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
14169                             regLo = op1->gtRegNum;
14170
14171                             regPair = gen2regs2pair(regLo, regHi);
14172                             goto DONE;
14173                         }
14174                     }
14175
14176                     /*  Generate the following sequence:
14177                           Compute op1 into some regpair
14178                           Make op2 (ignoring shift) addressable
14179                           OR regPairHi, op2
14180                      */
14181
14182                     // First, generate the first operand into some register
14183
14184                     genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
14185                     noway_assert(op1->gtFlags & GTF_REG_VAL);
14186
14187                     /* Make the second operand addressable */
14188
14189                     addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
14190
14191                     /* Make sure the result is in a free register pair */
14192
14193                     genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
14194                     regPair  = op1->gtRegPair;
14195                     regHi    = genRegPairHi(regPair);
14196
14197                     /* The operand might have interfered with the address */
14198
14199                     addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
14200
14201                     /* Compute the new value */
14202
14203                     inst_RV_TT(insHi, regHi, op2, 0);
14204
14205                     /* The value in the high register has been trashed */
14206
14207                     regTracker.rsTrackRegTrash(regHi);
14208
14209                     goto DONE_OR;
14210                 }
14211             }
14212
14213             /* Generate the first operand into registers */
14214
14215             if ( (genCountBits(needReg)       == 2)        &&     
14216                  ((regSet.rsRegMaskFree() & needReg) == needReg ) &&
14217                  ((op2->gtRsvdRegs & needReg) == RBM_NONE) &&
14218                  (!(tree->gtFlags & GTF_ASG))                 )
14219             {
14220                 regPair = regSet.rsPickRegPair(needReg);
14221                 genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
14222             }
14223             else
14224             {
14225                 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
14226             }
14227             noway_assert(op1->gtFlags & GTF_REG_VAL);
14228             regMaskTP  op1Mask;
14229             regPair = op1->gtRegPair;
14230             op1Mask = genRegPairMask(regPair);
14231
14232             /* Make the second operand addressable */
14233             regMaskTP  needReg2; 
14234             needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
14235             addrReg  = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
14236
14237             // TODO: If 'op1' got spilled and 'op2' happens to be
14238             // TODO: in a register, and we have add/mul/and/or/xor,
14239             // TODO: reverse the operands since we can perform the
14240             // TODO: operation directly with the spill temp, e.g.
14241             // TODO: 'add regHi, [temp]'.
14242
14243             /* Make sure the result is in a free register pair */
14244
14245             genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
14246             regPair = op1->gtRegPair;
14247             op1Mask = genRegPairMask(regPair);
14248
14249             regLo = genRegPairLo(regPair);
14250             regHi = genRegPairHi(regPair);
14251             
14252             /* Make sure that we don't spill regLo/regHi below */
14253             regSet.rsLockUsedReg(op1Mask);
14254
14255             /* The operand might have interfered with the address */
14256
14257             addrReg = genKeepAddressable(op2, addrReg);
14258
14259             /* The value in the register pair is about to be trashed */
14260
14261             regTracker.rsTrackRegTrash(regLo);
14262             regTracker.rsTrackRegTrash(regHi);
14263
14264             /* Compute the new value */
14265
14266             doLo = true;
14267             doHi = true;
14268
14269             if  (op2->gtOper == GT_CNS_LNG)
14270             {
14271                 __int64     icon = op2->gtLngCon.gtLconVal;
14272
14273                 /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
14274
14275                 switch (oper)
14276                 {
14277                 case GT_AND:
14278                     if  ((int)(icon      ) == -1)
14279                         doLo = false;
14280                     if  ((int)(icon >> 32) == -1)
14281                         doHi = false;
14282
14283                     if  (!(icon & I64(0x00000000FFFFFFFF)))
14284                     {
14285                         genSetRegToIcon(regLo, 0);
14286                         doLo = false;
14287                     }
14288
14289                     if  (!(icon & I64(0xFFFFFFFF00000000)))
14290                     {
14291                         /* Just to always set low first*/
14292
14293                         if  (doLo)
14294                         {
14295                             inst_RV_TT(insLo, regLo, op2, 0);
14296                             doLo = false;
14297                         }
14298                         genSetRegToIcon(regHi, 0);
14299                         doHi = false;
14300                     }
14301
14302                     break;
14303
14304                 case GT_OR:
14305                 case GT_XOR:
14306                     if  (!(icon & I64(0x00000000FFFFFFFF)))
14307                         doLo = false;
14308                     if  (!(icon & I64(0xFFFFFFFF00000000)))
14309                         doHi = false;
14310                     break;
14311                 default:
14312                     break;
14313                 }
14314             }
14315
14316             // Fix 383813 X86/ARM ILGEN
14317             // Fix 383793 ARM ILGEN
14318             // Fix 383911 ARM ILGEN
14319             regMaskTP  newMask;     newMask = addrReg & ~op1Mask;
14320             regSet.rsLockUsedReg(newMask);
14321
14322             if (doLo) 
14323             {
14324                 insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14325                 inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
14326             }
14327             if (doHi)
14328             {
14329                 insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14330                 inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
14331             }
14332
14333             regSet.rsUnlockUsedReg(newMask);
14334             regSet.rsUnlockUsedReg(op1Mask);
14335
14336         DONE_OR:
14337
14338             /* Free up anything that was tied up by the LHS */
14339
14340             genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
14341
14342             /* The result is where the first operand is sitting */
14343
14344             genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
14345
14346             regPair = op1->gtRegPair;
14347
14348             if (ovfl)
14349                 genCheckOverflow(tree);
14350
14351             goto DONE;
14352
14353         case GT_UMOD:
14354
14355             regPair = genCodeForLongModInt(tree, needReg);
14356             goto DONE;
14357
14358         case GT_MUL:
14359
14360             /* Special case: both operands promoted from int */
14361
14362             assert(tree->gtIsValid64RsltMul());
14363
14364             /* Change to an integer multiply temporarily */
14365
14366             tree->gtType = TYP_INT;
14367
14368             noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
14369             tree->gtOp.gtOp1 = op1->gtCast.CastOp();
14370             tree->gtOp.gtOp2 = op2->gtCast.CastOp();
14371
14372             assert(tree->gtFlags & GTF_MUL_64RSLT);
14373
14374 #if defined(_TARGET_X86_)
14375             // imul on x86 requires EDX:EAX
14376             genComputeReg(tree, (RBM_EAX|RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
14377             noway_assert(tree->gtFlags & GTF_REG_VAL);
14378             noway_assert(tree->gtRegNum == REG_EAX);  // Also REG_EDX is setup with hi 32-bits
14379 #elif defined(_TARGET_ARM_)
14380             genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
14381             noway_assert(tree->gtFlags & GTF_REG_VAL);
14382 #else
14383             assert(!"Unsupported target for 64-bit multiply codegen");
14384 #endif
14385
14386             /* Restore gtType, op1 and op2 from the change above */
14387
14388             tree->gtType     = TYP_LONG;
14389             tree->gtOp.gtOp1 = op1;
14390             tree->gtOp.gtOp2 = op2;
14391
14392 #if defined(_TARGET_X86_)
14393             /* The result is now in EDX:EAX */
14394             regPair = REG_PAIR_EAXEDX;
14395 #elif defined(_TARGET_ARM_)
14396             regPair = tree->gtRegPair;
14397 #endif
14398             goto DONE;
14399
14400         case GT_LSH: helper = CORINFO_HELP_LLSH; goto SHIFT;
14401         case GT_RSH: helper = CORINFO_HELP_LRSH; goto SHIFT;
14402         case GT_RSZ: helper = CORINFO_HELP_LRSZ; goto SHIFT;
14403
14404         SHIFT:
14405
14406             noway_assert(op1->gtType == TYP_LONG);
14407             noway_assert(genActualType(op2->gtType) == TYP_INT);
14408
14409             /* Is the second operand a constant? */
14410
14411             if  (op2->gtOper == GT_CNS_INT)
14412             {
14413                 unsigned int count = op2->gtIntCon.gtIconVal;
14414
14415                 /* Compute the left operand into a free register pair */
14416
14417                 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
14418                 noway_assert(op1->gtFlags & GTF_REG_VAL);
14419
14420                 regPair = op1->gtRegPair;
14421                 regLo   = genRegPairLo(regPair);
14422                 regHi   = genRegPairHi(regPair);
14423
14424                 /* Assume the value in the register pair is trashed. In some cases, though,
14425                    a register might be set to zero, and we can use that information to improve
14426                    some code generation.
14427                 */
14428
14429                 regTracker.rsTrackRegTrash(regLo);
14430                 regTracker.rsTrackRegTrash(regHi);
14431
14432                 /* Generate the appropriate shift instructions */
14433
14434                 switch (oper)
14435                 {
14436                 case GT_LSH:
14437                     if (count == 0)
14438                     {
14439                         // regHi, regLo are correct
14440                     }
14441                     else if (count < 32)
14442                     {
14443 #if defined(_TARGET_XARCH_)
14444                         inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
14445 #elif defined(_TARGET_ARM_)
14446                         inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
14447                         getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count, INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
14448 #else // _TARGET_*
14449                         NYI("INS_shld");
14450 #endif // _TARGET_*
14451                         inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
14452                     }
14453                     else // count >= 32
14454                     {
14455                         assert(count >= 32);
14456                         if (count < 64)
14457                         {
14458 #if defined(_TARGET_ARM_)
14459                             if (count == 32)
14460                             {
14461                                 // mov low dword into high dword (i.e. shift left by 32-bits)
14462                                 inst_RV_RV(INS_mov, regHi, regLo);
14463                             }
14464                             else
14465                             {
14466                                 assert(count > 32 && count < 64);
14467                                 getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo, count - 32);
14468                             }
14469 #else // _TARGET_*
14470                             // mov low dword into high dword (i.e. shift left by 32-bits)
14471                             inst_RV_RV(INS_mov, regHi, regLo);
14472                             if (count > 32)
14473                             {
14474                                 // Shift high dword left by count - 32
14475                                 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
14476                             }
14477 #endif // _TARGET_*
14478                         }
14479                         else // count >= 64
14480                         {
14481                             assert(count >= 64);
14482                             genSetRegToIcon(regHi, 0);
14483                         }
14484                         genSetRegToIcon(regLo, 0);
14485                     }
14486                     break;
14487
14488                 case GT_RSH:
14489                     if (count == 0)
14490                     {
14491                         // regHi, regLo are correct
14492                     }
14493                     else if (count < 32)
14494                     {
14495 #if defined(_TARGET_XARCH_)
14496                         inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14497 #elif defined(_TARGET_ARM_)
14498                         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14499                         getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14500 #else // _TARGET_*
14501                         NYI("INS_shrd");
14502 #endif // _TARGET_*
14503                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
14504                     }
14505                     else // count >= 32
14506                     {
14507                         assert(count >= 32);
14508                         if (count < 64)
14509                         {
14510 #if defined(_TARGET_ARM_)
14511                             if (count == 32)
14512                             {
14513                                 // mov high dword into low dword (i.e. shift right by 32-bits)
14514                                 inst_RV_RV(INS_mov, regLo, regHi);
14515                             }
14516                             else
14517                             {
14518                                 assert(count > 32 && count < 64);
14519                                 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi, count - 32);
14520                             }
14521 #else // _TARGET_*
14522                             // mov high dword into low dword (i.e. shift right by 32-bits)
14523                             inst_RV_RV(INS_mov, regLo, regHi);
14524                             if (count > 32)
14525                             {
14526                                 // Shift low dword right by count - 32
14527                                 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
14528                             }
14529 #endif // _TARGET_*
14530                         }
14531
14532                         // Propagate sign bit in high dword
14533                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14534
14535                         if (count >= 64)
14536                         {
14537                             // Propagate the sign from the high dword
14538                             inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
14539                         }
14540                     }
14541                     break;
14542
14543                 case GT_RSZ:
14544                     if (count == 0)
14545                     {
14546                         // regHi, regLo are correct
14547                     }
14548                     else if (count < 32)
14549                     {
14550 #if defined(_TARGET_XARCH_)
14551                         inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14552 #elif defined(_TARGET_ARM_)
14553                         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14554                         getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14555 #else // _TARGET_*
14556                         NYI("INS_shrd");
14557 #endif // _TARGET_*
14558                         inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
14559                     }
14560                     else // count >= 32
14561                     {
14562                         assert(count >= 32);
14563                         if (count < 64)
14564                         {
14565 #if defined(_TARGET_ARM_)
14566                             if (count == 32)
14567                             {
14568                                 // mov high dword into low dword (i.e. shift right by 32-bits)
14569                                 inst_RV_RV(INS_mov, regLo, regHi);
14570                             }
14571                             else
14572                             {
14573                                 assert(count > 32 && count < 64);
14574                                 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi, count - 32);
14575                             }
14576 #else // _TARGET_*
14577                             // mov high dword into low dword (i.e. shift right by 32-bits)
14578                             inst_RV_RV(INS_mov, regLo, regHi);
14579                             if (count > 32)
14580                             {
14581                                 // Shift low dword right by count - 32
14582                                 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
14583                             }
14584 #endif // _TARGET_*
14585                         }
14586                         else // count >= 64
14587                         {
14588                             assert(count >= 64);
14589                             genSetRegToIcon(regLo, 0);
14590                         }
14591                         genSetRegToIcon(regHi, 0);
14592                     }
14593                     break;
14594
14595                 default:
14596                     noway_assert(!"Illegal oper for long shift");
14597                     break;
14598                 }
14599
14600                 goto DONE_SHF;
14601             }
14602           
14603             /* Which operand are we supposed to compute first? */
14604
14605             assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
14606
14607             if  (tree->gtFlags & GTF_REVERSE_OPS)
14608             {
14609                 /* The second operand can't be a constant */
14610
14611                 noway_assert(op2->gtOper != GT_CNS_INT);
14612
14613                 /* Load the shift count, hopefully into RBM_SHIFT */
14614                 RegSet::ExactReg  exactReg;
14615                 if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
14616                     exactReg = RegSet::EXACT_REG;
14617                 else
14618                     exactReg = RegSet::ANY_REG;
14619                 genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
14620
14621                 /* Compute the left operand into REG_LNGARG_0 */
14622
14623                 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14624                 noway_assert(op1->gtFlags & GTF_REG_VAL);
14625
14626                 /* Lock op1 so that it doesn't get trashed */
14627
14628                 regSet.rsLockUsedReg(RBM_LNGARG_0);
14629
14630                 /* Make sure the shift count wasn't displaced */
14631
14632                 genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
14633
14634                 /* Lock op2 */
14635
14636                 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14637             }
14638             else
14639             {
14640                 /* Compute the left operand into REG_LNGARG_0 */
14641
14642                 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14643                 noway_assert(op1->gtFlags & GTF_REG_VAL);
14644
14645                 /* Compute the shift count into RBM_SHIFT */
14646
14647                 genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
14648
14649                 /* Lock op2 */
14650
14651                 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14652
14653                 /* Make sure the value hasn't been displaced */
14654
14655                 genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
14656
14657                 /* Lock op1 so that it doesn't get trashed */
14658
14659                 regSet.rsLockUsedReg(RBM_LNGARG_0);
14660             }
14661
14662 #ifndef _TARGET_X86_
14663             /* The generic helper is a C-routine and so it follows the full ABI */
14664             {
14665                 /* Spill any callee-saved registers which are being used */
14666                 regMaskTP  spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
14667
14668                 /* But do not spill our argument registers. */
14669                 spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14670
14671                 if (spillRegs)
14672                 {
14673                     regSet.rsSpillRegs(spillRegs);
14674                 }
14675             }
14676 #endif // !_TARGET_X86_
14677
14678             /* Perform the shift by calling a helper function */
14679
14680             noway_assert(op1->gtRegPair == REG_LNGARG_0);
14681             noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
14682             noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
14683
14684             genEmitHelperCall(helper,
14685                               0,             // argSize
14686                               EA_8BYTE);     // retSize
14687
14688 #ifdef _TARGET_X86_
14689             /* The value in the register pair is trashed */
14690
14691             regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
14692             regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
14693 #else // _TARGET_X86_
14694             /* The generic helper is a C-routine and so it follows the full ABI */
14695             regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
14696 #endif // _TARGET_X86_
14697
14698             /* Release both operands */
14699
14700             regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14701             genReleaseRegPair(op1);
14702             genReleaseReg    (op2);
14703
14704         DONE_SHF:
14705
14706             noway_assert(op1->gtFlags & GTF_REG_VAL);
14707             regPair  = op1->gtRegPair;
14708             goto DONE;
14709
14710         case GT_NEG:
14711         case GT_NOT:
14712
14713             /* Generate the operand into some register pair */
14714
14715             genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
14716             noway_assert(op1->gtFlags & GTF_REG_VAL);
14717
14718             regPair  = op1->gtRegPair;
14719
14720             /* Figure out which registers the value is in */
14721
14722             regLo = genRegPairLo(regPair);
14723             regHi = genRegPairHi(regPair);
14724
14725             /* The value in the register pair is about to be trashed */
14726
14727             regTracker.rsTrackRegTrash(regLo);
14728             regTracker.rsTrackRegTrash(regHi);
14729
14730             if  (oper == GT_NEG)
14731             {
14732                 /* Unary "neg": negate the value  in the register pair */
14733
14734 #ifdef _TARGET_ARM_
14735
14736                 // ARM doesn't have an opcode that sets the carry bit like
14737                 // x86, so we can't use neg/addc/neg.  Instead we use subtract
14738                 // with carry.  Too bad this uses an extra register.
14739
14740                 // Lock regLo and regHi so we don't pick them, and then pick
14741                 // a third register to be our 0.
14742                 regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
14743                 regSet.rsLockReg(regPairMask);
14744                 regMaskTP regBest = RBM_ALLINT & ~avoidReg;
14745                 regNumber regZero = genGetRegSetToIcon(0, regBest);
14746                 regSet.rsUnlockReg(regPairMask);
14747
14748                 inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
14749                 getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
14750
14751 #elif defined(_TARGET_XARCH_)
14752
14753                 inst_RV   (INS_NEG,  regLo,    TYP_LONG);
14754                 inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
14755                 inst_RV   (INS_NEG,  regHi,    TYP_LONG);
14756 #else
14757                 NYI("GT_NEG on TYP_LONG");
14758 #endif
14759             }
14760             else
14761             {
14762                 /* Unary "not": flip all the bits in the register pair */
14763
14764                 inst_RV   (INS_NOT, regLo, TYP_LONG);
14765                 inst_RV   (INS_NOT, regHi, TYP_LONG);
14766             }
14767
14768             goto DONE;
14769
14770 #if LONG_ASG_OPS
14771
14772         case GT_ASG_OR : insLo =          insHi = INS_OR ;  goto ASG_OPR;
14773         case GT_ASG_XOR: insLo =          insHi = INS_XOR;  goto ASG_OPR;
14774         case GT_ASG_AND: insLo =          insHi = INS_AND;  goto ASG_OPR;
14775         case GT_ASG_SUB: insLo = INS_sub; insHi = INS_SUBC; goto ASG_OPR;
14776         case GT_ASG_ADD: insLo = INS_add; insHi = INS_ADDC; goto ASG_OPR;
14777
14778         ASG_OPR:
14779
14780             if  (op2->gtOper == GT_CNS_LNG)
14781             {
14782                 __int64     lval = op2->gtLngCon.gtLconVal;
14783
14784                 /* Make the target addressable */
14785
14786                 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
14787
14788                 /* Optimize some special cases */
14789
14790                 doLo =
14791                 doHi = true;
14792
14793                 /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
14794
14795                 switch (oper)
14796                 {
14797                 case GT_ASG_AND:
14798                     if  ((int)(lval      ) == -1) doLo = false;
14799                     if  ((int)(lval >> 32) == -1) doHi = false;
14800                     break;
14801
14802                 case GT_ASG_OR:
14803                 case GT_ASG_XOR:
14804                     if  (!(lval & 0x00000000FFFFFFFF)) doLo = false;
14805                     if  (!(lval & 0xFFFFFFFF00000000)) doHi = false;
14806                     break;
14807                 }
14808
14809                 if (doLo) inst_TT_IV(insLo, op1, (int)(lval      ), 0);
14810                 if (doHi) inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
14811
14812                 bool    isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
14813                 if (doLo || doHi)
14814                     tree->gtFlags |= GTF_ZSF_SET;
14815
14816                 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
14817                 goto DONE_ASSG_REGS;
14818             }
14819
14820             /* TODO: allow non-const long assignment operators */
14821
14822             noway_assert(!"non-const long asgop NYI");
14823
14824 #endif // LONG_ASG_OPS
14825
14826         case GT_IND:
14827         case GT_NULLCHECK:
14828             {
14829                 regMaskTP   tmpMask;
14830                 int         hiFirst;
14831                 
14832                 regMaskTP   availMask = RBM_ALLINT & ~needReg;
14833
14834                 /* Make sure the operand is addressable */
14835
14836                 addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
14837
14838                 GenTreePtr addr = oper == GT_IND ? op1 : tree;
14839
14840                 /* Pick a register for the value */
14841
14842                 regPair = regSet.rsPickRegPair(needReg);
14843                 tmpMask = genRegPairMask(regPair);
14844
14845                 /* Is there any overlap between the register pair and the address? */
14846
14847                 hiFirst = FALSE;
14848
14849                 if  (tmpMask & addrReg)
14850                 {
14851                     /* Does one or both of the target registers overlap? */
14852
14853                     if  ((tmpMask & addrReg) != tmpMask)
14854                     {
14855                         /* Only one register overlaps */
14856
14857                         noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
14858
14859                         /* If the low register overlaps, load the upper half first */
14860
14861                         if  (addrReg & genRegMask(genRegPairLo(regPair)))
14862                             hiFirst = TRUE;
14863                     }
14864                     else
14865                     {
14866                         regMaskTP  regFree;
14867
14868                         /* The register completely overlaps with the address */
14869
14870                         noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
14871
14872                         /* Can we pick another pair easily? */
14873
14874                         regFree = regSet.rsRegMaskFree() & ~addrReg;
14875                         if  (needReg)
14876                             regFree &= needReg;
14877
14878                         /* More than one free register available? */
14879
14880                         if  (regFree && !genMaxOneBit(regFree))
14881                         {
14882                             regPair = regSet.rsPickRegPair(regFree);
14883                             tmpMask = genRegPairMask(regPair);
14884                         }
14885                         else
14886                         {
14887 //                          printf("Overlap: needReg = %08X\n", needReg);
14888
14889                             // Reg-prediction won't allow this
14890                             noway_assert((regSet.rsMaskVars & addrReg) == 0);
14891
14892                             // Grab one fresh reg, and use any one of addrReg
14893
14894                             if (regFree)    // Try to follow 'needReg'
14895                                 regLo = regSet.rsGrabReg(regFree);
14896                             else            // Pick any reg besides addrReg
14897                                 regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
14898
14899                             unsigned regBit = 0x1;
14900                             regNumber regNo;
14901
14902                             for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
14903                             {
14904                                 // Found one of addrReg. Use it.
14905                                 if (regBit & addrReg)
14906                                     break;
14907                             }
14908                             noway_assert(genIsValidReg(regNo)); // Should have found regNo
14909
14910                             regPair = gen2regs2pair(regLo, regNo);
14911                             tmpMask = genRegPairMask(regPair);
14912                         }
14913                     }
14914                 }
14915
14916                 /* Make sure the value is still addressable */
14917
14918                 noway_assert(genStillAddressable(tree));
14919
14920                 /* Figure out which registers the value is in */
14921
14922                 regLo = genRegPairLo(regPair);
14923                 regHi = genRegPairHi(regPair);
14924
14925                 /* The value in the register pair is about to be trashed */
14926
14927                 regTracker.rsTrackRegTrash(regLo);
14928                 regTracker.rsTrackRegTrash(regHi);
14929
14930                 /* Load the target registers from where the value is */
14931
14932                 if  (hiFirst)
14933                 {
14934                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14935                     regSet.rsLockReg(genRegMask(regHi));
14936                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14937                     regSet.rsUnlockReg(genRegMask(regHi));
14938                 }
14939                 else
14940                 {
14941                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14942                     regSet.rsLockReg(genRegMask(regLo));
14943                     inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14944                     regSet.rsUnlockReg(genRegMask(regLo));
14945                 }
14946
14947 #ifdef _TARGET_ARM_
14948                 if (tree->gtFlags & GTF_IND_VOLATILE)
14949                 {
14950                     // Emit a memory barrier instruction after the load 
14951                     instGen_MemoryBarrier();
14952                 }
14953 #endif
14954
14955                 genUpdateLife(tree);
14956                 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
14957
14958             }
14959             goto DONE;
14960
14961         case GT_CAST:
14962
14963             /* What are we casting from? */
14964
14965             switch (op1->gtType)
14966             {
14967             case TYP_BOOL:
14968             case TYP_BYTE:
14969             case TYP_CHAR:
14970             case TYP_SHORT:
14971             case TYP_INT:
14972             case TYP_UBYTE:
14973             case TYP_BYREF:
14974                 {
14975                     regMaskTP hiRegMask;
14976                     regMaskTP loRegMask;
14977
14978                     // For an unsigned cast we don't need to sign-extend the 32 bit value
14979                     if (tree->gtFlags & GTF_UNSIGNED)
14980                     {
14981                         // Does needReg have exactly two bits on and thus
14982                         // specifies the exact register pair that we want to use
14983                         if (!genMaxOneBit(needReg))
14984                         {
14985                             regPair   = regSet.rsFindRegPairNo(needReg);
14986                             if (needReg != genRegPairMask(regPair))
14987                                 goto ANY_FREE_REG_UNSIGNED;
14988                             loRegMask = genRegMask(genRegPairLo(regPair));
14989                             if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14990                                 goto ANY_FREE_REG_UNSIGNED;
14991                             hiRegMask = genRegMask(genRegPairHi(regPair));
14992                         }
14993                         else
14994                         {
14995 ANY_FREE_REG_UNSIGNED:
14996                             loRegMask = needReg;
14997                             hiRegMask = needReg;
14998                         }
14999
15000                         genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
15001                         noway_assert(op1->gtFlags & GTF_REG_VAL);
15002
15003                         regLo     = op1->gtRegNum;
15004                         loRegMask = genRegMask(regLo);
15005                         regSet.rsLockUsedReg(loRegMask);
15006                         regHi     = regSet.rsPickReg(hiRegMask);
15007                         regSet.rsUnlockUsedReg(loRegMask);
15008
15009                         regPair = gen2regs2pair(regLo, regHi);
15010
15011                         // Move 0 to the higher word of the ULong
15012                         genSetRegToIcon(regHi, 0, TYP_INT);
15013
15014                         /* We can now free up the operand */
15015                         genReleaseReg(op1);
15016
15017                         goto DONE;
15018                     }
15019 #ifdef _TARGET_XARCH_
15020                     /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
15021                        and we need the result to be in those registers.
15022                        cdq is smaller so we use it for SMALL_CODE
15023                     */
15024
15025                     if  ((needReg & (RBM_EAX|RBM_EDX)) == (RBM_EAX|RBM_EDX)  &&
15026                          (regSet.rsRegMaskFree() & RBM_EDX)                            )
15027                     {
15028                         genCodeForTree(op1, RBM_EAX);
15029                         regSet.rsMarkRegUsed(op1);
15030
15031                         /* If we have to spill EDX, might as well use the faster
15032                            sar as the spill will increase code size anyway */
15033
15034                         if (op1->gtRegNum != REG_EAX ||
15035                             !(regSet.rsRegMaskFree() & RBM_EDX))
15036                         {
15037                             hiRegMask = regSet.rsRegMaskFree();
15038                             goto USE_SAR_FOR_CAST;
15039                         }
15040
15041                         regSet.rsGrabReg      (RBM_EDX);
15042                         regTracker.rsTrackRegTrash(REG_EDX);
15043
15044                         /* Convert the int in EAX into a long in EDX:EAX */
15045
15046                         instGen(INS_cdq);
15047
15048                         /* The result is in EDX:EAX */
15049
15050                         regPair  = REG_PAIR_EAXEDX;
15051                     }
15052                     else
15053 #endif
15054                     {
15055                         /* use the sar instruction to sign-extend a 32-bit integer */
15056
15057                         // Does needReg have exactly two bits on and thus
15058                         // specifies the exact register pair that we want to use
15059                         if (!genMaxOneBit(needReg))
15060                         {
15061                             regPair = regSet.rsFindRegPairNo(needReg);
15062                             if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
15063                                 goto ANY_FREE_REG_SIGNED;
15064                             loRegMask = genRegMask(genRegPairLo(regPair));
15065                             if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
15066                                 goto ANY_FREE_REG_SIGNED;
15067                             hiRegMask = genRegMask(genRegPairHi(regPair));
15068                         }
15069                         else
15070                         {
15071 ANY_FREE_REG_SIGNED:
15072                             loRegMask = needReg;
15073                             hiRegMask = RBM_NONE;
15074                         }
15075
15076                         genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
15077 #ifdef _TARGET_XARCH_
15078 USE_SAR_FOR_CAST:
15079 #endif
15080                         noway_assert(op1->gtFlags & GTF_REG_VAL);
15081
15082                         regLo     = op1->gtRegNum;
15083                         loRegMask = genRegMask(regLo);
15084                         regSet.rsLockUsedReg(loRegMask);
15085                         regHi     = regSet.rsPickReg(hiRegMask);
15086                         regSet.rsUnlockUsedReg(loRegMask);
15087
15088                         regPair = gen2regs2pair(regLo, regHi);
15089
15090                         /* Copy the lo32 bits from regLo to regHi and sign-extend it */
15091
15092 #ifdef _TARGET_ARM_
15093                         // Use one instruction instead of two
15094                         getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
15095 #else
15096                         inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
15097                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
15098 #endif
15099
15100                         /* The value in the upper register is trashed */
15101
15102                         regTracker.rsTrackRegTrash(regHi);
15103                     }
15104
15105                     /* We can now free up the operand */
15106                     genReleaseReg(op1);
15107
15108                     // conv.ovf.u8 could overflow if the original number was negative
15109                     if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
15110                     {
15111                         regNumber hiReg = genRegPairHi(regPair);
15112                         instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
15113                         emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
15114                         genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
15115                     }
15116                 }
15117                 goto DONE;
15118
15119             case TYP_FLOAT:
15120             case TYP_DOUBLE:
15121
15122 #if 0
15123                 /* Load the FP value onto the coprocessor stack */
15124
15125                 genCodeForTreeFlt(op1);
15126
15127                 /* Allocate a temp for the long value */
15128
15129                 temp = compiler->tmpGetTemp(TYP_LONG);
15130
15131                 /* Store the FP value into the temp */
15132
15133                 inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
15134                 genFPstkLevel--;
15135
15136                 /* Pick a register pair for the value */
15137
15138                 regPair  = regSet.rsPickRegPair(needReg);
15139
15140                 /* Figure out which registers the value is in */
15141
15142                 regLo = genRegPairLo(regPair);
15143                 regHi = genRegPairHi(regPair);
15144
15145                 /* The value in the register pair is about to be trashed */
15146
15147                 regTracker.rsTrackRegTrash(regLo);
15148                 regTracker.rsTrackRegTrash(regHi);
15149
15150                 /* Load the converted value into the registers */
15151
15152                 inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
15153                 inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
15154
15155                 /* We no longer need the temp */
15156
15157                 compiler->tmpRlsTemp(temp);
15158                 goto DONE;
15159 #else
15160                 NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
15161                 break;
15162 #endif
15163             case TYP_LONG:
15164             case TYP_ULONG:
15165                {
15166                     noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
15167
15168                     genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
15169                     regPair = op1->gtRegPair;
15170
15171                     // Do we need to set the sign-flag, or can we checked if it is set?
15172                     // and not do this "test" if so.
15173
15174                     if (op1->gtFlags & GTF_REG_VAL)
15175                     {
15176                         regNumber hiReg = genRegPairHi(op1->gtRegPair);
15177                         noway_assert(hiReg != REG_STK);
15178                         instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
15179                     }
15180                     else
15181                     {
15182                         inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
15183                     }
15184
15185                     emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
15186                     genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
15187                 }
15188                 goto DONE;
15189
15190             default:
15191 #ifdef  DEBUG
15192                 compiler->gtDispTree(tree);
15193 #endif
15194                 NO_WAY("unexpected cast to long");
15195             }
15196             break;
15197
15198
15199         case GT_RETURN:
15200
15201             /* TODO: 
15202              * This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
15203              * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement.  We should really
15204              * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
15205              */
15206
15207             // TODO: this should be done AFTER we called exit mon so that
15208             //       we are sure that we don't have to keep 'this' alive
15209
15210             if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15211             {
15212                 /* either it's an "empty" statement or the return statement
15213                    of a synchronized method
15214                  */
15215
15216                 genPInvokeMethodEpilog();
15217             }
15218
15219 #if CPU_LONG_USES_REGPAIR
15220             /* There must be a long return value */
15221
15222             noway_assert(op1);
15223
15224             /* Evaluate the return value into EDX:EAX */
15225
15226             genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
15227
15228             noway_assert(op1->gtFlags & GTF_REG_VAL);
15229             noway_assert(op1->gtRegPair == REG_LNGRET);
15230
15231 #else
15232             NYI("64-bit return");
15233 #endif
15234
15235             //The profiling hook does not trash registers, so it's safe to call after we emit the code for
15236             //the GT_RETURN tree.
15237 #ifdef PROFILING_SUPPORTED
15238             /* XXX Thu 7/5/2007
15239              * Oh look.  More cloned code from the regular processing of GT_RETURN.
15240              */
15241             if (compiler->compCurBB == compiler->genReturnBB)
15242             {
15243                 genProfilingLeaveCallback();
15244             }
15245 #endif
15246             return;
15247
15248         case GT_QMARK:
15249             noway_assert(!"inliner-generated ?: for longs NYI");
15250             NO_WAY("inliner-generated ?: for longs NYI");
15251             break;
15252
15253         case GT_COMMA:
15254
15255             if (tree->gtFlags & GTF_REVERSE_OPS)
15256             {
15257                 // Generate op2
15258                 genCodeForTreeLng(op2, needReg, avoidReg);
15259                 genUpdateLife (op2);
15260
15261                 noway_assert(op2->gtFlags & GTF_REG_VAL);
15262
15263                 regSet.rsMarkRegPairUsed(op2);
15264
15265                 // Do side effects of op1
15266                 genEvalSideEffects(op1);
15267
15268                 // Recover op2 if spilled
15269                 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
15270
15271                 genReleaseRegPair(op2);
15272
15273                 genUpdateLife (tree);
15274
15275                 regPair = op2->gtRegPair;
15276             }
15277             else
15278             {
15279                 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
15280
15281                 /* Generate side effects of the first operand */
15282
15283                 genEvalSideEffects(op1);
15284                 genUpdateLife (op1);
15285
15286                 /* Is the value of the second operand used? */
15287
15288                 if  (tree->gtType == TYP_VOID)
15289                 {
15290                     /* The right operand produces no result */
15291
15292                     genEvalSideEffects(op2);
15293                     genUpdateLife(tree);
15294                     return;
15295                 }
15296
15297                 /* Generate the second operand, i.e. the 'real' value */
15298
15299                 genCodeForTreeLng(op2, needReg, avoidReg);
15300
15301                 /* The result of 'op2' is also the final result */
15302
15303                 regPair = op2->gtRegPair;
15304             }
15305
15306             goto DONE;
15307
15308         case GT_BOX:
15309             {
15310                 /* Generate the  operand, i.e. the 'real' value */
15311
15312                 genCodeForTreeLng(op1, needReg, avoidReg);
15313
15314                 /* The result of 'op1' is also the final result */
15315
15316                 regPair = op1->gtRegPair;
15317             }
15318
15319             goto DONE;
15320
15321         case GT_NOP:
15322             if (op1 == NULL)
15323                 return;
15324
15325             genCodeForTreeLng(op1, needReg, avoidReg);
15326             regPair = op1->gtRegPair;
15327             goto DONE;
15328
15329         default:
15330             break;
15331         }
15332
15333 #ifdef  DEBUG
15334         compiler->gtDispTree(tree);
15335 #endif
15336         noway_assert(!"unexpected 64-bit operator");
15337     }
15338
15339     /* See what kind of a special operator we have here */
15340
15341     switch  (oper)
15342     {
15343         regMaskTP retMask;
15344     case GT_CALL:
15345         retMask = genCodeForCall(tree, true);
15346         if (retMask == RBM_NONE)
15347             regPair = REG_PAIR_NONE;
15348         else
15349             regPair = regSet.rsFindRegPairNo(retMask);
15350         break;
15351
15352     default:
15353 #ifdef  DEBUG
15354         compiler->gtDispTree(tree);
15355 #endif
15356         NO_WAY("unexpected long operator");
15357     }
15358
15359 DONE:
15360
15361     genUpdateLife(tree);
15362
15363     /* Here we've computed the value of 'tree' into 'regPair' */
15364
15365     noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
15366
15367     genMarkTreeInRegPair(tree, regPair);
15368 }
15369 #ifdef _PREFAST_
15370 #pragma warning(pop)
15371 #endif
15372
15373
15374 /*****************************************************************************
15375  *
15376  *  Generate code for a mod of a long by an int.
15377  */
15378
15379 regPairNo           CodeGen::genCodeForLongModInt(GenTreePtr tree,
15380                                                   regMaskTP needReg)
15381 {
15382 #ifdef _TARGET_X86_
15383
15384     regPairNo       regPair;
15385     regMaskTP       addrReg;
15386
15387     genTreeOps      oper = tree->OperGet();
15388     GenTreePtr      op1  = tree->gtOp.gtOp1;
15389     GenTreePtr      op2  = tree->gtOp.gtOp2;
15390
15391     /* Codegen only for Unsigned MOD */
15392     noway_assert(oper == GT_UMOD);
15393
15394     /* op2 must be a long constant in the range 2 to 0x3fffffff */
15395
15396     noway_assert((op2->gtOper == GT_CNS_LNG) &&
15397            (op2->gtLngCon.gtLconVal >= 2) &&
15398            (op2->gtLngCon.gtLconVal <= 0x3fffffff));
15399     int val = (int) op2->gtLngCon.gtLconVal;
15400
15401     op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
15402
15403     op2->gtType             = TYP_INT;
15404     op2->gtIntCon.gtIconVal = val;
15405
15406     /* Which operand are we supposed to compute first? */
15407
15408     if  (tree->gtFlags & GTF_REVERSE_OPS)
15409     {
15410         /* Compute the second operand into a scratch register, other
15411            than EAX or EDX */
15412
15413         needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15414
15415         /* Special case: if op2 is a local var we are done */
15416
15417         if  (op2->gtOper == GT_LCL_VAR ||
15418              op2->gtOper == GT_LCL_FLD ||
15419              op2->gtOper == GT_CLS_VAR)
15420         {
15421             addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15422         }
15423         else
15424         {
15425             genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15426
15427             noway_assert(op2->gtFlags & GTF_REG_VAL);
15428             addrReg = genRegMask(op2->gtRegNum);
15429         }
15430
15431         /* Compute the first operand into EAX:EDX */
15432
15433         genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
15434         noway_assert(op1->gtFlags & GTF_REG_VAL);
15435         noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15436
15437         /* And recover the second argument while locking the first one */
15438
15439         addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15440     }
15441     else
15442     {
15443         /* Compute the first operand into EAX:EDX */
15444
15445         genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
15446         noway_assert(op1->gtFlags & GTF_REG_VAL);
15447         noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15448
15449         /* Compute the second operand into a scratch register, other
15450            than EAX or EDX */
15451
15452         needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15453
15454         /* Special case: if op2 is a local var we are done */
15455
15456         if  (op2->gtOper == GT_LCL_VAR ||
15457              op2->gtOper == GT_LCL_FLD ||
15458              op2->gtOper == GT_CLS_VAR)
15459         {
15460             addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15461         }
15462         else
15463         {
15464             genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15465
15466             noway_assert(op2->gtFlags & GTF_REG_VAL);
15467             addrReg = genRegMask(op2->gtRegNum);
15468         }
15469
15470         /* Recover the first argument */
15471
15472         genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
15473
15474         /* And recover the second argument while locking the first one */
15475
15476         addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15477     }
15478
15479     /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
15480        contains the 32bit divisor.  We want to generate the following code:
15481
15482        ==========================
15483        Unsigned (GT_UMOD)
15484
15485        cmp edx, op2->gtRegNum
15486        jb  lab_no_overflow
15487
15488        mov temp, eax
15489        mov eax, edx
15490        xor edx, edx
15491        div op2->g2RegNum
15492        mov eax, temp
15493
15494        lab_no_overflow:
15495        idiv
15496        ==========================
15497        This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
15498     */
15499
15500     BasicBlock * lab_no_overflow = genCreateTempLabel();
15501
15502     // grab a temporary register other than eax, edx, and op2->gtRegNum
15503
15504     regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
15505
15506     // EAX and tempReg will be trashed by the mov instructions.  Doing
15507     // this early won't hurt, and might prevent confusion in genSetRegToIcon.
15508
15509     regTracker.rsTrackRegTrash (REG_PAIR_TMP_LO);
15510     regTracker.rsTrackRegTrash (tempReg);
15511
15512     inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
15513     inst_JMP(EJ_jb ,lab_no_overflow);
15514
15515     inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
15516     inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15517     genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15518     inst_TT(INS_UNSIGNED_DIVIDE,  op2);
15519     inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
15520
15521     // Jump point for no overflow divide
15522
15523     genDefineTempLabel(lab_no_overflow);
15524
15525     // Issue the divide instruction
15526
15527     inst_TT(INS_UNSIGNED_DIVIDE, op2);
15528
15529     /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
15530
15531         regTracker.rsTrackRegTrash (REG_PAIR_TMP_LO);
15532         regTracker.rsTrackRegTrash (REG_PAIR_TMP_HI);
15533         regTracker.rsTrackRegTrash (tempReg);
15534         regTracker.rsTrackRegTrash (op2->gtRegNum);
15535
15536
15537     if (tree->gtFlags & GTF_MOD_INT_RESULT)
15538     {
15539         /* We don't need to normalize the result, because the caller wants
15540            an int (in edx) */
15541
15542         regPair = REG_PAIR_TMP_REVERSE;
15543     }
15544     else
15545     {
15546         /* The result is now in EDX, we now have to normalize it, i.e. we have
15547            to issue:
15548            mov eax, edx; xor edx, edx (for UMOD)
15549         */
15550
15551         inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15552
15553         genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15554
15555         regPair = REG_PAIR_TMP;
15556     }
15557
15558     genReleaseRegPair(op1);
15559     genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
15560
15561     return regPair;
15562
15563 #else // !_TARGET_X86_
15564
15565     NYI("codegen for LongModInt");
15566
15567     return REG_PAIR_NONE;
15568
15569 #endif // !_TARGET_X86_
15570 }
15571
15572 // Given a tree, return the number of registers that are currently
15573 // used to hold integer enregistered local variables.
15574 // Note that, an enregistered TYP_LONG can take 1 or 2 registers.
15575 unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
15576 {
15577     unsigned regCount = 0;
15578     
15579     VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
15580     while (iter.NextElem(compiler, &varNum))
15581     {
15582         unsigned    lclNum = compiler->lvaTrackedToVarNum[varNum];
15583         LclVarDsc * varDsc = &compiler->lvaTable[lclNum];
15584
15585         if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
15586         {
15587             ++regCount;
15588
15589             if (varTypeIsLong(varDsc->TypeGet()))
15590             {
15591                 // For enregistered LONG/ULONG, the lower half should always be in a register.                
15592                 noway_assert(varDsc->lvRegNum != REG_STK);
15593
15594                 // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as well.
15595                 if (varDsc->lvOtherReg != REG_STK)
15596                 {
15597                     ++regCount;
15598                 }
15599             }
15600         }
15601     } 
15602     
15603     return regCount;
15604     
15605 }
15606
15607 /*****************************************************************************/
15608 /*****************************************************************************/
15609 #if     CPU_HAS_FP_SUPPORT
15610 /*****************************************************************************
15611  *
15612  *  Generate code for a floating-point operation.
15613  */
15614
15615 void                CodeGen::genCodeForTreeFlt(GenTreePtr tree,
15616                                                regMaskTP  needReg,     /* = RBM_ALLFLOAT */
15617                                                regMaskTP  bestReg)     /* = RBM_NONE */
15618 {
15619     genCodeForTreeFloat(tree, needReg, bestReg);
15620
15621     if (tree->OperGet() == GT_RETURN)
15622     {
15623         //Make sure to get ALL THE EPILOG CODE
15624
15625         // TODO: this should be done AFTER we called exit mon so that
15626         //       we are sure that we don't have to keep 'this' alive
15627
15628         if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15629         {
15630             /* either it's an "empty" statement or the return statement
15631                of a synchronized method
15632              */
15633
15634             genPInvokeMethodEpilog();
15635         }
15636
15637         //The profiling hook does not trash registers, so it's safe to call after we emit the code for
15638         //the GT_RETURN tree.
15639 #ifdef PROFILING_SUPPORTED
15640         /* XXX Thu 7/5/2007
15641          * Oh look.  More cloned code from the regular processing of GT_RETURN.
15642          */
15643         if (compiler->compCurBB == compiler->genReturnBB)
15644         {
15645             genProfilingLeaveCallback();
15646         }
15647 #endif
15648     }
15649 }
15650
15651 /*****************************************************************************/
15652 #endif//CPU_HAS_FP_SUPPORT
15653
15654 /*****************************************************************************
15655  *
15656  *  Generate a table switch - the switch value (0-based) is in register 'reg'.
15657  */
15658
15659 void            CodeGen::genTableSwitch(regNumber      reg,
15660                                         unsigned       jumpCnt,
15661                                         BasicBlock **  jumpTab)
15662 {
15663     unsigned    jmpTabBase;
15664
15665     if (jumpCnt == 1)
15666     {
15667         //In debug code, we don't optimize away the trivial switch statements.  So we can get here with a
15668         //BBJ_SWITCH with only a default case.  Therefore, don't generate the switch table.
15669         noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
15670         inst_JMP(EJ_jmp, jumpTab[0]);
15671         return;
15672     }
15673
15674     noway_assert(jumpCnt >= 2);
15675
15676     /* Is the number of cases right for a test and jump switch? */
15677
15678     const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
15679     const bool fDefaultFollows   = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
15680     const bool fHaveScratchReg   = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
15681
15682
15683     unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
15684
15685     // This means really just a single cmp/jcc (aka a simple if/else)
15686     if (fFirstCaseFollows || fDefaultFollows)
15687         minSwitchTabJumpCnt++;
15688
15689 #ifdef _TARGET_ARM_
15690     // On the ARM for small switch tables we will 
15691     // generate a sequence of compare and branch instructions
15692     // because the code to load the base of the switch
15693     // table is huge and hideous due to the relocation... :(
15694     // 
15695     minSwitchTabJumpCnt++;
15696     if (fHaveScratchReg)
15697         minSwitchTabJumpCnt++;
15698     
15699 #endif // _TARGET_ARM_
15700
15701     if (jumpCnt < minSwitchTabJumpCnt)
15702     {
15703         /* Does the first case label follow? */
15704         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
15705
15706         if  (fFirstCaseFollows)
15707         {
15708             /* Check for the default case */
15709             inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15710             emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15711             inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15712
15713             /* No need to jump to the first case */
15714
15715             jumpCnt -= 2;
15716             jumpTab += 1;
15717
15718             /* Generate a series of "dec reg; jmp label" */
15719
15720             // Make sure that we can trash the register so
15721             // that we can generate a series of compares and jumps
15722             //
15723             if ((jumpCnt > 0) && !fHaveScratchReg)
15724             {
15725                 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15726                 inst_RV_RV(INS_mov, tmpReg, reg);
15727                 regTracker.rsTrackRegTrash(tmpReg);
15728                 reg = tmpReg;
15729             }         
15730
15731             while (jumpCnt > 0)
15732             {
15733                 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15734                 inst_JMP(jmpEqual, *jumpTab++);
15735                 jumpCnt--;
15736             }
15737         }
15738         else
15739         {
15740             /* Check for case0 first */
15741             instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
15742             inst_JMP(jmpEqual, *jumpTab);
15743
15744             /* No need to jump to the first case or the default */
15745
15746             jumpCnt -= 2;
15747             jumpTab += 1;
15748
15749             /* Generate a series of "dec reg; jmp label" */
15750
15751             // Make sure that we can trash the register so
15752             // that we can generate a series of compares and jumps
15753             //
15754             if ((jumpCnt > 0) && !fHaveScratchReg)
15755             {
15756                 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15757                 inst_RV_RV(INS_mov, tmpReg, reg);
15758                 regTracker.rsTrackRegTrash(tmpReg);
15759                 reg = tmpReg;
15760             }         
15761
15762             while (jumpCnt > 0)
15763             {
15764                 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15765                 inst_JMP(jmpEqual, *jumpTab++);
15766                 jumpCnt--;
15767             }
15768
15769             if (!fDefaultFollows)
15770             {
15771                 inst_JMP(EJ_jmp, *jumpTab);
15772             }
15773         }
15774
15775         if ((fFirstCaseFollows || fDefaultFollows) && compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
15776         {
15777             inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
15778         }
15779
15780         return;
15781     }
15782
15783     /* First take care of the default case */
15784
15785     inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15786     emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15787     inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15788
15789     /* Generate the jump table contents */
15790
15791     jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
15792
15793 #ifdef  DEBUG
15794     if  (compiler->opts.dspCode)
15795         printf("\n      J_M%03u_DS%02u LABEL   DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
15796 #endif
15797
15798     for (unsigned index = 0; index < jumpCnt - 1; index++)
15799     {
15800         BasicBlock* target = jumpTab[index];
15801
15802         noway_assert(target->bbFlags & BBF_JMP_TARGET);
15803
15804 #ifdef  DEBUG
15805         if  (compiler->opts.dspCode)
15806             printf("            DD      L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
15807 #endif
15808
15809         getEmitter()->emitDataGenData(index, target);
15810     }
15811
15812     getEmitter()->emitDataGenEnd();
15813
15814 #ifdef _TARGET_ARM_
15815     // We need to load the address of the table into a register.
15816     // The data section might get placed a long distance away, so we
15817     // can't safely do a PC-relative ADR. :(
15818     // Pick any register except the index register.
15819     //
15820     regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
15821     getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
15822     getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
15823     regTracker.rsTrackRegTrash(regTabBase);
15824
15825     // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
15826     getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
15827
15828 #else // !_TARGET_ARM_
15829
15830     getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
15831
15832 #endif
15833 }
15834
15835 /*****************************************************************************
15836  *
15837  *  Generate code for a switch statement.
15838  */
15839
15840 void                CodeGen::genCodeForSwitch(GenTreePtr tree)
15841 {
15842     unsigned        jumpCnt;
15843     BasicBlock * *  jumpTab;
15844
15845     GenTreePtr      oper;
15846     regNumber       reg;
15847
15848     noway_assert(tree->gtOper == GT_SWITCH);
15849     oper = tree->gtOp.gtOp1;
15850     noway_assert(genActualTypeIsIntOrI(oper->gtType));
15851
15852     /* Get hold of the jump table */
15853
15854     noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
15855
15856     jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
15857     jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
15858
15859     /* Compute the switch value into some register */
15860
15861     genCodeForTree(oper, 0);
15862
15863     /* Get hold of the register the value is in */
15864
15865     noway_assert(oper->gtFlags & GTF_REG_VAL);
15866     reg = oper->gtRegNum;
15867
15868 #if FEATURE_STACK_FP_X87
15869     if (!compCurFPState.IsEmpty())
15870     {
15871         return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
15872     }
15873     else
15874 #endif // FEATURE_STACK_FP_X87
15875     {
15876         return genTableSwitch(reg, jumpCnt, jumpTab);
15877     }
15878 }
15879
15880 /*****************************************************************************/
15881 /*****************************************************************************
15882  *  Emit a call to a helper function.
15883  */
15884
15885 // inline
15886 void        CodeGen::genEmitHelperCall(unsigned    helper,
15887                                        int         argSize,
15888                                        emitAttr    retSize)
15889 {
15890     // Can we call the helper function directly
15891
15892     void * addr = NULL, **pAddr = NULL;
15893
15894     // Don't ask VM if it hasn't requested ELT hooks 
15895 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
15896     if (!compiler->compProfilerHookNeeded && 
15897         compiler->opts.compJitELTHookEnabled &&
15898         (helper == CORINFO_HELP_PROF_FCN_ENTER ||
15899          helper == CORINFO_HELP_PROF_FCN_LEAVE ||
15900          helper == CORINFO_HELP_PROF_FCN_TAILCALL))
15901     {
15902         addr = compiler->compProfilerMethHnd;
15903     }
15904     else
15905 #endif
15906     {
15907         addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
15908     }
15909
15910
15911 #ifdef _TARGET_ARM_
15912     if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
15913     {
15914         // Load the address into a register and call  through a register
15915         regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
15916         if (addr)
15917         {
15918             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
15919         }
15920         else
15921         {
15922             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
15923             regTracker.rsTrackRegTrash(indCallReg);
15924         }
15925
15926         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
15927                                  compiler->eeFindHelper(helper),
15928                                  INDEBUG_LDISASM_COMMA(nullptr)
15929                                  NULL,                          // addr
15930                                  argSize,
15931                                  retSize,
15932                                  gcInfo.gcVarPtrSetCur,
15933                                  gcInfo.gcRegGCrefSetCur,
15934                                  gcInfo.gcRegByrefSetCur,
15935                                  BAD_IL_OFFSET,                 // ilOffset
15936                                  indCallReg,                    // ireg
15937                                  REG_NA, 0, 0,                  // xreg, xmul, disp
15938                                  false,                         // isJump
15939                                  emitter::emitNoGChelper(helper),
15940                                  (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15941     }
15942     else
15943     {
15944         getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN,
15945                                  compiler->eeFindHelper(helper),
15946                                  INDEBUG_LDISASM_COMMA(nullptr)
15947                                  addr,
15948                                  argSize,
15949                                  retSize,
15950                                  gcInfo.gcVarPtrSetCur,
15951                                  gcInfo.gcRegGCrefSetCur,
15952                                  gcInfo.gcRegByrefSetCur,
15953                                  BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0,    /* ilOffset, ireg, xreg, xmul, disp */
15954                                  false,                 /* isJump */
15955                                  emitter::emitNoGChelper(helper),
15956                                  (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15957     }
15958 #else
15959
15960     {
15961         emitter::EmitCallType  callType = emitter::EC_FUNC_TOKEN;
15962
15963         if (!addr)
15964         {
15965             callType = emitter::EC_FUNC_TOKEN_INDIR;
15966             addr = pAddr;
15967         }
15968
15969         getEmitter()->emitIns_Call(callType,
15970                                  compiler->eeFindHelper(helper),
15971                                  INDEBUG_LDISASM_COMMA(nullptr)
15972                                  addr,
15973                                  argSize,
15974                                  retSize,
15975                                  gcInfo.gcVarPtrSetCur,
15976                                  gcInfo.gcRegGCrefSetCur,
15977                                  gcInfo.gcRegByrefSetCur,
15978                                  BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0,    /* ilOffset, ireg, xreg, xmul, disp */
15979                                  false,                 /* isJump */
15980                                  emitter::emitNoGChelper(helper));
15981     }
15982 #endif
15983
15984     regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
15985     regTracker.rsTrashRegsForGCInterruptability();    
15986 }
15987
15988 /*****************************************************************************
15989  *
15990  *  Push the given registers.
15991  *  This function does not check if the register is marked as used, etc.
15992  */
15993
15994 regMaskTP           CodeGen::genPushRegs(regMaskTP regs, regMaskTP * byrefRegs, regMaskTP * noRefRegs)
15995 {
15996     *byrefRegs = RBM_NONE;
15997     *noRefRegs = RBM_NONE;
15998
15999 //  noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
16000
16001     if (regs == RBM_NONE)
16002         return RBM_NONE;
16003
16004 #if FEATURE_FIXED_OUT_ARGS 
16005
16006     NYI("Don't call genPushRegs with real regs!");
16007     return RBM_NONE;
16008
16009 #else // FEATURE_FIXED_OUT_ARGS
16010
16011     noway_assert(genTypeStSz(TYP_REF)   == genTypeStSz(TYP_I_IMPL));
16012     noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
16013
16014     regMaskTP pushedRegs = regs;
16015
16016     for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
16017     {
16018         regMaskTP regBit = regMaskTP(1) << reg;
16019
16020         if ((regBit & regs) == RBM_NONE)
16021             continue;
16022
16023         var_types type;
16024         if (regBit & gcInfo.gcRegGCrefSetCur)
16025         {
16026             type = TYP_REF;
16027         }
16028         else
16029         if (regBit & gcInfo.gcRegByrefSetCur)
16030         {
16031             *byrefRegs |= regBit;
16032             type = TYP_BYREF;
16033         }
16034         else
16035         if (noRefRegs != NULL)
16036         {
16037             *noRefRegs |= regBit;
16038             type = TYP_I_IMPL;
16039         }
16040         else
16041         {
16042             continue;
16043         }
16044
16045         inst_RV(INS_push, reg, type);
16046
16047         genSinglePush();
16048         gcInfo.gcMarkRegSetNpt(regBit);
16049
16050         regs &= ~regBit;
16051     }
16052
16053     return pushedRegs;
16054
16055 #endif // FEATURE_FIXED_OUT_ARGS
16056
16057 }
16058
16059 /*****************************************************************************
16060  *
16061  * Pop the registers pushed by genPushRegs()
16062  */
16063
16064 void                CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
16065 {
16066     if (regs == RBM_NONE)
16067         return;
16068
16069 #if FEATURE_FIXED_OUT_ARGS 
16070
16071     NYI("Don't call genPopRegs with real regs!");
16072
16073 #else // FEATURE_FIXED_OUT_ARGS
16074
16075     noway_assert((regs & byrefRegs) == byrefRegs);
16076     noway_assert((regs & noRefRegs) == noRefRegs);
16077 //  noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
16078     noway_assert((regs & (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur)) == RBM_NONE);
16079
16080     noway_assert(genTypeStSz(TYP_REF)   == genTypeStSz(TYP_INT));
16081     noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
16082
16083     // Walk the registers in the reverse order as genPushRegs()
16084     for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
16085     {
16086         regMaskTP regBit = regMaskTP(1) << reg;
16087
16088         if ((regBit & regs) == RBM_NONE)
16089             continue;
16090
16091         var_types type;
16092         if (regBit & byrefRegs)
16093         {
16094             type = TYP_BYREF;
16095         }
16096         else
16097         if (regBit & noRefRegs)
16098         {
16099             type = TYP_INT;
16100         }
16101         else
16102         {
16103             type = TYP_REF;
16104         }
16105
16106         inst_RV(INS_pop, reg, type);
16107         genSinglePop();
16108
16109         if (type != TYP_INT)
16110             gcInfo.gcMarkRegPtrVal(reg, type);
16111
16112         regs &= ~regBit;
16113     }
16114
16115 #endif // FEATURE_FIXED_OUT_ARGS
16116
16117 }
16118
16119 /*****************************************************************************
16120  *
16121  *  Push the given argument list, right to left; returns the total amount of
16122  *  stuff pushed.
16123  */
16124
16125 #if !FEATURE_FIXED_OUT_ARGS 
16126 #ifdef _PREFAST_
16127 #pragma warning(push)
16128 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
16129 #endif
16130 size_t              CodeGen::genPushArgList(GenTreePtr  call)
16131
16132     GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
16133     size_t          size    = 0;
16134     regMaskTP       addrReg;
16135
16136     GenTreeArgList* args;
16137     // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16138     // so we can iterate over this argument list more uniformly.
16139     // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16140     GenTreeArgList  firstForObjp(/*temp dummy arg*/call, call->gtCall.gtCallArgs);
16141     if (call->gtCall.gtCallObjp == NULL)
16142     {
16143         args = call->gtCall.gtCallArgs;
16144     }
16145     else
16146     {
16147         firstForObjp.Current() = call->gtCall.gtCallObjp;
16148         args = &firstForObjp;
16149     }
16150
16151     GenTreePtr      curr;
16152     var_types       type;
16153     size_t          opsz;
16154
16155     for (; args; args = args->Rest())
16156     {
16157         addrReg = DUMMY_INIT(RBM_CORRUPT);   // to detect uninitialized use
16158
16159         /* Get hold of the next argument value */
16160         curr = args->Current();
16161
16162         if (curr->IsArgPlaceHolderNode())
16163         {
16164             assert(curr->gtFlags & GTF_LATE_ARG);
16165
16166             addrReg = 0;
16167             continue;
16168         }
16169
16170         // If we have a comma expression, eval the non-last, then deal with the last.
16171         if (!(curr->gtFlags & GTF_LATE_ARG))
16172             curr = genCodeForCommaTree(curr);
16173
16174         /* See what type of a value we're passing */
16175         type = curr->TypeGet();
16176
16177         opsz = genTypeSize(genActualType(type));
16178
16179         switch (type)
16180         {
16181         case TYP_BOOL:
16182         case TYP_BYTE:
16183         case TYP_SHORT:
16184         case TYP_CHAR:
16185         case TYP_UBYTE:
16186
16187             /* Don't want to push a small value, make it a full word */
16188
16189             genCodeForTree(curr, 0);
16190
16191             __fallthrough; // now the value should be in a register ...
16192
16193         case TYP_INT:
16194         case TYP_REF:
16195         case TYP_BYREF:
16196 #if !   CPU_HAS_FP_SUPPORT
16197         case TYP_FLOAT:
16198 #endif
16199
16200             if (curr->gtFlags & GTF_LATE_ARG)
16201             {
16202                 assert(curr->gtOper == GT_ASG);
16203                 /* one more argument will be passed in a register */
16204                 noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
16205
16206                 /* arg is passed in the register, nothing on the stack */
16207
16208                 opsz = 0;
16209
16210             }
16211
16212             /* Is this value a handle? */
16213
16214             if  (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
16215             {
16216                 /* Emit a fixup for the push instruction */
16217
16218                 inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
16219                 genSinglePush();
16220
16221                 addrReg = 0;
16222                 break;
16223             }
16224
16225
16226             /* Is the value a constant? */
16227
16228             if  (curr->gtOper == GT_CNS_INT)
16229             {
16230
16231 #if     REDUNDANT_LOAD
16232                 regNumber       reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
16233
16234                 if  (reg != REG_NA)
16235                 {
16236                     inst_RV(INS_push, reg, TYP_INT);
16237                 }
16238                 else
16239 #endif
16240                 {
16241                     inst_IV(INS_push, curr->gtIntCon.gtIconVal);
16242                 }
16243
16244                 /* If the type is TYP_REF, then this must be a "null". So we can
16245                    treat it as a TYP_INT as we don't need to report it as a GC ptr */
16246
16247                 noway_assert(curr->TypeGet() == TYP_INT ||
16248                              (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
16249
16250                 genSinglePush();
16251
16252                 addrReg = 0;
16253                 break;
16254             }
16255
16256
16257             if (curr->gtFlags & GTF_LATE_ARG)
16258             {
16259                 /* This must be a register arg temp assignment */
16260
16261                 noway_assert(curr->gtOper == GT_ASG);
16262
16263                 /* Evaluate it to the temp */
16264
16265                 genCodeForTree(curr, 0);
16266
16267                 /* Increment the current argument register counter */
16268
16269                 intRegState.rsCurRegArgNum++;
16270
16271                 addrReg = 0;
16272             }
16273             else
16274             {
16275                 /* This is a 32-bit integer non-register argument */
16276
16277                 addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
16278                 inst_TT(INS_push, curr);
16279                 genSinglePush();
16280                 genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
16281
16282             }
16283             break;
16284
16285         case TYP_LONG:
16286 #if !CPU_HAS_FP_SUPPORT
16287         case TYP_DOUBLE:
16288 #endif
16289
16290             /* Is the value a constant? */
16291
16292             if  (curr->gtOper == GT_CNS_LNG)
16293             {
16294                 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
16295                 genSinglePush();
16296                 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal      ));
16297                 genSinglePush();
16298
16299                 addrReg = 0;
16300             }
16301             else
16302             {
16303                 addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
16304
16305                 inst_TT(INS_push, curr, sizeof(int));
16306                 genSinglePush();
16307                 inst_TT(INS_push, curr);
16308                 genSinglePush();
16309             }
16310             break;
16311
16312 #if  CPU_HAS_FP_SUPPORT
16313         case TYP_FLOAT:
16314         case TYP_DOUBLE:
16315 #endif
16316 #if FEATURE_STACK_FP_X87
16317             addrReg = genPushArgumentStackFP(curr);
16318 #else
16319             NYI("FP codegen");
16320             addrReg = 0;
16321 #endif
16322             break;
16323
16324         case TYP_VOID:
16325
16326             /* Is this a nothing node, deferred register argument? */
16327
16328             if (curr->gtFlags & GTF_LATE_ARG)
16329             {
16330                 GenTree* arg = curr;
16331                 if (arg->gtOper == GT_COMMA)
16332                 {
16333                     while (arg->gtOper == GT_COMMA)
16334                     {
16335                         GenTreePtr op1 = arg->gtOp.gtOp1;
16336                         genEvalSideEffects(op1);
16337                         genUpdateLife(op1);
16338                         arg = arg->gtOp.gtOp2;
16339                     }
16340                     if (!arg->IsNothingNode())
16341                     {
16342                         genEvalSideEffects(arg);
16343                         genUpdateLife(arg);
16344                     }
16345                 }
16346
16347                 /* increment the register count and continue with the next argument */
16348
16349                 intRegState.rsCurRegArgNum++;
16350
16351                 noway_assert(opsz == 0);
16352
16353                 addrReg = 0;
16354                 break;
16355             }
16356
16357             __fallthrough;
16358
16359         case TYP_STRUCT:
16360         {
16361             GenTree* arg = curr;
16362             while (arg->gtOper == GT_COMMA)
16363             {
16364                 GenTreePtr op1 = arg->gtOp.gtOp1;
16365                 genEvalSideEffects(op1);
16366                 genUpdateLife(op1);
16367                 arg = arg->gtOp.gtOp2;
16368             }
16369
16370             noway_assert(arg->gtOper == GT_OBJ 
16371                          || arg->gtOper == GT_MKREFANY
16372                          || arg->gtOper == GT_IND);
16373             noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
16374             noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
16375
16376             if (arg->gtOper == GT_MKREFANY)
16377             {
16378                 GenTreePtr op1 = arg->gtOp.gtOp1;
16379                 GenTreePtr op2 = arg->gtOp.gtOp2;
16380
16381                 addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
16382
16383                 /* Is this value a handle? */
16384                 if  (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
16385                 {
16386                     /* Emit a fixup for the push instruction */
16387
16388                     inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
16389                     genSinglePush();
16390                 }
16391                 else
16392                 {
16393                     regMaskTP  addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
16394                     inst_TT(INS_push, op2);
16395                     genSinglePush();
16396                     genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
16397
16398                 }
16399                 addrReg = genKeepAddressable(op1, addrReg);
16400                 inst_TT(INS_push, op1);
16401                 genSinglePush();
16402                 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
16403
16404                 opsz = 2*TARGET_POINTER_SIZE;
16405             }
16406             else
16407             {
16408                 noway_assert(arg->gtOper == GT_OBJ);
16409
16410                 if (arg->gtObj.gtOp1->gtOper == GT_ADDR &&
16411                     arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16412                 {              
16413                     GenTreePtr   structLocalTree  = arg->gtObj.gtOp1->gtOp.gtOp1;
16414                     unsigned     structLclNum     = structLocalTree->gtLclVarCommon.gtLclNum;
16415                     LclVarDsc *  varDsc           = &compiler->lvaTable[structLclNum];
16416
16417                     // As much as we would like this to be a noway_assert, we can't because
16418                     // there are some weird casts out there, and backwards compatiblity
16419                     // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
16420                     // lvPromoted in general currently do not require the local to be
16421                     // TYP_STRUCT, so this assert is really more about how we wish the world
16422                     // was then some JIT invariant.
16423                     assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
16424
16425                     Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);         
16426    
16427                     if (varDsc->lvPromoted && 
16428                         promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT)  // Otherwise it is guaranteed to live on stack.
16429                     {
16430                         assert(!varDsc->lvAddrExposed);  // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
16431
16432                         addrReg = 0;
16433
16434                         // Get the number of BYTES to copy to the stack                     
16435                         opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));                  
16436                         size_t bytesToBeCopied = opsz;
16437                 
16438                         // postponedFields is true if we have any postponed fields
16439                         //   Any field that does not start on a 4-byte boundary is a postponed field
16440                         //   Such a field is required to be a short or a byte
16441                         //
16442                         // postponedRegKind records the kind of scratch register we will 
16443                         //   need to process the postponed fields
16444                         //   RBM_NONE means that we don't need a register
16445                         //
16446                         // expectedAlignedOffset records the aligned offset that
16447                         //   has to exist for a push to cover the postponed fields.
16448                         //   Since all promoted structs have the tightly packed property 
16449                         //   we are guaranteed that we will have such a push
16450                         //
16451                         bool       postponedFields        = false;
16452                         regMaskTP  postponedRegKind       = RBM_NONE;
16453                         size_t     expectedAlignedOffset  = UINT_MAX;
16454                     
16455                         VARSET_TP* deadVarBits = NULL;
16456                         compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
16457
16458                         // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
16459                         //
16460                         for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
16461                              varNum >= (int) varDsc->lvFieldLclStart;
16462                              varNum--)
16463                         {           
16464                             LclVarDsc * fieldVarDsc = compiler->lvaTable + varNum;   
16465 #ifdef DEBUG
16466                             if (fieldVarDsc->lvExactSize == 2*sizeof(unsigned))                            
16467                             {
16468                                 noway_assert(fieldVarDsc->lvFldOffset % (2*sizeof(unsigned)) == 0);
16469                                 noway_assert(fieldVarDsc->lvFldOffset + (2*sizeof(unsigned)) == bytesToBeCopied);
16470                             }
16471 #endif                        
16472                             // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
16473                             // For packed structs we will go back and store the unaligned bytes and shorts 
16474                             // in the next loop
16475                             //
16476                             if (fieldVarDsc->lvStackAligned())
16477                             {
16478                                 if (fieldVarDsc->lvExactSize != 2*sizeof(unsigned)  &&
16479                                     fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
16480                                 {                                
16481                                     // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
16482                                     // Just push some junk (i.e EAX) on the stack.
16483                                     inst_RV(INS_push, REG_EAX, TYP_INT);
16484                                     genSinglePush();
16485
16486                                     bytesToBeCopied -= sizeof(void*);
16487                                 }
16488
16489                                 // If we have an expectedAlignedOffset make sure that this push instruction
16490                                 // is what we expect to cover the postponedFields
16491                                 //
16492                                 if (expectedAlignedOffset != UINT_MAX)
16493                                 {
16494                                     // This push must be for a small field
16495                                     noway_assert(fieldVarDsc->lvExactSize < 4);
16496                                     // The fldOffset for this push should be equal to the expectedAlignedOffset
16497                                     noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
16498                                     expectedAlignedOffset = UINT_MAX;
16499                                 }
16500                                                         
16501                                 // Push the "upper half" of LONG var first
16502                             
16503                                 if (isRegPairType(fieldVarDsc->lvType))
16504                                 {
16505                                     if  (fieldVarDsc->lvOtherReg != REG_STK)
16506                                     {                                   
16507                                         inst_RV(INS_push, 
16508                                                 fieldVarDsc->lvOtherReg,
16509                                                 TYP_INT); 
16510                                         genSinglePush();
16511  
16512                                         // Prepare the set of vars to be cleared from gcref/gcbyref set
16513                                         // in case they become dead after genUpdateLife.                            
16514                                         // genDoneAddressable() will remove dead gc vars by calling gcInfo.gcMarkRegSetNpt. 
16515                                         // Although it is not addrReg, we just borrow the name here.
16516                                         addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
16517                                     }
16518                                     else
16519                                     {
16520                                         getEmitter()->emitIns_S(INS_push,
16521                                                               EA_4BYTE,
16522                                                               varNum,
16523                                                               sizeof(void*));
16524                                         genSinglePush();                                
16525                                     }    
16526
16527                                     bytesToBeCopied -= sizeof(void*);
16528                                 }
16529
16530                                 // Push the "upper half" of DOUBLE var if it is not enregistered.
16531                             
16532                                 if (fieldVarDsc->lvType == TYP_DOUBLE)
16533                                 {
16534                                     if  (!fieldVarDsc->lvRegister)
16535                                     {                                   
16536                                         getEmitter()->emitIns_S(INS_push,
16537                                                               EA_4BYTE,
16538                                                               varNum,
16539                                                               sizeof(void*));
16540                                         genSinglePush();                                
16541                                     }
16542
16543                                     bytesToBeCopied -= sizeof(void*);
16544                                 }
16545                                                         
16546                                 //
16547                                 // Push the field local.
16548                                 //
16549                             
16550                                 if (fieldVarDsc->lvRegister)
16551                                 {                                                                         
16552                                     if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
16553                                     {
16554                                         inst_RV(INS_push, 
16555                                                 fieldVarDsc->lvRegNum,
16556                                                 genActualType(fieldVarDsc->TypeGet()));
16557                                         genSinglePush();
16558
16559                                         // Prepare the set of vars to be cleared from gcref/gcbyref set
16560                                         // in case they become dead after genUpdateLife.                            
16561                                         // genDoneAddressable() will remove dead gc vars by calling gcInfo.gcMarkRegSetNpt. 
16562                                         // Although it is not addrReg, we just borrow the name here.
16563                                         addrReg |= genRegMask(fieldVarDsc->lvRegNum);
16564                                     }
16565                                     else
16566                                     {      
16567                                         // Must be TYP_FLOAT or TYP_DOUBLE
16568                                         noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
16569                                     
16570                                         noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) || 
16571                                                      fieldVarDsc->lvExactSize == 2*sizeof(unsigned)); 
16572
16573                                         inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
16574                         
16575                                         genSinglePush();
16576                                         if  (fieldVarDsc->lvExactSize == 2*sizeof(unsigned))
16577                                         {                               
16578                                             genSinglePush();
16579                                         }
16580                                         
16581 #if FEATURE_STACK_FP_X87
16582                                         GenTree* fieldTree = new (compiler, GT_REG_VAR) GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
16583                                         fieldTree->gtOper = GT_REG_VAR;
16584                                         fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
16585                                         fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
16586                                         if ((arg->gtFlags & GTF_VAR_DEATH) != 0) 
16587                                         {
16588                                             if (fieldVarDsc->lvTracked && 
16589                                                 (deadVarBits == NULL || VarSetOps::IsMember(compiler, *deadVarBits, fieldVarDsc->lvVarIndex)))
16590                                             {
16591                                                 fieldTree->gtFlags |= GTF_VAR_DEATH;
16592                                             }
16593                                         }
16594                                         genCodeForTreeStackFP_Leaf(fieldTree);
16595                         
16596                                         // Take reg to top of stack
16597                                     
16598                                         FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
16599                       
16600                                         // Pop it off to stack
16601                                         compCurFPState.Pop();
16602                                     
16603                                         getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize), REG_NA, REG_SPBASE, 0);
16604 #else
16605                                         NYI_FLAT_FP_X87("FP codegen");
16606 #endif
16607                                     }
16608                                 }                                          
16609                                 else
16610                                 {                                                     
16611                                     getEmitter()->emitIns_S(INS_push,
16612                                                           (fieldVarDsc->TypeGet() == TYP_REF)?EA_GCREF:EA_4BYTE,
16613                                                           varNum,
16614                                                           0);
16615                                     genSinglePush();
16616                                 }
16617
16618                                 bytesToBeCopied -= sizeof(void*);
16619                             }
16620                             else  // not stack aligned
16621                             {
16622                                 noway_assert(fieldVarDsc->lvExactSize < 4);
16623
16624                                 // We will need to use a store byte or store word
16625                                 // to set this unaligned location
16626                                 postponedFields = true;
16627
16628                                 if (expectedAlignedOffset != UINT_MAX)
16629                                 {
16630                                     // This should never change until it is set back to UINT_MAX by an aligned offset
16631                                     noway_assert(expectedAlignedOffset == roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
16632                                 }
16633
16634                                 expectedAlignedOffset = roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
16635
16636                                 noway_assert(expectedAlignedOffset < bytesToBeCopied);
16637
16638                                 if (fieldVarDsc->lvRegister)
16639                                 {
16640                                     // Do we need to use a byte-able register?
16641                                     if (fieldVarDsc->lvExactSize == 1)
16642                                     {
16643                                         // Did we enregister fieldVarDsc2 in a non byte-able register?
16644                                         if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
16645                                         {
16646                                             // then we will need to grab a byte-able register
16647                                             postponedRegKind = RBM_BYTE_REGS; 
16648                                         }
16649                                     }
16650                                 }
16651                                 else // not enregistered
16652                                 {      
16653                                     if (fieldVarDsc->lvExactSize == 1)
16654                                     {
16655                                         // We will need to grab a byte-able register
16656                                         postponedRegKind = RBM_BYTE_REGS;
16657                                     }
16658                                     else
16659                                     {
16660                                         // We will need to grab any scratch register
16661                                         if (postponedRegKind != RBM_BYTE_REGS)
16662                                             postponedRegKind = RBM_ALLINT;
16663                                     }
16664                                 }
16665                             }
16666                         }
16667
16668                         // Now we've pushed all of the aligned fields.
16669                         //
16670                         // We should have pushed bytes equal to the entire struct
16671                         noway_assert(bytesToBeCopied == 0);
16672                                 
16673                         // We should have seen a push that covers every postponed field
16674                         noway_assert(expectedAlignedOffset == UINT_MAX);
16675                     
16676                         // Did we have any postponed fields?
16677                         if (postponedFields)
16678                         {
16679                             regNumber  regNum = REG_STK;  // means no register
16680                             
16681                             // If we needed a scratch register then grab it here
16682                             
16683                             if (postponedRegKind != RBM_NONE)
16684                                 regNum = regSet.rsGrabReg(postponedRegKind); 
16685                                      
16686                             // Forward loop, starts from the lowest field offset
16687                             //
16688                             for (unsigned varNum = varDsc->lvFieldLclStart;
16689                                  varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
16690                                  varNum++)
16691                             {           
16692                                 LclVarDsc * fieldVarDsc = compiler->lvaTable + varNum;   
16693                             
16694                                 // All stack aligned fields have already been pushed
16695                                 if (fieldVarDsc->lvStackAligned())
16696                                     continue;
16697
16698                                 // We have a postponed field
16699
16700                                 // It must be a byte or a short
16701                                 noway_assert(fieldVarDsc->lvExactSize < 4);
16702
16703                                 // Is the field enregistered?
16704                                 if (fieldVarDsc->lvRegister)
16705                                 {
16706                                     // Frequently we can just use that register
16707                                     regNumber tmpRegNum = fieldVarDsc->lvRegNum;
16708                                 
16709                                     // Do we need to use a byte-able register?
16710                                     if (fieldVarDsc->lvExactSize == 1)
16711                                     {
16712                                         // Did we enregister the field in a non byte-able register?
16713                                         if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
16714                                         {
16715                                             // then we will need to use the byte-able register 'regNum'
16716                                             noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
16717                                             
16718                                             // Copy the register that contains fieldVarDsc into 'regNum'
16719                                             getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum, fieldVarDsc->lvRegNum);
16720                                             regTracker.rsTrackRegLclVar(regNum, varNum);
16721                                         
16722                                             // tmpRegNum is the register that we will extract the byte value from
16723                                             tmpRegNum = regNum; 
16724                                         }
16725                                         noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
16726                                     }
16727                                 
16728                                     getEmitter()->emitIns_AR_R (ins_Store(fieldVarDsc->TypeGet()),
16729                                                               (emitAttr)fieldVarDsc->lvExactSize,                                                          
16730                                                               tmpRegNum,
16731                                                               REG_SPBASE,
16732                                                               fieldVarDsc->lvFldOffset);
16733                                 }
16734                                 else // not enregistered
16735                                 {
16736                                     // We will copy the non-enregister fieldVar into our scratch register 'regNum'
16737                                     
16738                                     noway_assert(regNum != REG_STK);
16739                                     getEmitter()->emitIns_R_S (ins_Load(fieldVarDsc->TypeGet()),
16740                                                              (emitAttr)fieldVarDsc->lvExactSize,
16741                                                              regNum,
16742                                                              varNum,
16743                                                              0);
16744                                 
16745                                     regTracker.rsTrackRegLclVar(regNum, varNum);
16746                                 
16747                                     // Store the value (byte or short) into the stack
16748                                 
16749                                     getEmitter()->emitIns_AR_R (ins_Store(fieldVarDsc->TypeGet()),
16750                                                               (emitAttr)fieldVarDsc->lvExactSize,                                                          
16751                                                               regNum,
16752                                                               REG_SPBASE,
16753                                                               fieldVarDsc->lvFldOffset);
16754                                 }                                
16755                             }                        
16756                         }
16757                         genUpdateLife(structLocalTree);
16758
16759                         break; 
16760                     }
16761
16762                 }
16763
16764                 genCodeForTree(arg->gtObj.gtOp1, 0);
16765                 noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
16766                 regNumber reg = arg->gtObj.gtOp1->gtRegNum;
16767                 // Get the number of DWORDS to copy to the stack
16768                 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
16769                 unsigned slots = (unsigned)(opsz / sizeof(void*));
16770
16771                 BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16772
16773                 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
16774
16775                 BOOL bNoneGC = TRUE;
16776                 for (int i = slots - 1; i >= 0; --i)
16777                 {
16778                     if (gcLayout[i] != TYPE_GC_NONE)
16779                     {
16780                         bNoneGC = FALSE;
16781                         break;
16782                     }
16783                 }
16784
16785                 /* passing large structures using movq instead of pushes does not increase codesize very much */
16786                 unsigned movqLenMin =   8;
16787                 unsigned movqLenMax =  64;
16788                 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
16789
16790                 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
16791                 {
16792                     // Don't bother with this optimization in
16793                     // rarely run blocks or when optimizing for size
16794                     movqLenMax = movqLenMin = 0;
16795                 }
16796                 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
16797                 {
16798                     // Be more aggressive when optimizing for speed
16799                     movqLenMax *= 2;
16800                 }
16801
16802                 /* Adjust for BB weight */
16803                 if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT)/2)
16804                 {
16805                     // Be more aggressive when we are inside a loop
16806                     movqLenMax *= 2;
16807                 }
16808
16809                 if (compiler->opts.compCanUseSSE2 && bNoneGC &&
16810                     (opsz >= movqLenMin) && (opsz <= movqLenMax))
16811                 {
16812                     JITLOG_THIS(compiler, (LL_INFO10000, "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
16813                             opsz,  compiler->info.compFullName));
16814
16815                     int       stkDisp = (int)(unsigned)opsz;
16816                     int       curDisp = 0;
16817                     regNumber xmmReg  = REG_XMM0;
16818
16819                     if (opsz & 0x4)
16820                     {
16821                         stkDisp -= sizeof(void*);
16822                         getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
16823                         genSinglePush();
16824                     }
16825
16826                     inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
16827                     genStackLevel += stkDisp;
16828                     
16829                     while (curDisp < stkDisp)
16830                     {
16831                         getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
16832                         getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
16833                         curDisp += 2 * sizeof(void*);
16834                     }
16835                     noway_assert(curDisp == stkDisp);
16836                 }
16837                 else
16838                 {
16839                     for (int i = slots-1; i >= 0; --i)
16840                     {
16841                         emitAttr fieldSize;
16842                         if      (gcLayout[i] == TYPE_GC_NONE)
16843                             fieldSize = EA_4BYTE;
16844                         else if (gcLayout[i] == TYPE_GC_REF)
16845                             fieldSize = EA_GCREF;
16846                         else
16847                         {
16848                             noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16849                             fieldSize = EA_BYREF;
16850                         }
16851                         getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i*sizeof(void*));
16852                         genSinglePush();
16853                     }
16854                 }
16855                 gcInfo.gcMarkRegSetNpt(genRegMask(reg));    // Kill the pointer in op1
16856             }
16857                
16858             addrReg = 0;
16859             break;
16860         }
16861
16862         default:
16863             noway_assert(!"unhandled/unexpected arg type");
16864             NO_WAY("unhandled/unexpected arg type");
16865         }
16866
16867         /* Update the current set of live variables */
16868
16869         genUpdateLife(curr);
16870
16871         /* Update the current set of register pointers */
16872
16873         noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT)); 
16874         genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
16875
16876         /* Remember how much stuff we've pushed on the stack */
16877
16878         size += opsz;
16879
16880         /* Update the current argument stack offset */
16881
16882
16883         /* Continue with the next argument, if any more are present */
16884
16885     } // while args
16886
16887     /* Move the deferred arguments to registers */
16888
16889     for (args = regArgs; args; args = args->Rest())
16890     {
16891         curr = args->Current();
16892         
16893         assert(!curr->IsArgPlaceHolderNode());  // No place holders nodes are in the late args
16894
16895         fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16896         assert(curArgTabEntry);
16897         regNumber regNum = curArgTabEntry->regNum;
16898
16899         noway_assert(isRegParamType(curr->TypeGet()));
16900         noway_assert(curr->gtType != TYP_VOID);
16901
16902         /* Evaluate the argument to a register [pair] */
16903
16904         if  (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
16905         {
16906             /* Check if this is the guess area for the resolve interface call
16907              * Pass a size of EA_OFFSET*/
16908             if  (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
16909             {
16910                 getEmitter()->emitIns_R_C(ins_Load(TYP_INT),
16911                                         EA_OFFSET,
16912                                         regNum,
16913                                         curr->gtClsVar.gtClsVarHnd,
16914                                         0);
16915                 regTracker.rsTrackRegTrash(regNum);
16916                 
16917                 /* The value is now in the appropriate register */
16918                 
16919                 genMarkTreeInReg(curr, regNum);
16920             }
16921             else
16922             {
16923                 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
16924             }
16925
16926             noway_assert(curr->gtRegNum == regNum);
16927
16928             /* If the register is already marked as used, it will become
16929                multi-used. However, since it is a callee-trashed register,
16930                we will have to spill it before the call anyway. So do it now */
16931             
16932             if (regSet.rsMaskUsed & genRegMask(regNum))
16933             {
16934                 noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
16935                 regSet.rsSpillReg(regNum);
16936             }
16937             
16938             /* Mark the register as 'used' */
16939             
16940             regSet.rsMarkRegUsed(curr);
16941         }
16942         else
16943         {
16944             noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
16945         }
16946     }
16947
16948     /* If any of the previously loaded arguments were spilled - reload them */
16949
16950     for (args = regArgs; args; args = args->Rest())
16951     {
16952         curr  = args->Current();
16953         assert(curr);
16954
16955         if (curr->gtFlags & GTF_SPILLED)
16956         {
16957             if  (isRegPairType(curr->gtType))
16958             {
16959                 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
16960             }
16961             else
16962             {
16963                 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
16964             }
16965         }
16966     }
16967
16968     /* Return the total size pushed */
16969
16970     return size;
16971 }
16972 #ifdef _PREFAST_
16973 #pragma warning(pop)
16974 #endif
16975
16976 #else // FEATURE_FIXED_OUT_ARGS 
16977
16978 //
16979 // ARM and AMD64 uses this method to pass the stack based args
16980 //
16981 // returns size pushed (always zero)
16982 size_t              CodeGen::genPushArgList(GenTreePtr     call)
16983 {
16984     
16985     GenTreeArgList*  lateArgs = call->gtCall.gtCallLateArgs;
16986     GenTreePtr  curr;
16987     var_types   type;
16988     int         argSize;
16989
16990     GenTreeArgList* args;
16991     // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16992     // so we can iterate over this argument list more uniformly.
16993     // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16994     GenTreeArgList objpArgList(/*temp dummy arg*/call, call->gtCall.gtCallArgs);
16995     if (call->gtCall.gtCallObjp == NULL)
16996     {
16997         args = call->gtCall.gtCallArgs;
16998     }
16999     else
17000     {
17001         objpArgList.Current() = call->gtCall.gtCallObjp;
17002         args = &objpArgList;
17003     }
17004
17005     for (; args; args = args->Rest())
17006     {
17007         /* Get hold of the next argument value */
17008         curr = args->Current();
17009
17010         fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
17011         assert(curArgTabEntry);
17012         regNumber regNum    = curArgTabEntry->regNum;
17013         int       argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
17014
17015         /* See what type of a value we're passing */
17016         type = curr->TypeGet();
17017
17018         // This holds the set of registers corresponding to enregistered promoted struct field variables
17019         // that go dead after this use of the variable in the argument list.
17020         regMaskTP deadFieldVarRegs = RBM_NONE;
17021
17022         argSize = TARGET_POINTER_SIZE;  // The default size for an arg is one pointer-sized item
17023
17024         if (curr->IsArgPlaceHolderNode())
17025         {
17026             assert(curr->gtFlags & GTF_LATE_ARG);
17027             goto DEFERRED;
17028         }
17029
17030         if (varTypeIsSmall(type))
17031         {
17032             // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
17033             type = TYP_I_IMPL;
17034         }
17035
17036         switch (type)
17037         {
17038
17039         case TYP_DOUBLE:
17040         case TYP_LONG:
17041
17042 #if defined(_TARGET_ARM_)
17043
17044             argSize = (TARGET_POINTER_SIZE * 2);
17045
17046             /* Is the value a constant? */
17047
17048             if  (curr->gtOper == GT_CNS_LNG)
17049             {
17050                 assert((curr->gtFlags & GTF_LATE_ARG) == 0);
17051
17052                 int hiVal = (int) (curr->gtLngCon.gtLconVal >> 32);
17053                 int loVal = (int) (curr->gtLngCon.gtLconVal & 0xffffffff);  
17054
17055                 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal,
17056                                            compiler->lvaOutgoingArgSpaceVar, argOffset);
17057
17058                 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal,
17059                                            compiler->lvaOutgoingArgSpaceVar, argOffset + 4);
17060
17061                 break;
17062             }
17063             else
17064             {
17065                 genCodeForTree(curr, 0);
17066
17067                 if (curr->gtFlags & GTF_LATE_ARG)
17068                 {
17069                     // The arg was assigned into a temp and 
17070                     // will be moved to the correct register or slot later
17071                     
17072                     argSize = 0;  // nothing is passed on the stack
17073                 }
17074                 else
17075                 {
17076                     // The arg is passed in the outgoing argument area of the stack frame
17077                     //
17078                     assert(curr->gtOper != GT_ASG);       // GTF_LATE_ARG should be set if this is the case
17079                     assert(curr->gtFlags & GTF_REG_VAL);  // should be enregistered after genCodeForTree(curr, 0)
17080
17081                     if (type == TYP_LONG)
17082                     {
17083                         regNumber regLo = genRegPairLo(curr->gtRegPair);
17084                         regNumber regHi = genRegPairHi(curr->gtRegPair);
17085
17086                         assert(regLo != REG_STK);
17087                         inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
17088                         if (regHi == REG_STK)
17089                         {
17090                             regHi = regSet.rsPickFreeReg();
17091                             inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
17092                             regTracker.rsTrackRegTrash(regHi);
17093                         }
17094                         inst_SA_RV(ins_Store(TYP_INT), argOffset+4, regHi, TYP_INT);
17095                     }
17096                     else // (type == TYP_DOUBLE)
17097                     {
17098                         inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
17099                     }
17100                 }
17101             }
17102             break;
17103
17104 #elif defined(_TARGET_64BIT_)
17105             __fallthrough;
17106 #else
17107 #error "Unknown target for passing TYP_LONG argument using FIXED_ARGS" 
17108 #endif
17109
17110         case TYP_REF:
17111         case TYP_BYREF:
17112
17113         case TYP_FLOAT:
17114         case TYP_INT:
17115             /* Is the value a constant? */
17116
17117             if  (curr->gtOper == GT_CNS_INT)
17118             {
17119                 assert(!(curr->gtFlags & GTF_LATE_ARG));
17120
17121 #if     REDUNDANT_LOAD
17122                 regNumber       reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
17123
17124                 if  (reg != REG_NA)
17125                 {
17126                     inst_SA_RV(ins_Store(type), argOffset, reg, type);
17127                 }
17128                 else
17129 #endif
17130                 {
17131                     bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
17132                     emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
17133                     instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
17134                                                compiler->lvaOutgoingArgSpaceVar, argOffset);
17135                 }
17136                 break;
17137             }
17138
17139             /* This is passed as a pointer-sized integer argument */
17140
17141             genCodeForTree(curr, 0);
17142             if (curr->gtFlags & GTF_LATE_ARG)
17143             {
17144                 // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
17145
17146 #ifdef _TARGET_ARM_
17147                 argSize = 0;  // nothing is passed on the stack
17148 #endif
17149             }
17150             else
17151             {
17152                 // The arg is passed in the outgoing argument area of the stack frame
17153                 //
17154                 assert(curr->gtOper != GT_ASG);  // GTF_LATE_ARG should be set if this is the case
17155                 assert(curr->gtFlags & GTF_REG_VAL);  // should be enregistered after genCodeForTree(curr, 0)
17156                 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
17157             
17158                 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
17159                     gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
17160             }
17161             break;
17162
17163         case TYP_VOID:
17164             /* Is this a nothing node, deferred register argument? */
17165
17166             if (curr->gtFlags & GTF_LATE_ARG)
17167             {
17168                 /* Handle side-effects */
17169 DEFERRED:
17170                 if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
17171                 {
17172 #ifdef _TARGET_ARM_
17173                     {
17174                         GenTreePtr curArgNode = curArgTabEntry->node;
17175                         var_types curRegArgType = curArgNode->gtType;
17176                         assert(curRegArgType != TYP_UNDEF);
17177
17178                         if (curRegArgType == TYP_STRUCT)
17179                         {
17180                             // If the RHS of the COPYBLK is a promoted struct local, then the use of that
17181                             // is an implicit use of all its field vars.  If these are last uses, remember that,
17182                             // so we can later update the GC compiler->info.
17183                             if (curr->OperIsCopyBlkOp())
17184                                 deadFieldVarRegs |= genFindDeadFieldRegs(curr);
17185                         }
17186                     }
17187 #endif // _TARGET_ARM_
17188
17189                     genCodeForTree(curr, 0);
17190                 }
17191                 else  
17192                 {
17193                     assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
17194                 }
17195
17196 #if defined(_TARGET_ARM_)
17197                 argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
17198 #endif
17199             }
17200             else
17201             {
17202                 for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
17203                 {
17204                     GenTreePtr op1 = arg->gtOp.gtOp1;
17205
17206                     genEvalSideEffects(op1);
17207                     genUpdateLife(op1);
17208                 }
17209             }
17210             break;
17211
17212 #ifdef _TARGET_ARM_
17213
17214         case TYP_STRUCT:
17215         {
17216             GenTree* arg = curr;
17217             while (arg->gtOper == GT_COMMA)
17218             {
17219                 GenTreePtr op1 = arg->gtOp.gtOp1;
17220                 genEvalSideEffects(op1);
17221                 genUpdateLife(op1);
17222                 arg = arg->gtOp.gtOp2;
17223             }
17224             noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
17225
17226             CORINFO_CLASS_HANDLE clsHnd;
17227             unsigned             argAlign;
17228             unsigned             slots;
17229             BYTE*                gcLayout = NULL;
17230
17231             // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
17232             // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
17233             // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
17234             // table entry for the promoted struct local.  As we fill slots with the contents of a
17235             // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
17236             // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
17237             // variable number of the next field variable to be copied.
17238             LclVarDsc*           promotedStructLocalVarDesc = NULL;
17239             GenTreePtr           structLocalTree  = NULL;
17240             unsigned             bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;  // Size of slot.
17241             unsigned             nextPromotedStructFieldVar = BAD_VAR_NUM;
17242             unsigned             promotedStructOffsetOfFirstStackSlot = 0;
17243             unsigned             argOffsetOfFirstStackSlot = UINT32_MAX;  // Indicates uninitialized.
17244
17245             if (arg->OperGet() == GT_OBJ)
17246             {
17247                 clsHnd = arg->gtObj.gtClass;
17248                 unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
17249                 argAlign = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
17250                 argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
17251
17252                 slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
17253             
17254                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17255             
17256                 compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
17257
17258                 // Are we loading a promoted struct local var?
17259                 if (arg->gtObj.gtOp1->gtOper == GT_ADDR &&
17260                     arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17261                 {
17262                     structLocalTree               = arg->gtObj.gtOp1->gtOp.gtOp1;
17263                     unsigned     structLclNum     = structLocalTree->gtLclVarCommon.gtLclNum;
17264                     LclVarDsc *  varDsc           = &compiler->lvaTable[structLclNum];
17265
17266                     // As much as we would like this to be a noway_assert, we can't because
17267                     // there are some weird casts out there, and backwards compatiblity
17268                     // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
17269                     // lvPromoted in general currently do not require the local to be
17270                     // TYP_STRUCT, so this assert is really more about how we wish the world
17271                     // was then some JIT invariant.
17272                     assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
17273
17274                     Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17275
17276                     if (varDsc->lvPromoted && 
17277                         promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT)  // Otherwise it is guaranteed to live on stack.
17278                     {
17279                         assert(!varDsc->lvAddrExposed);  // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.  
17280                         promotedStructLocalVarDesc = varDsc;
17281                         nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17282                     }
17283                 }
17284             }
17285             else  
17286             {
17287                 noway_assert(arg->OperGet() == GT_MKREFANY);
17288
17289                 clsHnd   = NULL;
17290                 argAlign = TARGET_POINTER_SIZE;
17291                 argSize  = 2*TARGET_POINTER_SIZE;
17292                 slots    = 2;
17293             }
17294             
17295             // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
17296             noway_assert(regNum == REG_STK);
17297
17298             // This code passes a TYP_STRUCT by value using the outgoing arg space var
17299             //
17300             if (arg->OperGet() == GT_OBJ)
17301             {
17302                 regNumber regSrc = REG_STK;
17303                 regNumber regTmp = REG_STK;  // This will get set below if the obj is not of a promoted struct local.
17304                 int cStackSlots = 0;
17305
17306                 if (promotedStructLocalVarDesc == NULL)
17307                 {
17308                     genComputeReg(arg->gtObj.gtOp1, 0,  RegSet::ANY_REG, RegSet::KEEP_REG);
17309                     noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
17310                     regSrc = arg->gtObj.gtOp1->gtRegNum;
17311                 }
17312                          
17313                 // The number of bytes to add "argOffset" to get the arg offset of the current slot.
17314                 int extraArgOffset = 0;
17315
17316                 for (unsigned i = 0; i < slots; i++)
17317                 {
17318                     emitAttr fieldSize;
17319                     if      (gcLayout[i] == TYPE_GC_NONE)
17320                         fieldSize = EA_PTRSIZE;
17321                     else if (gcLayout[i] == TYPE_GC_REF)
17322                         fieldSize = EA_GCREF;
17323                     else
17324                     {
17325                         noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17326                         fieldSize = EA_BYREF;
17327                     }
17328                 
17329                     // Pass the argument using the lvaOutgoingArgSpaceVar
17330
17331                     if (promotedStructLocalVarDesc != NULL)
17332                     {
17333                         if (argOffsetOfFirstStackSlot == UINT32_MAX) argOffsetOfFirstStackSlot = argOffset;
17334
17335                         regNumber maxRegArg = regNumber(MAX_REG_ARG);
17336                         bool filledExtraSlot =
17337                             genFillSlotFromPromotedStruct(arg,
17338                                                           curArgTabEntry,
17339                                                           promotedStructLocalVarDesc,
17340                                                           fieldSize, 
17341                                                           &nextPromotedStructFieldVar,
17342                                                           &bytesOfNextSlotOfCurPromotedStruct,
17343                                                           /*pCurRegNum*/ &maxRegArg,
17344                                                           /*argOffset*/ argOffset + extraArgOffset, 
17345                                                           /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
17346                                                            argOffsetOfFirstStackSlot,
17347                                                           &deadFieldVarRegs,
17348                                                           &regTmp);
17349                         extraArgOffset += TARGET_POINTER_SIZE;
17350                         // If we filled an extra slot with an 8-byte value, skip a slot.
17351                         if (filledExtraSlot) 
17352                         {
17353                             i++; 
17354                             cStackSlots++;
17355                             extraArgOffset += TARGET_POINTER_SIZE;
17356                         }
17357                     }
17358                     else
17359                     {
17360                         if (regTmp == REG_STK)
17361                         {
17362                             regTmp = regSet.rsPickFreeReg();
17363                         }
17364
17365                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL),
17366                                                  fieldSize,
17367                                                  regTmp,
17368                                                  regSrc,
17369                                                  i*TARGET_POINTER_SIZE);
17370                 
17371                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17372                                                 fieldSize,
17373                                                 regTmp,
17374                                                 compiler->lvaOutgoingArgSpaceVar,
17375                                                 argOffset+cStackSlots*TARGET_POINTER_SIZE);
17376                         regTracker.rsTrackRegTrash(regTmp);
17377                     }                   
17378                     cStackSlots++;
17379                 }
17380
17381                 if (promotedStructLocalVarDesc == NULL)
17382                 {
17383                     regSet.rsMarkRegFree(genRegMask(regSrc));
17384                 }
17385                 if (structLocalTree != NULL) genUpdateLife(structLocalTree);
17386             }
17387             else
17388             {
17389                 assert(arg->OperGet() == GT_MKREFANY);
17390                 PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
17391                 argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
17392             }
17393         }
17394         break;
17395 #endif // _TARGET_ARM_
17396
17397         default:
17398             assert(!"unhandled/unexpected arg type");
17399             NO_WAY("unhandled/unexpected arg type");
17400         }
17401
17402         /* Update the current set of live variables */
17403
17404         genUpdateLife(curr);
17405
17406         // Now, if some copied field locals were enregistered, and they're now dead, update the set of 
17407         // register holding gc pointers.
17408         if (deadFieldVarRegs != 0)
17409             gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
17410
17411         /* Update the current argument stack offset */
17412
17413         argOffset += argSize;
17414
17415         /* Continue with the next argument, if any more are present */
17416     } // while (args)
17417
17418
17419     if (lateArgs)
17420     {
17421         SetupLateArgs(call);
17422     }
17423     
17424     /* Return the total size pushed */
17425     
17426     return 0;
17427 }
17428
17429 #ifdef _TARGET_ARM_
17430 bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr        arg,
17431                                             fgArgTabEntryPtr  curArgTabEntry,
17432                                             LclVarDsc*        promotedStructLocalVarDesc,
17433                                             emitAttr          fieldSize,
17434                                             unsigned*         pNextPromotedStructFieldVar,
17435                                             unsigned*         pBytesOfNextSlotOfCurPromotedStruct,
17436                                             regNumber*        pCurRegNum,
17437                                             int               argOffset,
17438                                             int               fieldOffsetOfFirstStackSlot,
17439                                             int               argOffsetOfFirstStackSlot,
17440                                             regMaskTP*        deadFieldVarRegs,
17441                                             regNumber*        pRegTmp)
17442 {
17443     unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
17444     unsigned limitPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
17445     unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
17446
17447     regNumber curRegNum = *pCurRegNum;
17448     regNumber regTmp = *pRegTmp;
17449     bool filledExtraSlot = false;
17450
17451     if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17452     {
17453         // We've already finished; just return.
17454         // We can reach this because the calling loop computes a # of slots based on the size of the struct.
17455         // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
17456         // the fourth slot, even though we've copied all the fields.
17457         return false;
17458     }
17459
17460     LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17461
17462     // Does this field fill an entire slot, and does it go at the start of the slot?
17463     // If so, things are easier...
17464
17465    bool oneFieldFillsSlotFromStart =
17466        (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)  // The field should start in the current slot...
17467        && ((fieldVarDsc->lvFldOffset % 4) == 0)                         // at the start of the slot, and...
17468        && (nextPromotedStructFieldVar+1 == limitPromotedStructFieldVar  // next field, if there is one, goes in the next slot.
17469            || compiler->lvaTable[nextPromotedStructFieldVar+1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
17470
17471     // Compute the proper size.
17472     if (fieldSize == EA_4BYTE)  // Not a GC ref or byref.
17473     {
17474         switch (fieldVarDsc->lvExactSize)
17475         {
17476         case 1: fieldSize = EA_1BYTE; break;
17477         case 2: fieldSize = EA_2BYTE; break;
17478         case 8:
17479             // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
17480             // in which case we should not have promoted the struct variable.
17481             noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
17482            
17483             // If the current reg number is not aligned, align it, and return to the calling loop, which will
17484             // consider that a filled slot and move on to the next argument register.
17485             if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0)) 
17486             {
17487                 // We must update the slot target, however!
17488                 bytesOfNextSlotOfCurPromotedStruct += 4;
17489                 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17490                 return false;
17491             }
17492             // Dest is an aligned pair of arg regs, if the struct type demands it.
17493             noway_assert((curRegNum % 2) == 0);
17494             // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
17495             break;
17496         default: assert(fieldVarDsc->lvExactSize == 4); break;
17497         }
17498     }
17499     else
17500     {
17501         // If the gc layout said it's a GC ref or byref, then the field size must be 4.
17502         noway_assert(fieldVarDsc->lvExactSize == 4);
17503     }
17504
17505     // We may need the type of the field to influence instruction selection.
17506     // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
17507     // If the fieldVarDsc is enregistered float we must use the field's exact type
17508     // however if it is in memory we can use an integer type TYP_I_IMPL
17509     //
17510     var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
17511     if ((fieldVarDsc->lvType == TYP_LONG) ||
17512         (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
17513     {
17514         fieldTypeForInstr = TYP_I_IMPL;
17515     }
17516
17517     // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
17518     if (curArgTabEntry->isHfaRegArg)
17519     {
17520         assert(oneFieldFillsSlotFromStart);
17521
17522         // Is the field variable promoted?
17523         if (fieldVarDsc->lvRegister)
17524         {
17525             // Move the field var living in register to dst, if they are different registers.
17526             regNumber srcReg = fieldVarDsc->lvRegNum;
17527             regNumber dstReg = curRegNum;
17528             if (srcReg != dstReg)
17529             {
17530                 inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
17531                 assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
17532             }
17533         }
17534         else
17535         {
17536             // Move the field var living in stack to dst.
17537             getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
17538                 fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE,
17539                 curRegNum,
17540                 nextPromotedStructFieldVar,
17541                 0);
17542             assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
17543         }
17544
17545         // Mark the arg as used and using reg val.
17546         genMarkTreeInReg(arg, curRegNum);
17547         regSet.SetUsedRegFloat(arg, true);
17548
17549         // Advance for double.
17550         if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
17551         {
17552             bytesOfNextSlotOfCurPromotedStruct += 4;
17553             curRegNum = REG_NEXT(curRegNum);
17554             arg->gtRegNum = curRegNum;
17555             regSet.SetUsedRegFloat(arg, true);
17556             filledExtraSlot = true;
17557         }
17558         arg->gtRegNum = curArgTabEntry->regNum;
17559
17560         // Advance.
17561         bytesOfNextSlotOfCurPromotedStruct += 4;
17562         nextPromotedStructFieldVar++;
17563     }
17564     else
17565     {
17566         if (oneFieldFillsSlotFromStart)
17567         {
17568             // If we write to the stack, offset in outgoing args at which we'll write.
17569             int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17570             assert(fieldArgOffset >= 0);
17571
17572             // Is the source a register or memory?
17573             if (fieldVarDsc->lvRegister)
17574             {
17575                 if (fieldTypeForInstr == TYP_DOUBLE)
17576                 {
17577                     fieldSize = EA_8BYTE;
17578                 }
17579
17580                 // Are we writing to a register or to the stack?
17581                 if (curRegNum != MAX_REG_ARG)
17582                 {
17583                     // Source is register and Dest is register.
17584
17585                     instruction  insCopy = INS_mov;
17586
17587                     if (varTypeIsFloating(fieldTypeForInstr))
17588                     {
17589                         if (fieldTypeForInstr == TYP_FLOAT)
17590                         {
17591                             insCopy   = INS_vmov_f2i;
17592                         }
17593                         else
17594                         {
17595                             assert(fieldTypeForInstr == TYP_DOUBLE);
17596                             insCopy   = INS_vmov_d2i;
17597                         }
17598                     }
17599
17600                     // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers.  Record the second
17601                     // register (which may become a tmp register, if its held in the argument register that the first
17602                     // register to be copied will overwrite).
17603                     regNumber otherRegNum = REG_STK;
17604                     if (fieldVarDsc->lvType == TYP_LONG)
17605                     {
17606                         otherRegNum = fieldVarDsc->lvOtherReg;
17607                         // Are we about to overwrite?
17608                         if (otherRegNum == curRegNum)
17609                         {
17610                             if (regTmp == REG_STK)
17611                             {
17612                                 regTmp = regSet.rsPickFreeReg();
17613                             }
17614                             // Copy the second register to the temp reg.
17615                             getEmitter()->emitIns_R_R(INS_mov,
17616                                                     fieldSize,
17617                                                     regTmp,
17618                                                     otherRegNum);
17619                             regTracker.rsTrackRegCopy(regTmp, otherRegNum);
17620                             otherRegNum = regTmp;
17621                         }
17622                     }
17623
17624                     if (fieldVarDsc->lvType == TYP_DOUBLE)
17625                     {
17626                         assert(curRegNum <= REG_R2);
17627                         getEmitter()->emitIns_R_R_R(insCopy,
17628                                                   fieldSize,
17629                                                   curRegNum,
17630                                                   genRegArgNext(curRegNum),
17631                                                   fieldVarDsc->lvRegNum);
17632                         regTracker.rsTrackRegTrash(curRegNum);
17633                         regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
17634                     }
17635                     else
17636                     {
17637                         // Now do the first register.
17638                         // It might be the case that it's already in the desired register; if so do nothing.
17639                         if (curRegNum != fieldVarDsc->lvRegNum)
17640                         {
17641                             getEmitter()->emitIns_R_R(insCopy,
17642                                                     fieldSize,
17643                                                     curRegNum,
17644                                                     fieldVarDsc->lvRegNum);
17645                             regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
17646                         }
17647                     }
17648
17649                     // In either case, mark the arg register as used.
17650                     regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17651
17652                     // Is there a second half of the value?
17653                     if (fieldVarDsc->lvExactSize == 8) 
17654                     {
17655                         curRegNum = genRegArgNext(curRegNum);
17656                         // The second dest reg must also be an argument register.
17657                         noway_assert(curRegNum < MAX_REG_ARG);
17658
17659                         // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17660                         if (fieldVarDsc->lvType == TYP_LONG)
17661                         {
17662                             // Copy the second register into the next argument register
17663
17664                             // If it's a register variable for a TYP_LONG value, then otherReg now should
17665                             //  hold the second register or it might say that it's in the stack.
17666                             if (otherRegNum == REG_STK)
17667                             {
17668                                 // Apparently when we partially enregister, we allocate stack space for the full
17669                                 // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
17670                                 // parameter, to get the high half.
17671                                 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17672                                                         fieldSize,
17673                                                         curRegNum,
17674                                                         nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17675                                 regTracker.rsTrackRegTrash(curRegNum);
17676                             }
17677                             else
17678                             {
17679                                 // The other half is in a register.
17680                                 // Again, it might be the case that it's already in the desired register; if so do nothing.
17681                                 if (curRegNum != otherRegNum)
17682                                 {
17683                                     getEmitter()->emitIns_R_R(INS_mov,
17684                                                             fieldSize,
17685                                                             curRegNum,
17686                                                             otherRegNum);
17687                                     regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
17688                                 }
17689                             }
17690                         }
17691
17692                         // Also mark the 2nd arg register as used.
17693                         regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
17694                         // Record the fact that we filled in an extra register slot
17695                         filledExtraSlot = true;
17696                     }
17697                 }
17698                 else
17699                 {
17700                     // Source is register and Dest is memory (OutgoingArgSpace).
17701
17702                     // Now write the srcReg into the right location in the outgoing argument list.
17703                     getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17704                                             fieldSize,
17705                                             fieldVarDsc->lvRegNum,
17706                                             compiler->lvaOutgoingArgSpaceVar,
17707                                             fieldArgOffset);
17708
17709                     if (fieldVarDsc->lvExactSize == 8) 
17710                     {
17711                         // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17712                         if (fieldVarDsc->lvType == TYP_LONG)
17713                         {
17714                             if (fieldVarDsc->lvOtherReg == REG_STK)
17715                             {
17716                                 // Source is stack.
17717                                 if (regTmp == REG_STK)
17718                                 {
17719                                     regTmp = regSet.rsPickFreeReg();
17720                                 }
17721                                 // Apparently if we partially enregister, we allocate stack space for the full
17722                                 // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
17723                                 // parameter, to get the high half.
17724                                 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17725                                                         fieldSize,
17726                                                         regTmp,
17727                                                         nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17728                                 regTracker.rsTrackRegTrash(regTmp);
17729                                 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17730                                                         fieldSize,
17731                                                         regTmp,
17732                                                         compiler->lvaOutgoingArgSpaceVar,
17733                                                         fieldArgOffset + TARGET_POINTER_SIZE);
17734                             }
17735                             else
17736                             {
17737                                 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17738                                                         fieldSize,
17739                                                         fieldVarDsc->lvOtherReg,
17740                                                         compiler->lvaOutgoingArgSpaceVar,
17741                                                         fieldArgOffset + TARGET_POINTER_SIZE);
17742                             }
17743                         }
17744                         // Record the fact that we filled in an extra register slot
17745                         filledExtraSlot = true;
17746                     }
17747                 }
17748                 assert(fieldVarDsc->lvTracked);  // Must be tracked, since it's enregistered...
17749                 // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
17750                 if (arg->gtFlags & GTF_VAR_DEATH)
17751                 {
17752                     *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
17753                     // We don't bother with the second reg of a register pair, since if it has one,
17754                     // it obviously doesn't hold a pointer.
17755                 }
17756             }
17757             else
17758             {
17759                 // Source is in memory.
17760
17761                 if (curRegNum != MAX_REG_ARG)
17762                 {
17763                     // Dest is reg.
17764                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17765                                             fieldSize,
17766                                             curRegNum,
17767                                             nextPromotedStructFieldVar, 0);
17768                     regTracker.rsTrackRegTrash(curRegNum);
17769
17770                     regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17771
17772                     if (fieldVarDsc->lvExactSize == 8) 
17773                     {
17774                         noway_assert(fieldSize == EA_4BYTE);
17775                         curRegNum = genRegArgNext(curRegNum);
17776                         noway_assert(curRegNum < MAX_REG_ARG);  // Because of 8-byte alignment.
17777                         getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL),
17778                                                 fieldSize,
17779                                                 curRegNum,
17780                                                 nextPromotedStructFieldVar,
17781                                                 TARGET_POINTER_SIZE);
17782                         regTracker.rsTrackRegTrash(curRegNum);
17783                         regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17784                         // Record the fact that we filled in an extra stack slot
17785                         filledExtraSlot = true;
17786                     }
17787                 }
17788                 else
17789                 {
17790                     // Dest is stack.
17791                     if (regTmp == REG_STK)
17792                     {
17793                         regTmp = regSet.rsPickFreeReg();
17794                     }
17795                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17796                                             fieldSize,
17797                                             regTmp,
17798                                             nextPromotedStructFieldVar, 0);
17799
17800                     // Now write regTmp into the right location in the outgoing argument list.
17801                     getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17802                                             fieldSize,
17803                                             regTmp,
17804                                             compiler->lvaOutgoingArgSpaceVar,
17805                                             fieldArgOffset);
17806                     // We overwrote "regTmp", so erase any previous value we recorded that it contained.
17807                     regTracker.rsTrackRegTrash(regTmp);
17808
17809                     if (fieldVarDsc->lvExactSize == 8) 
17810                     {
17811                         getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17812                                                 fieldSize,
17813                                                 regTmp,
17814                                                 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17815
17816                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17817                                                 fieldSize,
17818                                                 regTmp,
17819                                                 compiler->lvaOutgoingArgSpaceVar,
17820                                                 fieldArgOffset + TARGET_POINTER_SIZE);
17821                         // Record the fact that we filled in an extra stack slot
17822                         filledExtraSlot = true;
17823                     }
17824                 }
17825             }
17826
17827             // Bump up the following if we filled in an extra slot
17828             if (filledExtraSlot) 
17829                 bytesOfNextSlotOfCurPromotedStruct += 4;
17830
17831             // Go to the next field.
17832             nextPromotedStructFieldVar++;
17833             if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17834             {
17835                 fieldVarDsc = NULL;
17836             }
17837             else
17838             {
17839                 // The next field should have the same parent variable, and we should have put the field vars in order sorted by offset.
17840                 assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField
17841                        && fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl
17842                        && fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17843                 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17844             }
17845             bytesOfNextSlotOfCurPromotedStruct += 4;
17846         }
17847         else  // oneFieldFillsSlotFromStart == false
17848         {
17849             // The current slot should contain more than one field.
17850             // We'll construct a word in memory for the slot, then load it into a register.
17851             // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current slot,
17852             // in which case we'll just skip this loop altogether.)
17853             while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
17854             {
17855                 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
17856                 // whose fields have their natural alignment, and alignment == size on ARM).
17857                 noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
17858
17859                 // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
17860                 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17861                 noway_assert(argOffset == INT32_MAX || (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
17862  
17863                 if (fieldVarDsc->lvRegister)
17864                 {
17865                     if (curRegNum != MAX_REG_ARG)
17866                     {
17867                         noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17868
17869                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17870                                                 fieldSize,
17871                                                 fieldVarDsc->lvRegNum,
17872                                                 compiler->lvaPromotedStructAssemblyScratchVar,
17873                                                 fieldVarDsc->lvFldOffset % 4);
17874                     }
17875                     else
17876                     {
17877                         // Dest is stack; write directly.
17878                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17879                                                 fieldSize,
17880                                                 fieldVarDsc->lvRegNum,
17881                                                 compiler->lvaOutgoingArgSpaceVar,
17882                                                 fieldArgOffset);
17883                     }
17884                 }
17885                 else
17886                 {
17887                     // Source is in memory.
17888
17889                     // Make sure we have a temporary register to use...
17890                     if (regTmp == REG_STK)
17891                     {
17892                         regTmp = regSet.rsPickFreeReg();
17893                     }
17894                     getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17895                                             fieldSize,
17896                                             regTmp,
17897                                             nextPromotedStructFieldVar, 0);
17898                     regTracker.rsTrackRegTrash(regTmp);
17899
17900                     if (curRegNum != MAX_REG_ARG)
17901                     {                    
17902                         noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17903
17904                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17905                                                 fieldSize,
17906                                                 regTmp,
17907                                                 compiler->lvaPromotedStructAssemblyScratchVar,
17908                                                 fieldVarDsc->lvFldOffset % 4);
17909                     }
17910                     else
17911                     {
17912                         getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17913                                                 fieldSize,
17914                                                 regTmp,
17915                                                 compiler->lvaOutgoingArgSpaceVar,
17916                                                 fieldArgOffset);
17917                     }
17918                 }
17919                 // Go to the next field.
17920                 nextPromotedStructFieldVar++;
17921                 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17922                 {
17923                     fieldVarDsc = NULL;
17924                 }
17925                 else
17926                 {
17927                     // The next field should have the same parent variable, and we should have put the field vars in order sorted by offset.
17928                     noway_assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField
17929                         && fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl
17930                         && fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17931                     fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17932                 }
17933             }
17934             // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to write to
17935             // an argument register, do so.
17936             if (curRegNum != MAX_REG_ARG)
17937             {
17938                 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17939
17940                 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL),
17941                                         EA_4BYTE,
17942                                         curRegNum,
17943                                         compiler->lvaPromotedStructAssemblyScratchVar, 0);
17944                 regTracker.rsTrackRegTrash(curRegNum);
17945                 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17946             }
17947             // We've finished a slot; set the goal of the next slot.
17948             bytesOfNextSlotOfCurPromotedStruct += 4;
17949         }
17950     }
17951
17952     // Write back the updates.
17953     *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
17954     *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17955     *pCurRegNum = curRegNum;
17956     *pRegTmp = regTmp;
17957
17958     return filledExtraSlot;
17959 }
17960 #endif // _TARGET_ARM_
17961
17962 regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
17963 {
17964     noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
17965     GenTreePtr lst = cpBlk->gtOp.gtOp1;
17966     noway_assert(lst->OperGet() == GT_LIST);  // Well-formedness.
17967     GenTreePtr rhs = lst->gtOp.gtOp2;
17968     regMaskTP res = 0;
17969     if (rhs->OperGet() == GT_ADDR)
17970     {
17971         rhs = rhs->gtOp.gtOp1;
17972         if (rhs->OperGet() == GT_LCL_VAR)
17973         {
17974             LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
17975             if (rhsDsc->lvPromoted)
17976             {
17977                 // It is promoted; iterate over its field vars.
17978                 unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
17979                 for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
17980                 {
17981                     LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
17982                     // Did the variable go dead, and is it enregistered?
17983                     if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
17984                     {
17985                         // Add the register number to the set of registers holding field vars that are going dead.
17986                         res |= genRegMask(fieldVarDsc->lvRegNum);
17987                     }
17988                 }
17989             }
17990         }
17991     }
17992     return res;
17993 }
17994
17995
17996 void CodeGen::SetupLateArgs(GenTreePtr call)
17997 {
17998     GenTreeArgList* lateArgs;
17999     GenTreePtr  curr;
18000
18001     /* Generate the code to move the late arguments into registers */
18002
18003     for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
18004     {
18005         curr     = lateArgs->Current();
18006         assert(curr);
18007
18008         fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
18009         assert(curArgTabEntry);
18010         regNumber  regNum    = curArgTabEntry->regNum;
18011         unsigned   argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18012
18013         assert(isRegParamType(curr->TypeGet()));
18014         assert(curr->gtType != TYP_VOID);
18015
18016         /* If the register is already marked as used, it will become
18017            multi-used. However, since it is a callee-trashed register,
18018            we will have to spill it before the call anyway. So do it now */
18019
18020         {
18021             // Remember which registers hold pointers. We will spill
18022             // them, but the code that follows will fetch reg vars from
18023             // the registers, so we need that gc compiler->info.
18024             // Also regSet.rsSpillReg doesn't like to spill enregistered
18025             // variables, but if this is their last use that is *exactly*
18026             // what we need to do, so we have to temporarily pretend
18027             // they are no longer live.
18028             // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
18029             // when their last use is about to occur?
18030             // It is because this is the second operand to be evaluated
18031             // of some parent binary op, and the first operand is
18032             // live across this tree, and thought it could re-use the
18033             // variables register (like a GT_REG_VAR). This probably
18034             // is caused by RegAlloc assuming the first operand would
18035             // evaluate into another register.
18036             regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
18037             regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
18038             regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
18039             regSet.RemoveMaskVars(rsTemp);
18040
18041             regNumber regNum2 = regNum;
18042             for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
18043             {
18044                 if (regSet.rsMaskUsed & genRegMask(regNum2))
18045                 {
18046                     assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
18047                     regSet.rsSpillReg(regNum2);
18048                 }
18049                 if (isValidIntArgReg(regNum2))
18050                 {
18051                     regNum2 = genRegArgNext(regNum2);
18052                 }
18053                 else
18054                 {
18055                     regNum2 = genRegArgNextFloat(regNum2);
18056                 }
18057                 assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
18058             }
18059
18060             // Restore gc tracking masks.
18061             gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
18062             gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
18063
18064             // Set maskvars back to normal
18065             regSet.AddMaskVars(rsTemp);
18066         }
18067
18068         /* Evaluate the argument to a register */
18069
18070         /* Check if this is the guess area for the resolve interface call
18071          * Pass a size of EA_OFFSET*/
18072         if  (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
18073         {
18074             getEmitter()->emitIns_R_C(ins_Load(TYP_INT),
18075                                     EA_OFFSET,
18076                                     regNum,
18077                                     curr->gtClsVar.gtClsVarHnd,
18078                                     0);
18079             regTracker.rsTrackRegTrash(regNum);
18080
18081             /* The value is now in the appropriate register */
18082
18083             genMarkTreeInReg(curr, regNum);
18084
18085             regSet.rsMarkRegUsed(curr);
18086         }
18087 #ifdef _TARGET_ARM_
18088         else if (curr->gtType == TYP_STRUCT)
18089         {
18090             GenTree* arg = curr;
18091             while (arg->gtOper == GT_COMMA)
18092             {
18093                 GenTreePtr op1 = arg->gtOp.gtOp1;
18094                 genEvalSideEffects(op1);
18095                 genUpdateLife(op1);
18096                 arg = arg->gtOp.gtOp2;
18097             }
18098             noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_MKREFANY));
18099
18100             // This code passes a TYP_STRUCT by value using
18101             // the argument registers first and 
18102             // then the lvaOutgoingArgSpaceVar area.
18103             //
18104
18105             // We prefer to choose low registers here to reduce code bloat
18106             regMaskTP regNeedMask    = RBM_LOW_REGS;
18107             unsigned  firstStackSlot = 0;
18108             unsigned  argAlign       = TARGET_POINTER_SIZE;
18109             size_t    originalSize   = InferStructOpSizeAlign(arg, &argAlign);
18110                     
18111             unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
18112             assert(slots > 0);
18113
18114             if (regNum == REG_STK)
18115             {
18116                 firstStackSlot = 0;
18117             }
18118             else
18119             {
18120                 if (argAlign == (TARGET_POINTER_SIZE * 2))
18121                 {
18122                     assert((regNum & 1) == 0);
18123                 }
18124
18125                 // firstStackSlot is an index of the first slot of the struct
18126                 // that is on the stack, in the range [0,slots]. If it is 'slots',
18127                 // then the entire struct is in registers. It is also equal to
18128                 // the number of slots of the struct that are passed in registers.
18129
18130                 if (curArgTabEntry->isHfaRegArg)
18131                 {
18132                     // HFA arguments that have been decided to go into registers fit the reg space.
18133                     assert(regNum             >= FIRST_FP_ARGREG && "HFA must go in FP register");
18134                     assert(regNum + slots - 1 <= LAST_FP_ARGREG  && "HFA argument doesn't fit entirely in FP argument registers");
18135                     firstStackSlot = slots;
18136                 }
18137                 else if (regNum + slots > MAX_REG_ARG)
18138                 {
18139                     firstStackSlot = MAX_REG_ARG - regNum;
18140                     assert(firstStackSlot > 0);
18141                 }
18142                 else
18143                 {
18144                     firstStackSlot = slots;
18145                 }
18146
18147                 if (curArgTabEntry->isHfaRegArg)
18148                 {
18149                     // Mask out the registers used by an HFA arg from the ones used to compute tree into.
18150                     for (unsigned i = regNum; i < regNum + slots; i ++)
18151                     {
18152                         regNeedMask &= ~genRegMask(regNumber(i));
18153                     }
18154                 }
18155             }
18156
18157             // This holds the set of registers corresponding to enregistered promoted struct field variables
18158             // that go dead after this use of the variable in the argument list.
18159             regMaskTP deadFieldVarRegs = RBM_NONE;
18160
18161             // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
18162             // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
18163             // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
18164             // table entry for the promoted struct local.  As we fill slots with the contents of a
18165             // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
18166             // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're working
18167             // on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're done),
18168             // and "nextPromotedStructFieldVar" will be the local
18169             // variable number of the next field variable to be copied.
18170             LclVarDsc* promotedStructLocalVarDesc = NULL;
18171             unsigned   bytesOfNextSlotOfCurPromotedStruct = 0;  // Size of slot.
18172             unsigned   nextPromotedStructFieldVar = BAD_VAR_NUM;
18173             GenTreePtr structLocalTree  = NULL;
18174             
18175             BYTE *    gcLayout = NULL;
18176             regNumber regSrc = REG_NA;
18177             if (arg->gtOper == GT_OBJ)
18178             {
18179                 // Are we loading a promoted struct local var?
18180                 if (arg->gtObj.gtOp1->gtOper == GT_ADDR &&
18181                     arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18182                 {
18183                     structLocalTree               = arg->gtObj.gtOp1->gtOp.gtOp1;
18184                     unsigned     structLclNum     = structLocalTree->gtLclVarCommon.gtLclNum;
18185                     LclVarDsc *  varDsc           = &compiler->lvaTable[structLclNum];
18186
18187                     Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
18188
18189                     if (varDsc->lvPromoted && 
18190                         promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT)  // Otherwise it is guaranteed to live on stack.
18191                     {
18192                         // Fix 388395 ARM JitStress WP7
18193                         noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
18194
18195                         assert(!varDsc->lvAddrExposed);  // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
18196                         promotedStructLocalVarDesc = varDsc;
18197                         nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
18198                     }
18199                 }
18200
18201                 if (promotedStructLocalVarDesc == NULL)
18202                 {
18203                     // If it's not a promoted struct variable, set "regSrc" to the address
18204                     // of the struct local.
18205                     genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18206                     noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
18207                     regSrc = arg->gtObj.gtOp1->gtRegNum;
18208                     // Remove this register from the set of registers that we pick from, unless slots equals 1
18209                     if (slots > 1)
18210                         regNeedMask &= ~genRegMask(regSrc);
18211                 }
18212
18213                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
18214                 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
18215             }
18216             else if (arg->gtOper == GT_LCL_VAR)
18217             {
18218                 // Move the address of the LCL_VAR in arg into reg
18219
18220                 unsigned varNum = arg->gtLclVarCommon.gtLclNum;
18221
18222                 // Are we loading a promoted struct local var?
18223                 structLocalTree               = arg;
18224                 unsigned     structLclNum     = structLocalTree->gtLclVarCommon.gtLclNum;
18225                 LclVarDsc *  varDsc           = &compiler->lvaTable[structLclNum];
18226
18227                 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
18228
18229                 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
18230
18231                 if (varDsc->lvPromoted && 
18232                     promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT)  // Otherwise it is guaranteed to live on stack.
18233                 {
18234                     assert(!varDsc->lvAddrExposed);    // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
18235                     promotedStructLocalVarDesc = varDsc;
18236                     nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
18237                 }
18238
18239                 if (promotedStructLocalVarDesc == NULL)
18240                 {
18241                     regSrc = regSet.rsPickFreeReg(regNeedMask);
18242                     // Remove this register from the set of registers that we pick from, unless slots equals 1
18243                     if (slots > 1)
18244                         regNeedMask &= ~genRegMask(regSrc);
18245
18246                     getEmitter()->emitIns_R_S(INS_lea,
18247                                             EA_PTRSIZE,
18248                                             regSrc,
18249                                             varNum, 0);
18250                     regTracker.rsTrackRegTrash(regSrc);
18251                     gcLayout = compiler->lvaGetGcLayout(varNum);
18252                 }
18253             }
18254             else if (arg->gtOper == GT_MKREFANY)
18255             {
18256                 assert(slots == 2);
18257                 assert((firstStackSlot == 1) || (firstStackSlot == 2));
18258                 assert(argOffset == 0); // ???
18259                 PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
18260
18261                 // Adjust argOffset if part of this guy was pushed onto the stack
18262                 if (firstStackSlot < slots)
18263                 {
18264                     argOffset += TARGET_POINTER_SIZE;
18265                 }
18266                 
18267                 // Skip the copy loop below because we have already placed the argument in the right place
18268                 slots = 0;
18269                 gcLayout = NULL;
18270             }
18271             else
18272             {
18273                 assert(!"Unsupported TYP_STRUCT arg kind");
18274                 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
18275             }
18276
18277             if (promotedStructLocalVarDesc != NULL)
18278             {
18279                 // We must do do the stack parts first, since those might need values
18280                 // from argument registers that will be overwritten in the portion of the
18281                 // loop that writes into the argument registers.
18282                 bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot+1) * TARGET_POINTER_SIZE;
18283                 // Now find the var number of the first that starts in the first stack slot.
18284                 unsigned fieldVarLim = promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
18285                 while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset < (firstStackSlot*TARGET_POINTER_SIZE)
18286                        && nextPromotedStructFieldVar < fieldVarLim)
18287                 {
18288                     nextPromotedStructFieldVar++;
18289                 }
18290                 // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the first stack slot is after
18291                 // the last slot.
18292                 assert(nextPromotedStructFieldVar < fieldVarLim|| firstStackSlot >= slots);
18293             }
18294                         
18295             if (slots > 0)   // the mkref case may have set "slots" to zero. 
18296             {
18297                 // First pass the stack portion of the struct (if any)
18298                 //
18299                 for (unsigned i = firstStackSlot; i < slots; i++)
18300                 {
18301                     emitAttr fieldSize;
18302                     if      (gcLayout[i] == TYPE_GC_NONE)
18303                         fieldSize = EA_PTRSIZE;
18304                     else if (gcLayout[i] == TYPE_GC_REF)
18305                         fieldSize = EA_GCREF;
18306                     else
18307                     {
18308                         noway_assert(gcLayout[i] == TYPE_GC_BYREF);
18309                         fieldSize = EA_BYREF;
18310                     }
18311
18312                     regNumber maxRegArg = regNumber(MAX_REG_ARG);
18313                     if (promotedStructLocalVarDesc != NULL)
18314                     {
18315                         regNumber regTmp = REG_STK;
18316
18317                         bool filledExtraSlot =
18318                             genFillSlotFromPromotedStruct(arg,
18319                                                           curArgTabEntry,
18320                                                           promotedStructLocalVarDesc, 
18321                                                           fieldSize, 
18322                                                           &nextPromotedStructFieldVar, 
18323                                                           &bytesOfNextSlotOfCurPromotedStruct,
18324                                                           /*pCurRegNum*/&maxRegArg,
18325                                                           argOffset, 
18326                                                           /*fieldOffsetOfFirstStackSlot*/ firstStackSlot * TARGET_POINTER_SIZE,
18327                                                           /*argOffsetOfFirstStackSlot*/ 0, // is always zero in this "spanning" case.
18328                                                           &deadFieldVarRegs,
18329                                                           &regTmp);
18330                         if (filledExtraSlot) 
18331                         {
18332                             i++;
18333                             argOffset += TARGET_POINTER_SIZE;
18334                         }
18335                     }
18336                     else // (promotedStructLocalVarDesc == NULL)
18337                     {
18338                         // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
18339                         // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
18340                         // to regSet.rsPickFreeReg, so we need to be a little more forceful.
18341                         // Otherwise, just re-use the same register.
18342                         // 
18343                         regNumber regTmp = regSrc;
18344                         if (slots != 1)
18345                         {
18346                             regMaskTP regSrcUsed;
18347                             regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
18348
18349                             regTmp = regSet.rsPickFreeReg(regNeedMask);
18350
18351                             noway_assert(regTmp != regSrc);
18352
18353                             regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
18354                         }
18355                         
18356                         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL),
18357                                                  fieldSize,
18358                                                  regTmp,
18359                                                  regSrc,
18360                                                  i * TARGET_POINTER_SIZE);
18361                         
18362                         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
18363                                                 fieldSize,
18364                                                 regTmp,
18365                                                 compiler->lvaOutgoingArgSpaceVar,
18366                                                 argOffset);
18367                         regTracker.rsTrackRegTrash(regTmp);
18368                     }
18369                     argOffset += TARGET_POINTER_SIZE;
18370                 }
18371
18372                 // Now pass the register portion of the struct
18373                 //
18374
18375                 bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
18376                 if (promotedStructLocalVarDesc != NULL)
18377                     nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
18378
18379                 // Create a nested loop here so that the first time thru the loop
18380                 // we setup all of the regArg registers except for possibly 
18381                 // the one that would overwrite regSrc.  Then in the final loop
18382                 // (if necessary) we just setup regArg/regSrc with the overwrite
18383                 //
18384                 bool overwriteRegSrc=false;
18385                 bool needOverwriteRegSrc=false;
18386                 do {
18387                     if (needOverwriteRegSrc)
18388                         overwriteRegSrc = true;
18389
18390                     for (unsigned i = 0; i < firstStackSlot; i++)
18391                     {
18392                         regNumber regArg = (regNumber) (regNum+i);
18393
18394                         if (overwriteRegSrc == false)
18395                         {
18396                             if (regArg == regSrc)
18397                             {
18398                                 needOverwriteRegSrc=true;
18399                                 continue;
18400                             }
18401                         }
18402                         else
18403                         {
18404                             if (regArg != regSrc)
18405                                 continue;
18406                         }
18407
18408                         emitAttr fieldSize;
18409                         if      (gcLayout[i] == TYPE_GC_NONE)
18410                             fieldSize = EA_PTRSIZE;
18411                         else if (gcLayout[i] == TYPE_GC_REF)
18412                             fieldSize = EA_GCREF;
18413                         else
18414                         {
18415                             noway_assert(gcLayout[i] == TYPE_GC_BYREF);
18416                             fieldSize = EA_BYREF;
18417                         }
18418
18419                         regNumber regTmp = REG_STK;
18420                         if (promotedStructLocalVarDesc != NULL)
18421                         {
18422                             bool filledExtraSlot =
18423                                 genFillSlotFromPromotedStruct(arg,
18424                                                               curArgTabEntry,
18425                                                               promotedStructLocalVarDesc, 
18426                                                               fieldSize, 
18427                                                               &nextPromotedStructFieldVar, 
18428                                                               &bytesOfNextSlotOfCurPromotedStruct, 
18429                                                               /*pCurRegNum*/&regArg,
18430                                                               /*argOffset*/ INT32_MAX, 
18431                                                               /*fieldOffsetOfFirstStackSlot*/ INT32_MAX, 
18432                                                               /*argOffsetOfFirstStackSlot*/ INT32_MAX,
18433                                                               &deadFieldVarRegs,
18434                                                               &regTmp);
18435                             if (filledExtraSlot) 
18436                                 i++;
18437                         }
18438                         else
18439                         {
18440                             getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
18441                                                      fieldSize,
18442                                                      regArg,
18443                                                      regSrc,
18444                                                      i*TARGET_POINTER_SIZE);
18445                         }
18446                         regTracker.rsTrackRegTrash(regArg);
18447                     }                  
18448                 } while (needOverwriteRegSrc != overwriteRegSrc);
18449             }
18450
18451             if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
18452             {
18453                 regSet.rsMarkRegFree(genRegMask(regSrc));
18454             }
18455             
18456             if (regNum != REG_STK && promotedStructLocalVarDesc == NULL)  // If promoted, we already declared the regs used.
18457             {
18458                 arg->gtFlags |= GTF_REG_VAL;
18459                 for (unsigned i = 1; i < firstStackSlot; i++)
18460                 {
18461                     arg->gtRegNum = (regNumber)(regNum + i);
18462                     curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true)
18463                                                 : regSet.rsMarkRegUsed(arg);
18464                 }
18465                 arg->gtRegNum = regNum;
18466                 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true)
18467                                             : regSet.rsMarkRegUsed(arg);
18468             }
18469
18470             // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
18471             // so update liveness.
18472             genUpdateLife(arg);
18473
18474             // Now, if some copied field locals were enregistered, and they're now dead, update the set of 
18475             // register holding gc pointers.
18476             if (deadFieldVarRegs != RBM_NONE)
18477                 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
18478         }
18479         else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
18480         {
18481             if (curArgTabEntry->regNum == REG_STK)
18482             {
18483                 // The arg is passed in the outgoing argument area of the stack frame
18484                 genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
18485                 assert(curr->gtFlags & GTF_REG_VAL);  // should be enregistered after genCompIntoFreeRegPair(curr, 0)
18486
18487                 inst_SA_RV(ins_Store(TYP_INT), argOffset+0, genRegPairLo(curr->gtRegPair), TYP_INT);
18488                 inst_SA_RV(ins_Store(TYP_INT), argOffset+4, genRegPairHi(curr->gtRegPair), TYP_INT);
18489             }
18490             else
18491             {
18492                 assert(regNum < REG_ARG_LAST);
18493                 regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
18494                 genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
18495                 assert(curr->gtRegPair == regPair);
18496                 regSet.rsMarkRegPairUsed(curr);
18497             }
18498         }
18499 #endif // _TARGET_ARM_
18500         else if (curArgTabEntry->regNum == REG_STK)
18501         {
18502             // The arg is passed in the outgoing argument area of the stack frame
18503             //
18504             genCodeForTree(curr, 0);
18505             assert(curr->gtFlags & GTF_REG_VAL);  // should be enregistered after genCodeForTree(curr, 0)
18506
18507             inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
18508
18509             if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
18510                 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
18511         }
18512         else
18513         {           
18514             if (!varTypeIsFloating(curr->gtType))
18515             {
18516                 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
18517                 assert(curr->gtRegNum == regNum);
18518                 regSet.rsMarkRegUsed(curr);
18519             }
18520             else  // varTypeIsFloating(curr->gtType)
18521             {
18522                 if (genIsValidFloatReg(regNum))
18523                 {
18524                     genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG, false);
18525                     assert(curr->gtRegNum == regNum);
18526                     regSet.rsMarkRegUsed(curr);
18527                 }
18528                 else
18529                 {
18530                     genCodeForTree(curr, 0);
18531                     // If we are loading a floating point type into integer registers
18532                     // then it must be for varargs.
18533                     // genCodeForTree will load it into a floating point register, 
18534                     // now copy it into the correct integer register(s)
18535                     if (curr->TypeGet() == TYP_FLOAT)
18536                     {
18537                         assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
18538                         regSet.rsSpillRegIfUsed(regNum);
18539 #ifdef _TARGET_ARM_
18540                         getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
18541 #else
18542 #error "Unsupported target"
18543 #endif
18544                         regTracker.rsTrackRegTrash(regNum);
18545
18546                         curr->gtType = TYP_INT;                // Change this to TYP_INT in case we need to spill this register
18547                         curr->gtRegNum = regNum;
18548                         regSet.rsMarkRegUsed(curr);
18549                     }
18550                     else
18551                     {
18552                         assert(curr->TypeGet() == TYP_DOUBLE);
18553                         regNumber intRegNumLo = regNum;
18554                         curr->gtType = TYP_LONG;               // Change this to TYP_LONG in case we spill this 
18555 #ifdef _TARGET_ARM_
18556                         regNumber intRegNumHi = regNumber(intRegNumLo + 1);
18557                         assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
18558                         assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
18559                         regSet.rsSpillRegIfUsed(intRegNumHi);
18560                         regSet.rsSpillRegIfUsed(intRegNumLo);
18561
18562                         getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
18563                         regTracker.rsTrackRegTrash(intRegNumLo);
18564                         regTracker.rsTrackRegTrash(intRegNumHi);
18565                         curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
18566                         regSet.rsMarkRegPairUsed(curr);
18567 #else
18568 #error "Unsupported target"
18569 #endif
18570                     }
18571                 }
18572             }
18573         }
18574     }
18575
18576     /* If any of the previously loaded arguments were spilled - reload them */
18577
18578     for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
18579     {
18580         curr     = lateArgs->Current();
18581         assert(curr);
18582
18583         if (curr->gtFlags & GTF_SPILLED)
18584         {
18585             if  (isRegPairType(curr->gtType))
18586             {
18587                 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
18588             }
18589             else
18590             {
18591                 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
18592             }
18593         }
18594     }
18595 }
18596
18597
18598 #ifdef _TARGET_ARM_
18599
18600 // 'Push' a single GT_MKREFANY argument onto a call's argument list
18601 // The argument is passed as described by the fgArgTabEntry
18602 // If any part of the struct is to be passed in a register the
18603 // regNum value will be equal to the the registers used to pass the
18604 // the first part of the struct.
18605 // If any part is to go onto the stack, we first generate the
18606 // value into a register specified by 'regNeedMask' and 
18607 // then store it to the out going argument area.
18608 // When this method returns, both parts of the TypeReference have
18609 // been pushed onto the stack, but *no* registers have been marked
18610 // as 'in-use', that is the responsibility of the caller.
18611 //
18612 void CodeGen::PushMkRefAnyArg ( GenTreePtr          mkRefAnyTree, 
18613                                 fgArgTabEntryPtr    curArgTabEntry,
18614                                 regMaskTP           regNeedMask)
18615 {
18616     regNumber  regNum = curArgTabEntry->regNum;
18617     regNumber  regNum2;
18618     assert(mkRefAnyTree->gtOper == GT_MKREFANY);
18619     regMaskTP arg1RegMask = 0;
18620     int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18621
18622     // Construct the TypedReference directly into the argument list of the call by            
18623     // 'pushing' the first field of the typed reference: the pointer.
18624     // Do this by directly generating it into the argument register or outgoing arg area of the stack.
18625     // Mark it as used so we don't trash it while generating the second field.
18626     //
18627     if (regNum == REG_STK)
18628     {
18629         genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18630         noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
18631         regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
18632         inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
18633         regTracker.rsTrackRegTrash(tmpReg1);
18634         argOffset += TARGET_POINTER_SIZE;
18635         regNum2 = REG_STK;
18636     }
18637     else
18638     {
18639         assert(regNum <= REG_ARG_LAST);
18640         arg1RegMask = genRegMask(regNum);
18641         genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18642         regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
18643     }
18644
18645     // Now 'push' the second field of the typed reference: the method table.
18646     if (regNum2 == REG_STK)
18647     {
18648         genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18649         noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
18650         regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
18651         inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
18652         regTracker.rsTrackRegTrash(tmpReg2);
18653     }
18654     else
18655     {
18656         assert(regNum2 <= REG_ARG_LAST);
18657         // We don't have to mark this register as being in use here because it will
18658         // be done by the caller, and we don't want to double-count it.
18659         genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
18660     }
18661
18662     // Now that we are done generating the second part of the TypeReference, we can mark
18663     // the first register as free.
18664     // The caller in the shared path we will re-mark all registers used by this argument
18665     // as being used, so we don't want to double-count this one.
18666     if (arg1RegMask != 0)
18667     {
18668         GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
18669         if  (op1->gtFlags & GTF_SPILLED)
18670         {
18671             /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register  */
18672
18673             regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
18674         }
18675         else
18676         {
18677             regSet.rsMarkRegFree(arg1RegMask);
18678         }
18679     }
18680 }
18681 #endif // _TARGET_ARM_
18682
18683 #endif // FEATURE_FIXED_OUT_ARGS 
18684
18685
18686 regMaskTP           CodeGen::genLoadIndirectCallTarget(GenTreePtr  call)
18687 {
18688     assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
18689
18690     regMaskTP fptrRegs;
18691
18692     /* Loading the indirect call target might cause one or more of the previously
18693        loaded argument registers to be spilled. So, we save information about all
18694        the argument registers, and unspill any of them that get spilled, after
18695        the call target is loaded.
18696     */
18697     struct
18698     {
18699         GenTreePtr  node;
18700         union
18701         {
18702             regNumber   regNum;
18703             regPairNo   regPair;
18704         };
18705     }
18706     regArgTab[MAX_REG_ARG];
18707
18708     /* Record the previously loaded arguments, if any */
18709
18710     unsigned regIndex;
18711     regMaskTP prefRegs = regSet.rsRegMaskFree();
18712     regMaskTP argRegs = RBM_NONE;
18713     for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18714     {
18715         regMaskTP mask;
18716         regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
18717         GenTreePtr argTree = regSet.rsUsedTree[regNum];
18718         regArgTab[regIndex].node = argTree;
18719         if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT))     // We won't spill the struct
18720         {
18721             assert(argTree->gtFlags & GTF_REG_VAL);
18722             if  (isRegPairType(argTree->gtType))
18723             {
18724                 regPairNo regPair = argTree->gtRegPair;
18725                 assert(regNum == genRegPairHi(regPair) ||
18726                        regNum == genRegPairLo(regPair));
18727                 regArgTab[regIndex].regPair = regPair;
18728                 mask = genRegPairMask(regPair);
18729             }
18730             else
18731             {
18732                 assert(regNum == argTree->gtRegNum);
18733                 regArgTab[regIndex].regNum = regNum;
18734                 mask = genRegMask(regNum);
18735             }
18736             assert(!(prefRegs & mask));
18737             argRegs |= mask;
18738         }
18739     }
18740     
18741     /* Record the register(s) used for the indirect call func ptr */
18742     fptrRegs  = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
18743
18744     /* If any of the previously loaded arguments were spilled, reload them */
18745
18746     for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18747     {
18748         GenTreePtr argTree = regArgTab[regIndex].node;
18749         if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
18750         {
18751             assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
18752             if  (isRegPairType(argTree->gtType))
18753             {
18754                 regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
18755             }
18756             else
18757             {
18758                 regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
18759             }
18760         }
18761     }
18762
18763     /* Make sure the target is still addressable while avoiding the argument registers */
18764
18765     fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
18766
18767     return fptrRegs;
18768 }
18769
18770 /*****************************************************************************
18771  *
18772  *  Generate code for a call. If the call returns a value in register(s), the
18773  *  register mask that describes where the result will be found is returned;
18774  *  otherwise, RBM_NONE is returned.
18775  */
18776
18777 #ifdef _PREFAST_
18778 #pragma warning(push)
18779 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
18780 #endif
18781 regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
18782                                             bool        valUsed)
18783 {
18784     emitAttr        retSize;
18785     size_t          argSize;
18786     size_t          args;
18787     regMaskTP       retVal;
18788     emitter::EmitCallType emitCallType;
18789
18790     unsigned        saveStackLvl;
18791
18792     BasicBlock  *   returnLabel = DUMMY_INIT(NULL);
18793     LclVarDsc   *   frameListRoot = NULL;
18794
18795     unsigned        savCurIntArgReg;
18796     unsigned        savCurFloatArgReg;
18797
18798     unsigned        areg;
18799
18800     regMaskTP       fptrRegs = RBM_NONE;
18801     regMaskTP       vptrMask = RBM_NONE;
18802
18803 #ifdef  DEBUG
18804     unsigned        stackLvl = getEmitter()->emitCurStackLvl;
18805
18806     if (compiler->verbose)
18807     {
18808         printf("\t\t\t\t\t\t\tBeg call ");
18809         Compiler::printTreeID(call);
18810         printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
18811     }
18812 #endif
18813
18814     gtCallTypes     callType  = (gtCallTypes)call->gtCall.gtCallType;
18815     IL_OFFSETX      ilOffset  = BAD_IL_OFFSET;
18816
18817     CORINFO_SIG_INFO* sigInfo = nullptr;
18818
18819 #ifdef DEBUGGING_SUPPORT
18820     if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
18821     {
18822         (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
18823     }
18824 #endif
18825
18826     /* Make some sanity checks on the call node */
18827
18828     // This is a call
18829     noway_assert(call->IsCall());
18830     // "this" only makes sense for user functions
18831     noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
18832     // tailcalls won't be done for helpers, caller-pop args, and check that
18833     // the global flag is set
18834     noway_assert(!call->gtCall.IsTailCall() ||
18835                  (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
18836
18837 #ifdef DEBUG
18838     // Pass the call signature information down into the emitter so the emitter can associate
18839     // native call sites with the signatures they were generated from.
18840     if (callType != CT_HELPER)
18841     {
18842         sigInfo = call->gtCall.callSig;
18843     }
18844 #endif // DEBUG
18845
18846     unsigned pseudoStackLvl = 0;
18847
18848     if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
18849     {
18850         noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
18851
18852         pseudoStackLvl = genStackLevel;
18853
18854         noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
18855                       "so fgAddCodeRef() should have set isFramePointerRequired()");
18856     }
18857
18858     /* Mark the current stack level and list of pointer arguments */
18859
18860     saveStackLvl = genStackLevel;
18861
18862
18863     /*-------------------------------------------------------------------------
18864      *  Set up the registers and arguments
18865      */
18866
18867     /* We'll keep track of how much we've pushed on the stack */
18868
18869     argSize = 0;
18870
18871     /* We need to get a label for the return address with the proper stack depth. */
18872     /* For the callee pops case (the default) that is before the args are pushed. */
18873
18874     if ((call->gtFlags & GTF_CALL_UNMANAGED) &&
18875         !(call->gtFlags & GTF_CALL_POP_ARGS))
18876     {
18877        returnLabel = genCreateTempLabel();
18878     }
18879
18880     /*
18881         Make sure to save the current argument register status
18882         in case we have nested calls.
18883      */
18884
18885     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
18886     savCurIntArgReg = intRegState.rsCurRegArgNum;
18887     savCurFloatArgReg = floatRegState.rsCurRegArgNum;
18888     intRegState.rsCurRegArgNum = 0;
18889     floatRegState.rsCurRegArgNum = 0;
18890
18891     /* Pass the arguments */
18892
18893     if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
18894     {
18895         argSize += genPushArgList(call);
18896     }
18897
18898     /* We need to get a label for the return address with the proper stack depth. */
18899     /* For the caller pops case (cdecl) that is after the args are pushed. */
18900
18901     if (call->gtFlags & GTF_CALL_UNMANAGED)
18902     {
18903         if (call->gtFlags & GTF_CALL_POP_ARGS)
18904             returnLabel = genCreateTempLabel();
18905
18906         /* Make sure that we now have a label */
18907         noway_assert(returnLabel != DUMMY_INIT(NULL));
18908     }
18909
18910     if (callType == CT_INDIRECT)
18911     {
18912         fptrRegs = genLoadIndirectCallTarget(call);
18913     }
18914
18915     /* Make sure any callee-trashed registers are saved */
18916
18917     regMaskTP   calleeTrashedRegs = RBM_NONE;
18918
18919 #if GTF_CALL_REG_SAVE
18920     if  (call->gtFlags & GTF_CALL_REG_SAVE)
18921     {
18922         /* The return value reg(s) will definitely be trashed */
18923
18924         switch (call->gtType)
18925         {
18926         case TYP_INT:
18927         case TYP_REF:
18928         case TYP_BYREF:
18929 #if!CPU_HAS_FP_SUPPORT
18930         case TYP_FLOAT:
18931 #endif
18932             calleeTrashedRegs = RBM_INTRET;
18933             break;
18934
18935         case TYP_LONG:
18936 #if!CPU_HAS_FP_SUPPORT
18937         case TYP_DOUBLE:
18938 #endif
18939             calleeTrashedRegs = RBM_LNGRET;
18940             break;
18941
18942         case TYP_VOID:
18943 #if CPU_HAS_FP_SUPPORT
18944         case TYP_FLOAT:
18945         case TYP_DOUBLE:
18946 #endif
18947             calleeTrashedRegs = 0;
18948             break;
18949
18950         default:
18951             noway_assert(!"unhandled/unexpected type");
18952         }
18953     }
18954     else
18955 #endif
18956     {
18957         calleeTrashedRegs = RBM_CALLEE_TRASH;
18958     }
18959
18960     /* Spill any callee-saved registers which are being used */
18961
18962     regMaskTP       spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
18963
18964     /* We need to save all GC registers to the InlinedCallFrame.
18965        Instead, just spill them to temps. */
18966
18967     if (call->gtFlags & GTF_CALL_UNMANAGED)
18968         spillRegs |= (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
18969
18970     // Ignore fptrRegs as it is needed only to perform the indirect call
18971
18972     spillRegs &= ~fptrRegs;
18973
18974     /* Do not spill the argument registers.
18975        Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
18976
18977     noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
18978     spillRegs &= ~call->gtCall.gtCallRegUsedMask;
18979
18980     if (spillRegs)
18981     {
18982         regSet.rsSpillRegs(spillRegs);
18983     }
18984
18985 #if FEATURE_STACK_FP_X87
18986     // Spill fp stack
18987     SpillForCallStackFP();
18988
18989     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
18990     {
18991         // Pick up a reg
18992         regNumber regReturn = regSet.PickRegFloat();
18993
18994         // Assign reg to tree
18995         genMarkTreeInReg(call, regReturn);
18996
18997         // Mark as used
18998         regSet.SetUsedRegFloat(call, true);
18999
19000         // Update fp state
19001         compCurFPState.Push(regReturn);
19002     }
19003 #else
19004     SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
19005 #endif
19006
19007     /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
19008
19009     retSize = EA_PTRSIZE;
19010
19011
19012     if  (valUsed)
19013     {
19014         if      (call->gtType == TYP_REF ||
19015                  call->gtType == TYP_ARRAY)
19016         {
19017             retSize = EA_GCREF;
19018         }
19019         else if (call->gtType == TYP_BYREF)
19020         {
19021             retSize = EA_BYREF;
19022         }
19023     }
19024
19025
19026     /*-------------------------------------------------------------------------
19027      * For caller-pop calls, the GC info will report the arguments as pending
19028        arguments as the caller explicitly pops them. Also should be
19029        reported as non-GC arguments as they effectively go dead at the
19030        call site (callee owns them)
19031      */
19032
19033     args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize)
19034                                                :  argSize;
19035
19036     /*-------------------------------------------------------------------------
19037      *  Generate the profiling hooks for the call
19038      */
19039
19040     /* Treat special cases first */
19041
19042 #ifdef PROFILING_SUPPORTED
19043
19044     /* fire the event at the call site */
19045     /* alas, right now I can only handle calls via a method handle */
19046     if (compiler->compIsProfilerHookNeeded() &&
19047         (callType == CT_USER_FUNC) &&
19048         call->gtCall.IsTailCall())
19049     {
19050         unsigned  saveStackLvl2 = genStackLevel;
19051
19052         //
19053         // Push the profilerHandle
19054         //
19055 #ifdef _TARGET_X86_
19056         regMaskTP byrefPushedRegs;
19057         regMaskTP norefPushedRegs;
19058         regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
19059
19060         if (compiler->compProfilerMethHndIndirected)
19061         {
19062             getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, 
19063                 (ssize_t)compiler->compProfilerMethHnd);
19064         }
19065         else
19066         {
19067             inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
19068         }
19069         genSinglePush();
19070
19071         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
19072                           sizeof(int) * 1,  // argSize
19073                           EA_UNKNOWN);      // retSize
19074
19075         //
19076         // Adjust the number of stack slots used by this managed method if necessary.
19077         //
19078         if (compiler->fgPtrArgCntMax < 1)
19079         {
19080             compiler->fgPtrArgCntMax = 1;
19081         }
19082
19083         genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
19084 #elif _TARGET_ARM_
19085         // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
19086         // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
19087         // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
19088         //
19089         // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want to disturb them
19090         // and hence argument registers are locked here.
19091         regMaskTP usedMask = RBM_NONE;
19092         regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
19093         
19094         regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
19095         regSet.rsLockReg(genRegMask(scratchReg));
19096
19097         emitAttr attr = EA_UNKNOWN;
19098         if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
19099         {
19100             attr = EA_GCREF;
19101             gcInfo.gcMarkRegSetGCref(scratchReg);
19102         } 
19103         else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
19104         {
19105             attr = EA_BYREF;
19106             gcInfo.gcMarkRegSetByref(scratchReg);
19107         }
19108         else
19109         {
19110             attr = EA_4BYTE;
19111         }
19112
19113         getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
19114         regTracker.rsTrackRegTrash(scratchReg);
19115
19116         if (compiler->compProfilerMethHndIndirected)
19117         {
19118             getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
19119             regTracker.rsTrackRegTrash(REG_R0);
19120         }
19121         else
19122         {            
19123             instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
19124         }
19125
19126         genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
19127                           0,               // argSize
19128                           EA_UNKNOWN);     // retSize
19129
19130         // Restore back to the state that existed before profiler callback
19131         gcInfo.gcMarkRegSetNpt(scratchReg);
19132         getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
19133         regTracker.rsTrackRegTrash(REG_R0);
19134         regSet.rsUnlockReg(genRegMask(scratchReg));
19135         regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
19136 #else 
19137         NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
19138 #endif  //_TARGET_X86_
19139
19140
19141         /* Restore the stack level */
19142         genStackLevel = saveStackLvl2;
19143     }
19144
19145 #endif // PROFILING_SUPPORTED
19146
19147
19148
19149 #ifdef DEBUG
19150     /*-------------------------------------------------------------------------
19151      *  Generate an ESP check for the call
19152      */
19153
19154     if (compiler->opts.compStackCheckOnCall
19155 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
19156         //check the stacks as frequently as possible
19157         && !call->IsHelperCall()
19158 #else
19159         && call->gtCall.gtCallType == CT_USER_FUNC
19160 #endif
19161         )
19162     {
19163         noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
19164         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
19165     }
19166 #endif
19167
19168     /*-------------------------------------------------------------------------
19169      *  Generate the call
19170      */
19171
19172     bool fPossibleSyncHelperCall = false;
19173     CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
19174
19175     bool fTailCallTargetIsVSD = false;
19176
19177     bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
19178
19179     /* Check for Delegate.Invoke. If so, we inline it. We get the
19180        target-object and target-function from the delegate-object, and do
19181        an indirect call.
19182      */
19183
19184     if  ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
19185     {
19186         noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
19187
19188         assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) & (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL));
19189
19190         /* Find the offsets of the 'this' pointer and new target */
19191
19192         CORINFO_EE_INFO *  pInfo;
19193         unsigned           instOffs;     // offset of new 'this' pointer
19194         unsigned           firstTgtOffs; // offset of first target to invoke
19195         const regNumber    regThis = genGetThisArgReg(call);
19196
19197         pInfo = compiler->eeGetEEInfo();
19198         instOffs = pInfo->offsetOfDelegateInstance;
19199         firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
19200
19201         // Grab an available register to use for the CALL indirection
19202         regNumber  indCallReg = regSet.rsGrabReg(RBM_ALLINT);
19203
19204         //  Save the invoke-target-function in indCallReg 
19205         //  'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
19206         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
19207         regTracker.rsTrackRegTrash(indCallReg);
19208
19209         /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
19210
19211         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
19212         regTracker.rsTrackRegTrash(regThis);
19213         noway_assert(instOffs < 127);
19214
19215         /* Call through indCallReg */
19216
19217         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19218                                  NULL,    // methHnd
19219                                  INDEBUG_LDISASM_COMMA(sigInfo)
19220                                  NULL,                  // addr
19221                                  args,
19222                                  retSize,
19223                                  gcInfo.gcVarPtrSetCur,
19224                                  gcInfo.gcRegGCrefSetCur,
19225                                  gcInfo.gcRegByrefSetCur,
19226                                  ilOffset,
19227                                  indCallReg);
19228     }
19229     else
19230
19231     /*-------------------------------------------------------------------------
19232      *  Virtual and interface calls
19233      */
19234
19235     switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
19236     {
19237     case GTF_CALL_VIRT_STUB:
19238         {
19239             regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
19240
19241             // An x86 JIT which uses full stub dispatch must generate only
19242             // the following stub dispatch calls:
19243             //
19244             // (1) isCallRelativeIndirect:
19245             //        call dword ptr [rel32]  ;  FF 15 ---rel32----
19246             // (2) isCallRelative:
19247             //        call abc                ;     E8 ---rel32----
19248             // (3) isCallRegisterIndirect:
19249             //     3-byte nop                 ;
19250             //     call dword ptr [eax]       ;     FF 10
19251             //
19252             // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
19253             // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
19254
19255             //
19256             // Please do not insert any Random NOPs while constructing this VSD call
19257             //
19258             getEmitter()->emitDisableRandomNops();
19259
19260             if (!fTailCall)
19261             {
19262                 // This is code to set up an indirect call to a stub address computed
19263                 // via dictionary lookup.  However the dispatch stub receivers aren't set up
19264                 // to accept such calls at the moment.
19265                 if (callType == CT_INDIRECT)
19266                 {
19267                     regNumber indReg;
19268
19269                     // -------------------------------------------------------------------------
19270                     // The importer decided we needed a stub call via a computed
19271                     // stub dispatch address, i.e. an address which came from a dictionary lookup.
19272                     //   - The dictionary lookup produces an indirected address, suitable for call
19273                     //     via "call [REG_VIRTUAL_STUB_PARAM]"
19274                     //
19275                     // This combination will only be generated for shared generic code and when
19276                     // stub dispatch is active.
19277
19278                     // No need to null check the this pointer - the dispatch code will deal with this.
19279
19280                     noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19281
19282                     // Now put the address in REG_VIRTUAL_STUB_PARAM. 
19283                     // This is typically a nop when the register used for 
19284                     // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
19285                     //
19286                     inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
19287                     regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
19288
19289 #if defined(_TARGET_XARCH_)
19290   #if defined(_TARGET_X86_)
19291                     // Emit enough bytes of nops so that this sequence can be distinguished 
19292                     // from other virtual stub dispatch calls. 
19293                     //
19294                     // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
19295                     //        vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
19296                     //
19297                     getEmitter()->emitIns_Nop(3);
19298
19299   #endif // _TARGET_X86_
19300
19301                     // Make the virtual stub call:
19302                     //     call   [REG_VIRTUAL_STUB_PARAM]
19303                     //
19304                     emitCallType = emitter::EC_INDIR_ARD;
19305
19306                     indReg = REG_VIRTUAL_STUB_PARAM;
19307                     genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19308
19309 #elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
19310
19311                     genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19312
19313                     // Make the virtual stub call: 
19314                     //     ldr   indReg, [REG_VIRTUAL_STUB_PARAM]
19315                     //     call  indReg
19316                     //
19317                     emitCallType = emitter::EC_INDIR_R;
19318
19319                     // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
19320                     //
19321                     indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
19322                     assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
19323                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
19324                     regTracker.rsTrackRegTrash(indReg);
19325
19326 #else
19327 #error "Unknown target for VSD call" 
19328 #endif
19329
19330                     getEmitter()->emitIns_Call(emitCallType,
19331                                              NULL, // methHnd
19332                                              INDEBUG_LDISASM_COMMA(sigInfo)
19333                                              NULL,               // addr
19334                                              args,
19335                                              retSize,
19336                                              gcInfo.gcVarPtrSetCur,
19337                                              gcInfo.gcRegGCrefSetCur,
19338                                              gcInfo.gcRegByrefSetCur,
19339                                              ilOffset,
19340                                              indReg);
19341                 }
19342                 else
19343                 {
19344                     // -------------------------------------------------------------------------
19345                     // Check for a direct stub call.
19346                     //
19347
19348                     // Get stub addr. This will return NULL if virtual call stubs are not active
19349                     void *stubAddr = NULL;
19350
19351                     stubAddr = (void *) call->gtCall.gtStubCallStubAddr;
19352
19353                     noway_assert(stubAddr != NULL);
19354
19355                     // -------------------------------------------------------------------------
19356                     // Direct stub calls, though the stubAddr itself may still need to be
19357                     // accesed via an indirection.
19358                     //
19359
19360                     // No need to null check - the dispatch code will deal with null this.
19361
19362                     emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
19363                     void* addr = stubAddr;
19364                     int disp = 0;
19365                     regNumber callReg = REG_NA;
19366
19367                     if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
19368                     {
19369 #if CPU_LOAD_STORE_ARCH
19370                         callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
19371                         noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
19372
19373                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC,REG_VIRTUAL_STUB_PARAM,(ssize_t)stubAddr);
19374                         // The stub will write-back to this register, so don't track it
19375                         regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
19376                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE,REG_JUMP_THUNK_PARAM,REG_VIRTUAL_STUB_PARAM, 0);
19377                         regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
19378                         callTypeStubAddr = emitter::EC_INDIR_R;
19379                         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19380                                                  NULL,    // methHnd
19381                                                  INDEBUG_LDISASM_COMMA(sigInfo)
19382                                                  NULL,                  // addr
19383                                                  args,
19384                                                  retSize,
19385                                                  gcInfo.gcVarPtrSetCur,
19386                                                  gcInfo.gcRegGCrefSetCur,
19387                                                  gcInfo.gcRegByrefSetCur,
19388                                                  ilOffset,
19389                                                  REG_JUMP_THUNK_PARAM);
19390
19391 #else
19392                         // emit an indirect call
19393                         callTypeStubAddr = emitter::EC_INDIR_C;
19394                         addr = 0;
19395                         disp = (ssize_t) stubAddr;
19396 #endif                        
19397
19398                     }
19399 #if CPU_LOAD_STORE_ARCH
19400                     if (callTypeStubAddr != emitter::EC_INDIR_R)
19401 #endif
19402                     {
19403                         getEmitter()->emitIns_Call(callTypeStubAddr,
19404                             call->gtCall.gtCallMethHnd,
19405                             INDEBUG_LDISASM_COMMA(sigInfo)
19406                             addr,
19407                             args,
19408                             retSize,
19409                             gcInfo.gcVarPtrSetCur,
19410                             gcInfo.gcRegGCrefSetCur,
19411                             gcInfo.gcRegByrefSetCur,
19412                             ilOffset,
19413                             callReg,
19414                             REG_NA,
19415                             0,
19416                             disp);
19417                     }
19418                 }
19419             }
19420             else // tailCall is true
19421             {
19422
19423 // Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
19424 // moves the this pointer out of it's usual place and into the argument list.
19425 #ifdef _TARGET_X86_
19426
19427                 // Generate "cmp ECX, [ECX]" to trap null pointers
19428                 const regNumber regThis = genGetThisArgReg(call);
19429                 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
19430
19431 #endif // _TARGET_X86_
19432
19433                 if (callType == CT_INDIRECT)
19434                 {
19435                     noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19436
19437                     // Now put the address in EAX.
19438                     inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
19439                     regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19440
19441                     genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19442                 }
19443                 else
19444                 {
19445                     // importer/EE should guarantee the indirection
19446                     noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
19447
19448                     instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, ssize_t(call->gtCall.gtStubCallStubAddr));
19449                 }
19450
19451                 fTailCallTargetIsVSD = true;
19452             }
19453
19454             //
19455             // OK to start inserting random NOPs again
19456             //
19457             getEmitter()->emitEnableRandomNops();
19458         }
19459         break;
19460
19461     case GTF_CALL_VIRT_VTABLE:
19462         // stub dispatching is off or this is not a virtual call (could be a tailcall)
19463         {
19464             regNumber       vptrReg;
19465             unsigned        vtabOffsOfIndirection;
19466             unsigned        vtabOffsAfterIndirection;
19467
19468             noway_assert(callType == CT_USER_FUNC);
19469
19470             vptrReg   = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
19471             vptrMask  = genRegMask(vptrReg);
19472
19473             /* The register no longer holds a live pointer value */
19474             gcInfo.gcMarkRegSetNpt(vptrMask);
19475
19476             // MOV vptrReg, [REG_CALL_THIS + offs]
19477             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE,
19478                 vptrReg, genGetThisArgReg(call), VPTR_OFFS);
19479             regTracker.rsTrackRegTrash(vptrReg);
19480
19481             noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
19482
19483             /* Get hold of the vtable offset (note: this might be expensive) */
19484
19485             compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
19486
19487             /* Get the appropriate vtable chunk */
19488
19489             /* The register no longer holds a live pointer value */
19490             gcInfo.gcMarkRegSetNpt(vptrMask);
19491
19492             // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
19493             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE,
19494                 vptrReg, vptrReg, vtabOffsOfIndirection);
19495
19496             /* Call through the appropriate vtable slot */
19497
19498             if (fTailCall)
19499             {
19500                 /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
19501
19502                 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19503                     vptrReg, vtabOffsAfterIndirection);
19504             }
19505             else
19506             {
19507 #if CPU_LOAD_STORE_ARCH
19508                 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg, vtabOffsAfterIndirection);
19509
19510                 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19511                     call->gtCall.gtCallMethHnd,
19512                     INDEBUG_LDISASM_COMMA(sigInfo)
19513                     NULL,                          // addr
19514                     args,
19515                     retSize,
19516                     gcInfo.gcVarPtrSetCur,
19517                     gcInfo.gcRegGCrefSetCur,
19518                     gcInfo.gcRegByrefSetCur,
19519                     ilOffset,
19520                     vptrReg);                      // ireg
19521 #else
19522                 getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL,
19523                     call->gtCall.gtCallMethHnd,
19524                     INDEBUG_LDISASM_COMMA(sigInfo)
19525                     NULL,                          // addr
19526                     args,
19527                     retSize,
19528                     gcInfo.gcVarPtrSetCur,
19529                     gcInfo.gcRegGCrefSetCur,
19530                     gcInfo.gcRegByrefSetCur,
19531                     ilOffset,
19532                     vptrReg,                       // ireg
19533                     REG_NA,                        // xreg
19534                     0,                             // xmul
19535                     vtabOffsAfterIndirection);     // disp
19536 #endif // CPU_LOAD_STORE_ARCH
19537             }
19538         }
19539         break;
19540
19541     case GTF_CALL_NONVIRT:
19542         {
19543             //------------------------ Non-virtual/Indirect calls -------------------------
19544             // Lots of cases follow
19545             //    - Direct P/Invoke calls
19546             //    - Indirect calls to P/Invoke functions via the P/Invoke stub
19547             //    - Direct Helper calls
19548             //    - Indirect Helper calls
19549             //    - Direct calls to known addresses
19550             //    - Direct calls where address is accessed by one or two indirections
19551             //    - Indirect calls to computed addresses
19552             //    - Tailcall versions of all of the above
19553
19554             CORINFO_METHOD_HANDLE   methHnd  = call->gtCall.gtCallMethHnd;
19555
19556
19557             //------------------------------------------------------
19558             // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
19559             //
19560             // For (final and private) functions which were called with
19561             //  invokevirtual, but which we call directly, we need to
19562             //  dereference the object pointer to make sure it's not NULL.
19563             //
19564
19565             if (call->gtFlags & GTF_CALL_NULLCHECK)
19566             {
19567                 /* Generate "cmp ECX, [ECX]" to trap null pointers */
19568                 const regNumber regThis = genGetThisArgReg(call);
19569 #if CPU_LOAD_STORE_ARCH
19570                 regNumber indReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the indirection
19571                 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
19572                 regTracker.rsTrackRegTrash(indReg);
19573 #else
19574                 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
19575 #endif
19576             }
19577
19578             if (call->gtFlags & GTF_CALL_UNMANAGED)
19579             {
19580                 //------------------------------------------------------
19581                 // Non-virtual/Indirect calls: PInvoke calls.
19582
19583                 noway_assert(compiler->info.compCallUnmanaged != 0);
19584
19585                 /* args shouldn't be greater than 64K */
19586
19587                 noway_assert((argSize&0xffff0000) == 0);
19588
19589                 /* Remember the varDsc for the callsite-epilog */
19590
19591                 frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
19592
19593                 // exact codegen is required
19594                 getEmitter()->emitDisableRandomNops();
19595
19596                 int nArgSize = 0;
19597
19598                 regNumber  indCallReg = REG_NA;
19599
19600                 if (callType == CT_INDIRECT)
19601                 {
19602                     noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19603
19604                     if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
19605                         indCallReg  = call->gtCall.gtCallAddr->gtRegNum;
19606
19607                     nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
19608                     methHnd  = 0;
19609                 }
19610                 else
19611                 {
19612                     noway_assert(callType == CT_USER_FUNC);
19613                 }
19614
19615                 regNumber tcbReg;
19616                 tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
19617
19618                 void* addr = NULL;
19619
19620                 if (callType == CT_INDIRECT)
19621                 {
19622                     /* Double check that the callee didn't use/trash the
19623                        registers holding the call target.
19624                     */
19625                     noway_assert(tcbReg != indCallReg);
19626
19627                     if (indCallReg == REG_NA)
19628                     {
19629                         indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
19630
19631                         /* Please note that this even works with tcbReg == REG_EAX.
19632                         tcbReg contains an interesting value only if frameListRoot is
19633                         an enregistered local that stays alive across the call
19634                         (certainly not EAX). If frameListRoot has been moved into
19635                         EAX, we can trash it since it won't survive across the call
19636                         anyways.
19637                         */
19638
19639                         inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
19640                         regTracker.rsTrackRegTrash(indCallReg);
19641                     }
19642
19643                     emitCallType = emitter::EC_INDIR_R;
19644                 }
19645                 else
19646                 {
19647                     noway_assert(callType == CT_USER_FUNC);
19648                     
19649                     void* pAddr;
19650                     addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
19651                     if (addr != NULL)
19652                     {
19653 #if CPU_LOAD_STORE_ARCH
19654                         // Load the address into a register, indirect it and call  through a register
19655                         indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
19656                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19657                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19658                         regTracker.rsTrackRegTrash(indCallReg);
19659                         // Now make the call "call indCallReg"
19660
19661                         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,     
19662                             methHnd, // methHnd
19663                             INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
19664                             NULL,                  // addr
19665                             args,
19666                             retSize,
19667                             gcInfo.gcVarPtrSetCur,
19668                             gcInfo.gcRegGCrefSetCur,
19669                             gcInfo.gcRegByrefSetCur,
19670                             ilOffset,
19671                             indCallReg);
19672
19673                         emitCallType = emitter::EC_INDIR_R;
19674                         break;
19675 #else
19676                         emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19677                         indCallReg = REG_NA;
19678 #endif
19679                     }
19680                     else
19681                     {
19682                         // Double-indirection. Load the address into a register
19683                         // and call indirectly through a register
19684                         indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
19685
19686 #if CPU_LOAD_STORE_ARCH
19687                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
19688                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19689                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19690                         regTracker.rsTrackRegTrash(indCallReg);
19691
19692                         emitCallType = emitter::EC_INDIR_R;
19693
19694 #else
19695                         getEmitter()->emitIns_R_AI(INS_mov,
19696                                                  EA_PTR_DSP_RELOC,
19697                                                  indCallReg,
19698                                                  (ssize_t)pAddr);
19699                         regTracker.rsTrackRegTrash(indCallReg);
19700                         emitCallType = emitter::EC_INDIR_ARD;
19701
19702 #endif // CPU_LOAD_STORE_ARCH
19703                     }
19704                 }
19705
19706                 getEmitter()->emitIns_Call(emitCallType,                    
19707                     compiler->eeMarkNativeTarget(methHnd),
19708                     INDEBUG_LDISASM_COMMA(sigInfo)
19709                     addr,
19710                     args,
19711                     retSize,
19712                     gcInfo.gcVarPtrSetCur,
19713                     gcInfo.gcRegGCrefSetCur,
19714                     gcInfo.gcRegByrefSetCur,
19715                     ilOffset,
19716                     indCallReg);
19717
19718                 if (callType == CT_INDIRECT)
19719                     genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19720
19721                 getEmitter()->emitEnableRandomNops();
19722
19723                 // Done with PInvoke calls
19724                 break;
19725             }
19726
19727             if  (callType == CT_INDIRECT)
19728             {
19729                 noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19730
19731                 if (call->gtCall.gtCallCookie)
19732                 {
19733                     //------------------------------------------------------
19734                     // Non-virtual indirect calls via the P/Invoke stub
19735
19736                     GenTreePtr cookie = call->gtCall.gtCallCookie;
19737                     GenTreePtr target = call->gtCall.gtCallAddr;
19738
19739                     noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
19740
19741                     noway_assert(cookie->gtOper == GT_CNS_INT ||
19742                         cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
19743
19744                     noway_assert(args == argSize);
19745
19746 #if defined(_TARGET_X86_)
19747                     /* load eax with the real target */
19748
19749                     inst_RV_TT(INS_mov, REG_EAX, target);
19750                     regTracker.rsTrackRegTrash(REG_EAX);
19751
19752                     if (cookie->gtOper == GT_CNS_INT)
19753                         inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
19754                     else
19755                         inst_TT(INS_push, cookie);
19756
19757                     /* Keep track of ESP for EBP-less frames */
19758                     genSinglePush();
19759
19760                     argSize += sizeof(void *);
19761
19762 #elif defined(_TARGET_ARM_)
19763
19764                     // Ensure that we spill these registers (if caller saved) in the prolog
19765                     regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
19766
19767                     // ARM: load r12 with the real target
19768                     // X64: load r10 with the real target
19769                     inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
19770                     regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
19771
19772                     // ARM: load r4  with the pinvoke VASigCookie
19773                     // X64: load r11 with the pinvoke VASigCookie
19774                     if (cookie->gtOper == GT_CNS_INT)
19775                         inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal, EA_HANDLE_CNS_RELOC);
19776                     else
19777                         inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
19778                     regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19779
19780                     noway_assert(args == argSize);
19781
19782                     // Ensure that we don't trash any of these registers if we have to load
19783                     // the helper call target into a register to invoke it.
19784                     regMaskTP regsUsed;
19785                     regSet.rsLockReg(call->gtCall.gtCallRegUsedMask|RBM_PINVOKE_TARGET_PARAM|RBM_PINVOKE_COOKIE_PARAM, &regsUsed);
19786 #else
19787                     NYI("Non-virtual indirect calls via the P/Invoke stub");
19788 #endif
19789
19790                     args = argSize;
19791                     noway_assert((size_t)(int)args == args);
19792
19793                     genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
19794
19795 #if defined(_TARGET_ARM_)
19796                     regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask|RBM_PINVOKE_TARGET_PARAM|RBM_PINVOKE_COOKIE_PARAM, regsUsed);
19797 #endif
19798
19799 #ifdef _TARGET_ARM_
19800                     // genEmitHelperCall doesn't record all registers a helper call would trash.
19801                     regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19802 #endif
19803
19804                 }
19805                 else
19806                 {
19807                     //------------------------------------------------------
19808                     // Non-virtual indirect calls
19809
19810                     if (fTailCall)
19811                     {
19812                         inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
19813                         regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19814                     }
19815                     else
19816                         instEmit_indCall(call, args, retSize);
19817                 }
19818
19819                 genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19820
19821                 // Done with indirect calls
19822                 break;
19823             }
19824
19825             //------------------------------------------------------
19826             // Non-virtual direct/indirect calls: Work out if the address of the
19827             // call is known at JIT time (if not it is either an indirect call
19828             // or the address must be accessed via an single/double indirection)
19829
19830             noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
19831
19832             void * addr;
19833             InfoAccessType accessType;
19834
19835             helperNum = compiler->eeGetHelperNum(methHnd);
19836
19837             if (callType == CT_HELPER)
19838             {
19839                 noway_assert(helperNum != CORINFO_HELP_UNDEF);
19840
19841                 void * pAddr;
19842                 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
19843
19844                 accessType = IAT_VALUE;
19845
19846                 if (!addr)
19847                 {
19848                     accessType = IAT_PVALUE;
19849                     addr = pAddr;
19850                 }
19851             }
19852             else 
19853             {
19854                 noway_assert(helperNum == CORINFO_HELP_UNDEF);
19855
19856                 CORINFO_ACCESS_FLAGS  aflags   = CORINFO_ACCESS_ANY;
19857
19858                 if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
19859                     aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
19860
19861                 if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
19862                     aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
19863
19864                 CORINFO_CONST_LOOKUP addrInfo;
19865                 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
19866
19867                 accessType = addrInfo.accessType;
19868                 addr = addrInfo.addr;
19869             }
19870
19871             if (fTailCall)
19872             {
19873                 noway_assert(callType == CT_USER_FUNC);
19874
19875                 switch (accessType)
19876                 {
19877                 case IAT_VALUE:   
19878                     //------------------------------------------------------
19879                     // Non-virtual direct calls to known addressess
19880                     //
19881                     instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19882                     break;
19883
19884                 case IAT_PVALUE: 
19885                     //------------------------------------------------------
19886                     // Non-virtual direct calls to addresses accessed by
19887                     // a single indirection.
19888                     //
19889                     // For tailcalls we place the target address in REG_TAILCALL_ADDR
19890 #if CPU_LOAD_STORE_ARCH
19891                     {
19892                         regNumber indReg = REG_TAILCALL_ADDR;    
19893                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19894                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19895                         regTracker.rsTrackRegTrash(indReg);
19896                     }
19897 #else
19898                     getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR,
19899                         (ssize_t)addr);
19900                     regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19901 #endif
19902                     break;
19903
19904                 case IAT_PPVALUE: 
19905                     //------------------------------------------------------
19906                     // Non-virtual direct calls to addresses accessed by
19907                     // a double indirection.
19908                     //
19909                     // For tailcalls we place the target address in REG_TAILCALL_ADDR
19910 #if CPU_LOAD_STORE_ARCH
19911                     {
19912                         regNumber indReg = REG_TAILCALL_ADDR;    
19913                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19914                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19915                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19916                         regTracker.rsTrackRegTrash(indReg);
19917                     }
19918 #else
19919                     getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR,
19920                         (ssize_t)addr);
19921                     getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19922                         REG_TAILCALL_ADDR, 0);
19923                     regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19924 #endif
19925                     break;
19926
19927                 default: 
19928                     noway_assert(!"Bad accessType");
19929                     break;
19930                 }
19931             }
19932             else
19933             { 
19934                 switch (accessType)
19935                 {
19936                     regNumber  indCallReg;
19937
19938                 case IAT_VALUE:   
19939                     //------------------------------------------------------
19940                     // Non-virtual direct calls to known addressess
19941                     //
19942                     // The vast majority of calls end up here....  Wouldn't
19943                     // it be nice if they all did!
19944 #ifdef _TARGET_ARM_
19945                     if (!arm_Valid_Imm_For_BL((ssize_t)addr))
19946                     {
19947                         // Load the address into a register and call  through a register
19948                         indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
19949                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19950
19951                         getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19952                             methHnd,
19953                             INDEBUG_LDISASM_COMMA(sigInfo)
19954                             NULL,                          // addr
19955                             args,
19956                             retSize,
19957                             gcInfo.gcVarPtrSetCur,
19958                             gcInfo.gcRegGCrefSetCur,
19959                             gcInfo.gcRegByrefSetCur,
19960                             ilOffset,
19961                             indCallReg,                    // ireg
19962                             REG_NA, 0, 0,                  // xreg, xmul, disp
19963                             false,                         // isJump
19964                             emitter::emitNoGChelper(helperNum));
19965                     }
19966                     else
19967 #endif
19968                     {
19969                         getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN,
19970                             methHnd,
19971                             INDEBUG_LDISASM_COMMA(sigInfo)
19972                             addr,
19973                             args,
19974                             retSize,
19975                             gcInfo.gcVarPtrSetCur,
19976                             gcInfo.gcRegGCrefSetCur,
19977                             gcInfo.gcRegByrefSetCur,
19978                             ilOffset,
19979                             REG_NA, REG_NA, 0, 0,    /* ireg, xreg, xmul, disp */
19980                             false,                 /* isJump */
19981                             emitter::emitNoGChelper(helperNum));
19982                     }
19983                     break;
19984
19985                 case IAT_PVALUE: 
19986                     //------------------------------------------------------
19987                     // Non-virtual direct calls to addresses accessed by
19988                     // a single indirection.
19989                     //
19990 #if CPU_LOAD_STORE_ARCH
19991                     // Load the address into a register, load indirect and call  through a register
19992                     indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
19993
19994                     instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19995                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19996                     regTracker.rsTrackRegTrash(indCallReg);
19997
19998                     emitCallType = emitter::EC_INDIR_R;
19999                     addr = NULL;
20000
20001 #else
20002                     emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
20003                     indCallReg = REG_NA;
20004
20005 #endif // CPU_LOAD_STORE_ARCH
20006
20007                     getEmitter()->emitIns_Call( emitCallType,
20008                         methHnd,
20009                         INDEBUG_LDISASM_COMMA(sigInfo)
20010                         addr,
20011                         args,
20012                         retSize,
20013                         gcInfo.gcVarPtrSetCur,
20014                         gcInfo.gcRegGCrefSetCur,
20015                         gcInfo.gcRegByrefSetCur,
20016                         ilOffset,
20017                         indCallReg,          // ireg
20018                         REG_NA, 0, 0,        // xreg, xmul, disp
20019                         false,                 /* isJump */
20020                         emitter::emitNoGChelper(helperNum));
20021                     break;
20022
20023                 case IAT_PPVALUE: 
20024                     {
20025                         //------------------------------------------------------
20026                         // Non-virtual direct calls to addresses accessed by
20027                         // a double indirection.
20028                         //
20029                         // Double-indirection. Load the address into a register
20030                         // and call indirectly through the register
20031
20032                         noway_assert(helperNum == CORINFO_HELP_UNDEF);
20033
20034                         // Grab an available register to use for the CALL indirection
20035                         indCallReg = regSet.rsGrabReg(RBM_ALLINT);
20036
20037 #if CPU_LOAD_STORE_ARCH
20038                         instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
20039                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
20040                         getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
20041                         regTracker.rsTrackRegTrash(indCallReg);
20042
20043                         emitCallType = emitter::EC_INDIR_R;
20044
20045 #else
20046
20047                         getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC,
20048                                                  indCallReg,
20049                                                  (ssize_t)addr);
20050                         regTracker.rsTrackRegTrash(indCallReg);
20051
20052                         emitCallType = emitter::EC_INDIR_ARD;
20053
20054 #endif // CPU_LOAD_STORE_ARCH
20055
20056                         getEmitter()->emitIns_Call(emitCallType,
20057                             methHnd,
20058                             INDEBUG_LDISASM_COMMA(sigInfo)
20059                             NULL,                          // addr
20060                             args,
20061                             retSize,
20062                             gcInfo.gcVarPtrSetCur,
20063                             gcInfo.gcRegGCrefSetCur,
20064                             gcInfo.gcRegByrefSetCur,
20065                             ilOffset,
20066                             indCallReg,          // ireg
20067                             REG_NA, 0, 0,        // xreg, xmul, disp
20068                             false,               // isJump
20069                             emitter::emitNoGChelper(helperNum));
20070                     }
20071                     break;
20072
20073                 default: 
20074                     noway_assert(!"Bad accessType");
20075                     break;
20076                 }
20077
20078                 // tracking of region protected by the monitor in synchronized methods
20079                 if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
20080                 {
20081                     fPossibleSyncHelperCall = true;
20082                 }
20083             }
20084         }
20085         break;
20086
20087     default:
20088         noway_assert(!"strange call type");
20089         break;
20090
20091     }
20092
20093     /*-------------------------------------------------------------------------
20094      *  For tailcalls, REG_INTRET contains the address of the target function,
20095      *  enregistered args are in the correct registers, and the stack arguments
20096      *  have been pushed on the stack. Now call the stub-sliding helper
20097      */
20098
20099     if (fTailCall)
20100     {
20101
20102         if (compiler->info.compCallUnmanaged)
20103             genPInvokeMethodEpilog();
20104
20105 #ifdef _TARGET_X86_
20106         noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
20107
20108
20109         // Push the count of the incoming stack arguments
20110
20111         unsigned nOldStkArgs = (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void *)))/sizeof(void*));
20112         getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
20113         genSinglePush(); // Keep track of ESP for EBP-less frames
20114         args += sizeof(void*);
20115
20116         // Push the count of the outgoing stack arguments
20117
20118         getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize/sizeof(void*));
20119         genSinglePush(); // Keep track of ESP for EBP-less frames
20120         args += sizeof(void*);
20121
20122         // Push info about the callee-saved registers to be restored
20123         // For now, we always spill all registers if compiler->compTailCallUsed
20124
20125         DWORD calleeSavedRegInfo =
20126             1 | // always restore EDI,ESI,EBX
20127             (fTailCallTargetIsVSD ? 0x2 : 0x0);  // Stub dispatch flag
20128         getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
20129         genSinglePush(); // Keep track of ESP for EBP-less frames
20130         args += sizeof(void*);
20131
20132         // Push the address of the target function
20133
20134         getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
20135         genSinglePush(); // Keep track of ESP for EBP-less frames
20136         args += sizeof(void*);
20137
20138 #else // _TARGET_X86_
20139
20140         args = 0;
20141         retSize = EA_UNKNOWN;
20142  
20143 #endif // _TARGET_X86_
20144
20145         if (compiler->getNeedsGSSecurityCookie())
20146         {
20147             genEmitGSCookieCheck(true);
20148         }
20149
20150         // TailCall helper does not poll for GC. An explicit GC poll
20151         // Should have been placed in when we morphed this into a tail call.
20152         noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
20153
20154         // Now call the helper
20155
20156         genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
20157
20158     }
20159
20160     /*-------------------------------------------------------------------------
20161      *  Done with call.
20162      *  Trash registers, pop arguments if needed, etc
20163      */
20164
20165     /* Mark the argument registers as free */
20166
20167     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
20168
20169     for (areg = 0; areg < MAX_REG_ARG; areg++)
20170     {
20171         regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
20172
20173         // Is this one of the used argument registers?
20174         if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
20175             continue;
20176
20177 #ifdef _TARGET_ARM_
20178         if (regSet.rsUsedTree[areg] == NULL)
20179         {
20180             noway_assert(areg % 2 == 1 && (((areg+1) >= MAX_REG_ARG) ||
20181                                            (regSet.rsUsedTree[areg+1]->TypeGet() == TYP_STRUCT) || 
20182                                            (genTypeStSz(regSet.rsUsedTree[areg+1]->TypeGet()) == 2)));
20183             continue;
20184         }
20185 #endif      
20186
20187         regSet.rsMarkRegFree(curArgMask);
20188
20189         // We keep regSet.rsMaskVars current during codegen, so we have to remove any
20190         // that have been copied into arg regs.
20191
20192         regSet.RemoveMaskVars(curArgMask);
20193         gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
20194         gcInfo.gcRegByrefSetCur &= ~(curArgMask);
20195     }
20196
20197     //-------------------------------------------------------------------------
20198     // free up the FP args
20199
20200 #if !FEATURE_STACK_FP_X87
20201     for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
20202     {
20203         regNumber argRegNum  = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
20204         regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
20205
20206         // Is this one of the used argument registers?
20207         if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
20208             continue;
20209
20210         regSet.rsMaskUsed &= ~curArgMask;
20211         regSet.rsUsedTree[argRegNum] = NULL;
20212     }
20213 #endif // !FEATURE_STACK_FP_X87
20214
20215     /* restore the old argument register status */
20216
20217     intRegState.rsCurRegArgNum = savCurIntArgReg; 
20218     floatRegState.rsCurRegArgNum = savCurFloatArgReg;
20219
20220     noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
20221
20222     /* Mark all trashed registers as such */
20223
20224     if  (calleeTrashedRegs)
20225         regTracker.rsTrashRegSet(calleeTrashedRegs);
20226
20227     regTracker.rsTrashRegsForGCInterruptability();
20228
20229 #ifdef  DEBUG
20230
20231     if  (!(call->gtFlags & GTF_CALL_POP_ARGS))
20232     {
20233         if (compiler->verbose)
20234         {
20235             printf("\t\t\t\t\t\t\tEnd call ");
20236             Compiler::printTreeID(call);
20237             printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
20238         }
20239         noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
20240     }
20241
20242 #endif
20243
20244 #if FEATURE_STACK_FP_X87
20245     /* All float temps must be spilled around function calls */
20246     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20247     {
20248         noway_assert(compCurFPState.m_uStackSize == 1);
20249     }
20250     else
20251     {
20252         noway_assert(compCurFPState.m_uStackSize == 0);
20253     }
20254 #else
20255     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20256     {
20257 #ifdef _TARGET_ARM_
20258         if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
20259         {
20260             // Result return for vararg methods is in r0, r1, but our callers would
20261             // expect the return in s0, s1 because of floating type. Do the move now.
20262             if (call->gtType == TYP_FLOAT)
20263             {
20264                 inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
20265             }
20266             else
20267             {
20268                 inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
20269             }
20270         }
20271 #endif
20272         genMarkTreeInReg(call, REG_FLOATRET);
20273     }
20274 #endif
20275
20276     /* The function will pop all arguments before returning */
20277
20278     genStackLevel = saveStackLvl;
20279
20280     /* No trashed registers may possibly hold a pointer at this point */
20281
20282 #ifdef  DEBUG
20283     regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) & ~regSet.rsMaskVars & ~vptrMask;
20284     if  (ptrRegs)
20285     {
20286         // A reg may be dead already.  The assertion is too strong.
20287         LclVarDsc *varDsc;
20288         unsigned varNum;
20289         
20290         // use compiler->compCurLife
20291         for (varNum = 0, varDsc = compiler->lvaTable;
20292              varNum < compiler->lvaCount && ptrRegs != 0;
20293              varNum++  , varDsc++)
20294         {
20295             /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
20296
20297             if  (!varDsc->lvTracked)
20298                 continue;
20299             if  (!varDsc->lvRegister)
20300                 continue;
20301             if  (varDsc->IsFloatRegType())
20302                 continue;
20303
20304             /* Get hold of the index and the bitmask for the variable */
20305
20306             unsigned   varIndex = varDsc->lvVarIndex;
20307
20308             /* Is this variable live currently? */
20309
20310             if  (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
20311             {
20312                 regNumber  regNum  = varDsc->lvRegNum;
20313                 regMaskTP  regMask = genRegMask(regNum);
20314
20315                 if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
20316                     ptrRegs &= ~regMask;
20317             }
20318         }
20319         if (ptrRegs)
20320         {
20321             printf("Bad call handling for ");
20322             Compiler::printTreeID(call);
20323             printf("\n");
20324             noway_assert(!"A callee trashed reg is holding a GC pointer");
20325         }
20326     }
20327 #endif
20328
20329 #if defined(_TARGET_X86_)
20330     //-------------------------------------------------------------------------
20331     // Create a label for tracking of region protected by the monitor in synchronized methods.
20332     // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
20333     // so the GC state vars have been updated before creating the label.
20334
20335     if (fPossibleSyncHelperCall)
20336     {
20337         switch (helperNum) {
20338         case CORINFO_HELP_MON_ENTER:
20339         case CORINFO_HELP_MON_ENTER_STATIC:
20340             noway_assert(compiler->syncStartEmitCookie == NULL);
20341             compiler->syncStartEmitCookie = getEmitter()->emitAddLabel(
20342                                      gcInfo.gcVarPtrSetCur,
20343                                      gcInfo.gcRegGCrefSetCur,
20344                                      gcInfo.gcRegByrefSetCur);
20345             noway_assert(compiler->syncStartEmitCookie != NULL);
20346             break;
20347         case CORINFO_HELP_MON_EXIT:
20348         case CORINFO_HELP_MON_EXIT_STATIC:
20349             noway_assert(compiler->syncEndEmitCookie == NULL);
20350             compiler->syncEndEmitCookie = getEmitter()->emitAddLabel(
20351                                      gcInfo.gcVarPtrSetCur,
20352                                      gcInfo.gcRegGCrefSetCur,
20353                                      gcInfo.gcRegByrefSetCur);
20354             noway_assert(compiler->syncEndEmitCookie != NULL);
20355             break;
20356         default:
20357             break;
20358         }
20359     }
20360 #endif // _TARGET_X86_
20361
20362     if (call->gtFlags & GTF_CALL_UNMANAGED)
20363     {
20364         genDefineTempLabel(returnLabel);
20365
20366 #ifdef _TARGET_X86_
20367         if (getInlinePInvokeCheckEnabled())
20368         {
20369             noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
20370             BasicBlock  *   esp_check;
20371
20372             CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
20373             /* mov   ecx, dword ptr [frame.callSiteTracker] */
20374
20375             getEmitter()->emitIns_R_S (INS_mov,
20376                                       EA_4BYTE,
20377                                       REG_ARG_0,
20378                                       compiler->lvaInlinedPInvokeFrameVar,
20379                                       pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
20380             regTracker.rsTrackRegTrash(REG_ARG_0);
20381
20382             /* Generate the conditional jump */
20383
20384             if (!(call->gtFlags & GTF_CALL_POP_ARGS))
20385             {
20386                 if (argSize)
20387                 {
20388                     getEmitter()->emitIns_R_I  (INS_add,
20389                                               EA_PTRSIZE,
20390                                               REG_ARG_0,
20391                                               argSize);
20392                 }
20393             }
20394             /* cmp   ecx, esp */
20395
20396             getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
20397
20398             esp_check = genCreateTempLabel();
20399
20400             emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20401             inst_JMP(jmpEqual, esp_check);
20402
20403             getEmitter()->emitIns(INS_BREAKPOINT);
20404
20405             /* genCondJump() closes the current emitter block */
20406
20407             genDefineTempLabel(esp_check);
20408         }
20409 #endif
20410     }
20411
20412     /* Are we supposed to pop the arguments? */
20413
20414 #if defined(_TARGET_X86_)
20415     if (call->gtFlags & GTF_CALL_UNMANAGED)
20416     {
20417         if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
20418             compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
20419         {
20420             // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
20421             // take care of the cdecl argument popping here as well but the stack depth tracking logic
20422             // makes this very hard, i.e. it needs to "see" the actual pop.
20423
20424             CORINFO_EE_INFO *pInfo = compiler->eeGetEEInfo();
20425
20426             if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
20427             {
20428                 /* mov   esp, dword ptr [frame.callSiteTracker] */
20429                 getEmitter()->emitIns_R_S  (ins_Load(TYP_I_IMPL), 
20430                                           EA_PTRSIZE,
20431                                           REG_SPBASE,
20432                                           compiler->lvaInlinedPInvokeFrameVar,
20433                                           pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
20434             }
20435             else
20436             {
20437                 /* mov   ecx, dword ptr [frame.callSiteTracker] */
20438                 getEmitter()->emitIns_R_S  (ins_Load(TYP_I_IMPL), 
20439                                           EA_PTRSIZE,
20440                                           REG_ARG_0,
20441                                           compiler->lvaInlinedPInvokeFrameVar,
20442                                           pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
20443                 regTracker.rsTrackRegTrash(REG_ARG_0);
20444
20445                 /* lea   esp, [ecx + argSize] */
20446                 getEmitter()->emitIns_R_AR (INS_lea,
20447                                           EA_PTRSIZE,
20448                                           REG_SPBASE,
20449                                           REG_ARG_0,
20450                                           (int)argSize);
20451             }
20452         }
20453     }
20454 #endif // _TARGET_X86_
20455
20456     if  (call->gtFlags & GTF_CALL_POP_ARGS)
20457     {
20458         noway_assert(args == (size_t)-(int)argSize);
20459
20460         if (argSize)
20461         {
20462             genAdjustSP(argSize);
20463         }
20464     }
20465
20466     if  (pseudoStackLvl)
20467     {
20468         noway_assert(call->gtType == TYP_VOID);
20469
20470         /* Generate NOP */
20471
20472         instGen(INS_nop);
20473     }
20474
20475
20476
20477     /* What does the function return? */
20478
20479     retVal = RBM_NONE;
20480
20481     switch (call->gtType)
20482     {
20483     case TYP_REF:
20484     case TYP_ARRAY:
20485     case TYP_BYREF:
20486         gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
20487
20488         __fallthrough;
20489
20490     case TYP_INT:
20491 #if!CPU_HAS_FP_SUPPORT
20492     case TYP_FLOAT:
20493 #endif
20494         retVal = RBM_INTRET;
20495         break;
20496
20497 #ifdef _TARGET_ARM_
20498     case TYP_STRUCT:
20499         {
20500             assert(call->gtCall.gtRetClsHnd != NULL);
20501             assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
20502             int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
20503             assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
20504             assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
20505             retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
20506         }
20507         break;
20508 #endif
20509
20510     case TYP_LONG:
20511 #if!CPU_HAS_FP_SUPPORT
20512     case TYP_DOUBLE:
20513 #endif
20514         retVal = RBM_LNGRET;
20515         break;
20516
20517 #if CPU_HAS_FP_SUPPORT
20518     case TYP_FLOAT:
20519     case TYP_DOUBLE:
20520
20521         break;
20522 #endif
20523
20524     case TYP_VOID:
20525         break;
20526
20527     default:
20528         noway_assert(!"unexpected/unhandled fn return type");
20529     }
20530
20531     // We now have to generate the "call epilog" (if it was a call to unmanaged code).
20532     /* if it is a call to unmanaged code, frameListRoot must be set */
20533
20534     noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
20535
20536     if (frameListRoot)
20537         genPInvokeCallEpilog(frameListRoot, retVal);
20538
20539     if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
20540     {
20541         if (frameListRoot->lvRegister)
20542         {
20543             bool isBorn = false;
20544             bool isDying = true;
20545             genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
20546         }
20547     }
20548
20549 #ifdef DEBUG
20550     if (compiler->opts.compStackCheckOnCall
20551 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
20552         //check the stack as frequently as possible
20553         && !call->IsHelperCall()
20554 #else
20555         && call->gtCall.gtCallType == CT_USER_FUNC
20556 #endif
20557         )
20558     {
20559         noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
20560         if (argSize > 0)
20561         {
20562             getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
20563             getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
20564             getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
20565             regTracker.rsTrackRegTrash(REG_ARG_0);
20566         }
20567         else
20568             getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
20569
20570         BasicBlock  *   esp_check = genCreateTempLabel();
20571         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20572         inst_JMP(jmpEqual, esp_check);
20573         getEmitter()->emitIns(INS_BREAKPOINT);
20574         genDefineTempLabel(esp_check);
20575     }
20576 #endif // DEBUG
20577
20578 #if FEATURE_STACK_FP_X87
20579     UnspillRegVarsStackFp();
20580 #endif // FEATURE_STACK_FP_X87
20581
20582     if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20583     {
20584         // Restore return node if necessary
20585         if  (call->gtFlags & GTF_SPILLED)
20586         {
20587             UnspillFloat(call);
20588         }
20589
20590         // Mark as free
20591 #if FEATURE_STACK_FP_X87
20592         regSet.SetUsedRegFloat(call, false);
20593 #endif
20594     }
20595
20596 #if FEATURE_STACK_FP_X87
20597 #ifdef DEBUG
20598     if (compiler->verbose)
20599     {
20600         JitDumpFPState();
20601     }
20602 #endif
20603 #endif
20604
20605     return retVal;
20606 }
20607 #ifdef _PREFAST_
20608 #pragma warning(pop)
20609 #endif
20610
20611
20612 /*****************************************************************************
20613  *
20614  *  Create and record GC Info for the function.
20615  */
20616 #ifdef JIT32_GCENCODER
20617 void*
20618 #else
20619 void
20620 #endif
20621 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20622 {
20623 #ifdef JIT32_GCENCODER
20624     return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
20625 #else
20626     genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
20627 #endif
20628 }
20629
20630 #ifdef JIT32_GCENCODER
20631 void*  CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20632 {
20633     BYTE            headerBuf[64];
20634     InfoHdr         header;
20635
20636     int s_cached;
20637 #ifdef  DEBUG
20638     size_t headerSize      =
20639 #endif
20640     compiler->compInfoBlkSize = gcInfo.gcInfoBlockHdrSave(headerBuf,
20641                                          0,
20642                                          codeSize,
20643                                          prologSize,
20644                                          epilogSize,
20645                                          &header,
20646                                          &s_cached);
20647
20648     size_t argTabOffset = 0;
20649     size_t ptrMapSize      = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
20650
20651 #if DISPLAY_SIZES
20652
20653     if (genInterruptible)
20654     {
20655         gcHeaderISize += compiler->compInfoBlkSize;
20656         gcPtrMapISize += ptrMapSize;
20657     }
20658     else
20659     {
20660         gcHeaderNSize += compiler->compInfoBlkSize;
20661         gcPtrMapNSize += ptrMapSize;
20662     }
20663
20664 #endif // DISPLAY_SIZES
20665
20666     compiler->compInfoBlkSize += ptrMapSize;
20667
20668     /* Allocate the info block for the method */
20669
20670     compiler->compInfoBlkAddr = (BYTE *) compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
20671
20672 #if 0 // VERBOSE_SIZES
20673     // TODO-Review: 'dataSize', below, is not defined
20674
20675 //  if  (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
20676     {
20677         printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
20678                compiler->info.compILCodeSize,
20679                compiler->compInfoBlkSize,
20680                codeSize + dataSize,
20681                codeSize + dataSize - prologSize - epilogSize,
20682                100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
20683                100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
20684                compiler->info.compClassName,
20685                compiler->info.compMethodName);
20686     }
20687
20688 #endif
20689
20690     /* Fill in the info block and return it to the caller */
20691
20692     void* infoPtr = compiler->compInfoBlkAddr;
20693
20694     /* Create the method info block: header followed by GC tracking tables */
20695
20696     compiler->compInfoBlkAddr += gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1,
20697                                           codeSize,
20698                                           prologSize,
20699                                           epilogSize,
20700                                           &header,
20701                                           &s_cached);
20702
20703     assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
20704     compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
20705     assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
20706
20707 #ifdef  DEBUG
20708
20709     if  (0)
20710     {
20711         BYTE    *   temp = (BYTE *)infoPtr;
20712         unsigned    size = compiler->compInfoBlkAddr - temp;
20713         BYTE    *   ptab = temp + headerSize;
20714
20715         noway_assert(size == headerSize + ptrMapSize);
20716
20717         printf("Method info block - header [%u bytes]:", headerSize);
20718
20719         for (unsigned i = 0; i < size; i++)
20720         {
20721             if  (temp == ptab)
20722             {
20723                 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
20724                 printf("\n    %04X: %*c", i & ~0xF, 3*(i&0xF), ' ');
20725             }
20726             else
20727             {
20728                 if  (!(i % 16))
20729                     printf("\n    %04X: ", i);
20730             }
20731
20732             printf("%02X ", *temp++);
20733         }
20734
20735         printf("\n");
20736     }
20737
20738 #endif // DEBUG
20739
20740 #if DUMP_GC_TABLES
20741
20742     if  (compiler->opts.dspGCtbls)
20743     {
20744         const BYTE *base = (BYTE *)infoPtr;
20745         unsigned    size;
20746         unsigned    methodSize;
20747         InfoHdr     dumpHeader;
20748
20749         printf("GC Info for method %s\n", compiler->info.compFullName);
20750         printf("GC info size = %3u\n", compiler->compInfoBlkSize);
20751
20752         size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
20753         // printf("size of header encoding is %3u\n", size);
20754         printf("\n");
20755
20756         if  (compiler->opts.dspGCtbls)
20757         {
20758             base   += size;
20759             size    = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
20760             // printf("size of pointer table is %3u\n", size);
20761             printf("\n");
20762             noway_assert(compiler->compInfoBlkAddr == (base+size));
20763         }
20764
20765     }
20766
20767 #ifdef DEBUG
20768     if  (jitOpts.testMask & 128)
20769     {
20770         for (unsigned offs = 0; offs < codeSize; offs++)
20771         {
20772             gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
20773         }
20774     }
20775 #endif // DEBUG
20776 #endif // DUMP_GC_TABLES
20777
20778     /* Make sure we ended up generating the expected number of bytes */
20779
20780     noway_assert(compiler->compInfoBlkAddr == (BYTE *)infoPtr + compiler->compInfoBlkSize);
20781
20782     return infoPtr;
20783 }
20784
20785 #else // JIT32_GCENCODER
20786
20787 void                CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
20788 {
20789     IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
20790     GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
20791     assert(gcInfoEncoder);
20792
20793     // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
20794     gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
20795
20796     // First we figure out the encoder ID's for the stack slots and registers.
20797     gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
20798     // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
20799     gcInfoEncoder->FinalizeSlotIds();
20800     // Now we can actually use those slot ID's to declare live ranges.
20801     gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
20802
20803     gcInfoEncoder->Build();
20804
20805     //GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
20806     //let's save the values anyway for debugging purposes
20807     compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
20808     compiler->compInfoBlkSize = 0; //not exposed by the GCEncoder interface
20809 }
20810 #endif
20811
20812
20813 /*****************************************************************************
20814  *  For CEE_LOCALLOC
20815  */
20816
20817 regNumber           CodeGen::genLclHeap(GenTreePtr size)
20818 {
20819     noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
20820
20821     // regCnt is a register used to hold both
20822     //              the amount to stack alloc (either in bytes or pointer sized words)
20823     //          and the final stack alloc address to return as the result
20824     // 
20825     regNumber   regCnt = DUMMY_INIT(REG_CORRUPT);
20826     var_types   type   = genActualType(size->gtType);
20827     emitAttr    easz   = emitTypeSize(type);
20828
20829     // Verify ESP
20830     #ifdef DEBUG
20831     if (compiler->opts.compStackCheckOnRet)
20832     {
20833         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20834         getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20835
20836         BasicBlock  *   esp_check = genCreateTempLabel();
20837         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20838         inst_JMP(jmpEqual, esp_check);
20839         getEmitter()->emitIns(INS_BREAKPOINT);
20840         genDefineTempLabel(esp_check);
20841     }
20842     #endif
20843
20844     noway_assert(isFramePointerUsed());
20845     noway_assert(genStackLevel == 0); // Can't have anything on the stack
20846
20847     BasicBlock* endLabel      = NULL;    
20848 #if FEATURE_FIXED_OUT_ARGS
20849     bool        stackAdjusted = false;
20850 #endif
20851
20852     if (size->IsCnsIntOrI())
20853     {
20854 #if FEATURE_FIXED_OUT_ARGS
20855         // If we have an outgoing arg area then we must adjust the SP
20856         // essentially popping off the outgoing arg area, 
20857         // We will restore it right before we return from this method
20858         //
20859         if  (compiler->lvaOutgoingArgSpaceSize > 0)
20860         {
20861             assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
20862             inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20863             stackAdjusted = true;
20864         }
20865 #endif
20866         size_t amount = size->gtIntCon.gtIconVal;
20867             
20868         // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
20869         amount +=  (STACK_ALIGN - 1);
20870         amount &= ~(STACK_ALIGN - 1);
20871         amount >>= STACK_ALIGN_SHIFT;       // amount is number of pointer-sized words to locAlloc
20872         size->gtIntCon.gtIconVal = amount;  // update the GT_CNS value in the node
20873         
20874         /* If amount is zero then return null in RegCnt */
20875         if (amount == 0)
20876         {
20877             regCnt  = regSet.rsGrabReg(RBM_ALLINT);
20878             instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20879             goto DONE;
20880         }
20881             
20882         /* For small allocations we will generate up to six push 0 inline */
20883         if (amount <= 6)
20884         {
20885             regCnt  = regSet.rsGrabReg(RBM_ALLINT);
20886 #if CPU_LOAD_STORE_ARCH
20887             regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20888             // Set 'regZero' to zero
20889             instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
20890 #endif
20891
20892             while (amount != 0)
20893             {
20894 #if CPU_LOAD_STORE_ARCH
20895                 inst_IV(INS_push, (unsigned) genRegMask(regZero));
20896 #else
20897                 inst_IV(INS_push_hide, 0);  // push_hide means don't track the stack
20898 #endif
20899                 amount--;
20900             }
20901             
20902             regTracker.rsTrackRegTrash(regCnt);
20903             // --- move regCnt, ESP
20904             inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20905             goto DONE;
20906         }
20907         else 
20908         {
20909             if (!compiler->info.compInitMem)
20910             {
20911                 // Re-bias amount to be number of bytes to adjust the SP 
20912                 amount <<= STACK_ALIGN_SHIFT;
20913                 size->gtIntCon.gtIconVal = amount;  // update the GT_CNS value in the node
20914                 if (amount < compiler->eeGetPageSize())   // must be < not <=
20915                 {
20916                     // Since the size is a page or less, simply adjust ESP 
20917                     
20918                     // ESP might already be in the guard page, must touch it BEFORE
20919                     // the alloc, not after.
20920                     regCnt  = regSet.rsGrabReg(RBM_ALLINT);
20921                     inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20922 #if CPU_LOAD_STORE_ARCH
20923                     regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20924                     getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
20925                     regTracker.rsTrackRegTrash(regTmp);
20926 #else
20927                     getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20928 #endif
20929                     inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
20930                     inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
20931                     regTracker.rsTrackRegTrash(regCnt);
20932                     goto DONE;
20933                 }
20934             }
20935         } 
20936     }
20937
20938     // Compute the size of the block to allocate
20939     genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
20940     noway_assert(size->gtFlags & GTF_REG_VAL);
20941     regCnt = size->gtRegNum;
20942
20943 #if FEATURE_FIXED_OUT_ARGS
20944     // If we have an outgoing arg area then we must adjust the SP
20945     // essentially popping off the outgoing arg area, 
20946     // We will restore it right before we return from this method
20947     //
20948     if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
20949     {
20950         assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
20951         inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20952         stackAdjusted = true;
20953     }
20954 #endif
20955
20956     //  Perform alignment if we don't have a GT_CNS size
20957     //
20958     if (!size->IsCnsIntOrI())
20959     {
20960         endLabel = genCreateTempLabel();
20961
20962         // If 0 we bail out
20963         instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
20964         emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20965         inst_JMP(jmpEqual, endLabel);
20966
20967         // Align to STACK_ALIGN
20968         inst_RV_IV(INS_add, regCnt,  (STACK_ALIGN - 1), emitActualTypeSize(type));
20969
20970         if (compiler->info.compInitMem)
20971         {
20972             // regCnt will be the number of pointer-sized words to locAlloc
20973             // If the shift right won't do the 'and' do it here
20974 #if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
20975             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20976 #endif
20977             // --- shr regCnt, 2 ---
20978             inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
20979         }
20980         else
20981         {
20982             // regCnt will be the total number of bytes to locAlloc
20983
20984             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20985         }
20986     }
20987
20988     BasicBlock* loop; loop = genCreateTempLabel();
20989
20990     if (compiler->info.compInitMem)
20991     {
20992         // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
20993
20994         /* Since we have to zero out the allocated memory AND ensure that
20995            ESP is always valid by tickling the pages, we will just push 0's
20996            on the stack */
20997
20998 #if defined(_TARGET_ARM_)
20999         regNumber   regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
21000         regNumber   regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
21001         // Set 'regZero1' and 'regZero2' to zero
21002         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
21003         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
21004 #endif
21005
21006         // Loop:
21007         genDefineTempLabel(loop);
21008
21009 #if defined(_TARGET_X86_)
21010
21011         inst_IV(INS_push_hide, 0);   // --- push 0
21012         // Are we done?
21013         inst_RV(INS_dec, regCnt, type);
21014
21015 #elif defined(_TARGET_ARM_)
21016
21017         inst_IV(INS_push, (unsigned) (genRegMask(regZero1) | genRegMask(regZero2)));
21018         // Are we done?
21019         inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
21020
21021 #else
21022         assert(!"Codegen missing");
21023 #endif // TARGETS
21024
21025         emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
21026         inst_JMP(jmpNotEqual, loop);
21027
21028         // Move the final value of ESP into regCnt
21029         inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
21030         regTracker.rsTrackRegTrash(regCnt);
21031     }
21032     else
21033     {
21034         // At this point 'regCnt' is set to the total number of bytes to locAlloc
21035
21036         /* We don't need to zero out the allocated memory. However, we do have
21037            to tickle the pages to ensure that ESP is always valid and is
21038            in sync with the "stack guard page".  Note that in the worst
21039            case ESP is on the last byte of the guard page.  Thus you must
21040            touch ESP+0 first not ESP+x01000.
21041
21042            Another subtlety is that you don't want ESP to be exactly on the
21043            boundary of the guard page because PUSH is predecrement, thus
21044            call setup would not touch the guard page but just beyond it */
21045
21046         /* Note that we go through a few hoops so that ESP never points to
21047            illegal pages at any time during the ticking process
21048
21049                   neg   REG
21050                   add   REG, ESP         // reg now holds ultimate ESP
21051                   jb    loop             // result is smaller than orignial ESP (no wrap around)
21052                   xor   REG, REG,        // Overflow, pick lowest possible number
21053              loop:
21054                   test  ESP, [ESP+0]     // X86 - tickle the page
21055                   ldr   REGH,[ESP+0]     // ARM - tickle the page
21056                   mov   REGH, ESP
21057                   sub   REGH, PAGE_SIZE
21058                   mov   ESP, REGH
21059                   cmp   ESP, REG
21060                   jae   loop
21061
21062                   mov   ESP, REG
21063              end:
21064           */
21065 #ifdef _TARGET_ARM_
21066         inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
21067         inst_JMP(EJ_hs, loop);
21068 #else
21069         inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
21070         inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
21071         inst_JMP(EJ_jb, loop);
21072 #endif
21073         regTracker.rsTrackRegTrash(regCnt);
21074
21075         instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
21076
21077         genDefineTempLabel(loop);
21078
21079         // This is a workaround to avoid the emitter trying to track the
21080         // decrement of the ESP - we do the subtraction in another reg
21081         // instead of adjusting ESP directly.
21082
21083         regNumber   regTemp = regSet.rsPickReg();
21084
21085         // Tickle the decremented value, and move back to ESP,
21086         // note that it has to be done BEFORE the update of ESP since
21087         // ESP might already be on the guard page.  It is OK to leave
21088         // the final value of ESP on the guard page
21089
21090 #if CPU_LOAD_STORE_ARCH
21091         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
21092 #else
21093         getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
21094 #endif
21095
21096         inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
21097         regTracker.rsTrackRegTrash(regTemp);
21098
21099         inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
21100         inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
21101
21102         genRecoverReg(size, RBM_ALLINT, RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
21103         noway_assert(size->gtFlags & GTF_REG_VAL);
21104         regCnt = size->gtRegNum;
21105         inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
21106         emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
21107         inst_JMP(jmpGEU, loop);
21108
21109         // Move the final value to ESP
21110         inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
21111     }
21112     regSet.rsMarkRegFree(genRegMask(regCnt));
21113
21114 DONE:
21115     
21116     noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
21117
21118     if (endLabel != NULL)
21119         genDefineTempLabel(endLabel);
21120
21121 #if FEATURE_FIXED_OUT_ARGS
21122     // If we have an outgoing arg area then we must readjust the SP
21123     //
21124     if  (stackAdjusted)
21125     {
21126         assert(compiler->lvaOutgoingArgSpaceSize > 0);
21127         assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
21128         inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
21129     }
21130 #endif
21131
21132     /* Write the lvaShadowSPfirst stack frame slot */
21133     noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
21134     getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
21135                             compiler->lvaLocAllocSPvar, 0);
21136
21137 #if STACK_PROBES
21138     // Don't think it is worth it the codegen complexity to embed this
21139     // when it's possible in each of the customized allocas.
21140     if (compiler->opts.compNeedStackProbes)
21141     {
21142         genGenerateStackProbe();
21143     }
21144 #endif
21145
21146 #ifdef DEBUG
21147     // Update new ESP
21148     if (compiler->opts.compStackCheckOnRet)
21149     {
21150         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
21151         getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
21152     }
21153 #endif
21154
21155     return regCnt;
21156 }
21157
21158
21159 /*****************************************************************************/
21160 #ifdef DEBUGGING_SUPPORT
21161 /*****************************************************************************
21162  *                          genSetScopeInfo
21163  *
21164  * Called for every scope info piece to record by the main genSetScopeInfo()
21165  */
21166
21167 void        CodeGen::genSetScopeInfo  (unsigned                 which,
21168                                        UNATIVE_OFFSET           startOffs,
21169                                        UNATIVE_OFFSET           length,
21170                                        unsigned                 varNum,
21171                                        unsigned                 LVnum,
21172                                        bool                     avail,
21173                                        Compiler::siVarLoc&      varLoc)
21174 {
21175     /* We need to do some mapping while reporting back these variables */
21176
21177     unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
21178     noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
21179
21180     // Non-x86 platforms are allowed to access all arguments directly
21181     // so we don't need this code.
21182 #ifdef _TARGET_X86_
21183
21184     // Is this a varargs function?
21185
21186     if (compiler->info.compIsVarArgs &&
21187         varNum != compiler->lvaVarargsHandleArg &&
21188         varNum < compiler->info.compArgsCount &&
21189         !compiler->lvaTable[varNum].lvIsRegArg)
21190     {
21191         noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
21192
21193         // All stack arguments (except the varargs handle) have to be
21194         // accessed via the varargs cookie. Discard generated info,
21195         // and just find its position relative to the varargs handle
21196
21197         PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
21198         if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
21199         {
21200             noway_assert(!compiler->opts.compDbgCode);
21201             return;
21202         }
21203
21204         // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
21205         // arguments of vararg functions to avoid reporting them to GC.
21206         noway_assert(!compiler->lvaTable[varNum].lvRegister);
21207         unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
21208         unsigned varOffset    = compiler->lvaTable[varNum].lvStkOffs;
21209
21210         noway_assert(cookieOffset < varOffset);
21211         unsigned offset = varOffset - cookieOffset;
21212         unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void *);
21213         noway_assert(offset < stkArgSize);
21214         offset = stkArgSize - offset;
21215
21216         varLoc.vlType = Compiler::VLT_FIXED_VA;
21217         varLoc.vlFixedVarArg.vlfvOffset = offset;
21218     }
21219
21220 #endif // _TARGET_X86_
21221
21222     VarName name = NULL;
21223
21224 #ifdef DEBUG
21225
21226     for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
21227     {
21228         if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
21229         {
21230             name = compiler->info.compVarScopes[scopeNum].vsdName;
21231         }
21232     }
21233
21234     // Hang on to this compiler->info.
21235
21236     TrnslLocalVarInfo &tlvi = genTrnslLocalVarInfo[which];
21237
21238     tlvi.tlviVarNum         = ilVarNum;
21239     tlvi.tlviLVnum          = LVnum;
21240     tlvi.tlviName           = name;
21241     tlvi.tlviStartPC        = startOffs;
21242     tlvi.tlviLength         = length;
21243     tlvi.tlviAvailable      = avail;
21244     tlvi.tlviVarLoc         = varLoc;
21245
21246 #endif // DEBUG
21247
21248     compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
21249 }
21250
21251 /*****************************************************************************/
21252 #endif  // DEBUGGING_SUPPORT
21253 /*****************************************************************************/
21254
21255 /*****************************************************************************
21256  *
21257  *  Return non-zero if the given register is free after the given tree is
21258  *  evaluated (i.e. the register is either not used at all, or it holds a
21259  *  register variable which is not live after the given node).
21260  *  This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
21261  *  constant operand, and one that's in a register.  Thus, the only thing we
21262  *  need to determine is whether the register holding op1 is dead.
21263  */
21264 bool                CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
21265 {
21266     regMaskTP       vars;
21267     regMaskTP       mask = genRegMask(reg);
21268
21269     if  (regSet.rsMaskUsed & mask)
21270         return  false;
21271
21272     assert(tree->gtOper == GT_ADD);
21273     GenTreePtr regValTree = tree->gtOp.gtOp1;
21274     if (!tree->gtOp.gtOp2->IsCnsIntOrI())
21275     {
21276         regValTree = tree->gtOp.gtOp2;
21277         assert(tree->gtOp.gtOp1->IsCnsIntOrI());
21278     }
21279     assert(regValTree->gtFlags & GTF_REG_VAL);
21280
21281     /* At this point, the only way that the register will remain live
21282      * is if it is itself a register variable that isn't dying.
21283      */
21284     assert(regValTree->gtRegNum == reg);
21285     if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
21286         return false;
21287     else
21288         return  true;
21289 }
21290
21291  /*****************************************************************************/
21292  //
21293  // This method calculates the USE and DEF values for a statement.
21294  // It also calls fgSetRngChkTarget for the statement.
21295  //
21296  // We refactor out this code from fgPerBlockLocalVarLiveness
21297  // and add QMARK logics to it.
21298  //
21299  // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
21300  //
21301  // The usage of this method is very limited.
21302  // We should only call it for the first node in the statement or
21303  // for the node after the GTF_RELOP_QMARK node.
21304  //
21305  // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
21306
21307
21308  /*
21309         Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
21310         when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
21311         from both trees.
21312
21313         Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
21314         linked by gtNext.
21315
21316         The algorithm we use is:
21317         (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
21318         (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
21319             We cache copies of current fgCurDefSet and fgCurUseSet.
21320             (The fact that it is recursively calling itself is for nested QMARK case,
21321              where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
21322         (3) We walk the thenTree.
21323         (4) When we see GT_COLON node, we know that we just finished the thenTree.
21324             We then make a copy of the current fgCurDefSet and fgCurUseSet,
21325             restore them to the ones before the thenTree, and then continue walking
21326             the elseTree.
21327         (5) When we see the GT_QMARK node, we know we just finished the elseTree.
21328             So we combine the results from the thenTree and elseTree and then return.
21329
21330
21331                                   +--------------------+
21332                                   |      GT_QMARK    11|
21333                                   +----------+---------+
21334                                              |
21335                                              *
21336                                             / \
21337                                           /     \
21338                                         /         \
21339                    +---------------------+       +--------------------+
21340                    |      GT_<cond>    3 |       |     GT_COLON     7 |
21341                    |  w/ GTF_RELOP_QMARK |       |  w/ GTF_COLON_COND |
21342                    +----------+----------+       +---------+----------+
21343                               |                            |
21344                               *                            *
21345                              / \                          / \
21346                            /     \                      /     \
21347                          /         \                  /         \
21348                         2           1          thenTree 6       elseTree 10
21349                                    x               |                |
21350                                   /                *                *
21351       +----------------+        /                 / \              / \
21352       |prevExpr->gtNext+------/                 /     \          /     \
21353       +----------------+                      /         \      /         \
21354                                              5           4    9           8
21355
21356
21357  */
21358
21359 GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,   // The node to start walking with.
21360                                                           GenTreePtr relopNode,   // The node before the startNode.
21361                                                                                   // (It should either be NULL or
21362                                                                                   // a GTF_RELOP_QMARK node.)
21363                                                           GenTreePtr asgdLclVar
21364                                                          )
21365 {
21366     GenTreePtr tree;
21367
21368     VARSET_TP  VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet);  // Store the current fgCurDefSet and fgCurUseSet so
21369     VARSET_TP  VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet);  // we can restore then before entering the elseTree.
21370
21371     bool heapUse_BeforeSplit   = fgCurHeapUse;
21372     bool heapDef_BeforeSplit   = fgCurHeapDef;
21373     bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
21374
21375     VARSET_TP  VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this));    // These two variables will store the USE and DEF sets after
21376     VARSET_TP  VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this));    // evaluating the thenTree.
21377
21378     bool heapUse_AfterThenTree   = fgCurHeapUse;
21379     bool heapDef_AfterThenTree   = fgCurHeapDef;
21380     bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
21381
21382     // relopNode is either NULL or a GTF_RELOP_QMARK node.
21383     assert(!relopNode ||
21384            (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK)
21385           );
21386
21387     // If relopNode is NULL, then the startNode must be the 1st node of the statement.
21388     // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
21389     assert( (!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
21390             (relopNode && startNode == relopNode->gtNext)
21391           );
21392
21393     for (tree = startNode; tree; tree = tree->gtNext)
21394     {
21395         switch (tree->gtOper)
21396         {
21397
21398         case GT_QMARK:
21399
21400             // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
21401             noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
21402
21403             // By the time we see a GT_QMARK, we must have finished processing the elseTree.
21404             // So it's the time to combine the results
21405             // from the the thenTree and the elseTree, and then return.
21406
21407             VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
21408             VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
21409
21410             fgCurHeapDef   = fgCurHeapDef   && heapDef_AfterThenTree;
21411             fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
21412             fgCurHeapUse   = fgCurHeapUse   || heapUse_AfterThenTree;
21413
21414             // Return the GT_QMARK node itself so the caller can continue from there.
21415             // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
21416             // in the "for" statement.
21417             goto _return;
21418
21419         case GT_COLON:
21420             // By the time we see GT_COLON, we must have just walked the thenTree.
21421             // So we need to do two things here.
21422             // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
21423             //     with the result from the elseTree.
21424             // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
21425             //     and then continue walking the elseTree.
21426             VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
21427             VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
21428
21429             heapDef_AfterThenTree   = fgCurHeapDef;
21430             heapHavoc_AfterThenTree = fgCurHeapHavoc;
21431             heapUse_AfterThenTree   = fgCurHeapUse;
21432
21433             VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
21434             VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
21435
21436             fgCurHeapDef   = heapDef_BeforeSplit;
21437             fgCurHeapHavoc = heapHavoc_BeforeSplit;
21438             fgCurHeapUse   = heapUse_BeforeSplit;
21439
21440             break;
21441
21442         case GT_LCL_VAR:
21443         case GT_LCL_FLD:
21444         case GT_LCL_VAR_ADDR:
21445         case GT_LCL_FLD_ADDR:
21446         case GT_STORE_LCL_VAR:
21447         case GT_STORE_LCL_FLD:
21448             fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
21449             break;
21450
21451         case GT_CLS_VAR:
21452             // For Volatile indirection, first mutate the global heap
21453             // see comments in ValueNum.cpp (under case GT_CLS_VAR)
21454             // This models Volatile reads as def-then-use of the heap.
21455             // and allows for a CSE of a subsequent non-volatile read
21456             if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
21457             {
21458                 // For any Volatile indirection, we must handle it as a 
21459                 // definition of the global heap
21460                 fgCurHeapDef = true;
21461
21462             }
21463             // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to assignment.
21464             // Otherwise, we treat it as a use here.
21465             if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
21466             {
21467                 fgCurHeapUse = true;
21468             }
21469             break;
21470
21471         case GT_IND:
21472             // For Volatile indirection, first mutate the global heap
21473             // see comments in ValueNum.cpp (under case GT_CLS_VAR)
21474             // This models Volatile reads as def-then-use of the heap.
21475             // and allows for a CSE of a subsequent non-volatile read
21476             if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
21477             {
21478                 // For any Volatile indirection, we must handle it as a 
21479                 // definition of the global heap
21480                 fgCurHeapDef = true;
21481             }
21482
21483             // If the GT_IND is the lhs of an assignment, we'll handle it 
21484             // as a heap def, when we get to assignment.
21485             // Otherwise, we treat it as a use here.
21486             if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
21487             {
21488                 GenTreeLclVarCommon* dummyLclVarTree = NULL;
21489                 bool dummyIsEntire = false;
21490                 GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
21491                 if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/0, &dummyLclVarTree, &dummyIsEntire))
21492                 {
21493                     if (!fgCurHeapDef)
21494                     {
21495                         fgCurHeapUse = true;
21496                     }
21497                 }
21498                 else
21499                 {
21500                     // Defines a local addr
21501                     assert(dummyLclVarTree != nullptr);
21502                     fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
21503                 }
21504             }
21505             break;
21506
21507             // These should have been morphed away to become GT_INDs:
21508         case GT_FIELD:
21509         case GT_INDEX:
21510             unreached();
21511             break;
21512
21513             // We'll assume these are use-then-defs of the heap.
21514         case GT_LOCKADD:
21515         case GT_XADD:
21516         case GT_XCHG:
21517         case GT_CMPXCHG:
21518             if (!fgCurHeapDef)
21519             {
21520                 fgCurHeapUse = true;
21521             }
21522             fgCurHeapDef = true;
21523             fgCurHeapHavoc = true;
21524             break;
21525
21526         case GT_MEMORYBARRIER:
21527             // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
21528             fgCurHeapDef = true;
21529             break;
21530
21531             // For now, all calls read/write the heap, the latter in its entirety.  Might tighten this case later.
21532         case GT_CALL:
21533             {
21534                 GenTreeCall* call = tree->AsCall();
21535                 bool modHeap = true;
21536                 if (call->gtCallType == CT_HELPER)
21537                 {
21538                     CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
21539
21540                     if (   !s_helperCallProperties.MutatesHeap(helpFunc)
21541                         && !s_helperCallProperties.MayRunCctor(helpFunc))
21542                     {
21543                         modHeap = false;
21544                     }
21545                 }
21546                 if (modHeap)
21547                 {
21548                     if (!fgCurHeapDef)
21549                     {
21550                         fgCurHeapUse = true;
21551                     }
21552                     fgCurHeapDef = true;
21553                     fgCurHeapHavoc = true;
21554                 }
21555             }
21556
21557             // If this is a p/invoke unmanaged call or if this is a tail-call
21558             // and we have an unmanaged p/invoke call in the method,
21559             // then we're going to run the p/invoke epilog.
21560             // So we mark the FrameRoot as used by this instruction.
21561             // This ensures that the block->bbVarUse will contain
21562             // the FrameRoot local var if is it a tracked variable.
21563
21564             if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
21565             {
21566                 /* Get the TCB local and mark it as used */
21567
21568                 noway_assert(info.compLvFrameListRoot < lvaCount);
21569
21570                 LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
21571
21572                 if (varDsc->lvTracked)
21573                 {
21574                     if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
21575                     {
21576                         VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
21577                     }
21578                 }
21579             }
21580
21581             break;
21582
21583         default:
21584
21585             // Determine whether it defines a heap location.
21586             if (tree->OperIsAssignment() || tree->OperIsBlkOp())
21587             {
21588                 GenTreeLclVarCommon* dummyLclVarTree = NULL;
21589                 if (!tree->DefinesLocal(this, &dummyLclVarTree))
21590                 {
21591                     // If it doesn't define a local, then it might update the heap.
21592                     fgCurHeapDef = true;
21593                 }
21594             }
21595
21596             // Are we seeing a GT_<cond> for a GT_QMARK node?
21597             if ( (tree->OperKind() & GTK_RELOP) &&
21598                  (tree->gtFlags & GTF_RELOP_QMARK)
21599                ) {
21600                 // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
21601                 // Recursively call fgLegacyPerStatementLocalVarLiveness.
21602                 // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the current
21603                 // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
21604                 // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON node.
21605                 tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
21606
21607                 // We must have been returned here after seeing a GT_QMARK node.
21608                 noway_assert(tree->gtOper == GT_QMARK);
21609             }
21610
21611             break;
21612         }
21613     }
21614
21615 _return:
21616     return tree;
21617 }
21618
21619 /*****************************************************************************/
21620
21621 /*****************************************************************************
21622  * Initialize the TCB local and the NDirect stub, afterwards "push"
21623  * the hoisted NDirect stub.
21624  *
21625  * 'initRegs' is the set of registers which will be zeroed out by the prolog
21626  *             typically initRegs is zero
21627  *
21628  * The layout of the NDirect Inlined Call Frame is as follows:
21629  * (see VM/frames.h and VM/JITInterface.cpp for more information)
21630  *
21631  *   offset     field name                        when set
21632  *  --------------------------------------------------------------
21633  *    +00h      vptr for class InlinedCallFrame   method prolog
21634  *    +04h      m_Next                            method prolog
21635  *    +08h      m_Datum                           call site
21636  *    +0ch      m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
21637  *    +10h      m_pCallerReturnAddress            call site
21638  *    +14h      m_pCalleeSavedRegisters           not set by JIT
21639  *    +18h      JIT retval spill area (int)       before call_gc
21640  *    +1ch      JIT retval spill area (long)      before call_gc
21641  *    +20h      Saved value of EBP                method prolog
21642  */
21643
21644 regMaskTP           CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
21645 {
21646     assert(compiler->compGeneratingProlog);
21647     noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21648     noway_assert(compiler->info.compCallUnmanaged);
21649
21650     CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
21651     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21652
21653     /* let's find out if compLvFrameListRoot is enregistered */
21654
21655     LclVarDsc *     varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21656
21657     noway_assert(!varDsc->lvIsParam);
21658     noway_assert(varDsc->lvType == TYP_I_IMPL);
21659
21660     DWORD threadTlsIndex, *pThreadTlsIndex;
21661
21662     threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**) &pThreadTlsIndex);
21663 #if defined(_TARGET_X86_)
21664     if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
21665 #else
21666     if (true)
21667 #endif
21668     {
21669         // Instead of calling GetThread(), and getting GS cookie and
21670         // InlinedCallFrame vptr through indirections, we'll call only one helper.
21671         // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
21672         // and uses REG_PINVOKE_SCRATCH as scratch register.
21673         getEmitter()->emitIns_R_S (INS_lea,
21674                                  EA_PTRSIZE,
21675                                  REG_PINVOKE_FRAME,
21676                                  compiler->lvaInlinedPInvokeFrameVar,
21677                                  pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21678         regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21679
21680         // We're about to trask REG_PINVOKE_TCB, it better not be in use!
21681         assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
21682
21683         // Don't use the argument registers (including the special argument in
21684         // REG_PINVOKE_FRAME) for computing the target address.
21685         regSet.rsLockReg(RBM_ARG_REGS|RBM_PINVOKE_FRAME);
21686
21687         genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
21688
21689         regSet.rsUnlockReg(RBM_ARG_REGS|RBM_PINVOKE_FRAME);
21690
21691         if (varDsc->lvRegister)
21692         {
21693             regNumber regTgt = varDsc->lvRegNum;
21694
21695             // we are about to initialize it. So turn the bit off in initRegs to prevent
21696             // the prolog reinitializing it.
21697             initRegs &= ~genRegMask(regTgt);
21698
21699             if (regTgt != REG_PINVOKE_TCB)
21700             {
21701                 // move TCB to the its register if necessary
21702                 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
21703                 regTracker.rsTrackRegTrash(regTgt);
21704             }
21705         }
21706         else
21707         {
21708             // move TCB to its stack location
21709             getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21710                                      EA_PTRSIZE,
21711                                      REG_PINVOKE_TCB,
21712                                      compiler->info.compLvFrameListRoot,
21713                                      0);
21714         }
21715
21716         // We are done, the rest of this function deals with the inlined case.
21717         return initRegs;
21718     }
21719
21720     regNumber      regTCB;
21721
21722     if (varDsc->lvRegister)
21723     {
21724         regTCB = varDsc->lvRegNum;
21725
21726         // we are about to initialize it. So turn the bit off in initRegs to prevent
21727         // the prolog reinitializing it.
21728         initRegs &= ~genRegMask(regTCB);
21729     }
21730     else // varDsc is allocated on the Stack
21731     {
21732         regTCB = REG_PINVOKE_TCB;
21733     }
21734
21735     /* get TCB,  mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
21736
21737     // TODO-ARM-CQ: should we inline TlsGetValue here?
21738 #if !defined(_TARGET_ARM_) && !defined(_TARGET_AMD64_)
21739 #define WIN_NT_TLS_OFFSET (0xE10)
21740 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
21741
21742     if (threadTlsIndex < 64)
21743     {
21744         //  mov  reg, FS:[0xE10+threadTlsIndex*4]
21745         getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
21746                                  EA_PTRSIZE,
21747                                  regTCB,
21748                                  FLD_GLOBAL_FS,
21749                                  WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
21750         regTracker.rsTrackRegTrash(regTCB);
21751     }
21752     else
21753     {
21754         noway_assert(pInfo->osMajor >= 5);
21755
21756         DWORD basePtr   = WIN_NT5_TLS_HIGHOFFSET;
21757         threadTlsIndex -= 64;
21758
21759         // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
21760         // mov reg, [reg+threadTlsIndex*4]
21761
21762         getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
21763                                  EA_PTRSIZE,
21764                                  regTCB,
21765                                  FLD_GLOBAL_FS,
21766                                  basePtr);
21767         getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL),
21768                                  EA_PTRSIZE,
21769                                  regTCB,
21770                                  regTCB,
21771                                  threadTlsIndex*sizeof(int));
21772         regTracker.rsTrackRegTrash(regTCB);
21773     }
21774 #endif
21775
21776     /* save TCB in local var if not enregistered */
21777
21778     if (!varDsc->lvRegister)
21779     {
21780         getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21781                                  EA_PTRSIZE,
21782                                  regTCB,
21783                                  compiler->info.compLvFrameListRoot,
21784                                  0);
21785     }
21786
21787     /* set frame's vptr */
21788
21789     const void * inlinedCallFrameVptr, **pInlinedCallFrameVptr;
21790     inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**) &pInlinedCallFrameVptr);
21791     noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
21792
21793     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t) inlinedCallFrameVptr,
21794                                compiler->lvaInlinedPInvokeFrameVar, 
21795                                pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
21796                                REG_PINVOKE_SCRATCH);
21797
21798     // Set the GSCookie
21799     GSCookie gsCookie, * pGSCookie;
21800     compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
21801     noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
21802
21803     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t) gsCookie,
21804                                compiler->lvaInlinedPInvokeFrameVar,
21805                                pInfo->inlinedCallFrameInfo.offsetOfGSCookie,
21806                                REG_PINVOKE_SCRATCH);
21807
21808     /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
21809        set next field in frame */
21810
21811     getEmitter()->emitIns_R_AR (ins_Load(TYP_I_IMPL),
21812                               EA_PTRSIZE,
21813                               REG_PINVOKE_SCRATCH,
21814                               regTCB,
21815                               pInfo->offsetOfThreadFrame);
21816     regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21817
21818     getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21819                              EA_PTRSIZE,
21820                              REG_PINVOKE_SCRATCH,
21821                              compiler->lvaInlinedPInvokeFrameVar,
21822                              pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21823
21824     noway_assert(isFramePointerUsed());  // Setup of Pinvoke frame currently requires an EBP style frame
21825
21826     /* set EBP value in frame */
21827     getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21828                              EA_PTRSIZE,
21829                              genFramePointerReg(),
21830                              compiler->lvaInlinedPInvokeFrameVar,
21831                              pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
21832
21833     /* reset track field in frame */
21834     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
21835                                compiler->lvaInlinedPInvokeFrameVar,
21836                                pInfo->inlinedCallFrameInfo.offsetOfReturnAddress,
21837                                REG_PINVOKE_SCRATCH);
21838
21839     /* get address of our frame */
21840
21841     getEmitter()->emitIns_R_S (INS_lea,
21842                              EA_PTRSIZE,
21843                              REG_PINVOKE_SCRATCH,
21844                              compiler->lvaInlinedPInvokeFrameVar,
21845                              pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21846     regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21847
21848     /* now "push" our N/direct frame */
21849
21850     getEmitter()->emitIns_AR_R (ins_Store(TYP_I_IMPL),
21851                               EA_PTRSIZE,
21852                               REG_PINVOKE_SCRATCH,
21853                               regTCB,
21854                               pInfo->offsetOfThreadFrame);
21855
21856     return initRegs;
21857 }
21858
21859
21860 /*****************************************************************************
21861  *  Unchain the InlinedCallFrame.
21862  *  Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
21863  *  or tail call.
21864  */
21865 void                CodeGen::genPInvokeMethodEpilog()
21866 {
21867     noway_assert(compiler->info.compCallUnmanaged);
21868     noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21869     noway_assert(compiler->compCurBB == compiler->genReturnBB ||
21870                  (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
21871                  (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
21872
21873     CORINFO_EE_INFO *   pInfo = compiler->eeGetEEInfo();
21874     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21875
21876     getEmitter()->emitDisableRandomNops();
21877     //debug check to make sure that we're not using ESI and/or EDI across this call, except for
21878     //compLvFrameListRoot.
21879     unsigned regTrashCheck = 0;
21880
21881     /* XXX Tue 5/29/2007
21882      * We explicitly add interference for these in CodeGen::rgPredictRegUse.  If you change the code
21883      * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
21884      */
21885     LclVarDsc   *       varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21886     regNumber           reg;
21887     regNumber           reg2 = REG_PINVOKE_FRAME;
21888
21889
21890     //
21891     // Two cases for epilog invocation:
21892     //
21893     // 1. Return
21894     //    We can trash the ESI/EDI registers.
21895     //
21896     // 2. Tail call
21897     //    When tail called, we'd like to preserve enregistered args,
21898     //    in ESI/EDI so we can pass it to the callee.
21899     //
21900     // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
21901     // Instead use the reserved local variable slot.
21902     //
21903     if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21904     {
21905         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21906         {
21907 #if FEATURE_FIXED_OUT_ARGS
21908             // Save the register in the reserved local var slot.
21909             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->lvaPInvokeFrameRegSaveVar, 0);
21910 #else
21911             inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
21912 #endif
21913         }
21914         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21915         {
21916 #if FEATURE_FIXED_OUT_ARGS
21917             // Save the register in the reserved local var slot.
21918             getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21919 #else
21920             inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
21921 #endif
21922         }
21923     }
21924
21925     if (varDsc->lvRegister)
21926     {
21927         reg = varDsc->lvRegNum;
21928         if (reg == reg2)
21929             reg2 = REG_PINVOKE_TCB;
21930
21931         regTrashCheck |= genRegMask(reg2);
21932     }
21933     else
21934     {
21935         /* mov esi, [tcb address]    */
21936
21937         getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
21938                                  EA_PTRSIZE,
21939                                  REG_PINVOKE_TCB,
21940                                  compiler->info.compLvFrameListRoot,
21941                                  0);
21942         regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21943         reg = REG_PINVOKE_TCB;
21944
21945         regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
21946     }
21947
21948     /* mov edi, [ebp-frame.next] */
21949
21950     getEmitter()->emitIns_R_S  (ins_Load(TYP_I_IMPL),
21951                               EA_PTRSIZE,
21952                               reg2,
21953                               compiler->lvaInlinedPInvokeFrameVar,
21954                               pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21955     regTracker.rsTrackRegTrash(reg2);
21956
21957     /* mov [esi+offsetOfThreadFrame], edi */
21958
21959     getEmitter()->emitIns_AR_R (ins_Store(TYP_I_IMPL),
21960                               EA_PTRSIZE,
21961                               reg2,
21962                               reg,
21963                               pInfo->offsetOfThreadFrame);
21964
21965     noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
21966
21967     if (compiler->genReturnLocal != BAD_VAR_NUM &&
21968         compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
21969         compiler->lvaTable[compiler->genReturnLocal].lvRegister)
21970     {
21971         //really make sure we're not clobbering compiler->genReturnLocal.
21972         noway_assert(!(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum)
21973                        & ( (varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0)
21974                            | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
21975     }
21976
21977     (void)regTrashCheck;
21978
21979     // Restore the registers ESI and EDI.
21980     if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21981     {
21982         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21983         {
21984 #if FEATURE_FIXED_OUT_ARGS
21985             // Restore the register from the reserved local var slot.
21986             getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21987 #else
21988             inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
21989 #endif
21990             regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21991         }
21992         if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21993         {
21994 #if FEATURE_FIXED_OUT_ARGS
21995             // Restore the register from the reserved local var slot.
21996             getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->lvaPInvokeFrameRegSaveVar, 0);
21997 #else
21998             inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
21999 #endif
22000             regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
22001         }
22002     }
22003     getEmitter()->emitEnableRandomNops();
22004 }
22005
22006
22007 /*****************************************************************************
22008     This function emits the call-site prolog for direct calls to unmanaged code.
22009     It does all the necessary setup of the InlinedCallFrame.
22010     frameListRoot specifies the local containing the thread control block.
22011     argSize or methodToken is the value to be copied into the m_datum
22012             field of the frame (methodToken may be indirected & have a reloc)
22013     The function returns  the register now containing the thread control block,
22014     (it could be either enregistered or loaded into one of the scratch registers)
22015 */
22016
22017 regNumber          CodeGen::genPInvokeCallProlog(LclVarDsc*            frameListRoot,
22018                                                   int                   argSize,
22019                                                   CORINFO_METHOD_HANDLE methodToken,
22020                                                   BasicBlock*           returnLabel)
22021 {
22022     // Some stack locals might be 'cached' in registers, we need to trash them
22023     // from the regTracker *and* also ensure the gc tracker does not consider
22024     // them live (see the next assert).  However, they might be live reg vars
22025     // that are non-pointers CSE'd from pointers.
22026     // That means the register will be live in rsMaskVars, so we can't just
22027     // call gcMarkSetNpt().
22028     {
22029         regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
22030         gcInfo.gcRegGCrefSetCur &= ~deadRegs;
22031         gcInfo.gcRegByrefSetCur &= ~deadRegs;
22032
22033 #ifdef DEBUG
22034         deadRegs &= regSet.rsMaskVars;
22035         if (deadRegs)
22036         {
22037             for (LclVarDsc * varDsc = compiler->lvaTable;
22038                  ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs);
22039                  varDsc++ )
22040             {
22041                 if (!varDsc->lvTracked || !varDsc->lvRegister)
22042                     continue;
22043
22044                 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
22045                     continue;
22046
22047                 regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
22048                 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
22049                     varRegMask |= genRegMask(varDsc->lvOtherReg);
22050
22051                 if (varRegMask & deadRegs)
22052                 {
22053                     // We found the enregistered var that should not be live if it
22054                     // was a GC pointer.
22055                     noway_assert(!varTypeIsGC(varDsc));
22056                     deadRegs &= ~varRegMask;
22057                 }
22058             }
22059         }
22060 #endif // DEBUG
22061     }
22062
22063     /* Since we are using the InlinedCallFrame, we should have spilled all
22064        GC pointers to it - even from callee-saved registers */
22065
22066     noway_assert(((gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
22067
22068     /* must specify only one of these parameters */
22069     noway_assert((argSize == 0) || (methodToken == NULL));
22070
22071     /* We are about to call unmanaged code directly.
22072        Before we can do that we have to emit the following sequence:
22073
22074        mov  dword ptr [frame.callTarget], MethodToken
22075        mov  dword ptr [frame.callSiteTracker], esp
22076        mov  reg, dword ptr [tcb_address]
22077        mov  byte  ptr [tcb+offsetOfGcState], 0
22078
22079      */
22080
22081     CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
22082
22083     noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
22084     
22085     /* mov   dword ptr [frame.callSiteTarget], value */
22086
22087     if (methodToken == NULL)
22088     {
22089         /* mov   dword ptr [frame.callSiteTarget], argSize */
22090         instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize,
22091                                    compiler->lvaInlinedPInvokeFrameVar,
22092                                    pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
22093     }
22094     else
22095     {
22096         void * embedMethHnd, * pEmbedMethHnd;
22097
22098         embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(
22099                                           methodToken,
22100                                           &pEmbedMethHnd);
22101
22102         noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
22103
22104         if (embedMethHnd != NULL)
22105         {
22106             /* mov   dword ptr [frame.callSiteTarget], "MethodDesc" */
22107
22108             instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t) embedMethHnd,
22109                                       compiler->lvaInlinedPInvokeFrameVar,
22110                                       pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
22111         }
22112         else
22113         {
22114             /* mov   reg, dword ptr [MethodDescIndir]
22115                mov   dword ptr [frame.callSiteTarget], reg */
22116
22117             regNumber reg = regSet.rsPickFreeReg();
22118
22119 #if CPU_LOAD_STORE_ARCH
22120             instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22121                                     reg,
22122                                     (ssize_t) pEmbedMethHnd);
22123             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
22124 #else // !CPU_LOAD_STORE_ARCH
22125 #ifdef _TARGET_AMD64_
22126             if (reg != REG_RAX)
22127             {
22128                 instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22129                                         reg,
22130                                         (ssize_t) pEmbedMethHnd);
22131                 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
22132             }
22133             else
22134 #endif // _TARGET_AMD64_
22135             {
22136                 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC,
22137                                          reg, (ssize_t) pEmbedMethHnd);
22138             }
22139 #endif // !CPU_LOAD_STORE_ARCH
22140             regTracker.rsTrackRegTrash(reg);
22141             getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
22142                                      EA_PTRSIZE,
22143                                      reg,
22144                                      compiler->lvaInlinedPInvokeFrameVar,
22145                                      pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
22146         }
22147     }
22148
22149     regNumber tcbReg = REG_NA;
22150
22151     if (frameListRoot->lvRegister)
22152     {
22153         tcbReg = frameListRoot->lvRegNum;
22154     }
22155     else
22156     {
22157         tcbReg = regSet.rsGrabReg(RBM_ALLINT);
22158
22159         /* mov reg, dword ptr [tcb address]    */
22160
22161         getEmitter()->emitIns_R_S  (ins_Load(TYP_I_IMPL),
22162                                   EA_PTRSIZE,
22163                                   tcbReg,
22164                                   (unsigned)(frameListRoot - compiler->lvaTable),
22165                                   0);
22166         regTracker.rsTrackRegTrash(tcbReg);
22167     }
22168
22169 #ifdef _TARGET_X86_
22170     /* mov   dword ptr [frame.callSiteTracker], esp */
22171
22172     getEmitter()->emitIns_S_R  (ins_Store(TYP_I_IMPL),
22173                               EA_PTRSIZE,
22174                               REG_SPBASE,
22175                               compiler->lvaInlinedPInvokeFrameVar,
22176                               pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
22177 #endif // _TARGET_X86_
22178
22179     /* mov   dword ptr [frame.callSiteReturnAddress], label */
22180     
22181 #if CPU_LOAD_STORE_ARCH
22182     regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
22183     getEmitter()->emitIns_J_R (INS_adr,
22184                              EA_PTRSIZE,
22185                              returnLabel,
22186                              tmpReg);
22187     regTracker.rsTrackRegTrash(tmpReg);
22188     getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
22189                              EA_PTRSIZE,
22190                              tmpReg,
22191                              compiler->lvaInlinedPInvokeFrameVar,
22192                              pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
22193 #else // !CPU_LOAD_STORE_ARCH
22194     // TODO-AMD64-CQ: Consider changing to a rip relative sequence on x64.
22195     getEmitter()->emitIns_J_S (ins_Store(TYP_I_IMPL),
22196                              EA_PTRSIZE,
22197                              returnLabel,
22198                              compiler->lvaInlinedPInvokeFrameVar,
22199                              pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
22200 #endif // !CPU_LOAD_STORE_ARCH
22201
22202 #if CPU_LOAD_STORE_ARCH
22203     instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
22204
22205     noway_assert(tmpReg != tcbReg);
22206
22207     getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE),
22208                              EA_1BYTE,
22209                              tmpReg,
22210                              tcbReg,
22211                              pInfo->offsetOfGCState);
22212 #else // !CPU_LOAD_STORE_ARCH
22213     /* mov   byte  ptr [tcbReg+offsetOfGcState], 0 */
22214
22215     getEmitter()->emitIns_I_AR (ins_Store(TYP_BYTE),
22216                               EA_1BYTE,
22217                               0,
22218                               tcbReg,
22219                               pInfo->offsetOfGCState);
22220 #endif // !CPU_LOAD_STORE_ARCH
22221
22222     return tcbReg;
22223 }
22224
22225 /*****************************************************************************
22226  *
22227    First we have to mark in the hoisted NDirect stub that we are back
22228    in managed code. Then we have to check (a global flag) whether GC is
22229    pending or not. If so, we just call into a jit-helper.
22230    Right now we have this call always inlined, i.e. we always skip around
22231    the jit-helper call.
22232    Note:
22233    The tcb address is a regular local (initialized in the prolog), so it is either
22234    enregistered or in the frame:
22235
22236         tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
22237         mov  byte ptr[tcb_reg+offsetOfGcState], 1
22238         cmp  'global GC pending flag', 0
22239         je   @f
22240         [mov  ECX, tcb_reg]  OR [ecx was setup above]     ; we pass the tcb value to callGC
22241         [mov  [EBP+spill_area+0], eax]                    ; spill the int  return value if any
22242         [mov  [EBP+spill_area+4], edx]                    ; spill the long return value if any
22243         call @callGC
22244         [mov  eax, [EBP+spill_area+0] ]                   ; reload the int  return value if any
22245         [mov  edx, [EBP+spill_area+4] ]                   ; reload the long return value if any
22246     @f:
22247  */
22248
22249 void                CodeGen::genPInvokeCallEpilog(LclVarDsc *  frameListRoot,
22250                                                   regMaskTP    retVal)
22251 {
22252     BasicBlock  *       clab_nostop;
22253     CORINFO_EE_INFO *   pInfo = compiler->eeGetEEInfo();
22254     regNumber           reg2;
22255     regNumber           reg3;
22256 #ifdef _TARGET_ARM_
22257         reg3 = REG_R3;
22258 #else
22259         reg3 = REG_EDX;
22260 #endif
22261 #ifdef _TARGET_AMD64_
22262     TempDsc * retTmp = NULL;
22263 #endif
22264
22265     getEmitter()->emitDisableRandomNops();
22266
22267     if (frameListRoot->lvRegister)
22268     {
22269         /* make sure that register is live across the call */
22270
22271         reg2 = frameListRoot->lvRegNum;
22272         noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
22273     }
22274     else
22275     {
22276         /* mov   reg2, dword ptr [tcb address]    */
22277 #ifdef _TARGET_ARM_
22278         reg2 = REG_R2;
22279 #else
22280         reg2 = REG_ECX;
22281 #endif
22282
22283         getEmitter()->emitIns_R_S  (ins_Load(TYP_I_IMPL),
22284                                   EA_PTRSIZE,
22285                                   reg2,
22286                                   (unsigned)(frameListRoot - compiler->lvaTable),
22287                                   0);
22288         regTracker.rsTrackRegTrash(reg2);
22289     }
22290
22291
22292 #ifdef _TARGET_ARM_
22293     /* mov   r3, 1 */
22294     /* strb  [r2+offsetOfGcState], r3 */
22295     instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
22296     getEmitter()->emitIns_AR_R (ins_Store(TYP_BYTE),
22297                               EA_1BYTE,
22298                               reg3,
22299                               reg2,
22300                               pInfo->offsetOfGCState);
22301 #else
22302     /* mov   byte ptr [tcb+offsetOfGcState], 1 */
22303     getEmitter()->emitIns_I_AR (ins_Store(TYP_BYTE),
22304                               EA_1BYTE,
22305                               1,
22306                               reg2,
22307                               pInfo->offsetOfGCState);
22308 #endif
22309
22310     /* test global flag (we return to managed code) */
22311
22312     LONG * addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
22313
22314     addrOfCaptureThreadGlobal = compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**) &pAddrOfCaptureThreadGlobal);
22315     noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
22316
22317     // Can we directly use addrOfCaptureThreadGlobal?
22318
22319     if (addrOfCaptureThreadGlobal)
22320     {
22321 #ifdef _TARGET_ARM_
22322         instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22323                                 reg3,
22324                                 (ssize_t)addrOfCaptureThreadGlobal);
22325         getEmitter()->emitIns_R_R_I (ins_Load(TYP_INT),
22326                                    EA_4BYTE,
22327                                    reg3,
22328                                    reg3,
22329                                    0);
22330         regTracker.rsTrackRegTrash(reg3);
22331         getEmitter()->emitIns_R_I (INS_cmp,
22332                                  EA_4BYTE,
22333                                  reg3,
22334                                  0);
22335 #elif defined(_TARGET_AMD64_)
22336
22337         if (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint(addrOfCaptureThreadGlobal))
22338         {
22339             instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
22340
22341             getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, reg3, 0);
22342         }
22343         else
22344         {
22345             getEmitter()->emitIns_I_AI(INS_cmp, EA_4BYTE_DSP_RELOC, 0, (ssize_t)addrOfCaptureThreadGlobal);
22346         }
22347
22348 #else
22349         getEmitter()->emitIns_C_I  (INS_cmp,
22350                                   EA_PTR_DSP_RELOC,
22351                                   FLD_GLOBAL_DS,
22352                                   (ssize_t) addrOfCaptureThreadGlobal,
22353                                   0);
22354 #endif
22355     }
22356     else
22357     {
22358 #ifdef _TARGET_ARM_
22359         instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22360                                 reg3,
22361                                 (ssize_t)pAddrOfCaptureThreadGlobal);
22362         getEmitter()->emitIns_R_R_I (ins_Load(TYP_INT),
22363                                    EA_4BYTE,
22364                                    reg3,
22365                                    reg3,
22366                                    0);
22367         regTracker.rsTrackRegTrash(reg3);
22368         getEmitter()->emitIns_R_R_I (ins_Load(TYP_INT),
22369                                    EA_4BYTE,
22370                                    reg3,
22371                                    reg3,
22372                                    0);
22373         getEmitter()->emitIns_R_I (INS_cmp,
22374                                  EA_4BYTE,
22375                                  reg3,
22376                                  0);
22377 #else // !_TARGET_ARM_
22378
22379 #ifdef _TARGET_AMD64_
22380         if (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint(pAddrOfCaptureThreadGlobal))
22381         {
22382             instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_ECX, (ssize_t)pAddrOfCaptureThreadGlobal);
22383             getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ECX, REG_ECX, 0);
22384             regTracker.rsTrackRegTrash(REG_ECX);
22385         }
22386         else
22387 #endif // _TARGET_AMD64_
22388         {
22389             getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
22390                                      (ssize_t)pAddrOfCaptureThreadGlobal);
22391             regTracker.rsTrackRegTrash(REG_ECX);
22392         }
22393
22394         getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
22395 #endif // !_TARGET_ARM_
22396     }
22397
22398     /* */
22399     clab_nostop = genCreateTempLabel();
22400
22401     /* Generate the conditional jump */
22402     emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
22403     inst_JMP(jmpEqual, clab_nostop);
22404
22405 #ifdef _TARGET_ARM_
22406     // The helper preserves the return value on ARM
22407 #else
22408     /* save return value (if necessary) */
22409     if  (retVal != RBM_NONE)
22410     {
22411         if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
22412         {
22413 #ifdef _TARGET_AMD64_
22414             retTmp = compiler->tmpGetTemp(TYP_LONG);
22415             inst_ST_RV(INS_mov, retTmp, 0, REG_INTRET, TYP_LONG);
22416 #elif defined(_TARGET_X86_)
22417             /* push eax */
22418
22419             inst_RV(INS_push, REG_INTRET, TYP_INT);
22420
22421             if (retVal == RBM_LNGRET)
22422             {
22423                 /* push edx */
22424
22425                 inst_RV(INS_push, REG_EDX, TYP_INT);
22426             }
22427 #endif // _TARGET_AMD64_
22428         }
22429     }
22430 #endif
22431
22432     /* emit the call to the EE-helper that stops for GC (or other reasons) */
22433
22434     genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC,
22435                       0,             /* argSize */
22436                       EA_UNKNOWN);   /* retSize */
22437
22438 #ifdef _TARGET_ARM_
22439     // The helper preserves the return value on ARM
22440 #else
22441     /* restore return value (if necessary) */
22442
22443     if  (retVal != RBM_NONE)
22444     {
22445         if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
22446         {
22447 #ifdef _TARGET_AMD64_
22448
22449             assert(retTmp != NULL);
22450             inst_RV_ST(INS_mov, REG_INTRET, retTmp, 0, TYP_LONG);
22451             regTracker.rsTrackRegTrash(REG_INTRET);
22452             compiler->tmpRlsTemp(retTmp);
22453
22454 #elif defined(_TARGET_X86_)
22455             if (retVal == RBM_LNGRET)
22456             {
22457                 /* pop edx */
22458
22459                 inst_RV(INS_pop, REG_EDX, TYP_INT);
22460                 regTracker.rsTrackRegTrash(REG_EDX);
22461             }
22462
22463             /* pop eax */
22464
22465             inst_RV(INS_pop, REG_INTRET, TYP_INT);
22466             regTracker.rsTrackRegTrash(REG_INTRET);
22467 #endif // _TARGET_AMD64_
22468         }
22469     }
22470 #endif
22471
22472     /* genCondJump() closes the current emitter block */
22473
22474     genDefineTempLabel(clab_nostop);
22475
22476     // This marks the InlinedCallFrame as "inactive".  In fully interruptible code, this is not atomic with
22477     // the above code.  So the process is:
22478     // 1) Return to cooperative mode
22479     // 2) Check to see if we need to stop for GC
22480     // 3) Return from the p/invoke (as far as the stack walker is concerned).
22481
22482     /* mov  dword ptr [frame.callSiteTracker], 0 */
22483
22484     instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
22485                               compiler->lvaInlinedPInvokeFrameVar,
22486                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
22487
22488     getEmitter()->emitEnableRandomNops();
22489 }
22490
22491 /*****************************************************************************/
22492
22493 /*****************************************************************************
22494 *           TRACKING OF FLAGS
22495 *****************************************************************************/
22496
22497 void                CodeGen::genFlagsEqualToNone()
22498 {
22499     genFlagsEqReg = REG_NA;
22500     genFlagsEqVar = (unsigned)-1;
22501     genFlagsEqLoc.Init();
22502 }
22503
22504 /*****************************************************************************
22505  *
22506  *  Record the fact that the flags register has a value that reflects the
22507  *  contents of the given register.
22508  */
22509
22510 void                CodeGen::genFlagsEqualToReg(GenTreePtr tree,
22511                                                 regNumber  reg)
22512 {
22513     genFlagsEqLoc.CaptureLocation(getEmitter());
22514     genFlagsEqReg = reg;
22515
22516     /* previous setting of flags by a var becomes invalid */
22517
22518     genFlagsEqVar = 0xFFFFFFFF;
22519
22520     /* Set appropriate flags on the tree */
22521
22522     if (tree)
22523     {
22524         tree->gtFlags |= GTF_ZSF_SET;
22525         assert(tree->gtSetFlags());
22526     }
22527 }
22528
22529 /*****************************************************************************
22530  *
22531  *  Record the fact that the flags register has a value that reflects the
22532  *  contents of the given local variable.
22533  */
22534
22535 void                CodeGen::genFlagsEqualToVar(GenTreePtr tree,
22536                                                 unsigned   var)
22537 {
22538     genFlagsEqLoc.CaptureLocation(getEmitter());
22539     genFlagsEqVar = var;
22540
22541     /* previous setting of flags by a register becomes invalid */
22542
22543     genFlagsEqReg = REG_NA;
22544
22545     /* Set appropriate flags on the tree */
22546
22547     if (tree)
22548     {
22549         tree->gtFlags |= GTF_ZSF_SET;
22550         assert(tree->gtSetFlags());
22551     }
22552 }
22553
22554 /*****************************************************************************
22555  *
22556  *  Return an indication of whether the flags register is set to the current
22557  *  value of the given register/variable. The return value is as follows:
22558  *
22559  *      false  ..  nothing
22560  *      true   ..  the zero flag (ZF) and sign flag (SF) is set
22561  */
22562
22563 bool                 CodeGen::genFlagsAreReg(regNumber reg)
22564 {
22565     if  ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
22566     {
22567         return true;
22568     }
22569
22570     return false;
22571 }
22572
22573 bool                 CodeGen::genFlagsAreVar(unsigned  var)
22574 {
22575     if  ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
22576     {
22577         return true;
22578     }
22579
22580     return false;
22581 }
22582
22583 /*****************************************************************************
22584  * This utility function returns true iff the execution path from "from"
22585  * (inclusive) to "to" (exclusive) contains a death of the given var
22586  */
22587 bool
22588 CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
22589 {
22590     GenTreePtr tree;
22591     for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
22592     {
22593         if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
22594         {
22595             unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
22596             if (dyingVarNum == varNum) return true;
22597             LclVarDsc * varDsc = &(compiler->lvaTable[varNum]);
22598             if (varDsc->lvPromoted)
22599             {
22600                 assert(varDsc->lvType == TYP_STRUCT);
22601                 unsigned firstFieldNum = varDsc->lvFieldLclStart;
22602                 if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
22603                 {
22604                     return true;
22605                 }
22606             }
22607         }
22608     }
22609     assert(tree != NULL);
22610     return false;
22611 }
22612
22613 #endif // LEGACY_BACKEND