1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
22 #error AMD64 must be !LEGACY_BACKEND
26 #error ARM64 must be !LEGACY_BACKEND
32 #ifndef JIT32_GCENCODER
33 #include "gcinfoencoder.h"
37 /*****************************************************************************
39 * Determine what variables die between beforeSet and afterSet, and
40 * update the liveness globals accordingly:
41 * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
44 void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet,
45 VARSET_VALARG_TP afterSet)
50 VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
52 if (VarSetOps::IsEmpty(compiler, deadSet))
55 /* iterate through the dead variables */
57 VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
58 while (iter.NextElem(compiler, &varIndex))
60 varNum = compiler->lvaTrackedToVarNum[varIndex];
61 varDsc = compiler->lvaTable + varNum;
63 /* Remove this variable from the 'deadSet' bit set */
65 noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
67 VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
69 noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
70 VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
72 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
74 /* We are done if the variable is not enregistered */
76 if (!varDsc->lvRegister)
79 if (compiler->verbose)
81 printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
87 #if !FEATURE_FP_REGALLOC
88 // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
89 if (!varDsc->IsFloatRegType())
92 /* Get hold of the appropriate register bit(s) */
94 if (varTypeIsFloating(varDsc->TypeGet()))
96 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
100 regBit = genRegMask(varDsc->lvRegNum);
101 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
102 regBit |= genRegMask(varDsc->lvOtherReg);
106 if (compiler->verbose)
108 printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex, compiler->compRegVarName(varDsc->lvRegNum));
111 noway_assert((regSet.rsMaskVars & regBit) != 0);
113 regSet.RemoveMaskVars(regBit);
115 // Remove GC tracking if any for this register
117 if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
118 gcInfo.gcMarkRegSetNpt(regBit);
123 /*****************************************************************************
125 * Change the given enregistered local variable node to a register variable node
128 void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum,
131 noway_assert(tree->gtOper == GT_LCL_VAR);
132 noway_assert(varDsc->lvRegister);
134 if (isRegPairType(varDsc->lvType))
136 /* Check for the case of a variable that was narrowed to an int */
138 if (isRegPairType(tree->gtType))
140 genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
144 noway_assert(tree->gtFlags & GTF_VAR_CAST);
145 noway_assert(tree->gtType == TYP_INT);
149 noway_assert(!isRegPairType(tree->gtType));
152 /* It's a register variable -- modify the node */
154 unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
156 ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
157 tree->SetOper(GT_REG_VAR);
158 tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
160 tree->gtFlags |= livenessFlags;
161 tree->gtFlags |= GTF_REG_VAL;
162 tree->gtRegNum = varDsc->lvRegNum;
163 tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
164 tree->gtRegVar.SetLclNum(varNum);
169 void CodeGen::saveLiveness(genLivenessSet * ls)
171 VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
172 VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
173 ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
174 ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
175 ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
179 void CodeGen::restoreLiveness(genLivenessSet * ls)
181 VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
182 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
183 regSet.rsMaskVars = ls->maskVars;
184 gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
185 gcInfo.gcRegByrefSetCur = ls->byRefRegs;
189 void CodeGen::checkLiveness(genLivenessSet * ls)
191 assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
192 assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
193 assert(regSet.rsMaskVars == ls->maskVars);
194 assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
195 assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
199 bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
204 assert(tree->gtOper == GT_LCL_VAR);
206 /* Does the variable live in a register? */
208 varNum = tree->gtLclVarCommon.gtLclNum;
209 assert(varNum < compiler->lvaCount);
210 varDsc = compiler->lvaTable + varNum;
212 if (varDsc->lvRegister)
214 genBashLclVar(tree, varNum, varDsc);
224 GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
232 if (genCreateAddrMode(tree, // address
235 RBM_NONE, // reg mask
239 #if SCALED_ADDR_MODES
242 &cns, // displacement
243 true)) // don't generate code
250 void CodeGen::genSinglePush()
252 genStackLevel += sizeof(void*);
256 void CodeGen::genSinglePop()
258 genStackLevel -= sizeof(void*);
262 #if FEATURE_STACK_FP_X87
264 void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
266 genFPstkLevel = newValue;
270 unsigned CodeGenInterface::genGetFPstkLevel()
272 return genFPstkLevel;
276 void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
278 noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
279 genFPstkLevel += inc;
283 void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
285 noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
286 genFPstkLevel -= dec;
289 #endif // FEATURE_STACK_FP_X87
291 /*****************************************************************************
293 * Generate code that will set the given register to the integer constant.
296 void CodeGen::genSetRegToIcon(regNumber reg,
301 noway_assert(type != TYP_REF || val== NULL);
303 /* Does the reg already hold this constant? */
305 if (!regTracker.rsIconIsInReg(val, reg))
309 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
312 // If we can set a register to a constant with a small encoding, then do that.
313 else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
315 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
320 /* See if a register holds the value or a close value? */
321 bool constantLoaded = false;
323 regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
325 if (srcReg != REG_NA)
329 inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
330 constantLoaded = true;
334 #if defined(_TARGET_XARCH_)
335 /* delta should fit inside a byte */
336 if (delta == (signed char)delta)
338 /* use an lea instruction to set reg */
339 getEmitter()->emitIns_R_AR (INS_lea,
344 constantLoaded = true;
346 #elif defined(_TARGET_ARM_)
347 /* We found a register 'regS' that has the value we need, modulo a small delta.
348 That is, the value we need is 'regS + delta'.
349 We one to generate one of the following instructions, listed in order of preference:
351 adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and 0<=delta<=255
352 subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and -255<=delta<=0
353 adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
354 subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
355 mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
356 movw regD, icon ; 4 bytes. 0<=icon<=65535
357 add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
358 sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
359 addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
360 subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
362 If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
363 than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
364 can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
365 0x80000000. In this case, delta will be 1.
369 regMaskTP regMask = genRegMask(reg);
370 regMaskTP srcRegMask = genRegMask(srcReg);
372 if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) && (unsigned_abs(delta) <= 255))
376 else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) && (unsigned_abs(delta) <= 7))
380 else if (arm_Valid_Imm_For_Mov(val))
382 // fall through to general "!constantLoaded" case below
384 else if (arm_Valid_Imm_For_Add(delta, flags))
391 getEmitter()->emitIns_R_R_I (INS_add,
397 constantLoaded = true;
400 assert(!"Codegen missing");
405 if (!constantLoaded) // Have we loaded it yet?
407 #ifdef _TARGET_XARCH_
410 /* or reg,-1 takes 3 bytes */
411 inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
415 /* For SMALL_CODE it is smaller to push a small immediate and
416 then pop it into the dest register */
417 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) &&
418 val == (signed char)val)
420 /* "mov" has no s(sign)-bit and so always takes 6 bytes,
421 whereas push+pop takes 2+1 bytes */
423 inst_IV(INS_push, val);
426 inst_RV(INS_pop, reg, type);
429 #endif // _TARGET_X86_
431 #endif // _TARGET_XARCH_
433 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
438 regTracker.rsTrackRegIntCns(reg, val);
439 gcInfo.gcMarkRegPtrVal(reg, type);
442 /*****************************************************************************
444 * Find an existing register set to the given integer constant, or
445 * pick a register and generate code that will set it to the integer constant.
447 * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
448 * to pick some register to set. NOTE that this means the returned regNumber
449 * might *not* be in regBest. It also implies that you should lock any registers
450 * you don't want spilled (not just mark as used).
454 regNumber CodeGen::genGetRegSetToIcon(ssize_t val,
455 regMaskTP regBest /* = 0 */,
456 var_types type /* = TYP_INT */)
461 // Is there already a register with zero that we can use?
462 regCns = regTracker.rsIconIsInReg(val);
464 if (regCns == REG_NA)
467 // If not, grab a register to hold the constant, preferring
468 // any register besides RBM_TMP_0 so it can hopefully be re-used
469 regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
471 // Now set the constant
472 genSetRegToIcon(regCns, val, type);
475 // NOTE: there is guarantee that regCns is in regBest's mask
481 /*****************************************************************************/
482 /*****************************************************************************
484 * Add the given constant to the specified register.
485 * 'tree' is the resulting tree
488 void CodeGen::genIncRegBy(regNumber reg,
494 bool setFlags = (tree!=NULL) && tree->gtSetFlags();
496 #ifdef _TARGET_XARCH_
497 /* First check to see if we can generate inc or dec instruction(s) */
498 /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
499 if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
501 emitAttr size = emitTypeSize(dstType);
506 inst_RV(INS_inc, reg, dstType, size);
509 inst_RV(INS_inc, reg, dstType, size);
511 goto UPDATE_LIVENESS;
514 inst_RV(INS_dec, reg, dstType, size);
517 inst_RV(INS_dec, reg, dstType, size);
519 goto UPDATE_LIVENESS;
524 insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
525 inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
527 #ifdef _TARGET_XARCH_
532 genFlagsEqualToReg(tree, reg);
534 regTracker.rsTrackRegTrash(reg);
536 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
540 if (!tree->OperIsAssignment())
542 genMarkTreeInReg(tree, reg);
543 if (varTypeIsGC(tree->TypeGet()))
544 gcInfo.gcMarkRegSetByref(genRegMask(reg));
550 /*****************************************************************************
552 * Subtract the given constant from the specified register.
553 * Should only be used for unsigned sub with overflow. Else
554 * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
555 * for these cases as the flags are set differently, and the following
556 * check for overflow won't work correctly.
557 * 'tree' is the resulting tree.
560 void CodeGen::genDecRegBy(regNumber reg,
564 noway_assert((tree->gtFlags & GTF_OVERFLOW) && ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
565 noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
567 regTracker.rsTrackRegTrash(reg);
569 noway_assert(!varTypeIsGC(tree->TypeGet()));
570 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
572 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
573 inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
575 if (tree->gtSetFlags())
576 genFlagsEqualToReg(tree, reg);
580 genMarkTreeInReg(tree, reg);
584 /*****************************************************************************
586 * Multiply the specified register by the given value.
587 * 'tree' is the resulting tree
590 void CodeGen::genMulRegBy(regNumber reg,
596 noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
598 regTracker.rsTrackRegTrash(reg);
602 genMarkTreeInReg(tree, reg);
605 bool use_shift = false;
606 unsigned shift_by = 0;
608 if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival-1)) == 0))
611 BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
618 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
619 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
620 if (tree->gtSetFlags())
621 genFlagsEqualToReg(tree, reg);
627 #ifdef _TARGET_XARCH_
628 ins = getEmitter()->inst3opImulForReg(reg);
633 inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
637 /*****************************************************************************/
638 /*****************************************************************************/
639 /*****************************************************************************
641 * Compute the value 'tree' into a register that's in 'needReg'
642 * (or any free register if 'needReg' is RBM_NONE).
644 * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
645 * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
647 * If you require that the register returned is trashable, pass true for 'freeOnly'.
650 void CodeGen::genComputeReg(GenTreePtr tree,
652 RegSet::ExactReg mustReg,
653 RegSet::KeepReg keepReg,
656 noway_assert(tree->gtType != TYP_VOID);
661 #if FEATURE_STACK_FP_X87
662 noway_assert(genActualType(tree->gtType) == TYP_INT ||
663 genActualType(tree->gtType) == TYP_I_IMPL ||
664 genActualType(tree->gtType) == TYP_REF ||
665 tree->gtType == TYP_BYREF);
666 #elif defined(_TARGET_ARM_)
667 noway_assert(genActualType(tree->gtType) == TYP_INT ||
668 genActualType(tree->gtType) == TYP_I_IMPL ||
669 genActualType(tree->gtType) == TYP_REF ||
670 tree->gtType == TYP_BYREF ||
671 genActualType(tree->gtType) == TYP_FLOAT ||
672 genActualType(tree->gtType) == TYP_DOUBLE ||
673 genActualType(tree->gtType) == TYP_STRUCT);
675 noway_assert(genActualType(tree->gtType) == TYP_INT ||
676 genActualType(tree->gtType) == TYP_I_IMPL ||
677 genActualType(tree->gtType) == TYP_REF ||
678 tree->gtType == TYP_BYREF ||
679 genActualType(tree->gtType) == TYP_FLOAT ||
680 genActualType(tree->gtType) == TYP_DOUBLE);
683 /* Generate the value, hopefully into the right register */
685 genCodeForTree(tree, needReg);
686 noway_assert(tree->gtFlags & GTF_REG_VAL);
688 // There is a workaround in genCodeForTreeLng() that changes the type of the
689 // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
690 // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
691 // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
692 // running the rest of this code, because anything looking at gtRegNum on ARM or
693 // attempting to move from EAX/EDX will be wrong.
694 if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
697 reg = tree->gtRegNum;
699 /* Did the value end up in an acceptable register? */
701 if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
703 /* Not good enough to satisfy the caller's orders */
705 if (varTypeIsFloating(tree))
707 RegSet::RegisterPreference pref(needReg, RBM_NONE);
708 rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
712 rg2 = regSet.rsGrabReg(needReg);
717 /* Do we have to end up with a free register? */
722 /* Did we luck out and the value got computed into an unused reg? */
724 if (genRegMask(reg) & regSet.rsRegMaskFree())
727 /* Register already in use, so spill previous value */
729 if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
731 rg2 = regSet.rsGrabReg(needReg);
734 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
735 tree->gtRegNum = reg;
741 /* OK, let's find a trashable home for the value */
743 regMaskTP rv1RegUsed;
745 regSet.rsLockReg (genRegMask(reg), &rv1RegUsed);
746 rg2 = regSet.rsPickReg(needReg);
747 regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
751 noway_assert(reg != rg2);
753 /* Update the value in the target register */
755 regTracker.rsTrackRegCopy(rg2, reg);
757 inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
759 /* The value has been transferred to 'reg' */
761 if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
762 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
764 gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
766 /* The value is now in an appropriate register */
768 tree->gtRegNum = rg2;
772 /* Does the caller want us to mark the register as used? */
774 if (keepReg == RegSet::KEEP_REG)
776 /* In case we're computing a value into a register variable */
780 /* Mark the register as 'used' */
782 regSet.rsMarkRegUsed(tree);
786 /*****************************************************************************
788 * Same as genComputeReg(), the only difference being that the result is
789 * guaranteed to end up in a trashable register.
793 void CodeGen::genCompIntoFreeReg(GenTreePtr tree,
795 RegSet::KeepReg keepReg)
797 genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
800 /*****************************************************************************
802 * The value 'tree' was earlier computed into a register; free up that
803 * register (but also make sure the value is presently in a register).
806 void CodeGen::genReleaseReg(GenTreePtr tree)
808 if (tree->gtFlags & GTF_SPILLED)
810 /* The register has been spilled -- reload it */
812 regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
816 regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
819 /*****************************************************************************
821 * The value 'tree' was earlier computed into a register. Check whether that
822 * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
823 * free the register. The caller shouldn't need to be setting GCness of the register
824 * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
827 void CodeGen::genRecoverReg(GenTreePtr tree,
829 RegSet::KeepReg keepReg)
831 if (tree->gtFlags & GTF_SPILLED)
833 /* The register has been spilled -- reload it */
835 regSet.rsUnspillReg(tree, needReg, keepReg);
838 else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
840 /* We need the tree in another register. So move it there */
842 noway_assert(tree->gtFlags & GTF_REG_VAL);
843 regNumber oldReg = tree->gtRegNum;
845 /* Pick an acceptable register */
847 regNumber reg = regSet.rsGrabReg(needReg);
851 inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
852 tree->gtRegNum = reg;
854 gcInfo.gcMarkRegPtrVal(tree);
855 regSet.rsMarkRegUsed(tree);
856 regSet.rsMarkRegFree(oldReg, tree);
858 regTracker.rsTrackRegCopy(reg, oldReg);
861 /* Free the register if the caller desired so */
863 if (keepReg == RegSet::FREE_REG)
865 regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
866 // Can't use RegSet::FREE_REG on a GC type
867 noway_assert(!varTypeIsGC(tree->gtType));
871 noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
876 /*****************************************************************************
878 * Move one half of a register pair to its new regPair(half).
882 void CodeGen::genMoveRegPairHalf(GenTreePtr tree,
889 // handle long to unsigned long overflow casts
890 while (tree->gtOper == GT_CAST)
892 noway_assert(tree->gtType == TYP_LONG);
893 tree = tree->gtCast.CastOp();
895 noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
896 noway_assert(tree->gtType == TYP_LONG);
897 inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
898 regTracker.rsTrackRegTrash(dst);
902 regTracker.rsTrackRegCopy(dst, src);
903 inst_RV_RV(INS_mov, dst, src, TYP_INT);
907 /*****************************************************************************
909 * The given long value is in a register pair, but it's not an acceptable
910 * one. We have to move the value into a register pair in 'needReg' (if
911 * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
913 * Important note: if 'needReg' is non-zero, we assume the current pair
914 * has not been marked as free. If, OTOH, 'newPair' is specified, we
915 * assume that the current register pair is marked as used and free it.
918 void CodeGen::genMoveRegPair(GenTreePtr tree,
929 /* Either a target set or a specific pair may be requested */
931 noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
933 /* Get hold of the current pair */
935 oldPair = tree->gtRegPair; noway_assert(oldPair != newPair);
937 /* Are we supposed to move to a specific pair? */
939 if (newPair != REG_PAIR_NONE)
941 regMaskTP oldMask = genRegPairMask(oldPair);
942 regMaskTP loMask = genRegMask(genRegPairLo(newPair));
943 regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
944 regMaskTP overlap = oldMask & (loMask|hiMask);
946 /* First lock any registers that are in both pairs */
948 noway_assert((regSet.rsMaskUsed & overlap) == overlap);
949 noway_assert((regSet.rsMaskLock & overlap) == 0);
950 regSet.rsMaskLock |= overlap;
952 /* Make sure any additional registers we need are free */
954 if ((loMask & regSet.rsMaskUsed) != 0 &&
955 (loMask & oldMask ) == 0)
957 regSet.rsGrabReg(loMask);
960 if ((hiMask & regSet.rsMaskUsed) != 0 &&
961 (hiMask & oldMask ) == 0)
963 regSet.rsGrabReg(hiMask);
966 /* Unlock those registers we have temporarily locked */
968 noway_assert((regSet.rsMaskUsed & overlap) == overlap);
969 noway_assert((regSet.rsMaskLock & overlap) == overlap);
970 regSet.rsMaskLock -= overlap;
972 /* We can now free the old pair */
974 regSet.rsMarkRegFree(oldMask);
978 /* Pick the new pair based on the caller's stated preference */
980 newPair = regSet.rsGrabRegPair(needReg);
983 // If grabbed pair is the same as old one we're done
984 if (newPair==oldPair)
987 (oldLo = genRegPairLo(oldPair),
988 oldHi = genRegPairHi(oldPair),
989 newLo = genRegPairLo(newPair),
990 newHi = genRegPairHi(newPair),
991 newLo != REG_STK && newHi != REG_STK));
996 /* Move the values from the old pair into the new one */
998 oldLo = genRegPairLo(oldPair);
999 oldHi = genRegPairHi(oldPair);
1000 newLo = genRegPairLo(newPair);
1001 newHi = genRegPairHi(newPair);
1003 noway_assert(newLo != REG_STK && newHi != REG_STK);
1005 /* Careful - the register pairs might overlap */
1009 /* The low registers are identical, just move the upper half */
1011 noway_assert(newHi != oldHi);
1012 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1016 /* The low registers are different, are the upper ones the same? */
1020 /* Just move the lower half, then */
1021 genMoveRegPairHalf(tree, newLo, oldLo, 0);
1025 /* Both sets are different - is there an overlap? */
1029 /* Are high and low simply swapped ? */
1034 regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegPairMask(oldPair) & ~genRegPairMask(newPair));
1035 inst_RV_RV(INS_mov, regTmp, oldLo);
1036 inst_RV_RV(INS_mov, oldLo, oldHi);
1037 inst_RV_RV(INS_mov, oldHi, regTmp);
1038 regTracker.rsTrackRegTrash(regTmp);
1040 inst_RV_RV(INS_xchg, oldHi, oldLo);
1042 regTracker.rsTrackRegSwap(oldHi, oldLo);
1046 /* New lower == old higher, so move higher half first */
1048 noway_assert(newHi != oldLo);
1049 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1050 genMoveRegPairHalf(tree, newLo, oldLo, 0);
1055 /* Move lower half first */
1056 genMoveRegPairHalf(tree, newLo, oldLo, 0);
1057 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1062 /* Record the fact that we're switching to another pair */
1064 tree->gtRegPair = newPair;
1067 /*****************************************************************************
1069 * Compute the value 'tree' into the register pair specified by 'needRegPair'
1070 * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
1071 * those in avoidReg.
1072 * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
1073 * value ends up in as being used.
1076 void CodeGen::genComputeRegPair(GenTreePtr tree,
1077 regPairNo needRegPair,
1079 RegSet::KeepReg keepReg,
1085 regMaskTP tmpUsedMask;
1089 noway_assert(isRegPairType(tree->gtType));
1091 if (needRegPair == REG_PAIR_NONE)
1095 regMask = regSet.rsRegMaskFree() & ~avoidReg;
1096 if (genMaxOneBit(regMask))
1097 regMask = regSet.rsRegMaskFree();
1101 regMask = RBM_ALLINT & ~avoidReg;
1104 if (genMaxOneBit(regMask))
1105 regMask = regSet.rsRegMaskCanGrab();
1109 regMask = genRegPairMask(needRegPair);
1112 /* Generate the value, hopefully into the right register pair */
1114 genCodeForTreeLng(tree, regMask, avoidReg);
1116 noway_assert(tree->gtFlags & GTF_REG_VAL);
1118 regPair = tree->gtRegPair;
1119 tmpMask = genRegPairMask(regPair);
1121 rLo = genRegPairLo(regPair);
1122 rHi = genRegPairHi(regPair);
1124 /* At least one half is in a real register */
1126 noway_assert(rLo != REG_STK || rHi != REG_STK);
1128 /* Did the value end up in an acceptable register pair? */
1130 if (needRegPair != REG_PAIR_NONE)
1132 if (needRegPair != regPair)
1134 /* This is a workaround. If we specify a regPair for genMoveRegPair */
1135 /* it expects the source pair being marked as used */
1136 regSet.rsMarkRegPairUsed(tree);
1137 genMoveRegPair(tree, 0, needRegPair);
1142 /* Do we have to end up with a free register pair?
1143 Something might have gotten freed up above */
1144 bool mustMoveReg=false;
1146 regMask = regSet.rsRegMaskFree() & ~avoidReg;
1148 if (genMaxOneBit(regMask))
1149 regMask = regSet.rsRegMaskFree();
1151 if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
1153 /* Note that we must call genMoveRegPair if one of our registers
1154 comes from the used mask, so that it will be properly spilled. */
1159 if (genMaxOneBit(regMask))
1160 regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
1162 if (genMaxOneBit(regMask))
1163 regMask |= regSet.rsRegMaskCanGrab();
1165 /* Did the value end up in a free register pair? */
1169 /* We'll have to move the value to a free (trashable) pair */
1170 genMoveRegPair(tree, regMask, REG_PAIR_NONE);
1175 noway_assert(needRegPair == REG_PAIR_NONE);
1176 noway_assert(!freeOnly);
1178 /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
1179 tmpUsedMask = tmpMask & regSet.rsMaskUsed;
1180 tmpMask &= ~regSet.rsMaskUsed;
1182 /* Make sure that the value is in "real" registers*/
1185 /* Get one of the desired registers, but exclude rHi */
1187 regSet.rsLockReg(tmpMask);
1188 regSet.rsLockUsedReg(tmpUsedMask);
1190 regNumber reg = regSet.rsPickReg(regMask);
1192 regSet.rsUnlockUsedReg(tmpUsedMask);
1193 regSet.rsUnlockReg(tmpMask);
1195 inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
1197 tree->gtRegPair = gen2regs2pair(reg, rHi);
1199 regTracker.rsTrackRegTrash(reg);
1200 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1202 else if (rHi == REG_STK)
1204 /* Get one of the desired registers, but exclude rLo */
1206 regSet.rsLockReg(tmpMask);
1207 regSet.rsLockUsedReg(tmpUsedMask);
1209 regNumber reg = regSet.rsPickReg(regMask);
1211 regSet.rsUnlockUsedReg(tmpUsedMask);
1212 regSet.rsUnlockReg(tmpMask);
1214 inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
1216 tree->gtRegPair = gen2regs2pair(rLo, reg);
1218 regTracker.rsTrackRegTrash(reg);
1219 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1223 /* Does the caller want us to mark the register as used? */
1225 if (keepReg == RegSet::KEEP_REG)
1227 /* In case we're computing a value into a register variable */
1229 genUpdateLife(tree);
1231 /* Mark the register as 'used' */
1233 regSet.rsMarkRegPairUsed(tree);
1237 /*****************************************************************************
1239 * Same as genComputeRegPair(), the only difference being that the result
1240 * is guaranteed to end up in a trashable register pair.
1244 void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree,
1246 RegSet::KeepReg keepReg)
1248 genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
1251 /*****************************************************************************
1253 * The value 'tree' was earlier computed into a register pair; free up that
1254 * register pair (but also make sure the value is presently in a register
1258 void CodeGen::genReleaseRegPair(GenTreePtr tree)
1260 if (tree->gtFlags & GTF_SPILLED)
1262 /* The register has been spilled -- reload it */
1264 regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
1268 regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1271 /*****************************************************************************
1273 * The value 'tree' was earlier computed into a register pair. Check whether
1274 * either register of that pair has been spilled (and reload it if so), and
1275 * if 'keepReg' is 0, free the register pair.
1278 void CodeGen::genRecoverRegPair(GenTreePtr tree,
1280 RegSet::KeepReg keepReg)
1282 if (tree->gtFlags & GTF_SPILLED)
1286 if (regPair == REG_PAIR_NONE)
1289 regMask = genRegPairMask(regPair);
1291 /* The register pair has been spilled -- reload it */
1293 regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
1296 /* Does the caller insist on the value being in a specific place? */
1298 if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
1300 /* No good -- we'll have to move the value to a new place */
1302 genMoveRegPair(tree, 0, regPair);
1304 /* Mark the pair as used if appropriate */
1306 if (keepReg == RegSet::KEEP_REG)
1307 regSet.rsMarkRegPairUsed(tree);
1312 /* Free the register pair if the caller desired so */
1314 if (keepReg == RegSet::FREE_REG)
1315 regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1318 /*****************************************************************************
1320 * Compute the given long value into the specified register pair; don't mark
1321 * the register pair as used.
1325 void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
1327 genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
1328 genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
1331 /*****************************************************************************
1332 * This helper makes sure that the regpair target of an assignment is
1333 * available for use. This needs to be called in genCodeForTreeLng just before
1334 * a long assignment, but must not be called until everything has been
1335 * evaluated, or else we might try to spill enregistered variables.
1340 void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
1342 /* Make sure the target of the store is available */
1344 regNumber regLo = genRegPairLo(regPair);
1345 regNumber regHi = genRegPairHi(regPair);
1347 if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
1348 regSet.rsSpillReg(regHi);
1350 if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
1351 regSet.rsSpillReg(regLo);
1354 /*****************************************************************************/
1355 /*****************************************************************************
1357 * Return true if the given tree 'addr' can be computed via an addressing mode,
1358 * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
1359 * try to make it so (but we don't try 'too hard' to accomplish this).
1361 * If we end up needing a register (or two registers) to hold some part(s) of the
1362 * address, we return the use register mask via '*useMaskPtr'.
1364 * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
1365 * in use. The caller would then be responsible for calling
1366 * regSet.rsMarkRegFree(*useMaskPtr).
1368 * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
1369 * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
1372 bool CodeGen::genMakeIndAddrMode(GenTreePtr addr,
1376 RegSet::KeepReg keepReg,
1377 regMaskTP * useMaskPtr,
1380 if (addr->gtOper == GT_ARR_ELEM)
1382 regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
1390 bool operIsArrIndex; // is oper an array index
1391 GenTreePtr scaledIndex; // If scaled addressing mode can't be used
1393 regMaskTP anyMask = RBM_ALLINT;
1399 int ixv = INT_MAX; // unset value
1401 GenTreePtr scaledIndexVal;
1403 regMaskTP newLiveMask;
1407 /* Deferred address mode forming NYI for x86 */
1410 noway_assert(deferOK == false);
1412 noway_assert(oper == NULL
1413 || ((oper->OperIsIndir() || oper->OperIsAtomicOp())
1415 ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr)
1416 || oper->gtOp.gtOp1 == addr)));
1417 operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
1419 if (addr->gtOper == GT_LEA)
1421 rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
1422 GenTreeAddrMode * lea = addr->AsAddrMode();
1426 cns = lea->gtOffset;
1431 (rv1->gtFlags & GTF_REG_VAL) != 0)
1439 // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
1442 /* Is the complete address already sitting in a register? */
1444 if ((addr->gtFlags & GTF_REG_VAL) ||
1445 (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
1447 genUpdateLife(addr);
1450 rv2 = scaledIndex = 0;
1456 /* Is it an absolute address */
1458 if (addr->IsCnsIntOrI())
1460 rv1 = rv2 = scaledIndex = 0;
1461 // along this code path cns is never used, so place a BOGUS value in it as proof
1462 // cns = addr->gtIntCon.gtIconVal;
1468 /* Is there a chance of forming an address mode? */
1470 if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
1472 /* This better not be an array index */
1473 noway_assert(!operIsArrIndex);
1477 // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
1480 /* For scaled array access, RV2 may not be pointing to the index of the
1481 array if the CPU does not support the needed scaling factor. We will
1482 make it point to the actual index, and scaledIndex will point to
1486 scaledIndexVal = NULL;
1488 if (operIsArrIndex && rv2 != NULL
1489 && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH)
1490 && rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
1493 compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
1495 noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
1498 /* Has the address already been computed? */
1500 if (addr->gtFlags & GTF_REG_VAL)
1508 genUpdateLife(addr);
1513 Here we have the following operands:
1515 rv1 ..... base address
1516 rv2 ..... offset value (or NULL)
1517 mul ..... multiplier for rv2 (or 0)
1518 cns ..... additional constant (or 0)
1520 The first operand must be present (and be an address) unless we're
1521 computing an expression via 'LEA'. The scaled operand is optional,
1522 but must not be a pointer if present.
1525 noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
1527 /*-------------------------------------------------------------------------
1529 * Make sure both rv1 and rv2 (if present) are in registers
1533 // Trivial case : Is either rv1 or rv2 a NULL ?
1537 /* A single operand, make sure it's in a register */
1541 // In the case where "rv1" is already in a register, there's no reason to get into a
1542 // register in "regMask" yet, if there's a non-zero constant that we're going to add;
1543 // if there is, we can do an LEA.
1544 genCodeForTree(rv1, RBM_NONE);
1548 genCodeForTree(rv1, regMask);
1554 /* A single (scaled) operand, make sure it's in a register */
1556 genCodeForTree(rv2, 0);
1560 /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
1561 they are in registers */
1563 noway_assert(rv1 && rv2);
1566 /* If we have to check a constant array index, compare it against
1567 the array dimension (see below) but then fold the index with a
1568 scaling factor (if any) and additional offset (if any).
1571 if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
1573 if (scaledIndex != NULL)
1575 assert(rv2 == scaledIndex && scaledIndexVal != NULL);
1576 rv2 = scaledIndexVal;
1578 /* We must have a range-checked index operation */
1580 noway_assert(operIsArrIndex);
1582 /* Get hold of the index value and see if it's a constant */
1584 if (rv2->IsIntCnsFitsInI32())
1586 ixv = (int)rv2->gtIntCon.gtIconVal;
1587 // Maybe I should just set "fold" true in the call to genMakeAddressable above.
1588 if (scaledIndex != NULL)
1590 int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK -- multiple of 2^6.
1600 rv2 = scaledIndex = NULL;
1602 /* Add the scaled index into the added value */
1609 /* Make sure 'rv1' is in a register */
1611 genCodeForTree(rv1, regMask);
1617 if (rv1->gtFlags & GTF_REG_VAL)
1619 /* op1 already in register - how about op2? */
1621 if (rv2->gtFlags & GTF_REG_VAL)
1623 /* Great - both operands are in registers already. Just update
1624 the liveness and we are done. */
1640 /* rv1 is in a register, but rv2 isn't */
1644 /* rv1 is already materialized in a register. Just update liveness
1645 to rv1 and generate code for rv2 */
1648 regSet.rsMarkRegUsed(rv1, oper);
1653 else if (rv2->gtFlags & GTF_REG_VAL)
1655 /* rv2 is in a register, but rv1 isn't */
1657 noway_assert(rv2->gtOper == GT_REG_VAR);
1661 /* rv2 is already materialized in a register. Update liveness
1662 to after rv2 and then hang on to rv2 */
1665 regSet.rsMarkRegUsed(rv2, oper);
1668 /* Generate the for the first operand */
1670 genCodeForTree(rv1, regMask);
1674 // Free up rv2 in the right fashion (it might be re-marked if keepReg)
1675 regSet.rsMarkRegUsed(rv1, oper);
1676 regSet.rsLockUsedReg (genRegMask(rv1->gtRegNum));
1678 regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1683 /* We have evaluated rv1, and now we just need to update liveness
1684 to rv2 which was already in a register */
1695 /* Make sure we preserve the correct operand order */
1699 /* Generate the second operand first */
1701 // Determine what registers go live between rv2 and rv1
1702 newLiveMask = genNewLiveRegMask(rv2, rv1);
1704 rv2Mask = regMask & ~newLiveMask;
1705 rv2Mask &= ~rv1->gtRsvdRegs;
1707 if (rv2Mask == RBM_NONE)
1709 // The regMask hint cannot be honored
1710 // We probably have a call that trashes the register(s) in regMask
1711 // so ignore the regMask hint, but try to avoid using
1712 // the registers in newLiveMask and the rv1->gtRsvdRegs
1714 rv2Mask = RBM_ALLINT & ~newLiveMask;
1715 rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
1718 genCodeForTree(rv2, rv2Mask);
1719 regMask &= ~genRegMask(rv2->gtRegNum);
1721 regSet.rsMarkRegUsed(rv2, oper);
1723 /* Generate the first operand second */
1725 genCodeForTree(rv1, regMask);
1726 regSet.rsMarkRegUsed(rv1, oper);
1728 /* Free up both operands in the right order (they might be
1729 re-marked as used below)
1731 regSet.rsLockUsedReg (genRegMask(rv1->gtRegNum));
1733 regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1738 /* Get the first operand into a register */
1740 // Determine what registers go live between rv1 and rv2
1741 newLiveMask = genNewLiveRegMask(rv1, rv2);
1743 rv1Mask = regMask & ~newLiveMask;
1744 rv1Mask &= ~rv2->gtRsvdRegs;
1746 if (rv1Mask == RBM_NONE)
1748 // The regMask hint cannot be honored
1749 // We probably have a call that trashes the register(s) in regMask
1750 // so ignore the regMask hint, but try to avoid using
1751 // the registers in liveMask and the rv2->gtRsvdRegs
1753 rv1Mask = RBM_ALLINT & ~newLiveMask;
1754 rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
1757 genCodeForTree(rv1, rv1Mask);
1758 regSet.rsMarkRegUsed(rv1, oper);
1762 /* Here, we need to get rv2 in a register. We have either already
1763 materialized rv1 into a register, or it was already in a one */
1765 noway_assert(rv1->gtFlags & GTF_REG_VAL);
1766 noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
1768 /* Generate the second operand as well */
1770 regMask &= ~genRegMask(rv1->gtRegNum);
1771 genCodeForTree(rv2, regMask);
1775 /* rev==true means the evaluation order is rv2,rv1. We just
1776 evaluated rv2, and rv1 was already in a register. Just
1777 update liveness to rv1 and we are done. */
1783 /* We have evaluated rv1 and rv2. Free up both operands in
1784 the right order (they might be re-marked as used below) */
1786 /* Even though we have not explicitly marked rv2 as used,
1787 rv2->gtRegNum may be used if rv2 is a multi-use or
1788 an enregistered variable. */
1790 regSet.rsLockReg (genRegMask(rv2->gtRegNum), &rv2Used);
1792 /* Check for special case both rv1 and rv2 are the same register */
1793 if (rv2Used != genRegMask(rv1->gtRegNum))
1796 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1800 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1806 /*-------------------------------------------------------------------------
1808 * At this point, both rv1 and rv2 (if present) are in registers
1814 /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
1816 if (rv1 && !(rv1->gtFlags & GTF_REG_VAL)) return false;
1817 if (rv2 && !(rv2->gtFlags & GTF_REG_VAL)) return false;
1821 // *(intVar1+intVar1) causes problems as we
1822 // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
1823 // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
1824 // that currently as we return a single mask in useMaskPtr.
1826 if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 &&
1827 (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
1829 if (rv1->gtRegNum == rv2->gtRegNum)
1831 noway_assert(!operIsArrIndex);
1836 /* Check either register operand to see if it needs to be saved */
1840 noway_assert(rv1->gtFlags & GTF_REG_VAL);
1842 if (keepReg == RegSet::KEEP_REG)
1844 regSet.rsMarkRegUsed(rv1, oper);
1848 /* If the register holds an address, mark it */
1850 gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
1856 noway_assert(rv2->gtFlags & GTF_REG_VAL);
1858 if (keepReg == RegSet::KEEP_REG)
1859 regSet.rsMarkRegUsed(rv2, oper);
1864 noway_assert(!scaledIndex);
1868 /* Compute the set of registers the address depends on */
1870 regMaskTP useMask = RBM_NONE;
1874 if (rv1->gtFlags & GTF_SPILLED)
1875 regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
1877 noway_assert(rv1->gtFlags & GTF_REG_VAL);
1878 useMask |= genRegMask(rv1->gtRegNum);
1883 if (rv2->gtFlags & GTF_SPILLED)
1887 regMaskTP lregMask = genRegMask(rv1->gtRegNum);
1890 regSet.rsLockReg(lregMask, &used);
1891 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1892 regSet.rsUnlockReg(lregMask, used);
1895 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1897 noway_assert(rv2->gtFlags & GTF_REG_VAL);
1898 useMask |= genRegMask(rv2->gtRegNum);
1901 /* Tell the caller which registers we need to hang on to */
1903 *useMaskPtr = useMask;
1908 /*****************************************************************************
1910 * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
1913 void CodeGen::genRangeCheck(GenTreePtr oper)
1915 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1916 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1918 GenTreePtr arrLen = bndsChk->gtArrLen;
1919 GenTreePtr arrRef = NULL;
1922 // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
1923 // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
1926 if (arrLen->OperGet() == GT_ARR_LENGTH)
1928 GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
1929 lenOffset = arrLenExact->ArrLenOffset();
1931 // We always load the length into a register on ARM and x64.
1932 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1933 // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
1934 // lengths, but the index expression *can* be native int (64-bits)
1935 arrRef = arrLenExact->ArrRef();
1936 genCodeForTree(arrRef, RBM_ALLINT);
1937 noway_assert(arrRef->gtFlags & GTF_REG_VAL);
1938 regSet.rsMarkRegUsed(arrRef);
1939 noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1942 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1943 // This is another form in which we have an array reference and a constant length. Don't use
1944 // on LOAD_STORE or 64BIT.
1945 else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
1947 genCodeForTree(arrRef, RBM_ALLINT);
1948 noway_assert(arrRef->gtFlags & GTF_REG_VAL);
1949 regSet.rsMarkRegUsed(arrRef);
1950 noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1954 // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
1957 // (Unless it's a constant.)
1958 if (!arrLen->IsCnsIntOrI())
1960 genCodeForTree(arrLen, RBM_ALLINT);
1961 regSet.rsMarkRegUsed(arrLen);
1963 noway_assert(arrLen->gtFlags & GTF_REG_VAL);
1964 noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
1968 /* Is the array index a constant value? */
1969 GenTreePtr index = bndsChk->gtIndex;
1970 if (!index->IsCnsIntOrI())
1972 // No, it's not a constant.
1973 genCodeForTree(index, RBM_ALLINT);
1974 regSet.rsMarkRegUsed(index);
1976 // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
1977 // from its register, get it back in a register.
1979 genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
1980 else if (!arrLen->IsCnsIntOrI())
1981 genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
1983 /* Make sure we have the values we expect */
1984 noway_assert(index->gtFlags & GTF_REG_VAL);
1985 noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
1987 noway_assert(index->TypeGet() == TYP_I_IMPL || (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
1988 var_types indxType = index->TypeGet();
1989 if (indxType != TYP_I_IMPL) indxType = TYP_INT;
1992 { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1994 /* Generate "cmp index, [arrRef+LenOffs]" */
1995 inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
1997 else if (arrLen->IsCnsIntOrI())
1999 ssize_t len = arrLen->AsIntConCommon()->IconValue();
2000 inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
2004 inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
2007 /* Generate "jae <fail_label>" */
2009 noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
2010 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2011 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2015 /* Generate "cmp [rv1+LenOffs], cns" */
2017 bool indIsInt = true;
2018 #ifdef _TARGET_64BIT_
2020 ssize_t ixvFull = index->AsIntConCommon()->IconValue();
2021 if (ixvFull > INT32_MAX)
2030 ssize_t ixvFull = index->AsIntConCommon()->IconValue();
2031 int ixv = (int)ixvFull;
2033 if (arrRef != NULL && indIsInt)
2034 { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
2035 /* Generate "cmp [arrRef+LenOffs], ixv" */
2036 inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
2037 // Generate "jbe <fail_label>"
2038 emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
2039 genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2041 else if (arrLen->IsCnsIntOrI())
2043 ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
2044 // Both are constants; decide at compile time.
2045 if (!(0 <= ixvFull && ixvFull < lenv))
2047 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2052 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2056 /* Generate "cmp arrLen, ixv" */
2057 inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
2058 // Generate "jbe <fail_label>"
2059 emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
2060 genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2064 // Free the registers that were used.
2067 regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
2069 else if (!arrLen->IsCnsIntOrI())
2071 regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
2074 if (!index->IsCnsIntOrI())
2076 regSet.rsMarkRegFree(index->gtRegNum, index);
2080 /*****************************************************************************
2082 * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
2083 * Otherwise, check if rvalue is in register. If so, mark it. Then
2084 * call genMakeAddressable(). Needed because genMakeAddressable is used
2085 * for both lvalue and rvalue, and we only can do this for rvalue.
2089 regMaskTP CodeGen::genMakeRvalueAddressable(GenTreePtr tree,
2091 RegSet::KeepReg keepReg,
2099 if (tree->gtOper == GT_LCL_VAR)
2101 reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
2103 if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
2105 noway_assert(!isRegPairType(tree->gtType));
2107 genMarkTreeInReg(tree, reg);
2113 return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
2116 /*****************************************************************************/
2119 bool CodeGen::genIsLocalLastUse (GenTreePtr tree)
2121 const LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
2123 noway_assert(tree->OperGet() == GT_LCL_VAR);
2124 noway_assert(varDsc->lvTracked);
2126 return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
2130 /*****************************************************************************
2132 * This is genMakeAddressable(GT_ARR_ELEM).
2133 * Makes the array-element addressible and returns the addressibility registers.
2134 * It also marks them as used if keepReg==RegSet::KEEP_REG.
2135 * tree is the dependant tree.
2137 * Note that an array-element needs 2 registers to be addressibile, the
2138 * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
2139 * with the 2 registers so that other functions (like instGetAddrMode()) know
2140 * where to look for the offset to use.
2143 regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem,
2146 RegSet::KeepReg keepReg)
2148 noway_assert(arrElem->gtOper == GT_ARR_ELEM);
2149 noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
2151 /* Evaluate all the operands. We don't evaluate them into registers yet
2152 as GT_ARR_ELEM does not reorder the evaluation of the operands, and
2153 hence may use a sub-optimal ordering. We try to improve this
2154 situation somewhat by accessing the operands in stages
2155 (genMakeAddressable2 + genComputeAddressable and
2156 genCompIntoFreeReg + genRecoverReg).
2158 Note: we compute operands into free regs to avoid multiple uses of
2159 the same register. Multi-use would cause problems when we free
2160 registers in FIFO order instead of the assumed LIFO order that
2161 applies to all type of tree nodes except for GT_ARR_ELEM.
2164 GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj;
2165 unsigned rank = arrElem->gtArrElem.gtArrRank;
2166 var_types elemType = arrElem->gtArrElem.gtArrElemType;
2167 regMaskTP addrReg = RBM_NONE;
2168 regMaskTP regNeed = RBM_ALLINT;
2170 #if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
2171 // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
2172 // since the arrObj participates in the lea/add instruction
2173 // that computes ARG_0 we should avoid putting it in ARG_1
2175 if (varTypeIsGC(elemType))
2177 regNeed &= ~RBM_ARG_1;
2181 // Strip off any comma expression.
2182 arrObj = genCodeForCommaTree(arrObj);
2184 // Having generated the code for the comma, we don't care about it anymore.
2185 arrElem->gtArrElem.gtArrObj = arrObj;
2187 // If the array ref is a stack var that's dying here we have to move it
2188 // into a register (regalloc already counts of this), as if it's a GC pointer
2189 // it can be collected from here on. This is not an issue for locals that are
2190 // in a register, as they get marked as used an will be tracked.
2191 // The bug that caused this is #100776. (untracked vars?)
2192 if (arrObj->OperGet() == GT_LCL_VAR &&
2193 compiler->optIsTrackedLocal(arrObj) &&
2194 genIsLocalLastUse(arrObj) &&
2195 !genMarkLclVar(arrObj))
2197 genCodeForTree(arrObj, regNeed);
2198 regSet.rsMarkRegUsed(arrObj, 0);
2199 addrReg = genRegMask(arrObj->gtRegNum);
2203 addrReg = genMakeAddressable2(arrObj,
2206 true, // forLoadStore
2209 true); // evalSideEffs
2213 for (dim = 0; dim < rank; dim++)
2214 genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
2216 /* Ensure that the array-object is in a register */
2218 addrReg = genKeepAddressable(arrObj, addrReg);
2219 genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
2221 regNumber arrReg = arrObj->gtRegNum;
2222 regMaskTP arrRegMask = genRegMask(arrReg);
2223 regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
2224 regSet.rsLockUsedReg(arrRegMask);
2226 /* Now process all the indices, do the range check, and compute
2227 the offset of the element */
2229 regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
2231 for (dim = 0; dim < rank; dim++)
2233 GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
2235 /* Get the index into a free register (other than the register holding the array) */
2237 genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
2239 /* Subtract the lower bound, and do the range check */
2241 #if CPU_LOAD_STORE_ARCH
2242 regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
2243 getEmitter()->emitIns_R_AR(
2247 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2248 regTracker.rsTrackRegTrash(valueReg);
2249 getEmitter()->emitIns_R_R(
2253 regTracker.rsTrackRegTrash(index->gtRegNum);
2255 getEmitter()->emitIns_R_AR(
2259 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2260 getEmitter()->emitIns_R_R(
2265 getEmitter()->emitIns_R_AR(
2269 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2270 regTracker.rsTrackRegTrash(index->gtRegNum);
2272 getEmitter()->emitIns_R_AR(
2276 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2278 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2279 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
2283 /* Hang on to the register of the first index */
2285 noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
2286 accReg = index->gtRegNum;
2287 noway_assert(accReg != arrReg);
2288 regSet.rsLockUsedReg(genRegMask(accReg));
2292 /* Evaluate accReg = accReg*dim_size + index */
2294 noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
2295 #if CPU_LOAD_STORE_ARCH
2296 getEmitter()->emitIns_R_AR(
2300 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2301 regTracker.rsTrackRegTrash(valueReg);
2302 getEmitter()->emitIns_R_R(
2307 getEmitter()->emitIns_R_AR(
2311 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2314 inst_RV_RV(INS_add, accReg, index->gtRegNum);
2315 regSet.rsMarkRegFree(index->gtRegNum, index);
2316 regTracker.rsTrackRegTrash(accReg);
2320 if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
2322 regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
2324 getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
2325 regTracker.rsTrackRegTrash(accReg);
2328 regSet.rsUnlockUsedReg(genRegMask(arrReg));
2329 regSet.rsUnlockUsedReg(genRegMask(accReg));
2331 regSet.rsMarkRegFree(genRegMask(arrReg));
2332 regSet.rsMarkRegFree(genRegMask(accReg));
2334 if (keepReg == RegSet::KEEP_REG)
2336 /* We mark the addressability registers on arrObj and gtArrInds[0].
2337 instGetAddrMode() knows to work with this. */
2339 regSet.rsMarkRegUsed(arrObj, tree);
2340 regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
2343 return genRegMask(arrReg) | genRegMask(accReg);
2346 /*****************************************************************************
2348 * Make sure the given tree is addressable. 'needReg' is a mask that indicates
2349 * the set of registers we would prefer the destination tree to be computed
2350 * into (RBM_NONE means no preference).
2352 * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
2354 * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
2355 * on as used, and return the mask for that register set (if no registers
2356 * are marked as used, RBM_NONE is returned).
2358 * If 'smallOK' is not true and the datatype being address is a byte or short,
2359 * then the tree is forced into a register. This is useful when the machine
2360 * instruction being emitted does not have a byte or short version.
2362 * The "deferOK" parameter indicates the mode of operation - when it's false,
2363 * upon returning an actual address mode must have been formed (i.e. it must
2364 * be possible to immediately call one of the inst_TT methods to operate on
2365 * the value). When "deferOK" is true, we do whatever it takes to be ready
2366 * to form the address mode later - for example, if an index address mode on
2367 * a particular CPU requires the use of a specific register, we usually don't
2368 * want to immediately grab that register for an address mode that will only
2369 * be needed later. The convention is to call genMakeAddressable() with
2370 * "deferOK" equal to true, do whatever work is needed to prepare the other
2371 * operand, call genMakeAddressable() with "deferOK" equal to false, and
2372 * finally call one of the inst_TT methods right after that.
2374 * If we do any other codegen after genMakeAddressable(tree) which can
2375 * potentially spill the addressability registers, genKeepAddressable()
2376 * needs to be called before accessing the tree again.
2378 * genDoneAddressable() needs to be called when we are done with the tree
2379 * to free the addressability registers.
2382 regMaskTP CodeGen::genMakeAddressable(GenTreePtr tree,
2384 RegSet::KeepReg keepReg,
2388 GenTreePtr addr = NULL;
2391 /* Is the value simply sitting in a register? */
2393 if (tree->gtFlags & GTF_REG_VAL)
2395 genUpdateLife(tree);
2400 // TODO: If the value is for example a cast of float -> int, compute
2401 // TODO: the converted value into a stack temp, and leave it there,
2402 // TODO: since stack temps are always addressable. This would require
2403 // TODO: recording the fact that a particular tree is in a stack temp.
2406 /* byte/char/short operand -- is this acceptable to the caller? */
2408 if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
2411 // Evaluate non-last elements of comma expressions, to get to the last.
2412 tree = genCodeForCommaTree(tree);
2414 switch (tree->gtOper)
2418 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
2419 // to worry about it being enregistered.
2420 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
2422 genUpdateLife(tree);
2428 if (!genMarkLclVar(tree))
2430 genUpdateLife(tree);
2434 __fallthrough; // it turns out the variable lives in a register
2438 genUpdateLife(tree);
2447 #ifdef _TARGET_64BIT_
2448 // Non-relocs will be sign extended, so we don't have to enregister
2449 // constants that are equivalent to a sign-extended int.
2450 // Relocs can be left alone if they are RIP-relative.
2451 if ((genTypeSize(tree->TypeGet()) > 4) && (!tree->IsIntCnsFitsInI32() ||
2452 (tree->IsIconHandle() &&
2453 (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
2457 #endif // _TARGET_64BIT_
2462 // For MinOpts, we don't do constant folding, so we have
2463 // constants showing up in places we don't like.
2464 // force them into a register now to prevent that.
2465 if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
2473 /* Try to make the address directly addressable */
2475 if (genMakeIndAddrMode(tree->gtOp.gtOp1,
2477 false, /* not for LEA */
2483 genUpdateLife(tree);
2487 /* No good, we'll have to load the address into a register */
2490 tree = tree->gtOp.gtOp1;
2499 /* Here we need to compute the value 'tree' into a register */
2501 genCodeForTree(tree, needReg);
2505 noway_assert(tree->gtFlags & GTF_REG_VAL);
2507 if (isRegPairType(tree->gtType))
2509 /* Are we supposed to hang on to the register? */
2511 if (keepReg == RegSet::KEEP_REG)
2512 regSet.rsMarkRegPairUsed(tree);
2514 regMask = genRegPairMask(tree->gtRegPair);
2518 /* Are we supposed to hang on to the register? */
2520 if (keepReg == RegSet::KEEP_REG)
2521 regSet.rsMarkRegUsed(tree, addr);
2523 regMask = genRegMask(tree->gtRegNum);
2529 /*****************************************************************************
2530 * Compute a tree (which was previously made addressable using
2531 * genMakeAddressable()) into a register.
2532 * needReg - mask of preferred registers.
2533 * keepReg - should the computed register be marked as used by the tree
2534 * freeOnly - target register needs to be a scratch register
2537 void CodeGen::genComputeAddressable(GenTreePtr tree,
2539 RegSet::KeepReg keptReg,
2541 RegSet::KeepReg keepReg,
2544 noway_assert(genStillAddressable(tree));
2545 noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
2547 genDoneAddressable(tree, addrReg, keptReg);
2551 if (tree->gtFlags & GTF_REG_VAL)
2553 reg = tree->gtRegNum;
2555 if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
2560 if (tree->OperIsConst())
2562 /* Need to handle consts separately as we don't want to emit
2563 "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
2564 handles consts better for SMALL_CODE */
2566 noway_assert(tree->IsCnsIntOrI());
2567 reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
2572 reg = regSet.rsPickReg(needReg);
2574 inst_RV_TT(INS_mov, reg, tree);
2575 regTracker.rsTrackRegTrash(reg);
2579 genMarkTreeInReg(tree, reg);
2581 if (keepReg == RegSet::KEEP_REG)
2582 regSet.rsMarkRegUsed(tree);
2584 gcInfo.gcMarkRegPtrVal(tree);
2587 /*****************************************************************************
2588 * Should be similar to genMakeAddressable() but gives more control.
2591 regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree,
2593 RegSet::KeepReg keepReg,
2600 bool evalToReg = false;
2602 if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
2605 #if CPU_LOAD_STORE_ARCH
2612 genCodeForTree(tree, needReg);
2614 noway_assert(tree->gtFlags & GTF_REG_VAL);
2616 if (isRegPairType(tree->gtType))
2618 /* Are we supposed to hang on to the register? */
2620 if (keepReg == RegSet::KEEP_REG)
2621 regSet.rsMarkRegPairUsed(tree);
2623 return genRegPairMask(tree->gtRegPair);
2627 /* Are we supposed to hang on to the register? */
2629 if (keepReg == RegSet::KEEP_REG)
2630 regSet.rsMarkRegUsed(tree);
2632 return genRegMask(tree->gtRegNum);
2637 return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
2641 /*****************************************************************************
2643 * The given tree was previously passed to genMakeAddressable(); return
2644 * 'true' if the operand is still addressable.
2648 bool CodeGen::genStillAddressable(GenTreePtr tree)
2650 /* Has the value (or one or more of its sub-operands) been spilled? */
2652 if (tree->gtFlags & (GTF_SPILLED|GTF_SPILLED_OPER))
2658 /*****************************************************************************
2660 * Recursive helper to restore complex address modes. The 'lockPhase'
2661 * argument indicates whether we're in the 'lock' or 'reload' phase.
2664 regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr,
2668 regMaskTP regMask = RBM_NONE;
2670 /* Have we found a spilled value? */
2672 if (tree->gtFlags & GTF_SPILLED)
2674 /* Do nothing if we're locking, otherwise reload and lock */
2678 /* Unspill the register */
2680 regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
2682 /* The value should now be sitting in a register */
2684 noway_assert(tree->gtFlags & GTF_REG_VAL);
2685 regMask = genRegMask(tree->gtRegNum);
2687 /* Mark the register as used for the address */
2689 regSet.rsMarkRegUsed(tree, addr);
2691 /* Lock the register until we're done with the entire address */
2693 regSet.rsMaskLock |= regMask;
2699 /* Is this sub-tree sitting in a register? */
2701 if (tree->gtFlags & GTF_REG_VAL)
2703 regMask = genRegMask(tree->gtRegNum);
2705 /* Lock the register if we're in the locking phase */
2708 regSet.rsMaskLock |= regMask;
2712 /* Process any sub-operands of this node */
2714 unsigned kind = tree->OperKind();
2716 if (kind & GTK_SMPOP)
2718 /* Unary/binary operator */
2720 if (tree->gtOp.gtOp1)
2721 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
2722 if (tree->gtGetOp2())
2723 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
2725 else if (tree->gtOper == GT_ARR_ELEM)
2727 /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
2728 which holds the offset-calculation */
2730 regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
2731 regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
2733 else if (tree->gtOper == GT_CMPXCHG)
2735 regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
2739 /* Must be a leaf/constant node */
2741 noway_assert(kind & (GTK_LEAF|GTK_CONST));
2748 /*****************************************************************************
2750 * The given tree was previously passed to genMakeAddressable, but since then
2751 * some of its registers are known to have been spilled; do whatever it takes
2752 * to make the operand addressable again (typically by reloading any spilled
2756 regMaskTP CodeGen::genRestAddressable(GenTreePtr tree,
2760 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2762 /* Is this a 'simple' register spill? */
2764 if (tree->gtFlags & GTF_SPILLED)
2766 /* The mask must match the original register/regpair */
2768 if (isRegPairType(tree->gtType))
2770 noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
2772 regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2774 addrReg = genRegPairMask(tree->gtRegPair);
2778 noway_assert(addrReg == genRegMask(tree->gtRegNum));
2780 regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2782 addrReg = genRegMask(tree->gtRegNum);
2785 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2786 regSet.rsMaskLock -= lockMask;
2791 /* We have a complex address mode with some of its sub-operands spilled */
2793 noway_assert((tree->gtFlags & GTF_REG_VAL ) == 0);
2794 noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
2797 We'll proceed in several phases:
2799 1. Lock any registers that are part of the address mode and
2800 have not been spilled. This prevents these registers from
2801 getting spilled in step 2.
2803 2. Reload any registers that have been spilled; lock each
2804 one right after it is reloaded.
2806 3. Unlock all the registers.
2809 addrReg = genRestoreAddrMode(tree, tree, true);
2810 addrReg |= genRestoreAddrMode(tree, tree, false);
2812 /* Unlock all registers that the address mode uses */
2814 lockMask |= addrReg;
2816 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2817 regSet.rsMaskLock -= lockMask;
2822 /*****************************************************************************
2824 * The given tree was previously passed to genMakeAddressable, but since then
2825 * some of its registers might have been spilled ('addrReg' is the set of
2826 * registers used by the address). This function makes sure the operand is
2827 * still addressable (while avoiding any of the registers in 'avoidMask'),
2828 * and returns the (possibly modified) set of registers that are used by
2829 * the address (these will be marked as used on exit).
2832 regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree,
2834 regMaskTP avoidMask)
2836 /* Is the operand still addressable? */
2838 tree = tree->gtEffectiveVal(/*commaOnly*/true); // Strip off commas for this purpose.
2840 if (!genStillAddressable(tree))
2844 // Temporarily lock 'avoidMask' while we restore addressability
2845 // genRestAddressable will unlock the 'avoidMask' for us
2846 // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
2847 // In regSet.rsRegMaskFree() we require that all locked register be marked as used
2849 regSet.rsLockUsedReg(avoidMask);
2852 addrReg = genRestAddressable(tree, addrReg, avoidMask);
2854 noway_assert((regSet.rsMaskLock & avoidMask) == 0);
2860 /*****************************************************************************
2862 * After we're finished with the given operand (which was previously marked
2863 * by calling genMakeAddressable), this function must be called to free any
2864 * registers that may have been used by the address.
2865 * keptReg indicates if the addressability registers were marked as used
2866 * by genMakeAddressable().
2869 void CodeGen::genDoneAddressable(GenTreePtr tree,
2871 RegSet::KeepReg keptReg)
2873 if (keptReg == RegSet::FREE_REG)
2875 // We exclude regSet.rsMaskUsed since the registers may be multi-used.
2876 // ie. There may be a pending use in a higher-up tree.
2878 addrReg &= ~regSet.rsMaskUsed;
2880 /* addrReg was not marked as used. So just reset its GC info */
2883 gcInfo.gcMarkRegSetNpt(addrReg);
2888 /* addrReg was marked as used. So we need to free it up (which
2889 will also reset its GC info) */
2891 regSet.rsMarkRegFree(addrReg);
2895 /*****************************************************************************/
2896 /*****************************************************************************
2898 * Make sure the given floating point value is addressable, and return a tree
2899 * that will yield the value as an addressing mode (this tree may differ from
2900 * the one passed in, BTW). If the only way to make the value addressable is
2901 * to evaluate into the FP stack, we do this and return zero.
2904 GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree,
2905 regMaskTP * regMaskPtr,
2910 switch (tree->gtOper)
2918 if (tree->gtType == TYP_FLOAT)
2920 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
2921 return genMakeConst(&f, TYP_FLOAT, tree, false);
2923 return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
2928 /* Try to make the address directly addressable */
2930 if (genMakeIndAddrMode(tree->gtOp.gtOp1,
2932 false, /* not for LEA */
2938 genUpdateLife(tree);
2947 #if FEATURE_STACK_FP_X87
2948 /* We have no choice but to compute the value 'tree' onto the FP stack */
2950 genCodeForTreeFlt(tree);
2956 /*****************************************************************************/
2957 /*****************************************************************************
2959 * Display a string literal value (debug only).
2965 /*****************************************************************************
2967 * Generate code to check that the GS cookie wasn't thrashed by a buffer
2968 * overrun. If pushReg is true, preserve all registers around code sequence.
2969 * Otherwise, ECX maybe modified.
2971 * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
2973 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2975 // Make sure that EAX didn't die in the return expression
2976 if (!pushReg && (compiler->info.compRetType == TYP_REF))
2977 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2979 // Add cookie check code for unsafe buffers
2980 BasicBlock *gsCheckBlk;
2981 regMaskTP byrefPushedRegs = RBM_NONE;
2982 regMaskTP norefPushedRegs = RBM_NONE;
2983 regMaskTP pushedRegs = RBM_NONE;
2985 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2987 if (compiler->gsGlobalSecurityCookieAddr == NULL)
2990 #if CPU_LOAD_STORE_ARCH
2991 regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
2992 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE,
2994 compiler->lvaGSSecurityCookie, 0);
2995 regTracker.rsTrackRegTrash(reg);
2997 if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
2998 arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
3000 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE,
3002 compiler->gsGlobalSecurityCookieVal);
3006 // Load CookieVal into a register
3007 regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
3008 instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
3009 getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE,
3013 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE,
3014 compiler->lvaGSSecurityCookie, 0,
3015 (int)compiler->gsGlobalSecurityCookieVal);
3020 regNumber regGSCheck;
3021 regMaskTP regMaskGSCheck;
3022 #if CPU_LOAD_STORE_ARCH
3023 regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
3024 regMaskGSCheck = genRegMask(regGSCheck);
3026 // Don't pick the 'this' register
3027 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
3028 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
3030 regGSCheck = REG_EDX;
3031 regMaskGSCheck = RBM_EDX;
3035 regGSCheck = REG_ECX;
3036 regMaskGSCheck = RBM_ECX;
3040 if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed|regSet.rsMaskVars|regSet.rsMaskLock)))
3042 pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
3046 noway_assert((regMaskGSCheck & (regSet.rsMaskUsed|regSet.rsMaskVars|regSet.rsMaskLock)) == 0);
3049 #if defined(_TARGET_ARM_)
3050 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
3051 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck,0);
3053 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
3054 #endif // !_TARGET_ARM_
3055 regTracker.rsTrashRegSet(regMaskGSCheck);
3057 regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
3058 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
3059 regTracker.rsTrackRegTrash(regTmp);
3060 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
3062 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
3066 gsCheckBlk = genCreateTempLabel();
3067 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3068 inst_JMP(jmpEqual, gsCheckBlk);
3069 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
3070 genDefineTempLabel(gsCheckBlk);
3072 genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
3076 /*****************************************************************************
3078 * Generate any side effects within the given expression tree.
3081 void CodeGen::genEvalSideEffects(GenTreePtr tree)
3088 /* Does this sub-tree contain any side-effects? */
3089 if (tree->gtFlags & GTF_SIDE_EFFECT)
3091 #if FEATURE_STACK_FP_X87
3092 /* Remember the current FP stack level */
3093 int iTemps = genNumberTemps();
3095 if (tree->OperIsIndir())
3097 regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
3099 if (tree->gtFlags & GTF_REG_VAL)
3101 gcInfo.gcMarkRegPtrVal(tree);
3102 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3104 // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
3105 // do not need an additional null-check
3106 /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
3107 else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 &&
3108 ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
3110 /* Compare against any register to do null-check */
3111 #if defined(_TARGET_XARCH_)
3112 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
3113 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3114 #elif CPU_LOAD_STORE_ARCH
3115 if (varTypeIsFloating(tree->TypeGet()))
3117 genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT, RegSet::FREE_REG);
3121 genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
3124 if (tree->gtFlags & GTF_IND_VOLATILE)
3126 // Emit a memory barrier instruction after the load
3127 instGen_MemoryBarrier();
3136 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3141 /* Generate the expression and throw it away */
3142 genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
3143 if (tree->gtFlags & GTF_REG_VAL)
3145 gcInfo.gcMarkRegPtrVal(tree);
3148 #if FEATURE_STACK_FP_X87
3149 /* If the tree computed a value on the FP stack, pop the stack */
3150 if (genNumberTemps() > iTemps)
3152 noway_assert(genNumberTemps() == iTemps+1);
3153 genDiscardStackFP(tree);
3159 noway_assert(tree->gtOper != GT_ASG);
3161 /* Walk the tree, just to mark any dead values appropriately */
3163 oper = tree->OperGet();
3164 kind = tree->OperKind();
3166 /* Is this a constant or leaf node? */
3168 if (kind & (GTK_CONST|GTK_LEAF))
3170 #if FEATURE_STACK_FP_X87
3171 if (tree->IsRegVar() && isFloatRegType(tree->gtType) &&
3172 tree->IsRegVarDeath())
3174 genRegVarDeathStackFP(tree);
3175 FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
3178 genUpdateLife(tree);
3179 gcInfo.gcMarkRegPtrVal (tree);
3183 /* Must be a 'simple' unary/binary operator */
3185 noway_assert(kind & GTK_SMPOP);
3187 if (tree->gtGetOp2())
3189 genEvalSideEffects(tree->gtOp.gtOp1);
3191 tree = tree->gtOp.gtOp2;
3196 tree = tree->gtOp.gtOp1;
3202 /*****************************************************************************
3204 * A persistent pointer value is being overwritten, record it for the GC.
3206 * tgt : the destination being written to
3207 * assignVal : the value being assigned (the source). It must currently be in a register.
3208 * tgtAddrReg : the set of registers being used by "tgt"
3210 * Returns : the mask of the scratch register that was used.
3211 * RBM_NONE if a write-barrier is not needed.
3214 regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt,
3215 GenTreePtr assignVal,
3216 regMaskTP tgtAddrReg)
3218 noway_assert(assignVal->gtFlags & GTF_REG_VAL);
3220 GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3221 if (wbf == GCInfo::WBF_NoBarrier)
3224 regMaskTP resultRegMask = RBM_NONE;
3226 #if FEATURE_WRITE_BARRIER
3228 regNumber reg = assignVal->gtRegNum;
3230 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3232 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
3235 const static int regToHelper[2][8] =
3237 // If the target is known to be in managed memory
3239 CORINFO_HELP_ASSIGN_REF_EAX,
3240 CORINFO_HELP_ASSIGN_REF_ECX,
3242 CORINFO_HELP_ASSIGN_REF_EBX,
3244 CORINFO_HELP_ASSIGN_REF_EBP,
3245 CORINFO_HELP_ASSIGN_REF_ESI,
3246 CORINFO_HELP_ASSIGN_REF_EDI,
3249 // Don't know if the target is in managed memory
3251 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX,
3252 CORINFO_HELP_CHECKED_ASSIGN_REF_ECX,
3254 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX,
3256 CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
3257 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI,
3258 CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
3262 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
3263 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
3264 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
3265 noway_assert(regToHelper[0][REG_ESP] == -1 );
3266 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
3267 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
3268 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
3270 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
3271 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
3272 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
3273 noway_assert(regToHelper[1][REG_ESP] == -1 );
3274 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
3275 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
3276 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
3278 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
3281 Generate the following code:
3284 call write_barrier_helper_reg
3286 First grab the RBM_WRITE_BARRIER register for the target address.
3292 if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
3294 rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
3296 regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
3297 regSet.rsMaskLock |= RBM_WRITE_BARRIER;
3303 rg1 = REG_WRITE_BARRIER;
3308 noway_assert(rg1 == REG_WRITE_BARRIER);
3310 /* Generate "lea EDX, [addr-mode]" */
3312 noway_assert(tgt->gtType == TYP_REF);
3313 tgt->gtType = TYP_BYREF;
3314 inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
3316 /* Free up anything that was tied up by the LHS */
3317 genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3319 // In case "tgt" was a comma:
3320 tgt = tgt->gtEffectiveVal();
3322 regTracker.rsTrackRegTrash(rg1);
3323 gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
3324 gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
3327 /* Call the proper vm helper */
3329 // enforced by gcIsWriteBarrierCandidate
3330 noway_assert(tgt->gtOper == GT_IND ||
3331 tgt->gtOper == GT_CLS_VAR);
3333 unsigned tgtAnywhere = 0;
3334 if ((tgt->gtOper == GT_IND) &&
3335 ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
3340 int helper = regToHelper[tgtAnywhere][reg];
3341 resultRegMask = genRegMask(reg);
3343 gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
3345 genEmitHelperCall(helper,
3347 EA_PTRSIZE); // retSize
3351 regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
3352 regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
3355 return resultRegMask;
3361 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3363 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3366 Generate the following code (or its equivalent on the given target):
3370 call write_barrier_helper
3372 First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
3375 if (reg != REG_ARG_1)
3377 // We may need to spill whatever is in the ARG_1 register
3379 if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
3381 regSet.rsSpillReg(REG_ARG_1);
3384 inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
3386 resultRegMask = RBM_ARG_1;
3388 regTracker.rsTrackRegTrash(REG_ARG_1);
3389 gcInfo.gcMarkRegSetNpt(REG_ARG_1);
3390 gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
3392 bool free_arg1 = false;
3393 if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
3395 regSet.rsMaskUsed |= RBM_ARG_1;
3399 // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
3401 /* Generate "lea R0, [addr-mode]" */
3403 noway_assert(tgt->gtType == TYP_REF);
3404 tgt->gtType = TYP_BYREF;
3406 tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
3408 // We may need to spill whatever is in the ARG_0 register
3410 if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
3411 ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
3412 (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
3414 regSet.rsSpillReg(REG_ARG_0);
3417 inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
3419 /* Free up anything that was tied up by the LHS */
3420 genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3422 regTracker.rsTrackRegTrash(REG_ARG_0);
3423 gcInfo.gcMarkRegSetNpt(REG_ARG_0);
3424 gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
3427 // Finally, we may be required to spill whatever is in the further argument registers
3428 // trashed by the call. The write barrier trashes some further registers --
3429 // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
3430 #if NOGC_WRITE_BARRIERS
3431 regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
3433 regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
3435 // Spill any other registers trashed by the write barrier call and currently in use.
3436 regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0|RBM_ARG_1));
3437 if (mustSpill) regSet.rsSpillRegs(mustSpill);
3438 #endif // _TARGET_ARM_
3440 bool free_arg0 = false;
3441 if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
3443 regSet.rsMaskUsed |= RBM_ARG_0;
3447 // genEmitHelperCall might need to grab a register
3448 // so don't let it spill one of the arguments
3450 regMaskTP reallyUsedRegs = RBM_NONE;
3451 regSet.rsLockReg(RBM_ARG_0|RBM_ARG_1, &reallyUsedRegs);
3453 genGCWriteBarrier(tgt, wbf);
3455 regSet.rsUnlockReg(RBM_ARG_0|RBM_ARG_1, reallyUsedRegs);
3456 gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
3460 regSet.rsMaskUsed &= ~RBM_ARG_0;
3464 regSet.rsMaskUsed &= ~RBM_ARG_1;
3467 return resultRegMask;
3469 #endif // _TARGET_ARM_
3471 #else // !FEATURE_WRITE_BARRIER
3473 NYI("FEATURE_WRITE_BARRIER unimplemented");
3474 return resultRegMask;
3476 #endif // !FEATURE_WRITE_BARRIER
3481 /*****************************************************************************
3483 * Generate the appropriate conditional jump(s) right after the low 32 bits
3484 * of two long values have been compared.
3487 void CodeGen::genJccLongHi(genTreeOps cmp,
3488 BasicBlock * jumpTrue,
3489 BasicBlock * jumpFalse,
3494 jumpFalse->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
3500 inst_JMP(EJ_jne, jumpFalse);
3504 inst_JMP(EJ_jne, jumpTrue);
3511 inst_JMP(EJ_ja , jumpFalse);
3512 inst_JMP(EJ_jb , jumpTrue);
3516 inst_JMP(EJ_jg , jumpFalse);
3517 inst_JMP(EJ_jl , jumpTrue);
3525 inst_JMP(EJ_jb , jumpFalse);
3526 inst_JMP(EJ_ja , jumpTrue);
3530 inst_JMP(EJ_jl , jumpFalse);
3531 inst_JMP(EJ_jg , jumpTrue);
3536 noway_assert(!"expected a comparison operator");
3540 /*****************************************************************************
3542 * Generate the appropriate conditional jump(s) right after the high 32 bits
3543 * of two long values have been compared.
3546 void CodeGen::genJccLongLo(genTreeOps cmp,
3547 BasicBlock* jumpTrue,
3548 BasicBlock* jumpFalse)
3553 inst_JMP(EJ_je , jumpTrue);
3557 inst_JMP(EJ_jne, jumpTrue);
3561 inst_JMP(EJ_jb , jumpTrue);
3565 inst_JMP(EJ_jbe, jumpTrue);
3569 inst_JMP(EJ_jae, jumpTrue);
3573 inst_JMP(EJ_ja , jumpTrue);
3577 noway_assert(!"expected comparison");
3580 #elif defined(_TARGET_ARM_)
3581 /*****************************************************************************
3583 * Generate the appropriate conditional jump(s) right after the low 32 bits
3584 * of two long values have been compared.
3587 void CodeGen::genJccLongHi(genTreeOps cmp,
3588 BasicBlock * jumpTrue,
3589 BasicBlock * jumpFalse,
3594 jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3600 inst_JMP(EJ_ne, jumpFalse);
3604 inst_JMP(EJ_ne, jumpTrue);
3611 inst_JMP(EJ_hi, jumpFalse);
3612 inst_JMP(EJ_lo, jumpTrue);
3616 inst_JMP(EJ_gt, jumpFalse);
3617 inst_JMP(EJ_lt, jumpTrue);
3625 inst_JMP(EJ_lo, jumpFalse);
3626 inst_JMP(EJ_hi, jumpTrue);
3630 inst_JMP(EJ_lt, jumpFalse);
3631 inst_JMP(EJ_gt, jumpTrue);
3636 noway_assert(!"expected a comparison operator");
3640 /*****************************************************************************
3642 * Generate the appropriate conditional jump(s) right after the high 32 bits
3643 * of two long values have been compared.
3646 void CodeGen::genJccLongLo(genTreeOps cmp,
3647 BasicBlock* jumpTrue,
3648 BasicBlock* jumpFalse)
3653 inst_JMP(EJ_eq, jumpTrue);
3657 inst_JMP(EJ_ne, jumpTrue);
3661 inst_JMP(EJ_lo, jumpTrue);
3665 inst_JMP(EJ_ls, jumpTrue);
3669 inst_JMP(EJ_hs, jumpTrue);
3673 inst_JMP(EJ_hi, jumpTrue);
3677 noway_assert(!"expected comparison");
3681 /*****************************************************************************
3683 * Called by genCondJump() for TYP_LONG.
3686 void CodeGen::genCondJumpLng(GenTreePtr cond,
3687 BasicBlock* jumpTrue,
3688 BasicBlock* jumpFalse,
3691 noway_assert(jumpTrue && jumpFalse);
3692 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
3693 noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
3695 GenTreePtr op1 = cond->gtOp.gtOp1;
3696 GenTreePtr op2 = cond->gtOp.gtOp2;
3697 genTreeOps cmp = cond->OperGet();
3701 /* Are we comparing against a constant? */
3703 if (op2->gtOper == GT_CNS_LNG)
3705 __int64 lval = op2->gtLngCon.gtLconVal;
3708 // We're "done" evaluating op2; let's strip any commas off op1 before we
3710 op1 = genCodeForCommaTree(op1);
3712 /* We can generate better code for some special cases */
3713 instruction ins = INS_invalid;
3714 bool useIncToSetFlags = false;
3715 bool specialCaseCmp = false;
3723 useIncToSetFlags = false;
3724 specialCaseCmp = true;
3726 else if (lval == -1)
3730 useIncToSetFlags = true;
3731 specialCaseCmp = true;
3734 else if (cmp == GT_NE)
3740 useIncToSetFlags = false;
3741 specialCaseCmp = true;
3743 else if (lval == -1)
3747 useIncToSetFlags = true;
3748 specialCaseCmp = true;
3754 /* Make the comparand addressable */
3756 addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
3758 regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
3759 insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
3761 if (op1->gtFlags & GTF_REG_VAL)
3763 regPairNo regPair = op1->gtRegPair;
3764 regNumber rLo = genRegPairLo(regPair);
3765 regNumber rHi = genRegPairHi(regPair);
3766 if (tmpMask & genRegMask(rLo))
3770 else if (tmpMask & genRegMask(rHi))
3777 rTmp = regSet.rsGrabReg(tmpMask);
3778 inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
3781 /* The register is now trashed */
3782 regTracker.rsTrackRegTrash(rTmp);
3786 /* Set the flags using INS_AND | INS_OR */
3787 inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
3791 /* Set the flags using INS_AND | INS_OR */
3792 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3796 else // op1 is not GTF_REG_VAL
3798 rTmp = regSet.rsGrabReg(tmpMask);
3800 /* Load the low 32-bits of op1 */
3801 inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
3803 /* The register is now trashed */
3804 regTracker.rsTrackRegTrash(rTmp);
3806 /* Set the flags using INS_AND | INS_OR */
3807 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3810 /* Free up the addrReg(s) if any */
3811 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
3813 /* compares against -1, also requires an an inc instruction */
3814 if (useIncToSetFlags)
3816 /* Make sure the inc will set the flags */
3817 assert(cond->gtSetFlags());
3818 genIncRegBy(rTmp, 1, cond, TYP_INT);
3821 #if FEATURE_STACK_FP_X87
3822 // We may need a transition block
3825 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3828 emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
3829 inst_JMP(jmpKind, jumpTrue);
3831 else // specialCaseCmp == false
3833 /* Make the comparand addressable */
3834 addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
3836 /* Compare the high part first */
3838 int ival = (int)(lval >> 32);
3840 /* Comparing a register against 0 is easier */
3842 if (!ival && (op1->gtFlags & GTF_REG_VAL)
3843 && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK )
3845 /* Generate 'test rTmp, rTmp' */
3846 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3850 if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
3852 /* Special case: comparison of two constants */
3853 // Needed as gtFoldExpr() doesn't fold longs
3855 noway_assert(addrReg == 0);
3856 int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
3858 /* Get the constant operand into a register */
3859 rTmp = genGetRegSetToIcon(op1_hiword);
3861 /* Generate 'cmp rTmp, ival' */
3863 inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3867 /* Generate 'cmp op1, ival' */
3869 inst_TT_IV(INS_cmp, op1, ival, 4);
3873 #if FEATURE_STACK_FP_X87
3874 // We may need a transition block
3877 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3880 /* Generate the appropriate jumps */
3882 if (cond->gtFlags & GTF_UNSIGNED)
3883 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3885 genJccLongHi(cmp, jumpTrue, jumpFalse);
3887 /* Compare the low part second */
3891 /* Comparing a register against 0 is easier */
3893 if (!ival && (op1->gtFlags & GTF_REG_VAL)
3894 && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
3896 /* Generate 'test rTmp, rTmp' */
3897 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3901 if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
3903 /* Special case: comparison of two constants */
3904 // Needed as gtFoldExpr() doesn't fold longs
3906 noway_assert(addrReg == 0);
3907 int op1_loword = (int) op1->gtLngCon.gtLconVal;
3909 /* get the constant operand into a register */
3910 rTmp = genGetRegSetToIcon(op1_loword);
3912 /* Generate 'cmp rTmp, ival' */
3914 inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3918 /* Generate 'cmp op1, ival' */
3920 inst_TT_IV(INS_cmp, op1, ival, 0);
3924 /* Generate the appropriate jumps */
3925 genJccLongLo(cmp, jumpTrue, jumpFalse);
3927 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
3930 else // (op2->gtOper != GT_CNS_LNG)
3933 /* The operands would be reversed by physically swapping them */
3935 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
3937 /* Generate the first operand into a register pair */
3939 genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
3940 noway_assert(op1->gtFlags & GTF_REG_VAL);
3942 #if CPU_LOAD_STORE_ARCH
3943 /* Generate the second operand into a register pair */
3944 // Fix 388442 ARM JitStress WP7
3945 genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3946 noway_assert(op2->gtFlags & GTF_REG_VAL);
3947 regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
3949 /* Make the second operand addressable */
3951 addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3953 /* Make sure the first operand hasn't been spilled */
3955 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
3956 noway_assert(op1->gtFlags & GTF_REG_VAL);
3958 regPairNo regPair = op1->gtRegPair;
3960 #if !CPU_LOAD_STORE_ARCH
3961 /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
3963 addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
3966 #if FEATURE_STACK_FP_X87
3967 // We may need a transition block
3970 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3974 /* Perform the comparison - high parts */
3976 inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
3978 if (cond->gtFlags & GTF_UNSIGNED)
3979 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3981 genJccLongHi(cmp, jumpTrue, jumpFalse);
3983 /* Compare the low parts */
3985 inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
3986 genJccLongLo(cmp, jumpTrue, jumpFalse);
3988 /* Free up anything that was tied up by either operand */
3990 #if CPU_LOAD_STORE_ARCH
3991 // Fix 388442 ARM JitStress WP7
3992 regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
3993 genReleaseRegPair(op2);
3995 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
3997 genReleaseRegPair(op1);
4002 /*****************************************************************************
4003 * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
4004 * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
4005 * to the architecture we're running on.
4008 * gen_fcomp_FN: fcomp ST(0), stk
4009 * gen_fcomp_FS_TT: fcomp ST(0), addr
4010 * gen_fcompp_FS: fcompp
4011 * These are followed by fnstsw, sahf to get the flags in EFLAGS.
4014 * gen_fcomp_FN: fcomip ST(0), stk
4015 * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
4016 * (and reverse the branch condition since addr comes first)
4017 * gen_fcompp_FS: fcomip, fstp
4018 * These instructions will correctly set the EFLAGS register.
4020 * Return value: These functions return true if the instruction has
4021 * already placed its result in the EFLAGS register.
4024 bool CodeGen::genUse_fcomip()
4026 return compiler->opts.compUseFCOMI;
4029 /*****************************************************************************
4031 * Sets the flag for the TYP_INT/TYP_REF comparison.
4032 * We try to use the flags if they have already been set by a prior
4034 * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
4035 * need to compare again with zero. Just use a "INS_js"
4037 * Returns the flags the following jump/set instruction should use.
4040 emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
4042 noway_assert(cond->OperIsCompare());
4043 noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
4045 GenTreePtr op1 = cond->gtOp.gtOp1;
4046 GenTreePtr op2 = cond->gtOp.gtOp2;
4047 genTreeOps cmp = cond->OperGet();
4049 if (cond->gtFlags & GTF_REVERSE_OPS)
4051 /* Don't forget to modify the condition as well */
4053 cond->gtOp.gtOp1 = op2;
4054 cond->gtOp.gtOp2 = op1;
4055 cond->SetOper (GenTree::SwapRelop(cmp));
4056 cond->gtFlags &= ~GTF_REVERSE_OPS;
4058 /* Get hold of the new values */
4060 cmp = cond->OperGet();
4061 op1 = cond->gtOp.gtOp1;
4062 op2 = cond->gtOp.gtOp2;
4065 // Note that op1's type may get bashed. So save it early
4067 var_types op1Type = op1->TypeGet();
4068 bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
4069 emitAttr size = EA_UNKNOWN;
4072 regMaskTP addrReg1 = RBM_NONE;
4073 regMaskTP addrReg2 = RBM_NONE;
4074 emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
4079 regMaskTP newLiveMask;
4082 /* Are we comparing against a constant? */
4084 if (op2->IsCnsIntOrI())
4086 ssize_t ival = op2->gtIntConCommon.IconValue();
4088 /* unsigned less than comparisons with 1 ('< 1' )
4089 should be transformed into '== 0' to potentially
4090 suppress a tst instruction.
4092 if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
4094 op2->gtIntCon.gtIconVal = ival = 0;
4095 cond->gtOper = cmp = GT_EQ;
4098 /* Comparisons against 0 can be easier */
4102 // if we can safely change the comparison to unsigned we do so
4104 varTypeIsSmall(op1->TypeGet()) &&
4105 varTypeIsUnsigned(op1->TypeGet()))
4110 /* unsigned comparisons with 0 should be transformed into
4111 '==0' or '!= 0' to potentially suppress a tst instruction. */
4116 cond->gtOper = cmp = GT_NE;
4117 else if (cmp == GT_LE)
4118 cond->gtOper = cmp = GT_EQ;
4121 /* Is this a simple zero/non-zero test? */
4123 if (cmp == GT_EQ || cmp == GT_NE)
4125 /* Is the operand an "AND" operation? */
4127 if (op1->gtOper == GT_AND)
4129 GenTreePtr an1 = op1->gtOp.gtOp1;
4130 GenTreePtr an2 = op1->gtOp.gtOp2;
4132 /* Check for the case "expr & icon" */
4134 if (an2->IsIntCnsFitsInI32())
4136 int iVal = (int)an2->gtIntCon.gtIconVal;
4138 /* make sure that constant is not out of an1's range */
4140 switch (an1->gtType)
4144 if (iVal & 0xffffff00)
4145 goto NO_TEST_FOR_AND;
4149 if (iVal & 0xffff0000)
4150 goto NO_TEST_FOR_AND;
4156 if (an1->IsCnsIntOrI())
4158 // Special case - Both operands of AND are consts
4159 genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
4160 addrReg1 = genRegMask(an1->gtRegNum);
4164 addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
4166 #if CPU_LOAD_STORE_ARCH
4167 if ((an1->gtFlags & GTF_REG_VAL) == 0)
4169 genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
4170 if (arm_Valid_Imm_For_Alu(iVal))
4172 inst_RV_IV(INS_TEST,an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
4176 regNumber regTmp = regSet.rsPickFreeReg();
4177 instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
4178 inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
4181 addrReg1 = RBM_NONE;
4186 #ifdef _TARGET_XARCH_
4187 // Check to see if we can use a smaller immediate.
4188 if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
4190 var_types testType = (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
4191 #if CPU_HAS_BYTE_REGS
4192 // if we don't have byte-able register, switch to the 2-byte form
4193 if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
4195 testType = TYP_USHORT;
4197 #endif // CPU_HAS_BYTE_REGS
4199 inst_TT_IV(INS_TEST, an1, iVal, testType);
4202 #endif // _TARGET_XARCH_
4204 inst_TT_IV(INS_TEST, an1, iVal);
4216 // TODO: Check for other cases that can generate 'test',
4217 // TODO: also check for a 64-bit integer zero test which
4218 // TODO: could generate 'or lo, hi' followed by jz/jnz.
4222 // See what Jcc instruction we would use if we can take advantage of
4223 // the knowledge of EFLAGs.
4228 Unsigned comparison to 0. Using this table:
4230 ----------------------------------------------------
4231 | Comparison | Flags Checked | Instruction Used |
4232 ----------------------------------------------------
4233 | == 0 | ZF = 1 | je |
4234 ----------------------------------------------------
4235 | != 0 | ZF = 0 | jne |
4236 ----------------------------------------------------
4237 | < 0 | always FALSE | N/A |
4238 ----------------------------------------------------
4239 | <= 0 | ZF = 1 | je |
4240 ----------------------------------------------------
4241 | >= 0 | always TRUE | N/A |
4242 ----------------------------------------------------
4243 | > 0 | ZF = 0 | jne |
4244 ----------------------------------------------------
4249 case GT_EQ: jumpKind = EJ_eq; break;
4250 case GT_NE: jumpKind = EJ_ne; break;
4251 case GT_LT: jumpKind = EJ_NONE; break;
4252 case GT_LE: jumpKind = EJ_eq; break;
4253 case GT_GE: jumpKind = EJ_NONE; break;
4254 case GT_GT: jumpKind = EJ_ne; break;
4255 #elif defined(_TARGET_X86_)
4256 case GT_EQ: jumpKind = EJ_je; break;
4257 case GT_NE: jumpKind = EJ_jne; break;
4258 case GT_LT: jumpKind = EJ_NONE; break;
4259 case GT_LE: jumpKind = EJ_je; break;
4260 case GT_GE: jumpKind = EJ_NONE; break;
4261 case GT_GT: jumpKind = EJ_jne; break;
4264 noway_assert(!"Unexpected comparison OpCode");
4271 Signed comparison to 0. Using this table:
4273 -----------------------------------------------------
4274 | Comparison | Flags Checked | Instruction Used |
4275 -----------------------------------------------------
4276 | == 0 | ZF = 1 | je |
4277 -----------------------------------------------------
4278 | != 0 | ZF = 0 | jne |
4279 -----------------------------------------------------
4280 | < 0 | SF = 1 | js |
4281 -----------------------------------------------------
4282 | <= 0 | N/A | N/A |
4283 -----------------------------------------------------
4284 | >= 0 | SF = 0 | jns |
4285 -----------------------------------------------------
4287 -----------------------------------------------------
4293 case GT_EQ: jumpKind = EJ_eq; break;
4294 case GT_NE: jumpKind = EJ_ne; break;
4295 case GT_LT: jumpKind = EJ_mi; break;
4296 case GT_LE: jumpKind = EJ_NONE; break;
4297 case GT_GE: jumpKind = EJ_pl; break;
4298 case GT_GT: jumpKind = EJ_NONE; break;
4299 #elif defined(_TARGET_X86_)
4300 case GT_EQ: jumpKind = EJ_je; break;
4301 case GT_NE: jumpKind = EJ_jne; break;
4302 case GT_LT: jumpKind = EJ_js; break;
4303 case GT_LE: jumpKind = EJ_NONE; break;
4304 case GT_GE: jumpKind = EJ_jns; break;
4305 case GT_GT: jumpKind = EJ_NONE; break;
4308 noway_assert(!"Unexpected comparison OpCode");
4311 assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
4313 assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
4315 /* Is the value a simple local variable? */
4317 if (op1->gtOper == GT_LCL_VAR)
4319 /* Is the flags register set to the value? */
4321 if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
4323 if (jumpKind != EJ_NONE)
4325 addrReg1 = RBM_NONE;
4332 /* Make the comparand addressable */
4333 addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4335 /* Are the condition flags set based on the value? */
4337 unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
4339 if (op1->gtFlags & GTF_REG_VAL)
4341 if (genFlagsAreReg(op1->gtRegNum))
4343 flags |= GTF_ZSF_SET;
4349 if (jumpKind != EJ_NONE)
4355 /* Is the value in a register? */
4357 if (op1->gtFlags & GTF_REG_VAL)
4359 regNumber reg = op1->gtRegNum;
4361 /* With a 'test' we can do any signed test or any test for equality */
4363 if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
4365 emitAttr compareSize = emitTypeSize(op1->TypeGet());
4367 // If we have an GT_REG_VAR then the register will be properly sign/zero extended
4368 // But only up to 4 bytes
4369 if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
4371 compareSize = EA_4BYTE;
4374 #if CPU_HAS_BYTE_REGS
4375 // Make sure if we require a byte compare that we have a byte-able register
4376 if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
4377 #endif // CPU_HAS_BYTE_REGS
4379 /* Generate 'test reg, reg' */
4380 instGen_Compare_Reg_To_Zero(compareSize, reg);
4387 else // if (ival != 0)
4389 bool smallOk = true;
4392 /* make sure that constant is not out of op1's range
4393 if it is, we need to perform an int with int comparison
4394 and therefore, we set smallOk to false, so op1 gets loaded
4398 /* If op1 is TYP_SHORT, and is followed by an unsigned
4399 * comparison, we can use smallOk. But we don't know which
4400 * flags will be needed. This probably doesn't happen often.
4402 var_types gtType=op1->TypeGet();
4406 case TYP_BYTE: if (ival != (signed char )ival) smallOk = false; break;
4408 case TYP_UBYTE: if (ival != (unsigned char )ival) smallOk = false; break;
4410 case TYP_SHORT: if (ival != (signed short)ival) smallOk = false; break;
4411 case TYP_CHAR: if (ival != (unsigned short)ival) smallOk = false; break;
4413 #ifdef _TARGET_64BIT_
4414 case TYP_INT: if (!FitsIn<INT32>(ival)) smallOk = false; break;
4415 case TYP_UINT: if (!FitsIn<UINT32>(ival)) smallOk = false; break;
4416 #endif // _TARGET_64BIT_
4421 if (smallOk && // constant is in op1's range
4422 !unsignedCmp && // signed comparison
4423 varTypeIsSmall(gtType) && // smalltype var
4424 varTypeIsUnsigned(gtType)) // unsigned type
4429 /* Make the comparand addressable */
4430 addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
4434 // #if defined(DEBUGGING_SUPPORT)
4436 /* Special case: comparison of two constants */
4438 // Needed if Importer doesn't call gtFoldExpr()
4440 if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
4442 // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
4444 /* Workaround: get the constant operand into a register */
4445 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4447 noway_assert(addrReg1 == RBM_NONE);
4448 noway_assert(op1->gtFlags & GTF_REG_VAL);
4450 addrReg1 = genRegMask(op1->gtRegNum);
4455 /* Compare the operand against the constant */
4457 if (op2->IsIconHandle())
4459 inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
4463 inst_TT_IV(INS_cmp, op1, ival);
4468 //---------------------------------------------------------------------
4470 // We reach here if op2 was not a GT_CNS_INT
4476 if (op1Type == op2->gtType)
4478 shortCmp = varTypeIsShort(op1Type);
4479 byteCmp = varTypeIsByte(op1Type);
4482 noway_assert(op1->gtOper != GT_CNS_INT);
4484 if (op2->gtOper == GT_LCL_VAR)
4487 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4488 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4490 /* Are we comparing against a register? */
4492 if (op2->gtFlags & GTF_REG_VAL)
4494 /* Make the comparands addressable and mark as used */
4496 assert(addrReg1 == RBM_NONE);
4497 addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4499 /* Is the size of the comparison byte/char/short ? */
4501 if (varTypeIsSmall(op1->TypeGet()))
4503 /* Is op2 sitting in an appropriate register? */
4505 if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
4508 /* Is op2 of the right type for a small comparison */
4510 if (op2->gtOper == GT_REG_VAR)
4512 if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
4517 if (op1->gtType != op2->gtType)
4521 if (varTypeIsUnsigned(op1->TypeGet()))
4525 assert(addrReg2 == RBM_NONE);
4527 genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4528 addrReg2 = genRegMask(op2->gtRegNum);
4529 addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
4530 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4531 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4533 /* Compare against the register */
4535 inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
4541 // op1 has been made addressable and is marked as in use
4542 // op2 is un-generated
4543 assert(addrReg2 == 0);
4545 if ((op1->gtFlags & GTF_REG_VAL) == 0)
4547 regNumber reg1 = regSet.rsPickReg();
4549 noway_assert(varTypeIsSmall(op1->TypeGet()));
4550 instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL)!=0);
4552 // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
4553 // so we need to make sure it is still valid. Note that at this point, reg1 is
4554 // *not* marked as in use, and it is possible for it to be used in the address
4555 // mode expression, but that is OK, because we are done with expression after
4556 // this. We only need reg1.
4557 addrReg1 = genKeepAddressable(op1, addrReg1);
4558 inst_RV_TT(ins, reg1, op1);
4559 regTracker.rsTrackRegTrash(reg1);
4561 genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4564 genMarkTreeInReg(op1, reg1);
4566 regSet.rsMarkRegUsed(op1);
4567 addrReg1 = genRegMask(op1->gtRegNum);
4570 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4571 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4576 // We come here if op2 is not enregistered or not in a "good" register.
4578 assert(addrReg1 == 0);
4580 // Determine what registers go live between op1 and op2
4581 newLiveMask = genNewLiveRegMask(op1, op2);
4583 // Setup regNeed with the set of register that we suggest for op1 to be in
4585 regNeed = RBM_ALLINT;
4587 // avoid selecting registers that get newly born in op2
4588 regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
4590 // avoid selecting op2 reserved regs
4591 regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
4593 #if CPU_HAS_BYTE_REGS
4594 // if necessary setup regNeed to select just the byte-able registers
4596 regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
4597 #endif // CPU_HAS_BYTE_REGS
4599 // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
4601 genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
4602 noway_assert(op1->gtFlags & GTF_REG_VAL);
4604 op1Reg = op1->gtRegNum;
4606 // Setup regNeed with the set of register that we require for op1 to be in
4608 regNeed = RBM_ALLINT;
4610 #if CPU_HAS_BYTE_REGS
4611 // if necessary setup regNeed to select just the byte-able registers
4613 regNeed &= RBM_BYTE_REGS;
4614 #endif // CPU_HAS_BYTE_REGS
4616 // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
4617 regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
4619 // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
4620 regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
4622 // Did we end up in an acceptable register?
4623 // and do we have an acceptable free register available to grab?
4625 if ( ((genRegMask(op1Reg) & regNeed) == 0) &&
4626 ((regSet.rsRegMaskFree() & regNeed) != 0) )
4628 // Grab an acceptable register
4629 regNumber newReg = regSet.rsGrabReg(regNeed);
4631 noway_assert(op1Reg != newReg);
4633 /* Update the value in the target register */
4635 regTracker.rsTrackRegCopy(newReg, op1Reg);
4637 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
4639 /* The value has been transferred to 'reg' */
4641 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
4642 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
4644 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
4646 /* The value is now in an appropriate register */
4648 op1->gtRegNum = newReg;
4650 noway_assert(op1->gtFlags & GTF_REG_VAL);
4651 op1Reg = op1->gtRegNum;
4655 /* Mark the register as 'used' */
4656 regSet.rsMarkRegUsed(op1);
4658 addrReg1 = genRegMask(op1Reg);
4660 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4661 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4665 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4666 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4667 noway_assert(op1->gtFlags & GTF_REG_VAL);
4669 // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
4670 // when byteCmp is true we will perform a byte sized cmp instruction
4671 // and that instruction requires that any registers used are byte-able ones.
4673 regNeed = RBM_ALLINT;
4675 #if CPU_HAS_BYTE_REGS
4676 // if necessary setup regNeed to select just the byte-able registers
4678 regNeed &= RBM_BYTE_REGS;
4679 #endif // CPU_HAS_BYTE_REGS
4681 /* Make the comparand addressable */
4682 assert(addrReg2 == 0);
4683 addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
4685 /* Make sure the first operand is still in a register; if
4686 it's been spilled, we have to make sure it's reloaded
4687 into a byte-addressable register if needed.
4688 Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
4691 assert(addrReg1 != 0);
4692 genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
4694 noway_assert(op1->gtFlags & GTF_REG_VAL);
4695 noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
4697 addrReg1 = genRegMask(op1->gtRegNum);
4698 regSet.rsLockUsedReg(addrReg1);
4700 /* Make sure that op2 is addressable. If we are going to do a
4701 byte-comparison, we need it to be in a byte register. */
4703 if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
4705 genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
4706 addrReg2 = genRegMask(op2->gtRegNum);
4710 addrReg2 = genKeepAddressable(op2, addrReg2);
4713 regSet.rsUnlockUsedReg(addrReg1);
4715 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4716 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4718 if (byteCmp || shortCmp)
4720 size = emitTypeSize(op2->TypeGet());
4721 if (varTypeIsUnsigned(op1Type))
4726 size = emitActualTypeSize(op2->TypeGet());
4729 /* Perform the comparison */
4730 inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
4734 jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
4736 DONE_FLAGS: // We have determined what jumpKind to use
4738 genUpdateLife(cond);
4740 /* The condition value is dead at the jump that follows */
4742 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4743 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4744 genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4745 genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
4747 noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
4752 /*****************************************************************************/
4753 /*****************************************************************************/
4754 /*****************************************************************************
4756 * Generate code to jump to the jump target of the current basic block if
4757 * the given relational operator yields 'true'.
4760 void CodeGen::genCondJump(GenTreePtr cond,
4761 BasicBlock *destTrue,
4762 BasicBlock *destFalse,
4766 BasicBlock * jumpTrue;
4767 BasicBlock * jumpFalse;
4769 GenTreePtr op1 = cond->gtOp.gtOp1;
4770 GenTreePtr op2 = cond->gtOp.gtOp2;
4771 genTreeOps cmp = cond->OperGet();
4775 jumpTrue = destTrue;
4776 jumpFalse = destFalse;
4780 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
4782 jumpTrue = compiler->compCurBB->bbJumpDest;
4783 jumpFalse = compiler->compCurBB->bbNext;
4786 noway_assert(cond->OperIsCompare());
4788 /* Make sure the more expensive operand is 'op1' */
4789 noway_assert( (cond->gtFlags & GTF_REVERSE_OPS) == 0 );
4791 if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
4793 /* Don't forget to modify the condition as well */
4795 cond->gtOp.gtOp1 = op2;
4796 cond->gtOp.gtOp2 = op1;
4797 cond->SetOper (GenTree::SwapRelop(cmp));
4798 cond->gtFlags &= ~GTF_REVERSE_OPS;
4800 /* Get hold of the new values */
4802 cmp = cond->OperGet();
4803 op1 = cond->gtOp.gtOp1;
4804 op2 = cond->gtOp.gtOp2;
4807 /* What is the type of the operand? */
4809 switch (genActualType(op1->gtType))
4814 emitJumpKind jumpKind;
4816 // Check if we can use the currently set flags. Else set them
4818 jumpKind = genCondSetFlags(cond);
4820 #if FEATURE_STACK_FP_X87
4823 genCondJmpInsStackFP(jumpKind,
4830 /* Generate the conditional jump */
4831 inst_JMP(jumpKind, jumpTrue);
4837 #if FEATURE_STACK_FP_X87
4840 genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
4845 genCondJumpLng(cond, jumpTrue, jumpFalse);
4851 #if FEATURE_STACK_FP_X87
4852 genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
4854 genCondJumpFloat(cond, jumpTrue, jumpFalse);
4860 compiler->gtDispTree(cond);
4862 unreached(); // unexpected/unsupported 'jtrue' operands type
4866 /*****************************************************************************
4867 * Spill registers to check callers can handle it.
4872 void CodeGen::genStressRegs(GenTreePtr tree)
4874 if (regSet.rsStressRegs() < 2)
4877 /* Spill as many registers as possible. Callers should be prepared
4878 to handle this case.
4879 But don't spill trees with no size (TYP_STRUCT comes to mind) */
4882 regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
4886 for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
4888 if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) && (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
4890 regSet.rsSpillReg(regNum);
4892 spillRegs &= regSet.rsMaskUsed;
4900 regMaskTP trashRegs = regSet.rsRegMaskFree();
4902 if (trashRegs == RBM_NONE)
4905 /* It is sometimes reasonable to expect that calling genCodeForTree()
4906 on certain trees won't spill anything */
4908 if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) &&
4909 (compiler->compCurBB->bbCatchTyp) && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
4911 trashRegs &= ~(RBM_EXCEPTION_OBJECT);
4914 // If genCodeForTree() effectively gets called a second time on the same tree
4916 if (tree->gtFlags & GTF_REG_VAL)
4918 noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
4919 trashRegs &= ~genRegMask(tree->gtRegNum);
4922 if (tree->gtType == TYP_INT && tree->OperIsSimple())
4924 GenTreePtr op1 = tree->gtOp.gtOp1;
4925 GenTreePtr op2 = tree->gtOp.gtOp2;
4926 if (op1 && (op1->gtFlags & GTF_REG_VAL))
4927 trashRegs &= ~genRegMask(op1->gtRegNum);
4928 if (op2 && (op2->gtFlags & GTF_REG_VAL))
4929 trashRegs &= ~genRegMask(op2->gtRegNum);
4932 if (compiler->compCurBB == compiler->genReturnBB)
4934 if (compiler->info.compCallUnmanaged)
4936 LclVarDsc * varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
4937 if (varDsc->lvRegister)
4938 trashRegs &= ~genRegMask(varDsc->lvRegNum);
4942 /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
4943 to save/restore the register. We try to be as unintrusive
4946 noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
4947 // This is obviously false for ARM, but this function is never called.
4948 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
4950 regMaskTP regMask = genRegMask(reg);
4952 if (regSet.rsRegsModified(regMask & trashRegs))
4953 genSetRegToIcon(reg, 0);
4960 /*****************************************************************************
4962 * Generate code for a GTK_CONST tree
4965 void CodeGen::genCodeForTreeConst(GenTreePtr tree,
4969 noway_assert(tree->IsCnsIntOrI());
4971 ssize_t ival = tree->gtIntConCommon.IconValue();
4972 regMaskTP needReg = destReg;
4974 bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
4978 /* If we are targeting destReg and ival is zero */
4979 /* we would rather xor needReg than copy another register */
4983 bool reuseConstantInReg = false;
4985 if (destReg == RBM_NONE)
4986 reuseConstantInReg = true;
4989 // If we can set a register to a constant with a small encoding, then do that.
4990 // Assume we'll get a low register if needReg has low registers as options.
4991 if (!reuseConstantInReg &&
4992 !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
4994 reuseConstantInReg = true;
4997 if (!reuseConstantInReg && ival != 0)
4998 reuseConstantInReg = true;
5001 if (reuseConstantInReg)
5003 /* Is the constant already in register? If so, use this register */
5005 reg = regTracker.rsIconIsInReg(ival);
5011 #endif // REDUNDANT_LOAD
5013 reg = regSet.rsPickReg(needReg, bestReg);
5015 /* If the constant is a handle, we need a reloc to be applied to it */
5019 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
5020 regTracker.rsTrackRegTrash(reg);
5024 genSetRegToIcon(reg, ival, tree->TypeGet());
5030 /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
5032 if (!genTempLiveChg)
5034 VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
5035 genTempLiveChg = true;
5039 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
5040 genCodeForTree_DONE(tree, reg);
5044 /*****************************************************************************
5046 * Generate code for a GTK_LEAF tree
5049 void CodeGen::genCodeForTreeLeaf(GenTreePtr tree,
5053 genTreeOps oper = tree->OperGet();
5054 regNumber reg = DUMMY_INIT(REG_CORRUPT);
5055 regMaskTP regs = regSet.rsMaskUsed;
5056 regMaskTP needReg = destReg;
5059 noway_assert(tree->OperKind() & GTK_LEAF);
5064 NO_WAY("GT_REG_VAR should have been caught above");
5069 /* Does the variable live in a register? */
5071 if (genMarkLclVar(tree))
5073 genCodeForTree_REG_VAR1(tree);
5079 /* Is the local variable already in register? */
5081 reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
5085 /* Use the register the variable happens to be in */
5086 regMaskTP regMask = genRegMask(reg);
5088 // If the register that it was in isn't one of the needRegs
5089 // then try to move it into a needReg register
5091 if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
5093 regNumber rg2 = reg;
5094 reg = regSet.rsPickReg(needReg, bestReg);
5097 regMask = genRegMask(reg);
5098 inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
5102 gcInfo.gcMarkRegPtrVal (reg, tree->TypeGet());
5103 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5112 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
5113 // to worry about it being enregistered.
5114 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
5121 /* Pick a register for the value */
5123 reg = regSet.rsPickReg(needReg, bestReg);
5125 /* Load the variable into the register */
5127 size = genTypeSize(tree->gtType);
5129 if (size < EA_4BYTE)
5131 instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL)!=0);
5132 inst_RV_TT(ins, reg, tree, 0);
5134 /* We've now "promoted" the tree-node to TYP_INT */
5136 tree->gtType = TYP_INT;
5140 inst_RV_TT(INS_mov, reg, tree, 0);
5143 regTracker.rsTrackRegTrash(reg);
5145 gcInfo.gcMarkRegPtrVal (reg, tree->TypeGet());
5150 regTracker.rsTrackRegClsVar(reg, tree);
5153 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5157 default: noway_assert(!"Unexpected oper");
5161 if (tree->gtFlags & GTF_IND_VOLATILE)
5163 // Emit a memory barrier instruction after the load
5164 instGen_MemoryBarrier();
5171 if (tree->gtFlags & GTF_NO_OP_NO)
5173 // The VM does certain things with actual NOP instructions
5174 // so generate something small that has no effect, but isn't
5176 #ifdef _TARGET_XARCH_
5177 // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
5180 #elif defined (_TARGET_ARM_)
5181 // The VM isn't checking yet, when it does, hopefully it will
5182 // get fooled by the wider variant.
5185 NYI("Non-nop NO_OP");
5195 #if !FEATURE_EH_FUNCLETS
5198 /* Have to clear the shadowSP of the nesting level which
5199 encloses the finally */
5201 unsigned finallyNesting;
5202 finallyNesting = (unsigned)tree->gtVal.gtVal1;
5203 noway_assert(tree->gtVal.gtVal1 < compiler->compHndBBtabCount); //assert we didn't truncate with the cast above.
5204 noway_assert(finallyNesting < compiler->compHndBBtabCount);
5206 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
5207 unsigned filterEndOffsetSlotOffs;
5208 PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > sizeof(void*)); //below doesn't underflow.
5209 filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
5211 unsigned curNestingSlotOffs;
5212 curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
5213 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
5214 compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
5217 #endif // !FEATURE_EH_FUNCLETS
5221 noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
5223 /* Catch arguments get passed in a register. genCodeForBBlist()
5224 would have marked it as holding a GC object, but not used. */
5226 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
5227 reg = REG_EXCEPTION_OBJECT;
5231 genCodeForTreeLeaf_GT_JMP(tree);
5234 case GT_MEMORYBARRIER:
5235 // Emit the memory barrier instruction
5236 instGen_MemoryBarrier();
5242 compiler->gtDispTree(tree);
5244 noway_assert(!"unexpected leaf");
5247 noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
5248 genCodeForTree_DONE(tree, reg);
5252 GenTreePtr CodeGen::genCodeForCommaTree (GenTreePtr tree)
5254 while (tree->OperGet() == GT_COMMA)
5256 GenTreePtr op1 = tree->gtOp.gtOp1;
5257 genCodeForTree(op1, RBM_NONE);
5258 gcInfo.gcMarkRegPtrVal(op1);
5260 tree = tree->gtOp.gtOp2;
5265 /*****************************************************************************
5267 * Generate code for the a leaf node of type GT_JMP
5270 void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
5272 noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
5274 #ifdef PROFILING_SUPPORTED
5275 if (compiler->compIsProfilerHookNeeded())
5277 /* fire the event at the call site */
5278 unsigned saveStackLvl2 = genStackLevel;
5280 compiler->info.compProfilerCallback = true;
5284 // Push the profilerHandle
5286 regMaskTP byrefPushedRegs;
5287 regMaskTP norefPushedRegs;
5288 regMaskTP pushedArgRegs = genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed|regSet.rsMaskVars|regSet.rsMaskLock), &byrefPushedRegs, &norefPushedRegs);
5290 if (compiler->compProfilerMethHndIndirected)
5292 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
5296 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
5300 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5301 sizeof(int) * 1, // argSize
5302 EA_UNKNOWN); // retSize
5305 // Adjust the number of stack slots used by this managed method if necessary.
5307 if (compiler->fgPtrArgCntMax < 1)
5309 compiler->fgPtrArgCntMax = 1;
5312 genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
5314 // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
5315 // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
5316 regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
5317 noway_assert(argReg == REG_PROFILER_JMP_ARG);
5318 regSet.rsLockReg(RBM_PROFILER_JMP_USED);
5320 if (compiler->compProfilerMethHndIndirected)
5322 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
5323 regTracker.rsTrackRegTrash(argReg);
5327 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
5330 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5332 EA_UNKNOWN); // retSize
5334 regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
5336 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
5337 #endif //_TARGET_X86_
5339 /* Restore the stack level */
5340 genStackLevel = saveStackLvl2;
5342 #endif // PROFILING_SUPPORTED
5344 /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
5345 * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
5346 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
5349 if (compiler->info.compCallUnmanaged)
5351 genPInvokeMethodEpilog();
5354 // Make sure register arguments are in their initial registers
5355 // and stack arguments are put back as well.
5357 // This does not deal with circular dependencies of register
5358 // arguments, which is safe because RegAlloc prevents that by
5359 // not enregistering any RegArgs when a JMP opcode is used.
5361 if (compiler->info.compArgsCount == 0)
5369 // First move any enregistered stack arguments back to the stack
5370 for (varNum = 0, varDsc = compiler->lvaTable;
5371 varNum < compiler->info.compArgsCount;
5372 varNum++ , varDsc++)
5374 noway_assert(varDsc->lvIsParam);
5375 if (varDsc->lvIsRegArg || !varDsc->lvRegister)
5378 /* Argument was passed on the stack, but ended up in a register
5379 * Store it back to the stack */
5381 #ifndef _TARGET_64BIT_
5382 if (varDsc->TypeGet() == TYP_LONG)
5384 /* long - at least the low half must be enregistered */
5386 getEmitter()->emitIns_S_R(ins_Store(TYP_INT),
5392 /* Is the upper half also enregistered? */
5394 if (varDsc->lvOtherReg != REG_STK)
5396 getEmitter()->emitIns_S_R(ins_Store(TYP_INT),
5404 #endif // _TARGET_64BIT_
5406 getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()),
5407 emitTypeSize(varDsc->TypeGet()),
5415 regMaskTP fixedArgsMask = RBM_NONE;
5418 // Next move any un-enregistered register arguments back to their register
5419 for (varNum = 0, varDsc = compiler->lvaTable;
5420 varNum < compiler->info.compArgsCount;
5421 varNum++ , varDsc++)
5423 /* Is this variable a register arg? */
5425 if (!varDsc->lvIsRegArg)
5428 /* Register argument */
5430 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5431 noway_assert(!varDsc->lvRegister);
5433 /* Reload it from the stack */
5435 #ifndef _TARGET_64BIT_
5436 if (varDsc->TypeGet() == TYP_LONG)
5438 /* long - at least the low half must be enregistered */
5440 getEmitter()->emitIns_R_S(ins_Load(TYP_INT),
5445 regTracker.rsTrackRegTrash(varDsc->lvArgReg);
5447 /* Also assume the upper half also enregistered */
5449 getEmitter()->emitIns_R_S(ins_Load(TYP_INT),
5451 genRegArgNext(varDsc->lvArgReg),
5454 regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
5457 fixedArgsMask |= genRegMask(varDsc->lvArgReg);
5458 fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
5462 #endif // _TARGET_64BIT_
5464 if (varDsc->lvIsHfaRegArg())
5466 const var_types elemType = varDsc->GetHfaType();
5467 const instruction loadOp = ins_Load(elemType);
5468 const emitAttr size = emitTypeSize(elemType);
5469 regNumber argReg = varDsc->lvArgReg;
5470 const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
5472 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5474 getEmitter()->emitIns_R_S(loadOp,
5479 assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
5480 argReg = regNextOfType(argReg, elemType);
5483 else if (varDsc->TypeGet() == TYP_STRUCT)
5485 const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
5486 const instruction loadOp = ins_Load(elemType);
5487 const emitAttr size = emitTypeSize(elemType);
5488 regNumber argReg = varDsc->lvArgReg;
5489 const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
5491 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5493 getEmitter()->emitIns_R_S(loadOp,
5498 regTracker.rsTrackRegTrash(argReg);
5500 fixedArgsMask |= genRegMask(argReg);
5502 argReg = genRegArgNext(argReg);
5506 #endif //_TARGET_ARM_
5508 var_types loadType = varDsc->TypeGet();
5509 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5510 bool twoParts = false;
5512 if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
5514 #ifndef _TARGET_64BIT_
5515 if (loadType == TYP_DOUBLE)
5518 loadType = TYP_I_IMPL;
5519 assert(isValidIntArgReg(argReg));
5522 getEmitter()->emitIns_R_S(ins_Load(loadType),
5523 emitTypeSize(loadType),
5527 regTracker.rsTrackRegTrash(argReg);
5530 fixedArgsMask |= genRegMask(argReg);
5534 argReg = genRegArgNext(argReg);
5535 assert(isValidIntArgReg(argReg));
5537 getEmitter()->emitIns_R_S(ins_Load(loadType),
5538 emitTypeSize(loadType),
5542 regTracker.rsTrackRegTrash(argReg);
5545 fixedArgsMask |= genRegMask(argReg);
5552 // Check if we have any non-fixed args possibly in the arg registers.
5553 if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
5555 noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
5557 regNumber regDeclArgs = REG_ARG_FIRST;
5559 // Skip the 'this' pointer.
5560 if (!compiler->info.compIsStatic)
5562 regDeclArgs = REG_NEXT(regDeclArgs);
5565 // Skip the 'generic context.'
5566 if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5568 regDeclArgs = REG_NEXT(regDeclArgs);
5571 // Skip any 'return buffer arg.'
5572 if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
5574 regDeclArgs = REG_NEXT(regDeclArgs);
5577 // Skip the 'vararg cookie.'
5578 regDeclArgs = REG_NEXT(regDeclArgs);
5580 // Also add offset for the vararg cookie.
5581 int offset = REGSIZE_BYTES;
5583 // Load all the variable arguments in registers back to their registers.
5584 for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
5586 if (!(fixedArgsMask & genRegMask(reg)))
5588 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
5589 regTracker.rsTrackRegTrash(reg);
5591 offset += REGSIZE_BYTES;
5594 #endif // _TARGET_ARM_
5597 /*****************************************************************************
5599 * Check if a variable is assigned to in a tree. The variable number is
5600 * passed in pCallBackData. If the variable is assigned to, return
5601 * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
5603 Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr *pTree, Compiler::fgWalkData *data)
5605 GenTreePtr tree = *pTree;
5606 if ((tree->OperIsAssignment()) &&
5607 (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
5608 (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned) (size_t)data->pCallbackData))
5610 return Compiler::WALK_ABORT;
5613 return Compiler::WALK_CONTINUE;
5617 regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
5622 if (tree->gtOper == GT_LCL_VAR)
5624 /* Does the variable live in a register? */
5626 varNum = tree->gtLclVarCommon.gtLclNum;
5627 noway_assert(varNum < compiler->lvaCount);
5628 varDsc = compiler->lvaTable + varNum;
5630 if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
5632 return varDsc->lvRegNum;
5640 void CodeGen::unspillLiveness(genLivenessSet * ls)
5642 // Only try to unspill the registers that are missing from the currentLiveRegs
5644 regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
5645 regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
5646 cannotSpillMask &= ~currentLiveRegs;
5648 // Typically this will always be true and we will return
5650 if (cannotSpillMask == 0)
5653 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
5655 // Is this a register that we cannot leave in the spilled state?
5657 if ((cannotSpillMask & genRegMask(reg)) == 0)
5660 RegSet::SpillDsc * spill = regSet.rsSpillDesc[reg];
5662 // Was it spilled, if not then skip it.
5667 noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
5669 regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
5673 /*****************************************************************************
5675 * Generate code for a qmark colon
5678 void CodeGen::genCodeForQmark(GenTreePtr tree,
5682 GenTreePtr op1 = tree->gtOp.gtOp1;
5683 GenTreePtr op2 = tree->gtOp.gtOp2;
5685 regMaskTP regs = regSet.rsMaskUsed;
5686 regMaskTP needReg = destReg;
5688 noway_assert(compiler->compQmarkUsed);
5689 noway_assert(tree->gtOper == GT_QMARK);
5690 noway_assert(op1->OperIsCompare());
5691 noway_assert(op2->gtOper == GT_COLON);
5693 GenTreePtr thenNode = op2->AsColon()->ThenNode();
5694 GenTreePtr elseNode = op2->AsColon()->ElseNode();
5696 /* If elseNode is a Nop node you must reverse the
5697 thenNode and elseNode prior to reaching here!
5698 (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
5700 noway_assert(!elseNode->IsNothingNode());
5702 /* Try to implement the qmark colon using a CMOV. If we can't for
5703 whatever reason, this will return false and we will implement
5704 it using regular branching constructs. */
5706 if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
5710 This is a ?: operator; generate code like this:
5713 jmp_if_true lab_true
5716 op1 (false = 'else' part)
5720 op2 (true = 'then' part)
5725 NOTE: If no 'then' part we do not generate the 'jmp lab_done'
5726 or the 'lab_done' label
5729 BasicBlock * lab_true;
5730 BasicBlock * lab_false;
5731 BasicBlock * lab_done;
5733 genLivenessSet entryLiveness;
5734 genLivenessSet exitLiveness;
5736 lab_true = genCreateTempLabel();
5737 lab_false = genCreateTempLabel();
5739 #if FEATURE_STACK_FP_X87
5740 /* Spill any register that hold partial values so that the exit liveness
5741 from sides is the same */
5743 regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
5745 // spillMask should be the whole FP stack
5746 noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
5749 SpillTempsStackFP(regSet.rsMaskUsedFloat);
5750 noway_assert(regSet.rsMaskUsedFloat == 0);
5753 /* Before we generate code for qmark, we spill all the currently used registers
5754 that conflict with the registers used in the qmark tree. This is to avoid
5755 introducing spills that only occur on either the 'then' or 'else' side of
5756 the tree, but not both identically. We need to be careful with enregistered
5757 variables that are used; see below.
5760 if (regSet.rsMaskUsed)
5762 /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
5763 variable), then it may not get spilled. However, the variable may
5764 then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
5765 may get spilled from one side and not the other. So unmark regSet.rsMaskVars
5766 before spilling regSet.rsMaskUsed */
5768 regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
5769 regMaskTP rsAdditional = RBM_NONE;
5771 // For each multi-use of an enregistered variable, we need to determine if
5772 // it can get spilled inside the qmark colon. This can only happen if
5773 // its life ends somewhere in the qmark colon. We have the following
5775 // 1) Variable is dead at the end of the colon -- needs to be spilled
5776 // 2) Variable is alive at the end of the colon -- needs to be spilled
5777 // iff it is assigned to in the colon. In order to determine that, we
5778 // examine the GTF_ASG flag to see if any assignments were made in the
5779 // colon. If there are any, we need to do a tree walk to see if this
5780 // variable is the target of an assignment. This treewalk should not
5781 // happen frequently.
5782 if (rsAdditionalCandidates)
5785 if (compiler->verbose)
5787 Compiler::printTreeID(tree);
5788 printf(": Qmark-Colon additional spilling candidates are ");
5789 dspRegMask(rsAdditionalCandidates); printf("\n");
5793 // If any candidates are not alive at the GT_QMARK node, then they
5794 // need to be spilled
5796 VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
5797 VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
5798 compiler->compCurLifeTree,
5801 VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow, VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
5803 VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
5804 while (iter.NextElem(compiler, &varIndex))
5806 // Find the variable in compiler->lvaTable
5807 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
5808 LclVarDsc *varDsc = compiler->lvaTable + varNum;
5810 #if !FEATURE_FP_REGALLOC
5811 if (varDsc->IsFloatRegType())
5815 noway_assert(varDsc->lvRegister);
5819 if (varTypeIsFloating(varDsc->TypeGet()))
5821 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
5825 regBit = genRegMask(varDsc->lvRegNum);
5827 // For longs we may need to spill both regs
5828 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
5829 regBit |= genRegMask(varDsc->lvOtherReg);
5832 // Is it one of our reg-use vars? If not, we don't need to spill it.
5833 regBit &= rsAdditionalCandidates;
5837 // Is the variable live at the end of the colon?
5838 if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
5840 // Variable is alive at the end of the colon. Was it assigned
5841 // to inside the colon?
5843 if (!(op2->gtFlags & GTF_ASG))
5846 if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void *)(size_t)varNum) == Compiler::WALK_ABORT)
5848 // Variable was assigned to, so we need to spill it.
5850 rsAdditional |= regBit;
5852 if (compiler->verbose)
5854 Compiler::printTreeID(tree);
5855 printf(": Qmark-Colon candidate ");
5856 dspRegMask(regBit); printf("\n");
5857 printf(" is assigned to inside colon and will be spilled\n");
5864 // Variable is not alive at the end of the colon. We need to spill it.
5866 rsAdditional |= regBit;
5868 if (compiler->verbose)
5870 Compiler::printTreeID(tree);
5871 printf(": Qmark-Colon candidate ");
5872 dspRegMask(regBit); printf("\n");
5873 printf(" is alive at end of colon and will be spilled\n");
5880 if (compiler->verbose)
5882 Compiler::printTreeID(tree);
5883 printf(": Qmark-Colon approved additional spilling candidates are ");
5884 dspRegMask(rsAdditional); printf("\n");
5890 noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
5892 // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
5893 // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
5894 // we will have unbalanced spills and generate bad code.
5895 regMaskTP rsSpill = ((regSet.rsMaskUsed & ~(regSet.rsMaskVars|regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
5898 // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
5899 // 'bad' registers, causing spills. So, just force all used registers to get spilled
5900 // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
5901 // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
5902 // unspilled while generating that same tree.
5904 if (regSet.rsStressRegs() >= 1)
5906 rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
5912 // Remember which registers hold pointers. We will spill
5913 // them, but the code that follows will fetch reg vars from
5914 // the registers, so we need that gc compiler->info.
5915 regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
5916 regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
5918 // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
5919 // So, pretend there aren't any, and spill them anyway. This will only occur
5920 // if rsAdditional is non-empty.
5921 regMaskTP rsTemp = regSet.rsMaskVars;
5922 regSet.ClearMaskVars();
5924 regSet.rsSpillRegs(rsSpill);
5926 // Restore gc tracking masks.
5927 gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
5928 gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
5930 // Set regSet.rsMaskVars back to normal
5931 regSet.rsMaskVars = rsTemp;
5936 // Generate the conditional jump but without doing any StackFP fixups.
5937 genCondJump(op1, lab_true, lab_false, false);
5940 /* Save the current liveness, register status, and GC pointers */
5941 /* This is the liveness information upon entry */
5942 /* to both the then and else parts of the qmark */
5944 saveLiveness(&entryLiveness);
5946 /* Clear the liveness of any local variables that are dead upon */
5947 /* entry to the else part. */
5949 /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
5950 /* from the "colon or op2" liveSet */
5951 genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
5953 /* genCondJump() closes the current emitter block */
5955 genDefineTempLabel(lab_false);
5957 #if FEATURE_STACK_FP_X87
5960 QmarkStateStackFP tempFPState;
5961 bool bHasFPUState = !compCurFPState.IsEmpty();
5962 genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
5965 /* Does the operator yield a value? */
5967 if (tree->gtType == TYP_VOID)
5969 /* Generate the code for the else part of the qmark */
5971 genCodeForTree(elseNode, needReg, bestReg);
5973 /* The type is VOID, so we shouldn't have computed a value */
5975 noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
5977 /* Save the current liveness, register status, and GC pointers */
5978 /* This is the liveness information upon exit of the then part of the qmark */
5980 saveLiveness(&exitLiveness);
5982 /* Is there a 'then' part? */
5984 if (thenNode->IsNothingNode())
5986 #if FEATURE_STACK_FP_X87
5989 // We had FP state on entry just after the condition, so potentially, the else
5990 // node may have to do transition work.
5991 lab_done = genCreateTempLabel();
5993 /* Generate jmp lab_done */
5995 inst_JMP (EJ_jmp, lab_done);
5997 /* No 'then' - just generate the 'lab_true' label */
5999 genDefineTempLabel(lab_true);
6001 // We need to do this after defining the lab_false label
6002 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
6003 genQMarkAfterThenBlockStackFP(&tempFPState);
6004 genDefineTempLabel(lab_done);
6007 #endif // FEATURE_STACK_FP_X87
6009 /* No 'then' - just generate the 'lab_true' label */
6010 genDefineTempLabel(lab_true);
6015 lab_done = genCreateTempLabel();
6017 /* Generate jmp lab_done */
6019 inst_JMP (EJ_jmp, lab_done);
6021 /* Restore the liveness that we had upon entry of the then part of the qmark */
6023 restoreLiveness(&entryLiveness);
6025 /* Clear the liveness of any local variables that are dead upon */
6026 /* entry to the then part. */
6027 genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
6029 /* Generate lab_true: */
6031 genDefineTempLabel(lab_true);
6032 #if FEATURE_STACK_FP_X87
6033 // We need to do this after defining the lab_false label
6034 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
6036 /* Enter the then part - trash all registers */
6038 regTracker.rsTrackRegClr();
6040 /* Generate the code for the then part of the qmark */
6042 genCodeForTree(thenNode, needReg, bestReg);
6044 /* The type is VOID, so we shouldn't have computed a value */
6046 noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
6048 unspillLiveness(&exitLiveness);
6050 /* Verify that the exit liveness information is the same for the two parts of the qmark */
6052 checkLiveness(&exitLiveness);
6053 #if FEATURE_STACK_FP_X87
6054 genQMarkAfterThenBlockStackFP(&tempFPState);
6056 /* Define the "result" label */
6058 genDefineTempLabel(lab_done);
6061 /* Join of the two branches - trash all registers */
6063 regTracker.rsTrackRegClr();
6065 /* We're just about done */
6067 genUpdateLife(tree);
6071 /* Generate code for a qmark that generates a value */
6073 /* Generate the code for the else part of the qmark */
6075 noway_assert(elseNode->IsNothingNode() == false);
6077 /* Compute the elseNode into any free register */
6078 genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6079 noway_assert(elseNode->gtFlags & GTF_REG_VAL);
6080 noway_assert(elseNode->gtRegNum != REG_NA);
6082 /* Record the chosen register */
6083 reg = elseNode->gtRegNum;
6084 regs = genRegMask(reg);
6086 /* Save the current liveness, register status, and GC pointers */
6087 /* This is the liveness information upon exit of the else part of the qmark */
6089 saveLiveness(&exitLiveness);
6091 /* Generate jmp lab_done */
6092 lab_done = genCreateTempLabel();
6094 // We would like to know here if the else node is really going to generate
6095 // code, as if it isn't, we're generating here a jump to the next instruction.
6096 // What you would really like is to be able to go back and remove the jump, but
6097 // we have no way of doing that right now.
6100 // We will use this to assert we don't emit instructions if we decide not to
6102 unsigned emittedInstructions = getEmitter()->emitInsCount;
6103 bool bSkippedJump = false;
6106 #if FEATURE_STACK_FP_X87
6107 !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
6109 genIsEnregisteredIntVariable(thenNode) == reg)
6111 // For the moment, fix this easy case (enregistered else node), which
6112 // is the one that happens all the time.
6114 bSkippedJump = true;
6119 inst_JMP (EJ_jmp, lab_done);
6122 /* Restore the liveness that we had upon entry of the else part of the qmark */
6124 restoreLiveness(&entryLiveness);
6126 /* Clear the liveness of any local variables that are dead upon */
6127 /* entry to the then part. */
6128 genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
6130 /* Generate lab_true: */
6131 genDefineTempLabel(lab_true);
6132 #if FEATURE_STACK_FP_X87
6135 // We need to do this after defining the lab_true label
6136 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
6138 /* Enter the then part - trash all registers */
6140 regTracker.rsTrackRegClr();
6142 /* Generate the code for the then part of the qmark */
6144 noway_assert(thenNode->IsNothingNode() == false);
6146 /* This must place a value into the chosen register */
6147 genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
6149 noway_assert(thenNode->gtFlags & GTF_REG_VAL);
6150 noway_assert(thenNode->gtRegNum == reg);
6152 unspillLiveness(&exitLiveness);
6154 /* Verify that the exit liveness information is the same for the two parts of the qmark */
6155 checkLiveness(&exitLiveness);
6156 #if FEATURE_STACK_FP_X87
6157 genQMarkAfterThenBlockStackFP(&tempFPState);
6161 noway_assert(bSkippedJump == false ||
6162 getEmitter()->emitInsCount == emittedInstructions);
6165 /* Define the "result" label */
6166 genDefineTempLabel(lab_done);
6168 /* Join of the two branches - trash all registers */
6170 regTracker.rsTrackRegClr();
6172 /* Check whether this subtree has freed up any variables */
6174 genUpdateLife(tree);
6176 genMarkTreeInReg(tree, reg);
6182 /*****************************************************************************
6184 * Generate code for a qmark colon using the CMOV instruction. It's OK
6185 * to return false when we can't easily implement it using a cmov (leading
6186 * genCodeForQmark to implement it using branches).
6189 bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree,
6193 #ifdef _TARGET_XARCH_
6194 GenTreePtr cond = tree->gtOp.gtOp1;
6195 GenTreePtr colon = tree->gtOp.gtOp2;
6196 // Warning: this naming of the local vars is backwards!
6197 GenTreePtr thenNode = colon->gtOp.gtOp1;
6198 GenTreePtr elseNode = colon->gtOp.gtOp2;
6199 GenTreePtr alwaysNode, predicateNode;
6201 regMaskTP needReg = destReg;
6203 noway_assert(tree->gtOper == GT_QMARK);
6204 noway_assert(cond->OperIsCompare());
6205 noway_assert(colon->gtOper == GT_COLON);
6208 if (JitConfig.JitNoCMOV())
6214 /* Can only implement CMOV on processors that support it */
6216 if (!compiler->opts.compUseCMOV)
6221 /* thenNode better be a local or a constant */
6223 if ((thenNode->OperGet() != GT_CNS_INT) &&
6224 (thenNode->OperGet() != GT_LCL_VAR))
6229 /* elseNode better be a local or a constant or nothing */
6231 if ((elseNode->OperGet() != GT_CNS_INT) &&
6232 (elseNode->OperGet() != GT_LCL_VAR))
6237 /* can't handle two constants here */
6239 if ((thenNode->OperGet() == GT_CNS_INT) &&
6240 (elseNode->OperGet() == GT_CNS_INT))
6245 /* let's not handle comparisons of non-integer types */
6247 if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
6252 /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
6253 The biggest constraint is that cmov doesn't take an integer argument.
6256 bool reverseCond = false;
6257 if (elseNode->OperGet() == GT_CNS_INT)
6259 // else node is a constant
6261 alwaysNode = elseNode;
6262 predicateNode = thenNode;
6267 alwaysNode = thenNode;
6268 predicateNode = elseNode;
6271 // If the live set in alwaysNode is not the same as in tree, then
6272 // the variable in predicate node dies here. This is a dangerous
6273 // case that we don't handle (genComputeReg could overwrite
6274 // the value of the variable in the predicate node).
6276 // This assert is just paranoid (we've already asserted it above)
6277 assert (predicateNode->OperGet() == GT_LCL_VAR);
6278 if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
6283 // Pass this point we are comitting to use CMOV.
6287 compiler->gtReverseCond(cond);
6290 emitJumpKind jumpKind = genCondSetFlags(cond);
6292 // Compute the always node into any free register. If it's a constant,
6293 // we need to generate the mov instruction here (otherwise genComputeReg might
6294 // modify the flags, as in xor reg,reg).
6296 if (alwaysNode->OperGet() == GT_CNS_INT)
6298 reg = regSet.rsPickReg(needReg, bestReg);
6299 inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
6300 gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
6301 regTracker.rsTrackRegTrash(reg);
6305 genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6306 noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
6307 noway_assert(alwaysNode->gtRegNum != REG_NA);
6309 // Record the chosen register
6311 reg = alwaysNode->gtRegNum;
6314 regNumber regPredicate = REG_NA;
6316 // Is predicateNode an enregistered variable?
6318 if (genMarkLclVar(predicateNode))
6320 // Variable lives in a register
6322 regPredicate = predicateNode->gtRegNum;
6327 // Checks if the variable happens to be in any of the registers
6329 regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
6334 instruction EJtoCMOV[] =
6356 noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
6357 instruction cmov_ins = EJtoCMOV[jumpKind];
6359 noway_assert(insIsCMOV(cmov_ins));
6361 if (regPredicate != REG_NA)
6363 // regPredicate is in a register
6365 inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
6369 // regPredicate is in memory
6371 inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
6373 gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
6374 regTracker.rsTrackRegTrash(reg);
6376 genUpdateLife(alwaysNode);
6377 genUpdateLife(predicateNode);
6378 genCodeForTree_DONE_LIFE(tree, reg);
6386 #ifdef _TARGET_XARCH_
6387 void CodeGen::genCodeForMultEAX(GenTreePtr tree)
6389 GenTreePtr op1 = tree->gtOp.gtOp1;
6390 GenTreePtr op2 = tree->gtGetOp2();
6391 bool ovfl = tree->gtOverflow();
6392 regNumber reg = DUMMY_INIT(REG_CORRUPT);
6395 noway_assert(tree->OperGet() == GT_MUL);
6397 /* We'll evaluate 'op1' first */
6399 regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
6401 /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
6403 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
6404 noway_assert(op1->gtFlags & GTF_REG_VAL);
6406 // If op2 is a constant we need to load the constant into a register
6407 if (op2->OperKind() & GTK_CONST)
6409 genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
6410 noway_assert(op2->gtFlags & GTF_REG_VAL);
6411 regSet.rsMarkRegUsed(op2);
6412 addrReg = genRegMask(op2->gtRegNum);
6416 /* Make the second operand addressable */
6417 // Try to avoid EAX.
6418 addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
6421 /* Make sure the first operand is still in a register */
6422 // op1 *must* go into EAX.
6423 genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
6424 noway_assert(op1->gtFlags & GTF_REG_VAL);
6426 reg = op1->gtRegNum;
6428 // For 8 bit operations, we need to pick byte addressable registers
6430 if (ovfl && varTypeIsByte(tree->TypeGet()) &&
6431 !(genRegMask(reg) & RBM_BYTE_REGS))
6433 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6435 inst_RV_RV(INS_mov, byteReg, reg);
6437 regTracker.rsTrackRegTrash(byteReg);
6438 regSet.rsMarkRegFree (genRegMask(reg));
6441 op1->gtRegNum = reg;
6442 regSet.rsMarkRegUsed(op1);
6445 /* Make sure the operand is still addressable */
6446 addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
6448 /* Free up the operand, if it's a regvar */
6452 /* The register is about to be trashed */
6454 regTracker.rsTrackRegTrash(reg);
6456 // For overflow instructions, tree->TypeGet() is the accurate type,
6457 // and gives us the size for the operands.
6459 emitAttr opSize = emitTypeSize(tree->TypeGet());
6461 /* Compute the new value */
6463 noway_assert(op1->gtRegNum == REG_EAX);
6465 // Make sure Edx is free (unless used by op2 itself)
6466 bool op2Released = false;
6468 if ((addrReg & RBM_EDX) == 0)
6470 // op2 does not use Edx, so make sure noone else does either
6471 regSet.rsGrabReg(RBM_EDX);
6473 else if (regSet.rsMaskMult & RBM_EDX)
6475 /* Edx is used by op2 and some other trees.
6476 Spill the other trees besides op2. */
6478 regSet.rsGrabReg(RBM_EDX);
6481 /* keepReg==RegSet::FREE_REG so that the other multi-used trees
6482 don't get marked as unspilled as well. */
6483 regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
6488 if (tree->gtFlags & GTF_UNSIGNED)
6493 inst_TT(ins, op2, 0, 0, opSize);
6495 /* Both EAX and EDX are now trashed */
6497 regTracker.rsTrackRegTrash (REG_EAX);
6498 regTracker.rsTrackRegTrash (REG_EDX);
6500 /* Free up anything that was tied up by the operand */
6503 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
6505 /* The result will be where the first operand is sitting */
6507 /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
6508 genRecoverReg(op1, 0, RegSet::KEEP_REG);
6510 reg = op1->gtRegNum;
6511 noway_assert(reg == REG_EAX);
6515 /* Do we need an overflow check */
6518 genCheckOverflow(tree);
6520 genCodeForTree_DONE(tree, reg);
6522 #endif // _TARGET_XARCH_
6525 void CodeGen::genCodeForMult64(GenTreePtr tree,
6529 GenTreePtr op1 = tree->gtOp.gtOp1;
6530 GenTreePtr op2 = tree->gtGetOp2();
6532 noway_assert(tree->OperGet() == GT_MUL);
6534 /* Generate the first operand into some register */
6536 genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6537 noway_assert(op1->gtFlags & GTF_REG_VAL);
6539 /* Generate the second operand into some register */
6541 genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6542 noway_assert(op2->gtFlags & GTF_REG_VAL);
6544 /* Make sure the first operand is still in a register */
6545 genRecoverReg(op1, 0, RegSet::KEEP_REG);
6546 noway_assert(op1->gtFlags & GTF_REG_VAL);
6548 /* Free up the operands */
6549 genUpdateLife(tree);
6554 regNumber regLo = regSet.rsPickReg(destReg, bestReg);
6557 regSet.rsLockReg(genRegMask(regLo));
6558 regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
6559 regSet.rsUnlockReg(genRegMask(regLo));
6562 if (tree->gtFlags & GTF_UNSIGNED)
6567 getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
6568 regTracker.rsTrackRegTrash(regHi);
6569 regTracker.rsTrackRegTrash(regLo);
6571 /* Do we need an overflow check */
6573 if (tree->gtOverflow())
6575 // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
6577 regSet.rsLockReg(genRegMask(regLo));
6579 if (tree->gtFlags & GTF_MUL_64RSLT)
6580 regSet.rsLockReg(genRegMask(regHi));
6582 regNumber regTmpHi = regHi;
6583 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
6585 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
6586 regTmpHi = regSet.rsPickReg(RBM_ALLINT);
6587 getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
6588 regTracker.rsTrackRegTrash(regTmpHi);
6590 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
6592 // Jump to the block which will throw the expection
6593 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
6594 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
6596 // Unlock regLo [and regHi] after generating code for the gtOverflow() case
6598 regSet.rsUnlockReg(genRegMask(regLo));
6600 if (tree->gtFlags & GTF_MUL_64RSLT)
6601 regSet.rsUnlockReg(genRegMask(regHi));
6604 genUpdateLife(tree);
6606 if (tree->gtFlags & GTF_MUL_64RSLT)
6607 genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
6609 genMarkTreeInReg(tree, regLo);
6611 #endif // _TARGET_ARM_
6614 /*****************************************************************************
6616 * Generate code for a simple binary arithmetic or logical operator.
6617 * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
6620 void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree,
6625 genTreeOps oper = tree->OperGet();
6626 const var_types treeType = tree->TypeGet();
6627 GenTreePtr op1 = tree->gtOp.gtOp1;
6628 GenTreePtr op2 = tree->gtGetOp2();
6629 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
6630 regNumber reg = DUMMY_INIT(REG_CORRUPT);
6631 regMaskTP needReg = destReg;
6633 /* Figure out what instruction to generate */
6638 case GT_AND: ins = INS_AND; isArith = false; break;
6639 case GT_OR : ins = INS_OR ; isArith = false; break;
6640 case GT_XOR: ins = INS_XOR; isArith = false; break;
6641 case GT_ADD: ins = INS_add; isArith = true; break;
6642 case GT_SUB: ins = INS_sub; isArith = true; break;
6643 case GT_MUL: ins = INS_MUL; isArith = true; break;
6648 #ifdef _TARGET_XARCH_
6649 /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
6651 if ((oper == GT_MUL) &&
6652 op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
6653 !op1->IsCnsIntOrI() && // op1 is not a constant
6654 (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
6655 !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
6656 !tree->gtOverflow() ) // 3 operand imul doesn't set flags
6658 /* Make the first operand addressable */
6660 regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
6662 /* Grab a register for the target */
6664 reg = regSet.rsPickReg(needReg, bestReg);
6666 /* Compute the value into the target: reg=op1*op2_icon */
6669 if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
6672 if (op1->gtFlags & GTF_REG_VAL)
6674 regSrc = op1->gtRegNum;
6678 inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
6681 getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc, (op2->gtIntCon.gtIconVal & -2), 0);
6684 #endif // LEA_AVAILABLE
6686 inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
6689 /* The register has been trashed now */
6691 regTracker.rsTrackRegTrash(reg);
6693 /* The address is no longer live */
6695 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
6697 genCodeForTree_DONE(tree, reg);
6700 #endif // _TARGET_XARCH_
6706 // We only reach here for GT_ADD, GT_SUB and GT_MUL.
6707 assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
6709 ovfl = tree->gtOverflow();
6711 /* We record the accurate (small) types in trees only we need to
6712 * check for overflow. Otherwise we record genActualType()
6715 noway_assert(ovfl || (treeType == genActualType(treeType)));
6719 /* Can we use an 'lea' to compute the result?
6720 Can't use 'lea' for overflow as it doesn't set flags
6721 Can't use 'lea' unless we have at least two free registers */
6723 bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
6724 genCountBits(regSet.rsMaskLock) + // Locked registers
6725 2 // We will need two regisers
6726 <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
6728 regMaskTP regs = RBM_NONE; // OUT argument
6731 genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, ®s, false))
6735 /* Is the value now computed in some register? */
6737 if (tree->gtFlags & GTF_REG_VAL)
6739 genCodeForTree_REG_VAR1(tree);
6743 /* If we can reuse op1/2's register directly, and 'tree' is
6744 a simple expression (ie. not in scaled index form),
6745 might as well just use "add" instead of "lea" */
6747 // However, if we're in a context where we want to evaluate "tree" into a specific
6748 // register different from the reg we'd use in this optimization, then it doesn't
6749 // make sense to do the "add", since we'd also have to do a "mov."
6750 if (op1->gtFlags & GTF_REG_VAL)
6752 reg = op1->gtRegNum;
6754 if ((genRegMask(reg) & regSet.rsRegMaskFree()) &&
6755 (genRegMask(reg) & needReg))
6757 if (op2->gtFlags & GTF_REG_VAL)
6759 /* Simply add op2 to the register */
6761 inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
6763 if (tree->gtSetFlags())
6764 genFlagsEqualToReg(tree, reg);
6768 else if (op2->OperGet() == GT_CNS_INT)
6770 /* Simply add op2 to the register */
6772 genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
6779 if (op2->gtFlags & GTF_REG_VAL)
6781 reg = op2->gtRegNum;
6783 if ((genRegMask(reg) & regSet.rsRegMaskFree()) &&
6784 (genRegMask(reg) & needReg))
6786 if (op1->gtFlags & GTF_REG_VAL)
6788 /* Simply add op1 to the register */
6790 inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
6792 if (tree->gtSetFlags())
6793 genFlagsEqualToReg(tree, reg);
6800 // The expression either requires a scaled-index form, or the
6801 // op1 or op2's register can't be targeted, this can be
6802 // caused when op1 or op2 are enregistered variables.
6804 reg = regSet.rsPickReg(needReg, bestReg);
6805 size = emitActualTypeSize(treeType);
6807 /* Generate "lea reg, [addr-mode]" */
6809 inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
6811 #ifndef _TARGET_XARCH_
6812 // Don't call genFlagsEqualToReg on x86/x64
6813 // as it does not set the flags
6814 if (tree->gtSetFlags())
6815 genFlagsEqualToReg(tree, reg);
6819 /* The register has been trashed now */
6820 regTracker.rsTrackRegTrash(reg);
6822 genDoneAddressable(tree, regs, RegSet::FREE_REG);
6824 /* The following could be an 'inner' pointer!!! */
6826 noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
6828 if (treeType == TYP_BYREF)
6830 genUpdateLife(tree);
6832 gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
6833 gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
6836 genCodeForTree_DONE(tree, reg);
6841 #endif // LEA_AVAILABLE
6843 noway_assert((varTypeIsGC(treeType) == false) ||
6844 (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
6847 /* The following makes an assumption about gtSetEvalOrder(this) */
6849 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
6851 /* Compute a useful register mask */
6852 needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6853 needReg = regSet.rsNarrowHint (needReg, regSet.rsRegMaskFree());
6855 // Determine what registers go live between op1 and op2
6856 // Don't bother checking if op1 is already in a register.
6857 // This is not just for efficiency; if it's already in a
6858 // register then it may already be considered "evaluated"
6859 // for the purposes of liveness, in which genNewLiveRegMask
6863 regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
6866 needReg = regSet.rsNarrowHint (needReg, ~newLiveMask);
6870 #if CPU_HAS_BYTE_REGS
6871 /* 8-bit operations can only be done in the byte-regs */
6872 if (varTypeIsByte(treeType))
6873 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6874 #endif // CPU_HAS_BYTE_REGS
6876 // Try selecting one of the 'bestRegs'
6877 needReg = regSet.rsNarrowHint(needReg, bestReg);
6879 /* Special case: small_val & small_mask */
6881 if ( varTypeIsSmall(op1->TypeGet()) &&
6882 op2->IsCnsIntOrI() &&
6885 size_t and_val = op2->gtIntCon.gtIconVal;
6887 var_types typ = op1->TypeGet();
6894 andMask = 0x000000FF;
6898 andMask = 0x0000FFFF;
6900 default: noway_assert(!"unexpected type"); return;
6903 // Is the 'and_val' completely contained within the bits found in 'andMask'
6904 if ((and_val & ~andMask) == 0)
6906 // We must use unsigned instructions when loading op1
6907 if (varTypeIsByte(typ))
6909 op1->gtType = TYP_UBYTE;
6911 else // varTypeIsShort(typ)
6913 assert(varTypeIsShort(typ));
6914 op1->gtType = TYP_CHAR;
6917 /* Generate the first operand into a scratch register */
6919 op1 = genCodeForCommaTree(op1);
6920 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6922 noway_assert(op1->gtFlags & GTF_REG_VAL);
6924 regNumber op1Reg = op1->gtRegNum;
6926 // Did we end up in an acceptable register?
6927 // and do we have an acceptable free register available to grab?
6929 if ( ((genRegMask(op1Reg) & needReg) == 0) &&
6930 ((regSet.rsRegMaskFree() & needReg) != 0) )
6932 // See if we can pick a register from bestReg
6935 // Grab an acceptable register
6937 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6938 newReg = regSet.rsGrabReg(bestReg);
6940 newReg = regSet.rsGrabReg(needReg);
6942 noway_assert(op1Reg != newReg);
6944 /* Update the value in the target register */
6946 regTracker.rsTrackRegCopy(newReg, op1Reg);
6948 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6950 /* The value has been transferred to 'reg' */
6952 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6953 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6955 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6957 /* The value is now in an appropriate register */
6959 op1->gtRegNum = newReg;
6961 noway_assert(op1->gtFlags & GTF_REG_VAL);
6964 /* Mark the register as 'used' */
6965 regSet.rsMarkRegUsed(op1);
6966 reg = op1->gtRegNum;
6968 if (and_val != andMask) // Does the "and" mask only cover some of the bits?
6970 /* "and" the value */
6972 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
6975 /* Update the live set of register variables */
6977 if (compiler->opts.varNames) genUpdateLife(tree);
6980 /* Now we can update the register pointer information */
6983 gcInfo.gcMarkRegPtrVal(reg, treeType);
6985 genCodeForTree_DONE_LIFE(tree, reg);
6990 #ifdef _TARGET_XARCH_
6992 // Do we have to use the special "imul" instruction
6993 // which has eax as the implicit operand ?
6995 bool multEAX = false;
6999 if (tree->gtFlags & GTF_MUL_64RSLT)
7001 /* Only multiplying with EAX will leave the 64-bit
7002 * result in EDX:EAX */
7008 if (tree->gtFlags & GTF_UNSIGNED)
7010 /* "mul reg/mem" always has EAX as default operand */
7014 else if (varTypeIsSmall(treeType))
7016 /* Only the "imul with EAX" encoding has the 'w' bit
7017 * to specify the size of the operands */
7026 noway_assert(oper == GT_MUL);
7028 return genCodeForMultEAX(tree);
7030 #endif // _TARGET_XARCH_
7034 // Do we have to use the special 32x32 => 64 bit multiply
7036 bool mult64 = false;
7040 if (tree->gtFlags & GTF_MUL_64RSLT)
7046 // We always must use the 32x32 => 64 bit multiply
7047 // to detect overflow
7054 noway_assert(oper == GT_MUL);
7056 return genCodeForMult64(tree, destReg, bestReg);
7058 #endif // _TARGET_ARM_
7060 /* Generate the first operand into a scratch register */
7062 op1 = genCodeForCommaTree(op1);
7063 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
7065 noway_assert(op1->gtFlags & GTF_REG_VAL);
7067 regNumber op1Reg = op1->gtRegNum;
7069 // Setup needReg with the set of register that we require for op1 to be in
7071 needReg = RBM_ALLINT;
7073 /* Compute a useful register mask */
7074 needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
7075 needReg = regSet.rsNarrowHint (needReg, regSet.rsRegMaskFree());
7077 #if CPU_HAS_BYTE_REGS
7078 /* 8-bit operations can only be done in the byte-regs */
7079 if (varTypeIsByte(treeType))
7080 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
7081 #endif // CPU_HAS_BYTE_REGS
7083 // Did we end up in an acceptable register?
7084 // and do we have an acceptable free register available to grab?
7086 if ( ((genRegMask(op1Reg) & needReg) == 0) &&
7087 ((regSet.rsRegMaskFree() & needReg) != 0) )
7089 // See if we can pick a register from bestReg
7092 // Grab an acceptable register
7094 if ((bestReg & regSet.rsRegMaskFree()) != 0)
7095 newReg = regSet.rsGrabReg(bestReg);
7097 newReg = regSet.rsGrabReg(needReg);
7099 noway_assert(op1Reg != newReg);
7101 /* Update the value in the target register */
7103 regTracker.rsTrackRegCopy(newReg, op1Reg);
7105 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
7107 /* The value has been transferred to 'reg' */
7109 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
7110 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
7112 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
7114 /* The value is now in an appropriate register */
7116 op1->gtRegNum = newReg;
7118 noway_assert(op1->gtFlags & GTF_REG_VAL);
7119 op1Reg = op1->gtRegNum;
7123 /* Mark the register as 'used' */
7124 regSet.rsMarkRegUsed(op1);
7126 bool isSmallConst = false;
7129 if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
7131 isSmallConst = true;
7134 /* Make the second operand addressable */
7136 regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
7138 #if CPU_LOAD_STORE_ARCH
7139 genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
7140 #else // !CPU_LOAD_STORE_ARCH
7141 /* Is op1 spilled and op2 in a register? */
7143 if ((op1->gtFlags & GTF_SPILLED) &&
7144 (op2->gtFlags & GTF_REG_VAL) &&
7147 noway_assert(ins == INS_add ||
7153 // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
7154 noway_assert(op2->gtOper != GT_LCL_VAR ||
7155 varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
7157 reg = op2->gtRegNum;
7158 regMaskTP regMask = genRegMask(reg);
7160 /* Is the register holding op2 available? */
7162 if (regMask & regSet.rsMaskVars)
7167 /* Get the temp we spilled into. */
7169 TempDsc * temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
7171 /* For 8bit operations, we need to make sure that op2 is
7172 in a byte-addressable registers */
7174 if (varTypeIsByte(treeType) &&
7175 !(regMask & RBM_BYTE_REGS))
7177 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7179 inst_RV_RV(INS_mov, byteReg, reg);
7180 regTracker.rsTrackRegTrash(byteReg);
7182 /* op2 couldn't have spilled as it was not sitting in
7183 RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
7184 noway_assert(op2->gtFlags & GTF_REG_VAL);
7186 regSet.rsUnlockReg (regMask);
7187 regSet.rsMarkRegFree(regMask);
7190 regMask = genRegMask(reg);
7191 op2->gtRegNum = reg;
7192 regSet.rsMarkRegUsed(op2);
7195 inst_RV_ST(ins, reg, temp, 0, treeType);
7197 regTracker.rsTrackRegTrash(reg);
7201 compiler->tmpRlsTemp(temp);
7203 /* 'add'/'sub' set all CC flags, others only ZF */
7205 /* If we need to check overflow, for small types, the
7206 * flags can't be used as we perform the arithmetic
7207 * operation (on small registers) and then sign extend it
7209 * NOTE : If we ever don't need to sign-extend the result,
7210 * we can use the flags
7213 if (tree->gtSetFlags())
7215 genFlagsEqualToReg(tree, reg);
7218 /* The result is where the second operand is sitting. Mark result reg as free */
7219 regSet.rsMarkRegFree(genRegMask(reg)) ;
7221 gcInfo.gcMarkRegPtrVal(reg, treeType);
7226 #endif // !CPU_LOAD_STORE_ARCH
7228 /* Make sure the first operand is still in a register */
7229 regSet.rsLockUsedReg(addrReg);
7230 genRecoverReg(op1, 0, RegSet::KEEP_REG);
7231 noway_assert(op1->gtFlags & GTF_REG_VAL);
7232 regSet.rsUnlockUsedReg(addrReg);
7234 reg = op1->gtRegNum;
7236 // For 8 bit operations, we need to pick byte addressable registers
7238 if (varTypeIsByte(treeType) &&
7239 !(genRegMask(reg) & RBM_BYTE_REGS))
7241 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7243 inst_RV_RV(INS_mov, byteReg, reg);
7245 regTracker.rsTrackRegTrash(byteReg);
7246 regSet.rsMarkRegFree (genRegMask(reg));
7249 op1->gtRegNum = reg;
7250 regSet.rsMarkRegUsed(op1);
7253 /* Make sure the operand is still addressable */
7254 addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
7256 /* Free up the operand, if it's a regvar */
7260 /* The register is about to be trashed */
7262 regTracker.rsTrackRegTrash(reg);
7264 bool op2Released = false;
7266 // For overflow instructions, tree->gtType is the accurate type,
7267 // and gives us the size for the operands.
7269 emitAttr opSize = emitTypeSize(treeType);
7271 /* Compute the new value */
7275 (op2->OperKind() & GTK_CONST)
7276 #if !CPU_HAS_FP_SUPPORT
7277 && (treeType == TYP_INT || treeType == TYP_I_IMPL)
7281 ssize_t ival = op2->gtIntCon.gtIconVal;
7285 genIncRegBy(reg, ival, tree, treeType, ovfl);
7287 else if (oper == GT_SUB)
7290 ((tree->gtFlags & GTF_UNSIGNED) ||
7291 (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000. Therefore we can't use -ival.
7294 /* For unsigned overflow, we have to use INS_sub to set
7295 the flags correctly */
7297 genDecRegBy(reg, ival, tree);
7301 /* Else, we simply add the negative of the value */
7303 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7306 else if (oper == GT_MUL)
7308 genMulRegBy(reg, ival, tree, treeType, ovfl);
7313 // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
7314 op2 = op2->gtEffectiveVal();
7315 if (varTypeIsByte(treeType) && op2->InReg())
7317 noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
7319 regNumber op2reg = op2->gtRegNum;
7320 regMaskTP op2regMask = genRegMask(op2reg);
7322 if (!(op2regMask & RBM_BYTE_REGS))
7324 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7326 inst_RV_RV(INS_mov, byteReg, op2reg);
7327 regTracker.rsTrackRegTrash(byteReg);
7329 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7332 op2->gtRegNum = byteReg;
7336 inst_RV_TT(ins, reg, op2, 0, opSize, flags);
7339 /* Free up anything that was tied up by the operand */
7342 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7344 /* The result will be where the first operand is sitting */
7346 /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
7347 genRecoverReg(op1, 0, RegSet::KEEP_REG);
7349 reg = op1->gtRegNum;
7351 /* 'add'/'sub' set all CC flags, others only ZF+SF */
7353 if (tree->gtSetFlags())
7354 genFlagsEqualToReg(tree, reg);
7358 #if !CPU_LOAD_STORE_ARCH
7360 #endif // !CPU_LOAD_STORE_ARCH
7362 /* Do we need an overflow check */
7365 genCheckOverflow(tree);
7367 genCodeForTree_DONE(tree, reg);
7371 /*****************************************************************************
7373 * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
7374 * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
7377 void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree,
7382 const genTreeOps oper = tree->OperGet();
7383 const var_types treeType = tree->TypeGet();
7384 GenTreePtr op1 = tree->gtOp.gtOp1;
7385 GenTreePtr op2 = tree->gtGetOp2();
7386 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7387 regNumber reg = DUMMY_INIT(REG_CORRUPT);
7388 regMaskTP needReg = destReg;
7391 /* Figure out what instruction to generate */
7396 case GT_ASG_AND: ins = INS_AND; isArith = false; break;
7397 case GT_ASG_OR : ins = INS_OR ; isArith = false; break;
7398 case GT_ASG_XOR: ins = INS_XOR; isArith = false; break;
7399 case GT_ASG_ADD: ins = INS_add; isArith = true; break;
7400 case GT_ASG_SUB: ins = INS_sub; isArith = true; break;
7409 // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
7411 ovfl = tree->gtOverflow();
7413 // We can't use += with overflow if the value cannot be changed
7414 // in case of an overflow-exception which the "+" might cause
7415 noway_assert(!ovfl ||
7416 ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) &&
7417 !compiler->compCurBB->hasTryIndex()));
7419 /* Do not allow overflow instructions with refs/byrefs */
7421 noway_assert(!ovfl || !varTypeIsGC(treeType));
7423 // We disallow overflow and byte-ops here as it is too much trouble
7424 noway_assert(!ovfl || !varTypeIsByte(treeType));
7426 /* Is the second operand a constant? */
7428 if (op2->IsIntCnsFitsInI32())
7430 int ival = (int)op2->gtIntCon.gtIconVal;
7432 /* What is the target of the assignment? */
7434 switch (op1->gtOper)
7440 reg = op1->gtRegVar.gtRegNum;
7442 /* No registers are needed for addressing */
7445 #if !CPU_LOAD_STORE_ARCH
7448 /* We're adding a constant to a register */
7450 if (oper == GT_ASG_ADD)
7451 genIncRegBy(reg, ival, tree, treeType, ovfl);
7453 ((tree->gtFlags & GTF_UNSIGNED) || ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 == 0x80000000. Therefore we can't use -ival.
7455 /* For unsigned overflow, we have to use INS_sub to set
7456 the flags correctly */
7457 genDecRegBy(reg, ival, tree);
7459 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7465 /* Does the variable live in a register? */
7467 if (genMarkLclVar(op1))
7474 /* Make the target addressable for load/store */
7475 addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
7477 #if CPU_LOAD_STORE_ARCH
7478 // We always load from memory then store to memory
7480 /* For small types with overflow check, we need to
7481 sign/zero extend the result, so we need it in a reg */
7483 if (ovfl && genTypeSize(treeType) < sizeof(int))
7484 #endif // CPU_LOAD_STORE_ARCH
7486 // Load op1 into a reg
7488 reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
7490 inst_RV_TT(INS_mov, reg, op1);
7492 // Issue the add/sub and the overflow check
7494 inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
7495 regTracker.rsTrackRegTrash(reg);
7499 genCheckOverflow(tree);
7502 /* Store the (sign/zero extended) result back to
7503 the stack location of the variable */
7505 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7509 #if !CPU_LOAD_STORE_ARCH
7512 /* Add/subtract the new value into/from the target */
7514 if (op1->gtFlags & GTF_REG_VAL)
7516 reg = op1->gtRegNum;
7520 /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
7521 if (!ovfl && (ival == 1 || ival == -1) && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
7523 noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
7524 if (oper == GT_ASG_SUB)
7527 ins = (ival > 0) ? INS_inc : INS_dec;
7532 inst_TT_IV(ins, op1, ival);
7535 if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
7537 if (tree->gtSetFlags())
7538 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
7543 #endif // !CPU_LOAD_STORE_ARCH
7544 } // end switch (op1->gtOper)
7546 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7548 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7550 } // end if (op2->IsIntCnsFitsInI32())
7551 } // end if (isArith)
7553 noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
7555 /* Is the target a register or local variable? */
7557 switch (op1->gtOper)
7561 /* Does the target variable live in a register? */
7563 if (!genMarkLclVar(op1))
7570 /* Get hold of the target register */
7572 reg = op1->gtRegVar.gtRegNum;
7574 /* Make sure the target of the store is available */
7576 if (regSet.rsMaskUsed & genRegMask(reg))
7578 regSet.rsSpillReg(reg);
7581 /* Make the RHS addressable */
7583 addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
7585 /* Compute the new value into the target register */
7587 #if CPU_HAS_BYTE_REGS
7588 // Fix 383833 X86 ILGEN
7590 if ((op2->gtFlags & GTF_REG_VAL) != 0)
7592 reg2 = op2->gtRegNum;
7599 // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
7600 // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
7602 if (varTypeIsByte(treeType) &&
7603 (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
7605 // We will force op2 into a register (via sign/zero extending load)
7606 // for the cases where op2 is in memory and thus could have
7607 // an unmapped page just beyond its location
7609 if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
7611 genCodeForTree(op2, 0);
7612 assert((op2->gtFlags & GTF_REG_VAL) != 0);
7615 inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
7617 bool canOmit = false;
7619 if (varTypeIsUnsigned(treeType))
7621 // When op2 is a byte sized constant we can omit the zero extend instruction
7622 if ((op2->gtOper == GT_CNS_INT) &&
7623 ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
7628 else // treeType is signed
7630 // When op2 is a positive 7-bit or smaller constant
7631 // we can omit the sign extension sequence.
7632 if ((op2->gtOper == GT_CNS_INT) &&
7633 ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
7641 // If reg is a byte reg then we can use a movzx/movsx instruction
7643 if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
7645 instruction extendIns = ins_Move_Extend(treeType, true);
7646 inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
7648 else // we can't encode a movzx/movsx instruction
7650 if (varTypeIsUnsigned(treeType))
7652 // otherwise, we must zero the upper 24 bits of 'reg'
7653 inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
7655 else // treeType is signed
7657 // otherwise, we must sign extend the result in the non-byteable register 'reg'
7658 // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
7659 // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
7661 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
7662 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
7668 #endif // CPU_HAS_BYTE_REGS
7670 inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
7673 /* The zero flag is now equal to the register value */
7675 if (tree->gtSetFlags())
7676 genFlagsEqualToReg(tree, reg);
7678 /* Remember that we trashed the target */
7680 regTracker.rsTrackRegTrash(reg);
7682 /* Free up anything that was tied up by the RHS */
7684 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7686 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7691 } // end switch (op1->gtOper)
7693 #if !CPU_LOAD_STORE_ARCH
7694 /* Special case: "x ^= -1" is actually "not(x)" */
7696 if (oper == GT_ASG_XOR)
7698 if (op2->gtOper == GT_CNS_INT &&
7699 op2->gtIntCon.gtIconVal == -1)
7701 addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
7702 inst_TT(INS_NOT, op1);
7703 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7705 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7709 #endif // !CPU_LOAD_STORE_ARCH
7711 /* Setup target mask for op2 (byte-regs for small operands) */
7714 needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS
7717 /* Is the second operand a constant? */
7719 if (op2->IsIntCnsFitsInI32())
7721 int ival = (int)op2->gtIntCon.gtIconVal;
7723 /* Make the target addressable */
7724 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
7726 inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
7728 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
7730 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7734 /* Is the value or the address to be computed first? */
7736 if (tree->gtFlags & GTF_REVERSE_OPS)
7738 /* Compute the new value into a register */
7740 genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7742 /* Make the target addressable for load/store */
7743 addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
7744 regSet.rsLockUsedReg(addrReg);
7746 #if CPU_LOAD_STORE_ARCH
7747 // We always load from memory then store to memory
7749 /* For small types with overflow check, we need to
7750 sign/zero extend the result, so we need it in a reg */
7752 if (ovfl && genTypeSize(treeType) < sizeof(int))
7753 #endif // CPU_LOAD_STORE_ARCH
7755 reg = regSet.rsPickReg();
7756 regSet.rsLockReg(genRegMask(reg));
7758 noway_assert(genIsValidReg(reg));
7760 /* Generate "ldr reg, [var]" */
7762 inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
7764 if (op1->gtOper == GT_LCL_VAR)
7765 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7767 regTracker.rsTrackRegTrash(reg);
7769 /* Make sure the new value is in a register */
7771 genRecoverReg(op2, 0, RegSet::KEEP_REG);
7773 /* Compute the new value */
7775 inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7778 genCheckOverflow(tree);
7780 /* Move the new value back to the variable */
7781 /* Generate "str reg, [var]" */
7783 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7784 regSet.rsUnlockReg(genRegMask(reg));
7786 if (op1->gtOper == GT_LCL_VAR)
7787 regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
7789 #if !CPU_LOAD_STORE_ARCH
7792 /* Make sure the new value is in a register */
7794 genRecoverReg(op2, 0, RegSet::KEEP_REG);
7796 /* Add the new value into the target */
7798 inst_TT_RV(ins, op1, op2->gtRegNum);
7800 #endif // !CPU_LOAD_STORE_ARCH
7801 /* Free up anything that was tied up either side */
7802 regSet.rsUnlockUsedReg(addrReg);
7803 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7804 genReleaseReg (op2);
7808 /* Make the target addressable */
7810 addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
7812 /* Compute the new value into a register */
7814 genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7815 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
7817 /* Make sure the target is still addressable */
7819 addrReg = genKeepAddressable(op1, addrReg);
7820 regSet.rsLockUsedReg(addrReg);
7822 #if CPU_LOAD_STORE_ARCH
7823 // We always load from memory then store to memory
7825 /* For small types with overflow check, we need to
7826 sign/zero extend the result, so we need it in a reg */
7828 if (ovfl && genTypeSize(treeType) < sizeof(int))
7829 #endif // CPU_LOAD_STORE_ARCH
7831 reg = regSet.rsPickReg();
7833 inst_RV_TT(INS_mov, reg, op1);
7835 inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7836 regTracker.rsTrackRegTrash(reg);
7839 genCheckOverflow(tree);
7841 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7843 if (op1->gtOper == GT_LCL_VAR)
7844 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7846 #if !CPU_LOAD_STORE_ARCH
7849 /* Add the new value into the target */
7851 inst_TT_RV(ins, op1, op2->gtRegNum);
7855 /* Free up anything that was tied up either side */
7856 regSet.rsUnlockUsedReg(addrReg);
7857 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7859 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
7860 genReleaseReg (op2);
7863 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7867 /*****************************************************************************
7869 * Generate code for GT_UMOD.
7872 void CodeGen::genCodeForUnsignedMod(GenTreePtr tree,
7876 assert(tree->OperGet() == GT_UMOD);
7878 GenTreePtr op1 = tree->gtOp.gtOp1;
7879 GenTreePtr op2 = tree->gtOp.gtOp2;
7880 const var_types treeType = tree->TypeGet();
7881 regMaskTP needReg = destReg;
7884 /* Is this a division by an integer constant? */
7887 if (compiler->fgIsUnsignedModOptimizable(op2))
7889 /* Generate the operand into some register */
7891 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7892 noway_assert(op1->gtFlags & GTF_REG_VAL);
7894 reg = op1->gtRegNum;
7896 /* Generate the appropriate sequence */
7897 size_t ival = op2->gtIntCon.gtIconVal - 1;
7898 inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
7900 /* The register is now trashed */
7902 regTracker.rsTrackRegTrash(reg);
7904 genCodeForTree_DONE(tree, reg);
7908 genCodeForGeneralDivide(tree, destReg, bestReg);
7912 /*****************************************************************************
7914 * Generate code for GT_MOD.
7917 void CodeGen::genCodeForSignedMod(GenTreePtr tree,
7921 assert(tree->OperGet() == GT_MOD);
7923 GenTreePtr op1 = tree->gtOp.gtOp1;
7924 GenTreePtr op2 = tree->gtOp.gtOp2;
7925 const var_types treeType = tree->TypeGet();
7926 regMaskTP needReg = destReg;
7929 /* Is this a division by an integer constant? */
7932 if (compiler->fgIsSignedModOptimizable(op2))
7934 ssize_t ival = op2->gtIntCon.gtIconVal;
7935 BasicBlock * skip = genCreateTempLabel();
7937 /* Generate the operand into some register */
7939 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7940 noway_assert(op1->gtFlags & GTF_REG_VAL);
7942 reg = op1->gtRegNum;
7944 /* Generate the appropriate sequence */
7946 inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
7948 /* The register is now trashed */
7950 regTracker.rsTrackRegTrash(reg);
7952 /* Check and branch for a postive value */
7953 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7954 inst_JMP(jmpGEL, skip);
7956 /* Generate the rest of the sequence and we're done */
7958 genIncRegBy(reg, -1, NULL, treeType);
7960 if ((treeType == TYP_LONG) && ((int)ival != ival))
7962 regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
7963 instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
7964 inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
7968 inst_RV_IV (INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
7970 genIncRegBy(reg, 1, NULL, treeType);
7972 /* Define the 'skip' label and we're done */
7974 genDefineTempLabel(skip);
7976 genCodeForTree_DONE(tree, reg);
7980 genCodeForGeneralDivide(tree, destReg, bestReg);
7984 /*****************************************************************************
7986 * Generate code for GT_UDIV.
7989 void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree,
7993 assert(tree->OperGet() == GT_UDIV);
7995 GenTreePtr op1 = tree->gtOp.gtOp1;
7996 GenTreePtr op2 = tree->gtOp.gtOp2;
7997 const var_types treeType = tree->TypeGet();
7998 regMaskTP needReg = destReg;
8001 /* Is this a division by an integer constant? */
8004 if (compiler->fgIsUnsignedDivOptimizable(op2))
8006 size_t ival = op2->gtIntCon.gtIconVal;
8008 /* Division by 1 must be handled elsewhere */
8010 noway_assert(ival != 1 || compiler->opts.MinOpts());
8012 /* Generate the operand into some register */
8014 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
8015 noway_assert(op1->gtFlags & GTF_REG_VAL);
8017 reg = op1->gtRegNum;
8019 /* Generate "shr reg, log2(value)" */
8021 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
8023 /* The register is now trashed */
8025 regTracker.rsTrackRegTrash(reg);
8027 genCodeForTree_DONE(tree, reg);
8031 genCodeForGeneralDivide(tree, destReg, bestReg);
8035 /*****************************************************************************
8037 * Generate code for GT_DIV.
8040 void CodeGen::genCodeForSignedDiv(GenTreePtr tree,
8044 assert(tree->OperGet() == GT_DIV);
8046 GenTreePtr op1 = tree->gtOp.gtOp1;
8047 GenTreePtr op2 = tree->gtOp.gtOp2;
8048 const var_types treeType = tree->TypeGet();
8049 regMaskTP needReg = destReg;
8052 /* Is this a division by an integer constant? */
8055 if (compiler->fgIsSignedDivOptimizable(op2))
8057 ssize_t ival_s = op2->gtIntConCommon.IconValue();
8058 assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
8059 size_t ival = static_cast<size_t>(ival_s);
8061 /* Division by 1 must be handled elsewhere */
8063 noway_assert(ival != 1);
8065 BasicBlock * onNegDivisee = genCreateTempLabel();
8067 /* Generate the operand into some register */
8069 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
8070 noway_assert(op1->gtFlags & GTF_REG_VAL);
8072 reg = op1->gtRegNum;
8076 /* Generate "sar reg, log2(value)" */
8078 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
8080 // Check and branch for a postive value, skipping the INS_ADDC instruction
8081 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
8082 inst_JMP(jmpGEL, onNegDivisee);
8084 // Add the carry flag to 'reg'
8085 inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
8087 /* Define the 'onNegDivisee' label and we're done */
8089 genDefineTempLabel(onNegDivisee);
8091 /* The register is now trashed */
8093 regTracker.rsTrackRegTrash(reg);
8095 /* The result is the same as the operand */
8097 reg = op1->gtRegNum;
8101 /* Generate the following sequence */
8110 instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
8112 // Check and branch for a postive value, skipping the INS_add instruction
8113 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
8114 inst_JMP(jmpGEL, onNegDivisee);
8116 inst_RV_IV(INS_add, reg, (int)ival-1, emitActualTypeSize(treeType));
8118 /* Define the 'onNegDivisee' label and we're done */
8120 genDefineTempLabel(onNegDivisee);
8122 /* Generate "sar reg, log2(value)" */
8124 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
8126 /* The register is now trashed */
8128 regTracker.rsTrackRegTrash(reg);
8130 /* The result is the same as the operand */
8132 reg = op1->gtRegNum;
8135 genCodeForTree_DONE(tree, reg);
8139 genCodeForGeneralDivide(tree, destReg, bestReg);
8143 /*****************************************************************************
8145 * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
8146 * (if op2 is not a power of 2 constant).
8149 void CodeGen::genCodeForGeneralDivide(GenTreePtr tree,
8153 assert(tree->OperGet() == GT_UMOD ||
8154 tree->OperGet() == GT_MOD ||
8155 tree->OperGet() == GT_UDIV ||
8156 tree->OperGet() == GT_DIV);
8158 GenTreePtr op1 = tree->gtOp.gtOp1;
8159 GenTreePtr op2 = tree->gtOp.gtOp2;
8160 const var_types treeType = tree->TypeGet();
8161 regMaskTP needReg = destReg;
8167 #if USE_HELPERS_FOR_INT_DIV
8168 noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
8171 #if defined(_TARGET_XARCH_)
8173 /* Which operand are we supposed to evaluate first? */
8175 if (tree->gtFlags & GTF_REVERSE_OPS)
8177 /* We'll evaluate 'op2' first */
8180 destReg &= ~op1->gtRsvdRegs;
8182 /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8183 if (op1->gtOper == GT_LCL_VAR)
8185 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8186 noway_assert(varNum < compiler->lvaCount);
8187 LclVarDsc* varDsc = compiler->lvaTable + varNum;
8188 if (varDsc->lvRegister)
8190 destReg &= ~genRegMask(varDsc->lvRegNum);
8196 /* We'll evaluate 'op1' first */
8201 if (RBM_EAX & op2->gtRsvdRegs)
8202 op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8204 op1Mask = RBM_EAX; // EAX would be ideal
8206 /* Generate the dividend into EAX and hold on to it. freeOnly=true */
8208 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8211 /* We want to avoid using EAX or EDX for the second operand */
8213 destReg = regSet.rsMustExclude(destReg, RBM_EAX|RBM_EDX);
8215 /* Make the second operand addressable */
8216 op2 = genCodeForCommaTree(op2);
8218 /* Special case: if op2 is a local var we are done */
8220 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
8222 if ((op2->gtFlags & GTF_REG_VAL) == 0)
8223 addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
8229 genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8231 noway_assert(op2->gtFlags & GTF_REG_VAL);
8232 addrReg = genRegMask(op2->gtRegNum);
8235 /* Make sure we have the dividend in EAX */
8239 /* We've previously computed op1 into EAX */
8241 genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
8245 /* Compute op1 into EAX and hold on to it */
8247 genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8250 noway_assert(op1->gtFlags & GTF_REG_VAL);
8251 noway_assert(op1->gtRegNum == REG_EAX);
8253 /* We can now safely (we think) grab EDX */
8255 regSet.rsGrabReg(RBM_EDX);
8256 regSet.rsLockReg(RBM_EDX);
8258 /* Convert the integer in EAX into a un/signed long in EDX:EAX */
8260 const genTreeOps oper = tree->OperGet();
8262 if (oper == GT_UMOD || oper == GT_UDIV)
8263 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
8267 /* Make sure the divisor is still addressable */
8269 addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
8271 /* Perform the division */
8273 if (oper == GT_UMOD || oper == GT_UDIV)
8274 inst_TT(INS_UNSIGNED_DIVIDE, op2);
8276 inst_TT(INS_SIGNED_DIVIDE, op2);
8278 /* Free up anything tied up by the divisor's address */
8280 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
8282 /* Unlock and free EDX */
8284 regSet.rsUnlockReg(RBM_EDX);
8286 /* Free up op1 (which is in EAX) as well */
8290 /* Both EAX and EDX are now trashed */
8292 regTracker.rsTrackRegTrash (REG_EAX);
8293 regTracker.rsTrackRegTrash (REG_EDX);
8295 /* Figure out which register the result is in */
8297 reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX
8300 /* Don't forget to mark the first operand as using EAX and EDX */
8302 op1->gtRegNum = reg;
8304 genCodeForTree_DONE(tree, reg);
8306 #elif defined(_TARGET_ARM_)
8308 /* Which operand are we supposed to evaluate first? */
8310 if (tree->gtFlags & GTF_REVERSE_OPS)
8312 /* We'll evaluate 'op2' first */
8315 destReg &= ~op1->gtRsvdRegs;
8317 /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8318 if (op1->gtOper == GT_LCL_VAR)
8320 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8321 noway_assert(varNum < compiler->lvaCount);
8322 LclVarDsc* varDsc = compiler->lvaTable + varNum;
8323 if (varDsc->lvRegister)
8325 destReg &= ~genRegMask(varDsc->lvRegNum);
8331 /* We'll evaluate 'op1' first */
8334 regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8336 /* Generate the dividend into a register and hold on to it. */
8338 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8341 /* Evaluate the second operand into a register and hold onto it. */
8343 genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8345 noway_assert(op2->gtFlags & GTF_REG_VAL);
8346 addrReg = genRegMask(op2->gtRegNum);
8350 // Recover op1 if spilled
8351 genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
8355 /* Compute op1 into any register and hold on to it */
8356 genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8358 noway_assert(op1->gtFlags & GTF_REG_VAL);
8360 reg = regSet.rsPickReg(needReg, bestReg);
8362 // Perform the divison
8364 const genTreeOps oper = tree->OperGet();
8366 if (oper == GT_UMOD || oper == GT_UDIV)
8371 getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
8373 if (oper == GT_UMOD || oper == GT_MOD)
8375 getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
8376 getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
8378 /* Free up op1 and op2 */
8382 genCodeForTree_DONE(tree, reg);
8385 #error "Unknown _TARGET_"
8390 /*****************************************************************************
8392 * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
8395 void CodeGen::genCodeForAsgShift(GenTreePtr tree,
8399 assert(tree->OperGet() == GT_ASG_LSH ||
8400 tree->OperGet() == GT_ASG_RSH ||
8401 tree->OperGet() == GT_ASG_RSZ);
8403 const genTreeOps oper = tree->OperGet();
8404 GenTreePtr op1 = tree->gtOp.gtOp1;
8405 GenTreePtr op2 = tree->gtOp.gtOp2;
8406 const var_types treeType = tree->TypeGet();
8407 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8408 regMaskTP needReg = destReg;
8415 case GT_ASG_LSH: ins = INS_SHIFT_LEFT_LOGICAL; break;
8416 case GT_ASG_RSH: ins = INS_SHIFT_RIGHT_ARITHM; break;
8417 case GT_ASG_RSZ: ins = INS_SHIFT_RIGHT_LOGICAL; break;
8422 noway_assert(!varTypeIsGC(treeType));
8425 /* Shifts by a constant amount are easier */
8427 if (op2->IsCnsIntOrI())
8429 /* Make the target addressable */
8431 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
8433 /* Are we shifting a register left by 1 bit? */
8435 if ((oper == GT_ASG_LSH) &&
8436 (op2->gtIntCon.gtIconVal == 1) &&
8437 (op1->gtFlags & GTF_REG_VAL))
8439 /* The target lives in a register */
8441 reg = op1->gtRegNum;
8443 /* "add reg, reg" is cheaper than "shl reg, 1" */
8445 inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
8449 #if CPU_LOAD_STORE_ARCH
8450 if ((op1->gtFlags & GTF_REG_VAL) == 0)
8452 regSet.rsLockUsedReg(addrReg);
8454 // Load op1 into a reg
8456 reg = regSet.rsPickReg(RBM_ALLINT);
8458 inst_RV_TT(INS_mov, reg, op1);
8462 inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
8463 regTracker.rsTrackRegTrash(reg);
8465 /* Store the (sign/zero extended) result back to the stack location of the variable */
8467 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
8469 regSet.rsUnlockUsedReg(addrReg);
8472 #endif // CPU_LOAD_STORE_ARCH
8474 /* Shift by the constant value */
8476 inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
8480 /* If the target is a register, it has a new value */
8482 if (op1->gtFlags & GTF_REG_VAL)
8483 regTracker.rsTrackRegTrash(op1->gtRegNum);
8485 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8487 /* The zero flag is now equal to the target value */
8488 /* X86: But only if the shift count is != 0 */
8490 if (op2->gtIntCon.gtIconVal != 0)
8492 if (tree->gtSetFlags())
8494 if (op1->gtOper == GT_LCL_VAR)
8496 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
8498 else if (op1->gtOper == GT_REG_VAR)
8500 genFlagsEqualToReg(tree, op1->gtRegNum);
8506 // It is possible for the shift count to equal 0 with valid
8507 // IL, and not be optimized away, in the case where the node
8508 // is of a small type. The sequence of instructions looks like
8509 // ldsfld, shr, stsfld and executed on a char field. This will
8510 // never happen with code produced by our compilers, because the
8511 // compilers will insert a conv.u2 before the stsfld (which will
8512 // lead us down a different codepath in the JIT and optimize away
8513 // the shift by zero). This case is not worth optimizing and we
8514 // will just make sure to generate correct code for it.
8516 genFlagsEqualToNone();
8521 regMaskTP op2Regs = RBM_NONE;
8522 if (REG_SHIFT != REG_NA)
8523 op2Regs = RBM_SHIFT;
8527 if (tree->gtFlags & GTF_REVERSE_OPS)
8529 tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
8530 genCodeForTree(op2, tempRegs);
8531 regSet.rsMarkRegUsed(op2);
8533 tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
8534 addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8536 genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
8540 /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
8541 regMaskTP excludeMask = op2->gtRsvdRegs;
8542 if (REG_SHIFT != REG_NA)
8543 excludeMask |= RBM_SHIFT;
8545 tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
8546 addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8548 /* Load the shift count into the necessary register */
8549 genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
8552 /* Make sure the address registers are still here */
8553 addrReg = genKeepAddressable(op1, addrReg, op2Regs);
8555 /* Perform the shift */
8556 #ifdef _TARGET_XARCH_
8557 inst_TT_CL(ins, op1);
8559 noway_assert(op2->gtFlags & GTF_REG_VAL);
8560 op2Regs = genRegMask(op2->gtRegNum);
8562 regSet.rsLockUsedReg(addrReg | op2Regs);
8563 inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
8564 regSet.rsUnlockUsedReg(addrReg | op2Regs);
8566 /* Free the address registers */
8567 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8569 /* If the value is in a register, it's now trash */
8571 if (op1->gtFlags & GTF_REG_VAL)
8572 regTracker.rsTrackRegTrash(op1->gtRegNum);
8574 /* Release the op2 [RBM_SHIFT] operand */
8579 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
8583 /*****************************************************************************
8585 * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
8588 void CodeGen::genCodeForShift(GenTreePtr tree,
8592 assert(tree->OperIsShift());
8594 const genTreeOps oper = tree->OperGet();
8595 GenTreePtr op1 = tree->gtOp.gtOp1;
8596 GenTreePtr op2 = tree->gtOp.gtOp2;
8597 const var_types treeType = tree->TypeGet();
8598 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8599 regMaskTP needReg = destReg;
8605 case GT_LSH: ins = INS_SHIFT_LEFT_LOGICAL; break;
8606 case GT_RSH: ins = INS_SHIFT_RIGHT_ARITHM; break;
8607 case GT_RSZ: ins = INS_SHIFT_RIGHT_LOGICAL; break;
8612 /* Is the shift count constant? */
8614 if (op2->IsIntCnsFitsInI32())
8616 // TODO: Check to see if we could generate a LEA instead!
8618 /* Compute the left operand into any free register */
8620 genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
8622 noway_assert(op1->gtFlags & GTF_REG_VAL);
8623 reg = op1->gtRegNum;
8625 /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
8627 // On ARM, until proven otherwise by performance numbers, just do the shift.
8628 // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
8629 // It's smaller than two "add reg, reg".
8630 #ifndef _TARGET_ARM_
8633 emitAttr size = emitActualTypeSize(treeType);
8634 if (op2->gtIntConCommon.IconValue() == 1)
8636 /* "add reg, reg" is smaller and faster than "shl reg, 1" */
8637 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8639 else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
8641 /* two "add reg, reg" instructions are faster than "shl reg, 2" */
8642 inst_RV_RV(INS_add, reg, reg, treeType);
8643 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8646 goto DO_SHIFT_BY_CNS;
8649 #endif // _TARGET_ARM_
8651 #ifndef _TARGET_ARM_
8653 #endif // _TARGET_ARM_
8654 // If we are shifting 'reg' by zero bits and do not need the flags to be set
8655 // then we can just skip emitting the instruction as 'reg' is already correct.
8657 if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
8659 /* Generate the appropriate shift instruction */
8660 inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
8666 /* Calculate a useful register mask for computing op1 */
8667 needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
8668 regMaskTP op2RegMask;
8669 #ifdef _TARGET_XARCH_
8670 op2RegMask = RBM_ECX;
8672 op2RegMask = RBM_NONE;
8674 needReg = regSet.rsMustExclude(needReg, op2RegMask);
8678 /* Which operand are we supposed to evaluate first? */
8679 if (tree->gtFlags & GTF_REVERSE_OPS)
8681 /* Load the shift count [into ECX on XARCH] */
8682 tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
8683 genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8685 /* We must not target the register that is holding op2 */
8686 needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
8688 /* Now evaluate 'op1' into a free register */
8689 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8691 /* Recover op2 into ECX */
8692 genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
8696 /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
8697 tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
8698 genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8700 /* Load the shift count [into ECX on XARCH] */
8701 genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8704 noway_assert(op2->gtFlags & GTF_REG_VAL);
8705 #ifdef _TARGET_XARCH_
8706 noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
8708 // Check for the case of op1 being spilled during the evaluation of op2
8709 if (op1->gtFlags & GTF_SPILLED)
8711 // The register has been spilled -- reload it to any register except ECX
8712 regSet.rsLockUsedReg(op2RegMask);
8713 regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
8714 regSet.rsUnlockUsedReg(op2RegMask);
8717 noway_assert(op1->gtFlags & GTF_REG_VAL);
8718 reg = op1->gtRegNum;
8720 /* Perform the shift */
8722 getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
8724 inst_RV_CL(ins, reg);
8729 noway_assert(op1->gtFlags & GTF_REG_VAL);
8730 noway_assert(reg == op1->gtRegNum);
8732 /* The register is now trashed */
8734 regTracker.rsTrackRegTrash(reg);
8736 genCodeForTree_DONE(tree, reg);
8740 /*****************************************************************************
8742 * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree). Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
8745 void CodeGen::genCodeForRelop(GenTreePtr tree,
8749 assert(tree->OperGet() == GT_EQ ||
8750 tree->OperGet() == GT_NE ||
8751 tree->OperGet() == GT_LT ||
8752 tree->OperGet() == GT_LE ||
8753 tree->OperGet() == GT_GE ||
8754 tree->OperGet() == GT_GT);
8756 const genTreeOps oper = tree->OperGet();
8757 GenTreePtr op1 = tree->gtOp.gtOp1;
8758 const var_types treeType = tree->TypeGet();
8759 regMaskTP needReg = destReg;
8762 // Longs and float comparisons are converted to "?:"
8763 noway_assert(!compiler->fgMorphRelopToQmark(op1));
8765 // Check if we can use the currently set flags. Else set them
8767 emitJumpKind jumpKind = genCondSetFlags(tree);
8769 // Grab a register to materialize the bool value into
8771 bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
8773 // Check that the predictor did the right job
8774 noway_assert(bestReg);
8776 // If needReg is in bestReg then use it
8777 if (needReg & bestReg)
8778 reg = regSet.rsGrabReg(needReg & bestReg);
8780 reg = regSet.rsGrabReg(bestReg);
8782 #if defined(_TARGET_ARM_)
8785 // jump-if-true L_true
8792 BasicBlock * L_true;
8795 L_true = genCreateTempLabel();
8796 L_end = genCreateTempLabel();
8798 inst_JMP(jumpKind, L_true);
8799 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
8800 inst_JMP(EJ_jmp, L_end);
8801 genDefineTempLabel(L_true);
8802 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
8803 genDefineTempLabel(L_end);
8805 regTracker.rsTrackRegTrash(reg);
8807 #elif defined(_TARGET_XARCH_)
8808 regMaskTP regs = genRegMask(reg);
8809 noway_assert(regs & RBM_BYTE_REGS);
8811 // Set (lower byte of) reg according to the flags
8813 /* Look for the special case where just want to transfer the carry bit */
8815 if (jumpKind == EJ_jb)
8817 inst_RV_RV(INS_SUBC, reg, reg);
8818 inst_RV (INS_NEG, reg, TYP_INT);
8819 regTracker.rsTrackRegTrash(reg);
8821 else if (jumpKind == EJ_jae)
8823 inst_RV_RV(INS_SUBC, reg, reg);
8824 genIncRegBy(reg, 1, tree, TYP_INT);
8825 regTracker.rsTrackRegTrash(reg);
8829 inst_SET(jumpKind, reg);
8831 regTracker.rsTrackRegTrash(reg);
8833 if (treeType == TYP_INT)
8835 // Set the higher bytes to 0
8836 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
8840 noway_assert(treeType == TYP_BYTE);
8845 #endif // _TARGET_XXX
8847 genCodeForTree_DONE(tree, reg);
8850 void CodeGen::genCodeForBlkOp(GenTreePtr tree,
8853 genTreeOps oper = tree->OperGet();
8854 GenTreePtr op1 = tree->gtOp.gtOp1;
8855 GenTreePtr op2 = tree->gtGetOp2();
8856 regMaskTP needReg = destReg;
8857 regMaskTP regs = regSet.rsMaskUsed;
8858 GenTreePtr opsPtr[3];
8859 regMaskTP regsPtr[3];
8861 noway_assert(oper == GT_COPYBLK || oper == GT_INITBLK);
8862 noway_assert(op1->IsList());
8865 if (tree->AsBlkOp()->IsVolatile())
8867 // Emit a memory barrier instruction before the InitBlk/CopyBlk
8868 instGen_MemoryBarrier();
8872 GenTreePtr destPtr, srcPtrOrVal;
8873 destPtr = op1->gtOp.gtOp1;
8874 srcPtrOrVal = op1->gtOp.gtOp2;
8875 noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
8876 noway_assert((oper == GT_COPYBLK &&
8877 (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet())))
8879 (oper == GT_INITBLK &&
8880 varTypeIsIntegral(srcPtrOrVal->TypeGet())));
8882 noway_assert(op1 && op1->IsList());
8883 noway_assert(destPtr && srcPtrOrVal);
8885 #if CPU_USES_BLOCK_MOVE
8886 regs = (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
8888 /* Some special code for block moves/inits for constant sizes */
8891 // Is this a fixed size COPYBLK?
8892 // or a fixed size INITBLK with a constant init value?
8894 if ((op2->IsCnsIntOrI()) &&
8895 ((oper == GT_COPYBLK) || (srcPtrOrVal->IsCnsIntOrI())))
8897 size_t length = (size_t)op2->gtIntCon.gtIconVal;
8899 instruction ins_P, ins_PR, ins_B;
8901 if (oper == GT_INITBLK)
8904 ins_PR = INS_r_stosp;
8907 /* Properly extend the init constant from a U1 to a U4 */
8908 initVal = 0xFF & ((unsigned)op1->gtOp.gtOp2->gtIntCon.gtIconVal);
8910 /* If it is a non-zero value we have to replicate */
8911 /* the byte value four times to form the DWORD */
8912 /* Then we change this new value into the tree-node */
8916 initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
8917 #ifdef _TARGET_64BIT_
8920 initVal = initVal | (initVal << 32);
8921 op1->gtOp.gtOp2->gtType = TYP_LONG;
8925 op1->gtOp.gtOp2->gtType = TYP_INT;
8927 #endif // _TARGET_64BIT_
8929 op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
8934 ins_PR = INS_r_movsp;
8938 // Determine if we will be using SSE2
8939 unsigned movqLenMin = 8;
8940 unsigned movqLenMax = 24;
8942 bool bWillUseSSE2 = false;
8943 bool bWillUseOnlySSE2 = false;
8944 bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
8946 #ifdef _TARGET_64BIT_
8948 // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
8949 // there is no point in wasting space on the bigger instructions
8951 #else // !_TARGET_64BIT_
8953 if (compiler->opts.compCanUseSSE2)
8955 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
8957 /* Adjust for BB weight */
8958 if (curBBweight == BB_ZERO_WEIGHT)
8960 // Don't bother with this optimization in
8961 // rarely run blocks
8962 movqLenMax = movqLenMin = 0;
8964 else if (curBBweight < BB_UNITY_WEIGHT)
8966 // Be less aggressive when we are inside a conditional
8969 else if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT) / 2)
8971 // Be more aggressive when we are inside a loop
8975 if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || (oper == GT_INITBLK))
8977 // Be more aggressive when optimizing for speed
8978 // InitBlk uses fewer instructions
8982 if (compiler->compCodeOpt() != Compiler::SMALL_CODE &&
8983 length >= movqLenMin &&
8984 length <= movqLenMax)
8986 bWillUseSSE2 = true;
8988 if ((length % 8) == 0)
8990 bWillUseOnlySSE2 = true;
8991 if (oper == GT_INITBLK && (initVal == 0))
8993 bNeedEvaluateCnst = false;
8994 noway_assert((op1->gtOp.gtOp2->OperGet() == GT_CNS_INT));
9000 #endif // !_TARGET_64BIT_
9002 const bool bWillTrashRegSrc = ((oper == GT_COPYBLK) && !bWillUseOnlySSE2);
9003 /* Evaluate dest and src/val */
9005 if (op1->gtFlags & GTF_REVERSE_OPS)
9007 if (bNeedEvaluateCnst)
9009 genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9011 genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9012 if (bNeedEvaluateCnst)
9014 genRecoverReg(op1->gtOp.gtOp2, regs, RegSet::KEEP_REG);
9019 genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9020 if (bNeedEvaluateCnst)
9022 genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9024 genRecoverReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::KEEP_REG);
9027 bool bTrashedESI = false;
9028 bool bTrashedEDI = false;
9033 regNumber xmmReg = REG_XMM0;
9035 if (oper == GT_INITBLK)
9039 getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
9040 getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
9044 getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
9048 JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
9049 length, (oper == GT_INITBLK) ? "initblk" : "copyblk", compiler->info.compFullName));
9053 if (oper == GT_INITBLK)
9055 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9059 getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
9060 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9068 noway_assert(bNeedEvaluateCnst);
9069 noway_assert(!bWillUseOnlySSE2);
9071 if (oper == GT_COPYBLK)
9073 inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
9077 inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
9080 if (length >= REGSIZE_BYTES)
9083 length -= REGSIZE_BYTES;
9087 else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
9089 /* For small code, we can only use ins_DR to generate fast
9090 and small code. We also can't use "rep movsb" because
9091 we may not atomically reading and writing the DWORD */
9093 noway_assert(bNeedEvaluateCnst);
9097 else if (length <= 4 * REGSIZE_BYTES)
9099 noway_assert(bNeedEvaluateCnst);
9101 while (length >= REGSIZE_BYTES)
9104 length -= REGSIZE_BYTES;
9108 if (oper == GT_COPYBLK)
9114 noway_assert(bNeedEvaluateCnst);
9116 /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
9117 genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
9119 length &= (REGSIZE_BYTES - 1);
9123 regTracker.rsTrackRegTrash(REG_ECX);
9126 if (oper == GT_COPYBLK)
9130 /* Now take care of the remainder */
9132 #ifdef _TARGET_64BIT_
9135 noway_assert(bNeedEvaluateCnst);
9136 noway_assert(length < 8);
9138 instGen((oper == GT_INITBLK) ? INS_stosd : INS_movsd);
9142 if (oper == GT_COPYBLK)
9146 #endif // _TARGET_64BIT_
9150 noway_assert(bNeedEvaluateCnst);
9158 if (oper == GT_COPYBLK)
9162 noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
9164 regTracker.rsTrackRegTrash(REG_EDI);
9166 regTracker.rsTrackRegTrash(REG_ESI);
9167 // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9169 genReleaseReg(op1->gtOp.gtOp1);
9170 if (bNeedEvaluateCnst) genReleaseReg(op1->gtOp.gtOp2);
9176 // This a variable-sized COPYBLK/INITBLK,
9177 // or a fixed size INITBLK with a variable init value,
9180 // What order should the Dest, Val/Src, and Size be calculated
9182 compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX,
9183 opsPtr, regsPtr); // OUT arguments
9185 noway_assert(((oper == GT_INITBLK) && (regs == RBM_EAX)) || ((oper == GT_COPYBLK) && (regs == RBM_ESI)));
9186 genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
9187 genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
9188 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
9190 genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9191 genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9193 noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
9194 (op1->gtOp.gtOp1->gtRegNum == REG_EDI));
9196 noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
9197 (genRegMask(op1->gtOp.gtOp2->gtRegNum) == regs));
9199 noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size
9200 (op2->gtRegNum == REG_ECX));
9202 if (oper == GT_INITBLK)
9203 instGen(INS_r_stosb);
9205 instGen(INS_r_movsb);
9207 regTracker.rsTrackRegTrash(REG_EDI);
9208 regTracker.rsTrackRegTrash(REG_ECX);
9210 if (oper == GT_COPYBLK)
9211 regTracker.rsTrackRegTrash(REG_ESI);
9212 // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9214 genReleaseReg(opsPtr[0]);
9215 genReleaseReg(opsPtr[1]);
9216 genReleaseReg(opsPtr[2]);
9219 #else // !CPU_USES_BLOCK_MOVE
9221 #ifndef _TARGET_ARM_
9222 // Currently only the ARM implementation is provided
9223 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
9226 // Is this a fixed size COPYBLK?
9227 // or a fixed size INITBLK with a constant init value?
9229 if ((op2->OperGet() == GT_CNS_INT) &&
9230 ((oper == GT_COPYBLK) || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
9232 GenTreePtr dstOp = op1->gtOp.gtOp1;
9233 GenTreePtr srcOp = op1->gtOp.gtOp2;
9234 unsigned length = (unsigned)op2->gtIntCon.gtIconVal;
9235 unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
9236 unsigned initVal = 0;
9237 bool useLoop = false;
9239 if (oper == GT_INITBLK)
9241 /* Properly extend the init constant from a U1 to a U4 */
9242 initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
9244 /* If it is a non-zero value we have to replicate */
9245 /* the byte value four times to form the DWORD */
9246 /* Then we store this new value into the tree-node */
9250 initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9251 op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
9255 // Will we be using a loop to implement this INITBLK/COPYBLK?
9256 if (((oper == GT_COPYBLK) && (fullStoreCount >= 8)) ||
9257 ((oper == GT_INITBLK) && (fullStoreCount >= 16)))
9267 /* Evaluate dest and src/val */
9269 if (op1->gtFlags & GTF_REVERSE_OPS)
9271 genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9272 assert(srcOp->gtFlags & GTF_REG_VAL);
9274 genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9275 assert(dstOp->gtFlags & GTF_REG_VAL);
9276 regDst = dstOp->gtRegNum;
9278 genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
9279 regSrc = srcOp->gtRegNum;
9283 genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9284 assert(dstOp->gtFlags & GTF_REG_VAL);
9286 genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9287 assert(srcOp->gtFlags & GTF_REG_VAL);
9288 regSrc = srcOp->gtRegNum;
9290 genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
9291 regDst = dstOp->gtRegNum;
9293 assert(dstOp->gtFlags & GTF_REG_VAL);
9294 assert(srcOp->gtFlags & GTF_REG_VAL);
9296 regDst = dstOp->gtRegNum;
9297 regSrc = srcOp->gtRegNum;
9298 usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
9299 bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
9300 emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9303 if (oper == GT_COPYBLK)
9305 // Prefer a low register,but avoid one of the ones we've already grabbed
9306 regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9307 usedRegs |= genRegMask(regTemp);
9308 bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
9309 srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9314 srcType = EA_PTRSIZE;
9317 instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
9318 instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
9322 // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
9325 for (unsigned i = 0; i < fullStoreCount; i++)
9327 if (oper == GT_COPYBLK)
9329 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
9330 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
9331 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9332 regTracker.rsTrackRegTrash(regTemp);
9336 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
9340 finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
9341 length -= finalOffset;
9343 else // We will use a loop to implement this INITBLK/COPYBLK
9345 unsigned pairStoreLoopCount = fullStoreCount / 2;
9347 // We need a second temp register for CopyBlk
9348 regNumber regTemp2 = REG_STK;
9349 if (oper == GT_COPYBLK)
9351 // Prefer a low register, but avoid one of the ones we've already grabbed
9352 regTemp2 = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9353 usedRegs |= genRegMask(regTemp2);
9356 // Pick and initialize the loop counter register
9357 regNumber regLoopIndex;
9358 regLoopIndex = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9359 genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
9361 // Create and define the Basic Block for the loop top
9362 BasicBlock * loopTopBlock = genCreateTempLabel();
9363 genDefineTempLabel(loopTopBlock);
9366 if (oper == GT_COPYBLK)
9368 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9369 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
9370 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9371 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
9372 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
9373 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9374 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
9375 regTracker.rsTrackRegTrash(regSrc);
9376 regTracker.rsTrackRegTrash(regTemp);
9377 regTracker.rsTrackRegTrash(regTemp2);
9381 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9382 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
9385 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
9386 regTracker.rsTrackRegTrash(regDst);
9387 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
9388 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
9389 inst_JMP(jmpGTS, loopTopBlock);
9391 regTracker.rsTrackRegIntCns(regLoopIndex, 0);
9393 length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
9395 if (length & TARGET_POINTER_SIZE)
9397 if (oper == GT_COPYBLK)
9399 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9400 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9404 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9406 finalOffset = TARGET_POINTER_SIZE;
9407 length -= TARGET_POINTER_SIZE;
9415 if (length & sizeof(short))
9417 loadIns = ins_Load(TYP_USHORT); // INS_ldrh
9418 storeIns = ins_Store(TYP_USHORT); // INS_strh
9420 if (oper == GT_COPYBLK)
9422 getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
9423 getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
9424 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9425 regTracker.rsTrackRegTrash(regTemp);
9429 getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
9431 length -= sizeof(short);
9432 finalOffset += sizeof(short);
9435 if (length & sizeof(char))
9437 loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
9438 storeIns = ins_Store(TYP_UBYTE); // INS_strb
9440 if (oper == GT_COPYBLK)
9442 getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
9443 getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
9444 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9445 regTracker.rsTrackRegTrash(regTemp);
9449 getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
9451 length -= sizeof(char);
9453 assert(length == 0);
9455 genReleaseReg(dstOp);
9456 genReleaseReg(srcOp);
9461 // This a variable-sized COPYBLK/INITBLK,
9462 // or a fixed size INITBLK with a variable init value,
9465 // What order should the Dest, Val/Src, and Size be calculated
9467 compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2,
9468 opsPtr, regsPtr); // OUT arguments
9470 genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
9471 genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
9472 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
9474 genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9475 genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9477 noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
9478 (op1->gtOp.gtOp1->gtRegNum == REG_ARG_0));
9480 noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
9481 (op1->gtOp.gtOp2->gtRegNum == REG_ARG_1));
9483 noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size
9484 (op2->gtRegNum == REG_ARG_2));
9486 regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9488 genEmitHelperCall(oper == GT_COPYBLK ? CORINFO_HELP_MEMCPY
9489 /* GT_INITBLK */ : CORINFO_HELP_MEMSET,
9492 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
9494 regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9495 genReleaseReg(opsPtr[0]);
9496 genReleaseReg(opsPtr[1]);
9497 genReleaseReg(opsPtr[2]);
9500 if ((oper == GT_COPYBLK) && tree->AsBlkOp()->IsVolatile())
9502 // Emit a memory barrier instruction after the CopyBlk
9503 instGen_MemoryBarrier();
9505 #endif // !CPU_USES_BLOCK_MOVE
9511 #pragma warning(push)
9512 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
9514 void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree,
9518 const genTreeOps oper = tree->OperGet();
9519 const var_types treeType = tree->TypeGet();
9520 GenTreePtr op1 = tree->gtOp.gtOp1;
9521 GenTreePtr op2 = tree->gtGetOp2();
9522 regNumber reg = DUMMY_INIT(REG_CORRUPT);
9523 regMaskTP regs = regSet.rsMaskUsed;
9524 regMaskTP needReg = destReg;
9525 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
9529 GenTreePtr opsPtr[3];
9530 regMaskTP regsPtr[3];
9533 addrReg = 0xDEADCAFE;
9536 noway_assert(tree->OperKind() & GTK_SMPOP);
9541 genCodeForTreeSmpOpAsg(tree);
9547 genCodeForAsgShift(tree, destReg, bestReg);
9555 genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
9559 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
9560 #ifdef _TARGET_XARCH_
9561 // Note that the specialCase here occurs when the treeType specifies a byte sized operation
9562 // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
9564 bool specialCase; specialCase = false;
9565 if (op1->gtOper == GT_REG_VAR)
9567 /* Get hold of the target register */
9569 reg = op1->gtRegVar.gtRegNum;
9570 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
9572 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
9574 inst_RV_RV(INS_mov, byteReg, reg);
9575 regTracker.rsTrackRegTrash(byteReg);
9577 inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
9578 var_types op1Type = op1->TypeGet();
9579 instruction wideningIns = ins_Move_Extend(op1Type, true);
9580 inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
9581 regTracker.rsTrackRegTrash(reg);
9588 inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
9590 #else // not _TARGET_XARCH_
9591 if (op1->gtFlags & GTF_REG_VAL)
9593 inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
9597 // Fix 388382 ARM JitStress WP7
9598 var_types op1Type = op1->TypeGet();
9599 regNumber reg = regSet.rsPickFreeReg();
9600 inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
9601 regTracker.rsTrackRegTrash(reg);
9602 inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
9603 inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
9606 if (op1->gtFlags & GTF_REG_VAL)
9607 regTracker.rsTrackRegTrash(op1->gtRegNum);
9608 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
9610 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
9619 genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
9623 genCodeForUnsignedMod(tree, destReg, bestReg);
9627 genCodeForSignedMod(tree, destReg, bestReg);
9631 genCodeForUnsignedDiv(tree, destReg, bestReg);
9635 genCodeForSignedDiv(tree, destReg, bestReg);
9641 genCodeForShift(tree, destReg, bestReg);
9647 /* Generate the operand into some register */
9649 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
9650 noway_assert(op1->gtFlags & GTF_REG_VAL);
9652 reg = op1->gtRegNum;
9654 /* Negate/reverse the value in the register */
9656 inst_RV((oper == GT_NEG) ? INS_NEG
9657 : INS_NOT, reg, treeType);
9659 /* The register is now trashed */
9661 regTracker.rsTrackRegTrash(reg);
9663 genCodeForTree_DONE(tree, reg);
9667 case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
9669 /* Make sure the operand is addressable */
9671 addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
9673 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9675 /* Figure out the size of the value being loaded */
9677 size = EA_ATTR(genTypeSize(tree->gtType));
9679 /* Pick a register for the value */
9681 if (needReg == RBM_ALLINT && bestReg == 0)
9683 /* Absent a better suggestion, pick a useless register */
9685 bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
9688 reg = regSet.rsPickReg(needReg, bestReg);
9690 if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
9692 noway_assert(size == EA_PTRSIZE);
9693 getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
9697 (int)op1->gtIntCon.gtIconVal);
9701 /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
9703 inst_mov_RV_ST(reg, tree);
9707 if (tree->gtFlags & GTF_IND_VOLATILE)
9709 // Emit a memory barrier instruction after the load
9710 instGen_MemoryBarrier();
9714 /* Note the new contents of the register we used */
9716 regTracker.rsTrackRegTrash(reg);
9718 /* Update the live set of register variables */
9721 if (compiler->opts.varNames) genUpdateLife(tree);
9724 /* Now we can update the register pointer information */
9726 // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9727 gcInfo.gcMarkRegPtrVal(reg, treeType);
9729 genCodeForTree_DONE_LIFE(tree, reg);
9734 genCodeForNumericCast(tree, destReg, bestReg);
9740 /* Is this a test of a relational operator? */
9742 if (op1->OperIsCompare())
9744 /* Generate the conditional jump */
9748 genUpdateLife(tree);
9753 compiler->gtDispTree(tree);
9755 NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
9759 genCodeForSwitch(tree);
9763 noway_assert(tree->gtType == TYP_VOID || op1 != 0);
9764 if (op1 == 0) // endfinally
9768 #ifdef _TARGET_XARCH_
9769 /* Return using a pop-jmp sequence. As the "try" block calls
9770 the finally with a jmp, this leaves the x86 call-ret stack
9771 balanced in the normal flow of path. */
9773 noway_assert(isFramePointerRequired());
9774 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
9775 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
9776 #elif defined(_TARGET_ARM_)
9777 // Nothing needed for ARM
9784 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9785 noway_assert(op1->gtFlags & GTF_REG_VAL);
9786 noway_assert(op1->gtRegNum == REG_INTRET);
9787 /* The return value has now been computed */
9788 reg = op1->gtRegNum;
9794 genCodeForTree_DONE(tree, reg);
9799 // TODO: this should be done AFTER we called exit mon so that
9800 // we are sure that we don't have to keep 'this' alive
9802 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
9804 /* either it's an "empty" statement or the return statement
9805 of a synchronized method
9808 genPInvokeMethodEpilog();
9811 /* Is there a return value and/or an exit statement? */
9815 if (op1->gtType == TYP_VOID)
9817 //We're returning nothing, just generate the block (shared epilog calls).
9818 genCodeForTree(op1, 0);
9821 else if (op1->gtType == TYP_STRUCT)
9823 if (op1->gtOper == GT_CALL)
9825 // We have a return call() because we failed to tail call.
9826 // In any case, just generate the call and be done.
9827 assert(compiler->IsHfa(op1));
9828 genCodeForCall(op1, true);
9829 genMarkTreeInReg(op1, REG_FLOATRET);
9833 assert(op1->gtOper == GT_LCL_VAR);
9834 assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
9835 genLoadIntoFltRetRegs(op1);
9838 else if (op1->TypeGet() == TYP_FLOAT)
9840 // This can only occur when we are returning a non-HFA struct
9841 // that is composed of a single float field and we performed
9842 // struct promotion and enregistered the float field.
9844 genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
9845 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
9847 #endif // _TARGET_ARM_
9850 //we can now go through this code for compiler->genReturnBB. I've regularized all the code.
9852 //noway_assert(compiler->compCurBB != compiler->genReturnBB);
9854 noway_assert(op1->gtType != TYP_VOID);
9856 /* Generate the return value into the return register */
9858 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9860 /* The result must now be in the return register */
9862 noway_assert(op1->gtFlags & GTF_REG_VAL);
9863 noway_assert(op1->gtRegNum == REG_INTRET);
9866 /* The return value has now been computed */
9868 reg = op1->gtRegNum;
9870 genCodeForTree_DONE(tree, reg);
9874 //The profiling hook does not trash registers, so it's safe to call after we emit the code for
9875 //the GT_RETURN tree.
9876 #ifdef PROFILING_SUPPORTED
9877 if (compiler->compCurBB == compiler->genReturnBB)
9879 genProfilingLeaveCallback();
9883 if (compiler->opts.compStackCheckOnRet)
9885 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
9886 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
9887 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
9888 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
9890 BasicBlock * esp_check = genCreateTempLabel();
9891 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
9892 inst_JMP(jmpEqual, esp_check);
9893 getEmitter()->emitIns(INS_BREAKPOINT);
9894 genDefineTempLabel(esp_check);
9901 if (tree->gtFlags & GTF_REVERSE_OPS)
9903 if (tree->gtType == TYP_VOID)
9905 genEvalSideEffects(op2);
9906 genUpdateLife (op2);
9907 genEvalSideEffects(op1);
9908 genUpdateLife(tree);
9913 genCodeForTree(op2, needReg);
9916 noway_assert(op2->gtFlags & GTF_REG_VAL);
9918 regSet.rsMarkRegUsed(op2);
9920 // Do side effects of op1
9921 genEvalSideEffects(op1);
9923 // Recover op2 if spilled
9924 genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
9926 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
9928 // set gc info if we need so
9929 gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
9931 genUpdateLife(tree);
9932 genCodeForTree_DONE(tree, op2->gtRegNum);
9938 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
9940 /* Generate side effects of the first operand */
9942 genEvalSideEffects(op1);
9943 genUpdateLife (op1);
9945 /* Is the value of the second operand used? */
9947 if (tree->gtType == TYP_VOID)
9949 /* The right operand produces no result. The morpher is
9950 responsible for resetting the type of GT_COMMA nodes
9951 to TYP_VOID if op2 isn't meant to yield a result. */
9953 genEvalSideEffects(op2);
9954 genUpdateLife(tree);
9958 /* Generate the second operand, i.e. the 'real' value */
9960 genCodeForTree(op2, needReg);
9961 noway_assert(op2->gtFlags & GTF_REG_VAL);
9963 /* The result of 'op2' is also the final result */
9965 reg = op2->gtRegNum;
9967 /* Remember whether we set the flags */
9969 tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
9971 genCodeForTree_DONE(tree, reg);
9976 genCodeForTree(op1, needReg);
9977 noway_assert(op1->gtFlags & GTF_REG_VAL);
9979 /* The result of 'op1' is also the final result */
9981 reg = op1->gtRegNum;
9983 /* Remember whether we set the flags */
9985 tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
9987 genCodeForTree_DONE(tree, reg);
9992 genCodeForQmark(tree, destReg, bestReg);
10002 /* Generate the operand into some register */
10004 genCodeForTree(op1, needReg);
10006 /* The result is the same as the operand */
10008 reg = op1->gtRegNum;
10010 genCodeForTree_DONE(tree, reg);
10015 switch (tree->gtIntrinsic.gtIntrinsicId)
10017 case CORINFO_INTRINSIC_Round:
10019 noway_assert(tree->gtType == TYP_INT);
10021 #if FEATURE_STACK_FP_X87
10022 genCodeForTreeFlt(op1);
10024 /* Store the FP value into the temp */
10025 TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
10027 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10028 FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
10029 inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
10031 reg = regSet.rsPickReg(needReg, bestReg);
10032 regTracker.rsTrackRegTrash(reg);
10034 inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
10036 compiler->tmpRlsTemp(temp);
10038 genCodeForTreeFloat(tree, needReg, bestReg);
10045 noway_assert(!"unexpected math intrinsic");
10049 genCodeForTree_DONE(tree, reg);
10054 reg = genLclHeap(op1);
10055 genCodeForTree_DONE(tree, reg);
10059 noway_assert(op1->IsList());
10061 /* If the value class doesn't have any fields that are GC refs or
10062 the target isn't on the GC-heap, we can merge it with CPBLK.
10063 GC fields cannot be copied directly, instead we will
10064 need to use a jit-helper for that. */
10065 assert(tree->AsCpObj()->gtGcPtrCount > 0);
10068 GenTreeCpObj* cpObjOp = tree->AsCpObj();
10070 #ifdef _TARGET_ARM_
10071 if (cpObjOp->IsVolatile())
10073 // Emit a memory barrier instruction before the CopyBlk
10074 instGen_MemoryBarrier();
10077 GenTreePtr srcObj = cpObjOp->Source();
10078 GenTreePtr dstObj = cpObjOp->Dest();
10080 noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
10083 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
10084 size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
10086 // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
10087 // The EE currently does not allow this. Let's assert it just to be safe.
10088 noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
10091 size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
10092 unsigned slots = cpObjOp->gtSlots;
10093 BYTE * gcPtrs = cpObjOp->gtGcPtrs;
10094 unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
10096 // If we have GC pointers then the GTF_BLK_HASGCPTR flags must be set
10097 if (gcPtrCount > 0)
10098 assert((tree->gtFlags & GTF_BLK_HASGCPTR) != 0);
10100 GenTreePtr treeFirst, treeSecond;
10101 regNumber regFirst, regSecond;
10103 // Check what order the object-ptrs have to be evaluated in ?
10105 if (op1->gtFlags & GTF_REVERSE_OPS)
10107 treeFirst = srcObj;
10108 treeSecond = dstObj;
10109 #if CPU_USES_BLOCK_MOVE
10110 regFirst = REG_ESI;
10111 regSecond = REG_EDI;
10113 regFirst = REG_ARG_1;
10114 regSecond = REG_ARG_0;
10119 treeFirst = dstObj;
10120 treeSecond = srcObj;
10121 #if CPU_USES_BLOCK_MOVE
10122 regFirst = REG_EDI;
10123 regSecond = REG_ESI;
10125 regFirst = REG_ARG_0;
10126 regSecond = REG_ARG_1;
10130 bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
10131 bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
10132 emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
10133 emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
10135 // Materialize the trees in the order desired
10137 #if CPU_USES_BLOCK_MOVE
10138 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10139 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10140 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
10142 // Grab ECX because it will be trashed by the helper
10144 regSet.rsGrabReg(RBM_ECX);
10146 while (blkSize >= TARGET_POINTER_SIZE)
10148 if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
10150 // Note that we can use movsd even if it is a GC pointer being transfered
10151 // because the value is not cached anywhere. If we did this in two moves,
10152 // we would have to make certain we passed the appropriate GC info on to
10154 instGen(INS_movsp);
10158 // This helper will act like a MOVSD
10159 // -- inputs EDI and ESI are byrefs
10160 // -- including incrementing of ESI and EDI by 4
10161 // -- helper will trash ECX
10163 regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
10164 regSet.rsLockUsedReg(argRegs);
10165 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
10167 EA_PTRSIZE); // retSize
10168 regSet.rsUnlockUsedReg(argRegs);
10171 blkSize -= TARGET_POINTER_SIZE;
10174 // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
10176 regTracker.rsTrackRegTrash(REG_EDI);
10177 regTracker.rsTrackRegTrash(REG_ESI);
10178 regTracker.rsTrackRegTrash(REG_ECX);
10180 gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
10182 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
10183 it is a emitNoGChelper. However, we have to let the emitter know that
10184 the GC liveness has changed. We do this by creating a new label.
10187 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
10189 genDefineTempLabel(&dummyBB);
10191 #else // !CPU_USES_BLOCK_MOVE
10193 #ifndef _TARGET_ARM_
10194 // Currently only the ARM implementation is provided
10195 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
10203 if ((gcPtrCount > 0) && !dstIsOnStack)
10205 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10206 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
10207 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
10209 /* The helper is a Asm-routine that will trash R2,R3 and LR */
10211 /* Spill any callee-saved registers which are being used */
10212 regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
10216 regSet.rsSpillRegs(spillRegs);
10220 // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
10221 // We will also use it as the temp register for our load/store sequences
10223 assert(REG_R2 == REG_TMP_1);
10224 regTemp = regSet.rsGrabReg(RBM_R2);
10229 genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
10230 genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
10231 genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
10233 // Grab any temp register to use for our load/store sequences
10235 regTemp = regSet.rsGrabReg(RBM_ALLINT);
10236 helperUsed = false;
10238 assert(dstObj->gtFlags & GTF_REG_VAL);
10239 assert(srcObj->gtFlags & GTF_REG_VAL);
10241 regDst = dstObj->gtRegNum;
10242 regSrc = srcObj->gtRegNum;
10244 assert(regDst != regTemp);
10245 assert(regSrc != regTemp);
10247 instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
10248 instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
10251 while (blkSize >= TARGET_POINTER_SIZE)
10253 CorInfoGCType gcType;
10254 CorInfoGCType gcTypeNext = TYPE_GC_NONE;
10255 var_types type = TYP_I_IMPL;
10257 #if FEATURE_WRITE_BARRIER
10258 gcType = (CorInfoGCType)(*gcPtrs++);
10259 if (blkSize > TARGET_POINTER_SIZE)
10260 gcTypeNext = (CorInfoGCType)(*gcPtrs);
10262 if (gcType == TYPE_GC_REF)
10264 else if (gcType == TYPE_GC_BYREF)
10269 assert(regDst == REG_ARG_0);
10270 assert(regSrc == REG_ARG_1);
10271 assert(regTemp == REG_R2);
10274 gcType = TYPE_GC_NONE;
10275 #endif // FEATURE_WRITE_BARRIER
10277 blkSize -= TARGET_POINTER_SIZE;
10279 emitAttr opSize = emitTypeSize(type);
10281 if (!helperUsed || (gcType == TYPE_GC_NONE))
10283 getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
10284 getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
10285 offset += TARGET_POINTER_SIZE;
10287 if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) ||
10288 ((offset >= 128) && (blkSize > 0)))
10290 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
10291 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
10297 assert(offset == 0);
10299 // The helper will act like this:
10300 // -- inputs R0 and R1 are byrefs
10301 // -- helper will perform copy from *R1 into *R0
10302 // -- helper will perform post increment of R0 and R1 by 4
10303 // -- helper will trash R2
10304 // -- helper will trash R3
10305 // -- calling the helper implicitly trashes LR
10307 assert(helperUsed);
10308 regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
10309 regSet.rsLockUsedReg(argRegs);
10310 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
10312 EA_PTRSIZE); // retSize
10314 regSet.rsUnlockUsedReg(argRegs);
10315 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
10319 regTracker.rsTrackRegTrash(regDst);
10320 regTracker.rsTrackRegTrash(regSrc);
10321 regTracker.rsTrackRegTrash(regTemp);
10323 gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
10325 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
10326 it is a emitNoGChelper. However, we have to let the emitter know that
10327 the GC liveness has changed. We do this by creating a new label.
10330 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
10332 genDefineTempLabel(&dummyBB);
10334 #endif // !CPU_USES_BLOCK_MOVE
10336 assert(blkSize == 0);
10338 genReleaseReg(dstObj);
10339 genReleaseReg(srcObj);
10343 genCodeForTree_DONE(tree, reg);
10345 #ifdef _TARGET_ARM_
10346 if (tree->AsBlkOp()->IsVolatile())
10348 // Emit a memory barrier instruction after the CopyBlk
10349 instGen_MemoryBarrier();
10358 genCodeForBlkOp(tree, destReg);
10359 genCodeForTree_DONE(tree, REG_NA);
10368 genCodeForRelop(tree, destReg, bestReg);
10373 genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
10376 #ifdef _TARGET_XARCH_
10379 // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
10380 // For reference, op1 is the location. op2 is the addend or the value.
10381 if (op2->OperIsConst())
10383 noway_assert(op2->TypeGet() == TYP_INT);
10384 ssize_t cns = op2->gtIntCon.gtIconVal;
10386 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
10391 instEmit_RM(INS_inc, op1, op1, 0); break;
10394 instEmit_RM(INS_dec, op1, op1, 0); break;
10396 assert((int)cns == cns); // By test above for AMD64.
10398 inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0); break;
10400 genReleaseReg(op1);
10404 //non constant addend means it needs to go into a register.
10406 goto LockBinOpCommon;
10409 genFlagsEqualToNone(); // We didn't compute a result into a register.
10410 genUpdateLife(tree); // We didn't compute an operand into anything.
10414 ins = INS_xadd; goto LockBinOpCommon;
10416 ins = INS_xchg; goto LockBinOpCommon;
10419 //Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
10420 //is a constant, it needs to be in a register. This should be the output register if
10423 //For reference, gtOp1 is the location. gtOp2 is the addend or the value.
10425 GenTreePtr location = op1;
10426 GenTreePtr value = op2;
10428 //Again, a friendly reminder. IL calling convention is left to right.
10429 if (tree->gtFlags & GTF_REVERSE_OPS)
10431 // The atomic operations destroy this argument, so force it into a scratch register
10432 reg = regSet.rsPickFreeReg();
10433 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10435 // Must evaluate location into a register
10436 genCodeForTree(location, needReg, RBM_NONE);
10437 assert(location->gtFlags & GTF_REG_VAL);
10438 regSet.rsMarkRegUsed(location);
10439 regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
10440 genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
10441 regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
10443 if (ins != INS_xchg)
10445 //xchg implies the lock prefix, but xadd and add require it.
10448 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10449 genReleaseReg(value);
10450 regTracker.rsTrackRegTrash(reg);
10451 genReleaseReg(location);
10456 if (genMakeIndAddrMode(location,
10458 false, /* not for LEA */
10463 genUpdateLife(location);
10465 reg = regSet.rsPickFreeReg();
10466 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10467 addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
10469 if (ins != INS_xchg)
10471 //xchg implies the lock prefix, but xadd and add require it.
10475 // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10476 // inst_TT_RV(ins, location, reg);
10477 sched_AM(ins, EA_4BYTE, reg, false, location, 0);
10479 genReleaseReg(value);
10480 regTracker.rsTrackRegTrash(reg);
10481 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
10485 // Must evalute location into a register.
10486 genCodeForTree(location, needReg, RBM_NONE);
10487 assert(location->gtFlags && GTF_REG_VAL);
10488 regSet.rsMarkRegUsed(location);
10490 // xadd destroys this argument, so force it into a scratch register
10491 reg = regSet.rsPickFreeReg();
10492 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10493 regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
10494 genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
10495 regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
10497 if (ins != INS_xchg)
10499 //xchg implies the lock prefix, but xadd and add require it.
10503 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10505 genReleaseReg(value);
10506 regTracker.rsTrackRegTrash(reg);
10507 genReleaseReg(location);
10511 //The flags are equal to the target of the tree (i.e. the result of the add), not to the
10512 //result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
10513 //that information to set the flags. Doesn't seem like there is a good reason for that.
10514 //Therefore, trash the flags.
10515 genFlagsEqualToNone();
10517 if (ins == INS_add)
10519 genUpdateLife(tree); //If the operator was add, then we were called from the GT_LOCKADD
10520 //case. In that case we don't use the result, so we don't need to
10525 genCodeForTree_DONE(tree, reg);
10530 #else // !_TARGET_XARCH_
10536 NYI_ARM("LOCK instructions");
10539 case GT_ARR_LENGTH:
10541 // Make the corresponding ind(a + c) node, and do codegen for that.
10542 GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF,
10543 tree->gtArrLen.ArrRef(),
10544 compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
10545 tree->SetOper(GT_IND);
10546 tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
10547 assert(tree->TypeGet() == TYP_INT);
10548 tree->gtOp.gtOp1 = addr;
10549 genCodeForTree(tree, destReg, bestReg);
10554 // All GT_OBJ nodes must have been morphed prior to this.
10555 noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
10559 compiler->gtDispTree(tree);
10561 noway_assert(!"unexpected unary/binary operator");
10562 } // end switch (oper)
10567 #pragma warning(pop) // End suppress PREFast warning about overly large function
10571 regNumber CodeGen::genIntegerCast(GenTree *tree,
10580 GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
10581 var_types dstType = tree->CastToType();
10582 var_types srcType = op1->TypeGet();
10584 if (genTypeSize(srcType) < genTypeSize(dstType))
10588 /* we need the source size */
10590 size = EA_ATTR(genTypeSize(srcType));
10592 noway_assert(size < EA_PTRSIZE);
10594 unsv = varTypeIsUnsigned(srcType);
10595 ins = ins_Move_Extend(srcType, op1->InReg());
10598 Special case: for a cast of byte to char we first
10599 have to expand the byte (w/ sign extension), then
10600 mask off the high bits.
10601 Use 'movsx' followed by 'and'
10603 if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
10605 noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
10611 // Narrowing cast, or sign-changing cast
10613 noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
10615 size = EA_ATTR(genTypeSize(dstType));
10617 unsv = varTypeIsUnsigned(dstType);
10618 ins = ins_Move_Extend(dstType, op1->InReg());
10621 noway_assert(size < EA_PTRSIZE);
10623 // Set bestReg to the same register a op1 if op1 is a regVar and is available
10626 regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
10627 if ( (((op1RegMask & bestReg) != 0) || (bestReg == 0)) &&
10628 ((op1RegMask & regSet.rsRegMaskFree()) != 0) )
10630 bestReg = op1RegMask;
10634 /* Is the value sitting in a non-byte-addressable register? */
10636 if (op1->InReg() &&
10637 (size == EA_1BYTE) &&
10638 !isByteReg(op1->gtRegNum))
10642 // for unsigned values we can AND, so it needs not be a byte register
10644 reg = regSet.rsPickReg(needReg, bestReg);
10650 /* Move the value into a byte register */
10652 reg = regSet.rsGrabReg(RBM_BYTE_REGS);
10655 if (reg != op1->gtRegNum)
10657 /* Move the value into that register */
10659 regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
10660 inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
10662 /* The value has a new home now */
10664 op1->gtRegNum = reg;
10669 /* Pick a register for the value (general case) */
10671 reg = regSet.rsPickReg(needReg, bestReg);
10673 // if we (might) need to set the flags and the value is in the same register
10674 // and we have an unsigned value then use AND instead of MOVZX
10675 if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
10677 #ifdef _TARGET_X86_
10678 noway_assert(ins == INS_movzx);
10684 if (ins == INS_AND)
10686 noway_assert(andv == false && unsv);
10688 /* Generate "and reg, MASK */
10690 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10691 inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
10693 if (tree->gtSetFlags())
10694 genFlagsEqualToReg(tree, reg);
10698 #ifdef _TARGET_XARCH_
10699 noway_assert(ins == INS_movsx || ins == INS_movzx);
10702 /* Generate "movsx/movzx reg, [addr]" */
10704 inst_RV_ST(ins, size, reg, op1);
10706 /* Mask off high bits for cast from byte to char */
10710 #ifdef _TARGET_XARCH_
10711 noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
10713 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10714 inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
10716 if (tree->gtSetFlags())
10717 genFlagsEqualToReg(tree, reg);
10721 regTracker.rsTrackRegTrash(reg);
10725 void CodeGen::genCodeForNumericCast(GenTreePtr tree,
10729 GenTreePtr op1 = tree->gtOp.gtOp1;
10730 var_types dstType = tree->CastToType();
10731 var_types baseType = TYP_INT;
10732 regNumber reg = DUMMY_INIT(REG_CORRUPT);
10733 regMaskTP needReg = destReg;
10739 * Constant casts should have been folded earlier
10740 * If not finite don't bother
10741 * We don't do this optimization for debug code/no optimization
10744 noway_assert((op1->gtOper != GT_CNS_INT &&
10745 op1->gtOper != GT_CNS_LNG &&
10746 op1->gtOper != GT_CNS_DBL) ||
10747 tree->gtOverflow() ||
10748 (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
10749 !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
10751 noway_assert(dstType != TYP_VOID);
10753 /* What type are we casting from? */
10755 switch (op1->TypeGet())
10759 /* Special case: the long is generated via the mod of long
10760 with an int. This is really an int and need not be
10761 converted to a reg pair. NOTE: the flag only indicates
10762 that this is a case to TYP_INT, it hasn't actually
10763 verified the second operand of the MOD! */
10765 if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) &&
10766 (op1->gtFlags & GTF_MOD_INT_RESULT))
10769 /* Verify that the op2 of the mod node is
10770 1) An integer tree, or
10771 2) A long constant that is small enough to fit in an integer
10774 GenTreePtr modop2 = op1->gtOp.gtOp2;
10775 if ((genActualType(modop2->gtType) == TYP_INT) ||
10776 ((modop2->gtOper == GT_CNS_LNG) &&
10777 (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
10779 genCodeForTree(op1, destReg, bestReg);
10781 #ifdef _TARGET_64BIT_
10782 reg = op1->gtRegNum;
10783 #else // _TARGET_64BIT_
10784 reg = genRegPairLo(op1->gtRegPair);
10785 #endif //_TARGET_64BIT_
10787 genCodeForTree_DONE(tree, reg);
10792 /* Make the operand addressable. When gtOverflow() is true,
10793 hold on to the addrReg as we will need it to access the higher dword */
10795 op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1 twice!)
10796 // See, e.g., the TYP_INT case below...
10798 addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
10800 /* Load the lower half of the value into some register */
10802 if (op1->gtFlags & GTF_REG_VAL)
10804 /* Can we simply use the low part of the value? */
10805 reg = genRegPairLo(op1->gtRegPair);
10807 if (tree->gtOverflow())
10811 loMask = genRegMask(reg);
10812 if (loMask & regSet.rsRegMaskFree())
10816 // for cast overflow we need to preserve addrReg for testing the hiDword
10817 // so we lock it to prevent regSet.rsPickReg from picking it.
10818 if (tree->gtOverflow())
10819 regSet.rsLockUsedReg(addrReg);
10821 reg = regSet.rsPickReg(needReg, bestReg);
10823 if (tree->gtOverflow())
10824 regSet.rsUnlockUsedReg(addrReg);
10826 noway_assert(genStillAddressable(op1));
10829 if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
10831 /* Generate "mov reg, [addr-mode]" */
10832 inst_RV_TT(ins_Load(TYP_INT), reg, op1);
10835 /* conv.ovf.i8i4, or conv.ovf.u8u4 */
10837 if (tree->gtOverflow())
10839 regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair)
10842 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10843 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
10847 case TYP_INT: // conv.ovf.i8.i4
10848 /* Generate the following sequence
10850 test loDWord, loDWord // set flags
10852 pos: test hiDWord, hiDWord // set flags
10855 neg: cmp hiDWord, 0xFFFFFFFF
10861 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
10862 if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
10864 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
10865 goto UPPER_BITS_ZERO;
10868 #if CPU_LOAD_STORE_ARCH
10870 // We will generate code like
10879 // We load the tree op1 into regs when we generate code for if clause.
10880 // When we generate else clause, we see the tree is already loaded into reg, and start use it directly.
10881 // Well, when the code is run, we may execute else clause without going through if clause.
10883 genCodeForTree(op1, 0);
10889 neg = genCreateTempLabel();
10890 done = genCreateTempLabel();
10892 // Is the loDWord positive or negative
10893 inst_JMP(jmpLTS, neg);
10895 // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
10897 if (hiReg < REG_STK)
10899 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
10903 inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
10906 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10907 inst_JMP(EJ_jmp, done);
10909 // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
10911 genDefineTempLabel(neg);
10913 if (hiReg < REG_STK)
10915 inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
10919 inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
10921 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10925 genDefineTempLabel(done);
10929 case TYP_UINT: // conv.ovf.u8u4
10931 // Just check that the upper DWord is 0
10933 if (hiReg < REG_STK)
10935 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
10939 inst_TT_IV(INS_cmp, op1, 0, 4);
10942 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10946 noway_assert(!"Unexpected dstType");
10950 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
10953 regTracker.rsTrackRegTrash(reg);
10954 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
10956 genCodeForTree_DONE(tree, reg);
10970 #if FEATURE_STACK_FP_X87
10972 NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
10976 if (compiler->opts.compCanUseSSE2)
10978 // do the SSE2 based cast inline
10979 // getting the fp operand
10981 regMaskTP addrRegInt = 0;
10982 regMaskTP addrRegFlt = 0;
10984 // make the operand addressable
10985 // We don't want to collapse constant doubles into floats, as the SSE2 instruction
10986 // operates on doubles. Note that these (casts from constant doubles) usually get
10987 // folded, but we don't do it for some cases (infinitys, etc). So essentially this
10988 // shouldn't affect performance or size at all. We're fixing this for #336067
10989 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
10990 if (!addrRegFlt && !op1->IsRegVar())
10992 // we have the address
10994 inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
10995 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10996 genUpdateLife(op1);
10998 reg = regSet.rsPickReg(needReg);
10999 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
11001 regTracker.rsTrackRegTrash(reg);
11002 genCodeForTree_DONE(tree, reg);
11006 // we will need to use a temp to get it into the xmm reg
11007 var_types typeTemp = op1->TypeGet();
11008 TempDsc * temp = compiler->tmpGetTemp(typeTemp);
11010 size = EA_ATTR(genTypeSize(typeTemp));
11014 // On the fp stack; Take reg to top of stack
11016 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
11022 reg = regSet.PickRegFloat();
11023 if (!op1->IsRegVarDeath())
11025 // Load it on the fp stack
11026 genLoadStackFP(op1, reg);
11030 // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
11031 genLoadStackFP(op1, reg);
11032 FlatFPX87_MoveToTOS(&compCurFPState, reg);
11036 // pop it off the fp stack
11037 compCurFPState.Pop();
11039 getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
11041 reg = regSet.rsPickReg(needReg);
11043 inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
11044 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
11046 // done..release the temp
11047 compiler->tmpRlsTemp(temp);
11049 // the reg is now trashed
11050 regTracker.rsTrackRegTrash(reg);
11051 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
11052 genUpdateLife(op1);
11053 genCodeForTree_DONE(tree, reg);
11059 genCodeForTreeFloat(tree, needReg, bestReg);
11060 #endif // FEATURE_STACK_FP_X87
11064 noway_assert(!"unexpected cast type");
11067 if (tree->gtOverflow())
11069 /* Compute op1 into a register, and free the register */
11071 genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
11072 reg = op1->gtRegNum;
11074 /* Do we need to compare the value, or just check masks */
11076 ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
11082 typeMask = ssize_t((int)0xFFFFFF80);
11083 typeMin = SCHAR_MIN; typeMax = SCHAR_MAX;
11084 unsv = (tree->gtFlags & GTF_UNSIGNED);
11087 typeMask = ssize_t((int)0xFFFF8000);
11088 typeMin = SHRT_MIN; typeMax = SHRT_MAX;
11089 unsv = (tree->gtFlags & GTF_UNSIGNED);
11092 typeMask = ssize_t((int)0x80000000L);
11093 #ifdef _TARGET_64BIT_
11094 unsv = (tree->gtFlags & GTF_UNSIGNED);
11095 typeMin = INT_MIN; typeMax = INT_MAX;
11096 #else // _TARGET_64BIT_
11097 noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
11099 #endif // _TARGET_64BIT_
11103 typeMask = ssize_t((int)0xFFFFFF00L);
11107 typeMask = ssize_t((int)0xFFFF0000L);
11111 #ifdef _TARGET_64BIT_
11112 typeMask = 0xFFFFFFFF00000000LL;
11113 #else // _TARGET_64BIT_
11114 typeMask = 0x80000000L;
11115 noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
11116 #endif // _TARGET_64BIT_
11119 NO_WAY("Unknown type");
11123 // If we just have to check a mask.
11124 // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
11129 inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
11130 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
11131 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
11135 // Check the value is in range.
11136 // This must be conv.ovf.i4i1, etc.
11138 // Compare with the MAX
11140 noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
11142 inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
11143 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
11144 genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
11146 // Compare with the MIN
11148 inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
11149 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
11150 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
11153 genCodeForTree_DONE(tree, reg);
11157 /* Make the operand addressable */
11159 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
11161 reg = genIntegerCast(tree, needReg, bestReg);
11163 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11165 genCodeForTree_DONE(tree, reg);
11168 /*****************************************************************************
11170 * Generate code for a leaf node of type GT_ADDR
11173 void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree,
11177 genTreeOps oper = tree->OperGet();
11178 const var_types treeType = tree->TypeGet();
11181 regMaskTP needReg = destReg;
11185 reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
11186 addrReg = 0xDEADCAFE;
11189 // We should get here for ldloca, ldarga, ldslfda, ldelema,
11191 if (oper == GT_ARR_ELEM)
11197 op1 = tree->gtOp.gtOp1;
11200 // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
11201 if (oper == GT_ARR_ELEM) {
11202 // To get the address of the array element,
11203 // we first call genMakeAddrArrElem to make the element addressable.
11204 // (That is, for example, we first emit code to calculate EBX, and EAX.)
11205 // And then use lea to obtain the address.
11206 // (That is, for example, we then emit
11207 // lea EBX, bword ptr [EBX+4*EAX+36]
11208 // to obtain the address of the array element.)
11209 addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
11213 addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
11216 noway_assert( treeType == TYP_BYREF || treeType == TYP_I_IMPL );
11218 // We want to reuse one of the scratch registers that were used
11219 // in forming the address mode as the target register for the lea.
11220 // If bestReg is unset or if it is set to one of the registers used to
11221 // form the address (i.e. addrReg), we calculate the scratch register
11222 // to use as the target register for the LEA
11224 bestReg = regSet.rsUseIfZero (bestReg, addrReg);
11225 bestReg = regSet.rsNarrowHint(bestReg, addrReg);
11227 /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
11228 it since keepReg==false.
11229 If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
11230 So this is guaranteed not to spill addrReg */
11232 reg = regSet.rsPickReg(needReg, bestReg);
11234 // Slight workaround, force the inst routine to think that
11235 // value being loaded is an int (since that is what what
11236 // LEA will return) otherwise it would try to allocate
11237 // two registers for a long etc.
11238 noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
11239 op1->gtType = treeType;
11241 inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
11243 // The Lea instruction above better not have tried to put the
11244 // 'value' pointed to by 'op1' in a register, LEA will not work.
11245 noway_assert(!(op1->gtFlags & GTF_REG_VAL));
11247 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11248 // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
11249 noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
11251 regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
11252 gcInfo.gcMarkRegPtrVal(reg, treeType);
11254 genCodeForTree_DONE(tree, reg);
11258 #ifdef _TARGET_ARM_
11260 /*****************************************************************************
11262 * Move (load/store) between float ret regs and struct promoted variable.
11264 * varDsc - The struct variable to be loaded from or stored into.
11265 * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
11268 void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
11270 regNumber curReg = REG_FLOATRET;
11272 unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
11273 for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
11275 LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
11277 // Is the struct field promoted and sitting in a register?
11278 if (varDscFld->lvRegister)
11280 // Move from the struct field into curReg if load
11281 // else move into struct field from curReg if store
11282 regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
11283 regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
11284 if (srcReg != dstReg)
11286 inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
11287 regTracker.rsTrackRegCopy(dstReg, srcReg);
11292 // This field is in memory, do a move between the field and float registers.
11293 emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
11296 getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11297 regTracker.rsTrackRegTrash(curReg);
11301 getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11305 // Advance the current reg.
11306 curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
11310 void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
11312 assert(tree->TypeGet() == TYP_STRUCT);
11313 assert(tree->gtOper == GT_LCL_VAR);
11314 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
11315 int slots = varDsc->lvSize() / REGSIZE_BYTES;
11316 if (varDsc->lvPromoted)
11318 genLdStFltRetRegsPromotedVar(varDsc, true);
11324 // Use the load float/double instruction.
11326 ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE),
11330 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11334 // Use the load store multiple instruction.
11335 regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11336 inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
11337 regTracker.rsTrackRegTrash(reg);
11338 getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11341 genMarkTreeInReg(tree, REG_FLOATRET);
11344 void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
11346 assert(tree->TypeGet() == TYP_STRUCT);
11347 assert(tree->OperGet() == GT_ASG);
11349 // LHS should be lcl var or fld.
11350 GenTreePtr op1 = tree->gtOp.gtOp1;
11352 // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
11353 // handling multiple levels of inlined functions that return HFA on the right-hand-side.
11354 // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
11355 // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
11356 // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
11357 // as a regular assert().
11358 noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
11359 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11360 assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
11362 // The RHS should be a call.
11363 GenTreePtr op2 = tree->gtOp.gtOp2;
11364 assert(op2->gtOper == GT_CALL);
11366 // Generate code for call and copy the return registers into the local.
11367 regMaskTP retMask = genCodeForCall(op2, true);
11369 // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
11371 regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
11372 assert((mask & (mask - 1)) == 0);
11373 assert(mask <= (1 << MAX_HFA_RET_SLOTS));
11374 assert((retMask & (((regMaskTP) RBM_FLOATRET) - 1)) == 0);
11377 int slots = genCountBits(retMask & RBM_ALLFLOAT);
11379 LclVarDsc* varDsc = &compiler->lvaTable[varNum];
11381 if (varDsc->lvPromoted)
11383 genLdStFltRetRegsPromotedVar(varDsc, false);
11390 ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE),
11394 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11398 regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11399 inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
11400 regTracker.rsTrackRegTrash(reg);
11401 getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11406 #endif // _TARGET_ARM_
11408 /*****************************************************************************
11410 * Generate code for a GT_ASG tree
11414 #pragma warning(push)
11415 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
11417 void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
11419 noway_assert(tree->gtOper == GT_ASG);
11421 GenTreePtr op1 = tree->gtOp.gtOp1;
11422 GenTreePtr op2 = tree->gtOp.gtOp2;
11423 regMaskTP needReg = RBM_ALLINT;
11424 regMaskTP bestReg = RBM_CORRUPT;
11425 regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
11426 bool ovfl = false; // Do we need an overflow check
11427 bool volat = false; // Is this a volatile store
11430 #ifdef DEBUGGING_SUPPORT
11431 unsigned lclVarNum = compiler->lvaCount;
11432 unsigned lclILoffs = DUMMY_INIT(0);
11435 #ifdef _TARGET_ARM_
11436 if (tree->gtType == TYP_STRUCT)
11438 // We use copy block to assign structs, however to receive HFAs in registers
11439 // from a CALL, we use assignment, var = (hfa) call();
11440 assert(compiler->IsHfa(tree));
11441 genStoreFromFltRetRegs(tree);
11447 if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
11449 if (varTypeIsFloating(op1))
11450 assert(!"Bad IL: Illegal assignment of integer into float!");
11452 assert(!"Bad IL: Illegal assignment of float into integer!");
11456 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
11458 op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
11461 /* Is the target a register or local variable? */
11462 switch (op1->gtOper)
11465 LclVarDsc * varDsc;
11468 varNum = op1->gtLclVarCommon.gtLclNum;
11469 noway_assert(varNum < compiler->lvaCount);
11470 varDsc = compiler->lvaTable + varNum;
11472 #ifdef DEBUGGING_SUPPORT
11473 /* For non-debuggable code, every definition of a lcl-var has
11474 * to be checked to see if we need to open a new scope for it.
11475 * Remember the local var info to call siCheckVarScope
11476 * AFTER code generation of the assignment.
11478 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
11480 lclVarNum = varNum;
11481 lclILoffs = op1->gtLclVar.gtLclILoffs;
11485 /* Check against dead store ? (with min opts we may have dead stores) */
11487 noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
11489 /* Does this variable live in a register? */
11491 if (genMarkLclVar(op1))
11498 /* Get hold of the target register */
11502 op1Reg = op1->gtRegVar.gtRegNum;
11504 /* Compute the RHS (hopefully) into the variable's register.
11505 For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
11506 as variables are kept alive everywhere. So we have to be
11507 careful if we want to compute the value directly into
11508 the variable's register. */
11511 bool needToUpdateRegSetCheckLevel;
11512 needToUpdateRegSetCheckLevel = false;
11515 // We should only be accessing lvVarIndex if varDsc is tracked.
11516 assert(varDsc->lvTracked);
11518 if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
11520 noway_assert(compiler->opts.compDbgCode);
11522 /* The predictor might expect us to generate op2 directly
11523 into the var's register. However, since the variable is
11524 already alive, first kill it and its register. */
11526 if (rpCanAsgOperWithoutReg(op2, true))
11528 genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
11529 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11531 needToUpdateRegSetCheckLevel = true;
11537 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11542 /* Special cases: op2 is a GT_CNS_INT */
11544 if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
11546 /* Save the old life status */
11548 VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
11549 VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
11551 /* Set a flag to avoid printing the message
11552 and remember that life was changed. */
11554 genTempLiveChg = false;
11559 if (needToUpdateRegSetCheckLevel)
11560 compiler->compRegSetCheckLevel++;
11562 genCodeForTree(op2, needReg, genRegMask(op1Reg));
11564 if (needToUpdateRegSetCheckLevel)
11565 compiler->compRegSetCheckLevel--;
11566 noway_assert(compiler->compRegSetCheckLevel>=0);
11568 noway_assert(op2->gtFlags & GTF_REG_VAL);
11570 /* Make sure the value ends up in the right place ... */
11572 if (op2->gtRegNum != op1Reg)
11574 /* Make sure the target of the store is available */
11576 if (regSet.rsMaskUsed & genRegMask(op1Reg))
11577 regSet.rsSpillReg(op1Reg);
11579 #ifdef _TARGET_ARM_
11580 if (op1->TypeGet() == TYP_FLOAT)
11582 // This can only occur when we are returning a non-HFA struct
11583 // that is composed of a single float field.
11585 inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
11588 #endif // _TARGET_ARM_
11590 inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
11593 /* The value has been transferred to 'op1Reg' */
11595 regTracker.rsTrackRegCopy (op1Reg, op2->gtRegNum);
11597 if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
11598 gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
11600 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11604 // First we need to remove it from the original reg set mask (or else trigger an
11605 // assert when we add it to the other reg set mask).
11606 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
11607 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11609 // The emitter has logic that tracks the GCness of registers and asserts if you
11610 // try to do bad things to a GC pointer (like lose its GCness).
11612 // An explict cast of a GC pointer to an int (which is legal if the
11613 // pointer is pinned) is encoded as an assignment of a GC source
11614 // to a integer variable. Unfortunately if the source was the last
11615 // use, and the source register gets reused by the destination, no
11616 // code gets emitted (That is where we are at right now). The emitter
11617 // thinks the register is a GC pointer (it did not see the cast).
11618 // This causes asserts, as well as bad GC info since we will continue
11619 // to report the register as a GC pointer even if we do arithmetic
11620 // with it. So force the emitter to see the change in the type
11621 // of variable by placing a label.
11622 // We only have to do this check at this point because in the
11623 // CAST morphing, we create a temp and assignment whenever we
11624 // have a cast that loses its GCness.
11626 if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
11628 void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
11635 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
11640 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
11641 // to worry about it being enregistered.
11642 noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
11652 assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
11654 if (op1->gtFlags & GTF_IND_VOLATILE)
11665 /* Is the value being assigned a simple one? */
11668 switch (op2->gtOper)
11672 if (!genMarkLclVar(op2))
11679 /* Is the target a byte/short/char value? */
11681 if (varTypeIsSmall(op1->TypeGet()))
11684 if (tree->gtFlags & GTF_REVERSE_OPS)
11687 /* Make the target addressable */
11689 op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
11691 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11693 /* Does the write barrier helper do the assignment? */
11695 regGC = WriteBarrier(op1, op2, addrReg);
11697 if (regGC == RBM_NONE)
11699 // No, assignment was not done by the WriteBarrier
11701 #ifdef _TARGET_ARM_
11704 // Emit a memory barrier instruction before the store
11705 instGen_MemoryBarrier();
11709 /* Move the value into the target */
11711 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
11713 // This is done in WriteBarrier when (regGC != RBM_NONE)
11715 /* Free up anything that was tied up by the LHS */
11716 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11719 /* Free up the RHS */
11720 genUpdateLife(op2);
11722 /* Remember that we've also touched the op2 register */
11724 addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
11730 ssize_t ival; ival = op2->gtIntCon.gtIconVal;
11731 emitAttr size; size = emitTypeSize(tree->TypeGet());
11733 ins = ins_Store(op1->TypeGet());
11735 // If we are storing a constant into a local variable
11736 // we extend the size of the store here
11737 // this normally takes place in CodeGen::inst_TT_IV on x86.
11739 if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
11741 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11742 LclVarDsc * varDsc = compiler->lvaTable + varNum;
11744 // Fix the immediate by sign extending if needed
11745 if (!varTypeIsUnsigned(varDsc->TypeGet()))
11747 if (size == EA_1BYTE)
11749 if ((ival & 0x7f) != ival)
11750 ival = ival | 0xffffff00;
11754 assert(size == EA_2BYTE);
11755 if ((ival & 0x7fff) != ival)
11756 ival = ival | 0xffff0000;
11760 // A local stack slot is at least 4 bytes in size, regardless of
11761 // what the local var is typed as, so auto-promote it here
11762 // unless it is a field of a promoted struct
11763 if (!varDsc->lvIsStructField)
11765 size = EA_SET_SIZE(size, EA_4BYTE);
11766 ins = ins_Store(TYP_INT);
11770 /* Make the target addressable */
11772 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11774 #ifdef _TARGET_ARM_
11777 // Emit a memory barrier instruction before the store
11778 instGen_MemoryBarrier();
11782 /* Move the value into the target */
11784 noway_assert(op1->gtOper != GT_REG_VAR);
11785 if (compiler->opts.compReloc && op2->IsIconHandle())
11787 /* The constant is actually a handle that may need relocation
11788 applied to it. genComputeReg will do the right thing (see
11789 code in genCodeForTreeConst), so we'll just call it to load
11790 the constant into a register. */
11792 genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
11793 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11794 noway_assert(op2->gtFlags & GTF_REG_VAL);
11795 inst_TT_RV(ins, op1, op2->gtRegNum);
11796 genReleaseReg(op2);
11800 regSet.rsLockUsedReg(addrReg);
11804 bool copyIconFromReg = true;
11805 regNumber iconReg = REG_NA;
11807 #ifdef _TARGET_ARM_
11808 // Only if the constant can't be encoded in a small instruction,
11809 // look for another register to copy the value from. (Assumes
11810 // target is a small register.)
11811 if ((op1->gtFlags & GTF_REG_VAL) &&
11812 !isRegPairType(tree->gtType) &&
11813 arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
11815 copyIconFromReg = false;
11817 #endif // _TARGET_ARM_
11819 if (copyIconFromReg)
11821 iconReg = regTracker.rsIconIsInReg(ival);
11822 if (iconReg == REG_NA)
11823 copyIconFromReg = false;
11826 if (copyIconFromReg &&
11827 (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) || (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
11829 /* Move the value into the target */
11831 inst_TT_RV(ins, op1, iconReg, 0, size);
11834 #endif // REDUNDANT_LOAD
11836 inst_TT_IV(ins, op1, ival, 0, size);
11839 regSet.rsUnlockUsedReg(addrReg);
11842 /* Free up anything that was tied up by the LHS */
11844 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11851 bool isWriteBarrier = false;
11852 regMaskTP needRegOp1 = RBM_ALLINT;
11853 RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
11855 /* Is the LHS more complex than the RHS? */
11857 if (tree->gtFlags & GTF_REVERSE_OPS)
11859 /* Is the target a byte/short/char value? */
11861 if (varTypeIsSmall(op1->TypeGet()))
11863 noway_assert(op1->gtOper != GT_LCL_VAR ||
11864 (op1->gtFlags & GTF_VAR_CAST) ||
11865 // TODO: Why does this have to be true?
11866 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
11867 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
11869 if (op2->gtOper == GT_CAST && !op2->gtOverflow())
11871 /* Special case: cast to small type */
11873 if (op2->CastToType() >= op1->gtType)
11875 /* Make sure the cast operand is not > int */
11877 if (op2->CastFromType() <= TYP_INT)
11879 /* Cast via a non-smaller type */
11881 op2 = op2->gtCast.CastOp();
11886 if (op2->gtOper == GT_AND &&
11887 op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
11890 switch (op1->gtType)
11892 case TYP_BYTE : mask = 0x000000FF; break;
11893 case TYP_SHORT: mask = 0x0000FFFF; break;
11894 case TYP_CHAR : mask = 0x0000FFFF; break;
11895 default: goto SIMPLE_SMALL;
11898 if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
11900 /* Redundant AND */
11902 op2 = op2->gtOp.gtOp1;
11906 /* Must get the new value into a byte register */
11909 if (varTypeIsByte(op1->TypeGet()))
11910 genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
11917 /* Generate the RHS into a register */
11919 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11920 if (isWriteBarrier)
11922 #if NOGC_WRITE_BARRIERS
11923 // Exclude the REG_WRITE_BARRIER from op2's needReg mask
11924 needReg = Target::exclude_WriteBarrierReg(needReg);
11925 mustReg = RegSet::EXACT_REG;
11926 #else // !NOGC_WRITE_BARRIERS
11927 // This code should be generic across architectures.
11929 // For the standard JIT Helper calls
11930 // op1 goes into REG_ARG_0 and
11931 // op2 goes into REG_ARG_1
11933 needRegOp1 = RBM_ARG_0;
11934 needReg = RBM_ARG_1;
11935 #endif // !NOGC_WRITE_BARRIERS
11937 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11940 noway_assert(op2->gtFlags & GTF_REG_VAL);
11942 /* Make the target addressable */
11944 op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
11945 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11947 /* Make sure the RHS register hasn't been spilled;
11948 keep the register marked as "used", otherwise
11949 we might get the pointer lifetimes wrong.
11952 if (varTypeIsByte(op1->TypeGet()))
11953 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11955 genRecoverReg(op2, needReg, RegSet::KEEP_REG);
11956 noway_assert(op2->gtFlags & GTF_REG_VAL);
11958 /* Lock the RHS temporarily (lock only already used) */
11960 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
11962 /* Make sure the LHS is still addressable */
11964 addrReg = genKeepAddressable(op1, addrReg);
11966 /* We can unlock (only already used ) the RHS register */
11968 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
11970 /* Does the write barrier helper do the assignment? */
11972 regGC = WriteBarrier(op1, op2, addrReg);
11976 // Yes, assignment done by the WriteBarrier
11977 noway_assert(isWriteBarrier);
11981 #ifdef _TARGET_ARM_
11984 // Emit a memory barrier instruction before the store
11985 instGen_MemoryBarrier();
11989 /* Move the value into the target */
11991 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11994 /* Update the current liveness info */
11997 if (compiler->opts.varNames) genUpdateLife(tree);
12000 // If op2 register is still in use, free it. (Might not be in use, if
12001 // a full-call write barrier was done, and the register was a caller-saved
12003 regMaskTP op2RM = genRegMask(op2->gtRegNum);
12004 if (op2RM & regSet.rsMaskUsed) regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
12006 // This is done in WriteBarrier when (regGC != 0)
12009 /* Free up anything that was tied up by the LHS */
12010 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
12015 /* Make the target addressable */
12017 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
12019 if (isWriteBarrier)
12021 #if NOGC_WRITE_BARRIERS
12022 /* Try to avoid RBM_TMP_0 */
12023 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
12024 mustReg = RegSet::EXACT_REG; // For op2
12025 #else // !NOGC_WRITE_BARRIERS
12026 // This code should be generic across architectures.
12028 // For the standard JIT Helper calls
12029 // op1 goes into REG_ARG_0 and
12030 // op2 goes into REG_ARG_1
12032 needRegOp1 = RBM_ARG_0;
12033 needReg = RBM_ARG_1;
12034 mustReg = RegSet::EXACT_REG; // For op2
12035 #endif // !NOGC_WRITE_BARRIERS
12038 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
12040 op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
12042 addrReg = genMakeAddressable(op1,
12044 RegSet::KEEP_REG, true);
12046 #if CPU_HAS_BYTE_REGS
12047 /* Is the target a byte value? */
12048 if (varTypeIsByte(op1->TypeGet()))
12050 /* Must get the new value into a byte register */
12051 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
12052 mustReg = RegSet::EXACT_REG;
12054 if (op2->gtType >= op1->gtType)
12055 op2->gtFlags |= GTF_SMALL_OK;
12059 #if NOGC_WRITE_BARRIERS
12060 /* For WriteBarrier we can't use REG_WRITE_BARRIER */
12061 if (isWriteBarrier)
12062 needReg = Target::exclude_WriteBarrierReg(needReg);
12064 /* Also avoid using the previously computed addrReg(s) */
12065 bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
12067 /* If we have a reg available to grab then use bestReg */
12068 if (bestReg & regSet.rsRegMaskCanGrab())
12071 mustReg = RegSet::EXACT_REG;
12074 /* Generate the RHS into a register */
12075 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
12076 noway_assert(op2->gtFlags & GTF_REG_VAL);
12078 /* Make sure the target is still addressable */
12079 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
12080 noway_assert(op2->gtFlags & GTF_REG_VAL);
12082 /* Does the write barrier helper do the assignment? */
12084 regGC = WriteBarrier(op1, op2, addrReg);
12088 // Yes, assignment done by the WriteBarrier
12089 noway_assert(isWriteBarrier);
12093 assert(!isWriteBarrier);
12095 #ifdef _TARGET_ARM_
12098 // Emit a memory barrier instruction before the store
12099 instGen_MemoryBarrier();
12103 /* Move the value into the target */
12105 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
12108 /* The new value is no longer needed */
12110 genReleaseReg(op2);
12112 /* Update the current liveness info */
12115 if (compiler->opts.varNames) genUpdateLife(tree);
12118 // This is done in WriteBarrier when (regGC != 0)
12121 /* Free up anything that was tied up by the LHS */
12122 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
12126 addrReg = RBM_NONE;
12130 noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
12131 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
12134 #ifdef DEBUGGING_SUPPORT
12135 /* For non-debuggable code, every definition of a lcl-var has
12136 * to be checked to see if we need to open a new scope for it.
12138 if (lclVarNum < compiler->lvaCount)
12139 siCheckVarScope(lclVarNum, lclILoffs);
12143 #pragma warning(pop)
12146 /*****************************************************************************
12148 * Generate code to complete the assignment operation
12151 void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree,
12156 const var_types treeType = tree->TypeGet();
12157 GenTreePtr op1 = tree->gtOp.gtOp1;
12158 GenTreePtr op2 = tree->gtOp.gtOp2;
12161 if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR) genUpdateLife(op1);
12162 genUpdateLife(tree);
12166 if (op1->gtOper == GT_LCL_VAR)
12167 regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
12169 /* Have we just assigned a value that is in a register? */
12171 if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
12173 regTracker.rsTrackRegAssign(op1, op2);
12178 noway_assert(addrReg != 0xDEADCAFE);
12180 gcInfo.gcMarkRegSetNpt(addrReg);
12184 noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
12186 /* If GTF_REG_VAL is not set, and it is a small type, then
12187 we must have loaded it up from memory, done the increment,
12188 checked for overflow, and then stored it back to memory */
12190 bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) &&
12191 !(op1->gtFlags & GTF_REG_VAL);
12195 // For small sizes, reg should be set as we sign/zero extend it.
12197 noway_assert(genIsValidReg(reg) ||
12198 genTypeSize(treeType) == sizeof(int));
12200 /* Currently we don't morph x=x+y into x+=y in try blocks
12201 * if we need overflow check, as x+y may throw an exception.
12202 * We can do it if x is not live on entry to the catch block.
12204 noway_assert(!compiler->compCurBB->hasTryIndex());
12206 genCheckOverflow(tree);
12212 /*****************************************************************************
12214 * Generate code for a special op tree
12217 void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
12221 genTreeOps oper = tree->OperGet();
12222 regNumber reg = DUMMY_INIT(REG_CORRUPT);
12223 regMaskTP regs = regSet.rsMaskUsed;
12225 noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
12230 regs = genCodeForCall(tree, true);
12232 /* If the result is in a register, make sure it ends up in the right place */
12234 if (regs != RBM_NONE)
12236 genMarkTreeInReg(tree, genRegNumFromMask(regs));
12239 genUpdateLife(tree);
12243 NO_WAY("should not see this operator in this phase");
12246 case GT_ARR_BOUNDS_CHECK:
12248 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
12249 // MUST NEVER CHECK-IN WITH THIS ENABLED.
12250 // This is just for convenience in doing performance investigations and requires x86ret builds
12251 if (!JitConfig.JitNoRngChk())
12253 genRangeCheck(tree);
12258 genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
12263 #if defined(_TARGET_XARCH_)
12264 // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
12266 // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
12267 // right now, just get the trees evaluated.
12269 // As a friendly reminder. IL args are evaluated left to right.
12271 GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
12272 GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
12273 GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
12277 // This little piggy (on the left) went to market.
12278 bool isAddr = genMakeIndAddrMode(location,
12280 false, /* not for LEA */
12287 genCodeForTree(location, RBM_NONE, RBM_NONE);
12288 assert(location->gtFlags && GTF_REG_VAL);
12289 addrReg = genRegMask(location->gtRegNum);
12290 regSet.rsMarkRegUsed(location);
12293 // This little piggy (in the middle) went home.
12294 // We must have a reg for the Value, but it doesn't really matter which register.
12296 // Try to avoid EAX and the address regsiter if possible.
12297 genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
12299 // This little piggy (on the right) had roast beef
12300 // cmpxchg uses EAX as an implicit operand to hold the comparand
12301 // We're going to destroy EAX in this operation, so we better not be keeping
12302 // anything important in it.
12305 if (RBM_EAX & regSet.rsMaskVars)
12307 //We have a variable enregistered in EAX. Make sure it goes dead in this tree.
12308 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
12310 const LclVarDsc & varDesc = compiler->lvaTable[varNum];
12311 if (!varDesc.lvIsRegCandidate())
12313 if (!varDesc.lvRegister)
12315 if (isFloatRegType(varDesc.lvType))
12317 if (varDesc.lvRegNum != REG_EAX)
12319 //I suppose I should technically check lvOtherReg.
12321 //OK, finally. Let's see if this local goes dead.
12322 //If the variable isn't going dead during this tree, we've just trashed a local with
12324 noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
12330 genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
12332 //Oh, no more piggies.
12333 //* Author's note. I believe in bounty and chose to omit the piggy who got none.
12336 //By this point we've evaluated everything. However the odds are that we've spilled something by
12337 //now. Let's recover all the registers and force them to stay.
12339 //Well, we just computed comparand, so it's still in EAX.
12340 noway_assert(comparand->gtRegNum == REG_EAX);
12341 regSet.rsLockUsedReg(RBM_EAX);
12343 //Stick it anywhere other than EAX.
12344 genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
12345 reg = value->gtRegNum;
12346 noway_assert(reg != REG_EAX);
12347 regSet.rsLockUsedReg(genRegMask(reg));
12351 addrReg = genKeepAddressable(/*location*/tree, addrReg, 0/*avoidMask*/);
12355 genRecoverReg(location, ~(RBM_EAX|genRegMask(reg)), RegSet::KEEP_REG);
12358 regSet.rsUnlockUsedReg(genRegMask(reg));
12359 regSet.rsUnlockUsedReg(RBM_EAX);
12364 sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
12365 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
12369 instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
12370 genReleaseReg(location);
12373 genReleaseReg(value);
12374 genReleaseReg(comparand);
12376 //EAX and the value register are both trashed at this point.
12377 regTracker.rsTrackRegTrash(REG_EAX);
12378 regTracker.rsTrackRegTrash(reg);
12382 //Until I try to optimize a cmp after a cmpxchg, just trash the flags for safety's sake.
12383 genFlagsEqualToNone();
12385 #else // not defined(_TARGET_XARCH_)
12386 NYI("GT_CMPXCHG codegen");
12393 compiler->gtDispTree(tree);
12395 noway_assert(!"unexpected operator");
12396 NO_WAY("unexpected operator");
12399 noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
12400 genCodeForTree_DONE(tree, reg);
12404 /*****************************************************************************
12406 * Generate code for the given tree. tree->gtRegNum will be set to the
12407 * register where the tree lives.
12409 * If 'destReg' is non-zero, we'll do our best to compute the value into a
12410 * register that is in that register set.
12411 * Use genComputeReg() if you need the tree in a specific register.
12412 * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
12413 * the register can only be used for read, but not for write.
12414 * Use genMakeAddressable() if you only need the tree to be accessible
12415 * using a complex addressing mode, and do not necessarily need the tree
12416 * materialized in a register.
12418 * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
12420 * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
12421 * register will not be consumed right away and could possibly be spilled.
12424 void CodeGen::genCodeForTree(GenTreePtr tree,
12429 if (compiler->verbose)
12431 printf("Generating code for tree ");
12432 Compiler::printTreeID(tree);
12433 printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
12435 genStressRegs(tree);
12438 noway_assert(tree);
12439 noway_assert(tree->gtOper != GT_STMT);
12440 assert(tree->IsNodeProperlySized());
12442 // When assigning to a enregistered local variable we receive
12443 // a hint that we should target the register that is used to
12444 // hold the enregistered local variable.
12445 // When receiving this hint both destReg and bestReg masks are set
12446 // to the register that is used by the enregistered local variable.
12448 // However it is possible to us to have a different local variable
12449 // targeting the same register to become alive (and later die)
12450 // as we descend the expression tree.
12452 // To handle such cases we will remove any registers that are alive from the
12453 // both the destReg and bestReg masks.
12455 regMaskTP liveMask = genLiveMask(tree);
12457 // This removes any registers used to hold enregistered locals
12458 // from the destReg and bestReg masks.
12459 // After this either mask could become 0
12461 destReg &= ~liveMask;
12462 bestReg &= ~liveMask;
12464 /* 'destReg' of 0 really means 'any' */
12466 destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
12468 if (destReg != RBM_ALL(tree->TypeGet()))
12469 bestReg = regSet.rsUseIfZero(bestReg, destReg);
12471 // Long, float, and double have their own codegen functions
12472 switch (tree->TypeGet())
12476 #if ! CPU_HAS_FP_SUPPORT
12479 genCodeForTreeLng(tree, destReg, /*avoidReg*/RBM_NONE);
12483 #if CPU_HAS_FP_SUPPORT
12487 // For comma nodes, we'll get back here for the last node in the comma list.
12488 if (tree->gtOper != GT_COMMA)
12490 genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
12499 noway_assert(!"These types are only used as markers in GT_CAST nodes");
12507 /* Is the value already in a register? */
12509 if (tree->gtFlags & GTF_REG_VAL)
12511 genCodeForTree_REG_VAR1(tree);
12515 /* We better not have a spilled value here */
12517 noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
12519 /* Figure out what kind of a node we have */
12521 unsigned kind = tree->OperKind();
12523 if (kind & GTK_CONST)
12525 /* Handle constant nodes */
12527 genCodeForTreeConst(tree, destReg, bestReg);
12529 else if (kind & GTK_LEAF)
12531 /* Handle leaf nodes */
12533 genCodeForTreeLeaf(tree, destReg, bestReg);
12535 else if (kind & GTK_SMPOP)
12537 /* Handle 'simple' unary/binary operators */
12539 genCodeForTreeSmpOp(tree, destReg, bestReg);
12543 /* Handle special operators */
12545 genCodeForTreeSpecialOp(tree, destReg, bestReg);
12550 /*****************************************************************************
12552 * Generate code for all the basic blocks in the function.
12556 #pragma warning(push)
12557 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
12559 void CodeGen::genCodeForBBlist()
12562 LclVarDsc * varDsc;
12564 unsigned savedStkLvl;
12567 genInterruptibleUsed = true;
12568 unsigned stmtNum = 0;
12569 unsigned totalCostEx = 0;
12570 unsigned totalCostSz = 0;
12572 // You have to be careful if you create basic blocks from now on
12573 compiler->fgSafeBasicBlockCreation = false;
12575 // This stress mode is not comptible with fully interruptible GC
12576 if (genInterruptible && compiler->opts.compStackCheckOnCall)
12578 compiler->opts.compStackCheckOnCall = false;
12581 // This stress mode is not comptible with fully interruptible GC
12582 if (genInterruptible && compiler->opts.compStackCheckOnRet)
12584 compiler->opts.compStackCheckOnRet = false;
12588 // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
12589 genPrepForEHCodegen();
12591 assert(!compiler->fgFirstBBScratch || compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
12593 /* Initialize the spill tracking logic */
12595 regSet.rsSpillBeg();
12597 /* Initialize the line# tracking logic */
12599 #ifdef DEBUGGING_SUPPORT
12600 if (compiler->opts.compScopeInfo)
12607 #ifdef _TARGET_X86_
12608 if (compiler->compTailCallUsed)
12610 noway_assert(isFramePointerUsed());
12611 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12615 if (compiler->opts.compDbgEnC)
12617 noway_assert(isFramePointerUsed());
12618 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12621 /* If we have any pinvoke calls, we might potentially trash everything */
12623 if (compiler->info.compCallUnmanaged)
12625 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
12626 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12629 /* Initialize the pointer tracking code */
12631 gcInfo.gcRegPtrSetInit();
12632 gcInfo.gcVarPtrSetInit();
12634 /* If any arguments live in registers, mark those regs as such */
12636 for (varNum = 0, varDsc = compiler->lvaTable;
12637 varNum < compiler->lvaCount;
12638 varNum++ , varDsc++)
12640 /* Is this variable a parameter assigned to a register? */
12642 if (!varDsc->lvIsParam || !varDsc->lvRegister)
12645 /* Is the argument live on entry to the method? */
12647 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
12650 #if CPU_HAS_FP_SUPPORT
12651 /* Is this a floating-point argument? */
12653 if (varDsc->IsFloatRegType())
12656 noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
12659 /* Mark the register as holding the variable */
12661 if (isRegPairType(varDsc->lvType))
12663 regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
12665 if (varDsc->lvOtherReg != REG_STK)
12666 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12670 regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
12674 unsigned finallyNesting = 0;
12676 // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
12677 // allocation at the start of each basic block.
12678 VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
12680 /*-------------------------------------------------------------------------
12682 * Walk the basic blocks and generate code for each one
12686 BasicBlock * block;
12687 BasicBlock * lblk; /* previous block */
12689 for (lblk = NULL, block = compiler->fgFirstBB;
12691 lblk = block, block = block->bbNext)
12694 if (compiler->verbose)
12696 printf("\n=============== Generating ");
12697 block->dspBlockHeader(compiler, true, true);
12698 compiler->fgDispBBLiveness(block);
12702 VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
12704 regMaskTP gcrefRegs = 0;
12705 regMaskTP byrefRegs = 0;
12707 /* Does any other block jump to this point ? */
12709 if (block->bbFlags & BBF_JMP_TARGET)
12711 /* Someone may jump here, so trash all regs */
12713 regTracker.rsTrackRegClr();
12715 genFlagsEqualToNone();
12719 /* No jump, but pointers always need to get trashed for proper GC tracking */
12721 regTracker.rsTrackRegClrPtr();
12724 /* No registers are used or locked on entry to a basic block */
12726 regSet.rsMaskUsed = RBM_NONE;
12727 regSet.rsMaskMult = RBM_NONE;
12728 regSet.rsMaskLock = RBM_NONE;
12730 // If we need to reserve registers such that they are not used
12731 // by CodeGen in this BasicBlock we do so here.
12732 // On the ARM when we have large frame offsets for locals we
12733 // will have RBM_R10 in the regSet.rsMaskResvd set,
12734 // additionally if a LocAlloc or alloca is used RBM_R9 is in
12735 // the regSet.rsMaskResvd set and we lock these registers here.
12737 if (regSet.rsMaskResvd != RBM_NONE)
12739 regSet.rsLockReg(regSet.rsMaskResvd);
12740 regSet.rsSetRegsModified(regSet.rsMaskResvd);
12743 /* Figure out which registers hold variables on entry to this block */
12745 regMaskTP specialUseMask = regSet.rsMaskResvd;
12747 specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE|RBM_FPBASE
12749 regSet.ClearMaskVars();
12750 VarSetOps::ClearD(compiler, compiler->compCurLife);
12751 VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
12753 #if FEATURE_STACK_FP_X87
12754 VarSetOps::AssignNoCopy(compiler,
12756 VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
12757 genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
12758 genFPdeadRegCnt = 0;
12760 gcInfo.gcResetForBB();
12762 genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
12763 #if FEATURE_STACK_FP_X87
12764 VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
12767 // We should never enregister variables in any of the specialUseMask registers
12768 noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
12770 VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
12771 while (iter.NextElem(compiler, &varIndex))
12773 varNum = compiler->lvaTrackedToVarNum[varIndex];
12774 varDsc = compiler->lvaTable + varNum;
12775 assert(varDsc->lvTracked);
12776 /* Ignore the variable if it's not not in a reg */
12778 if (!varDsc->lvRegister)
12780 if (isFloatRegType(varDsc->lvType))
12783 /* Get hold of the index and the bitmask for the variable */
12784 regNumber regNum = varDsc->lvRegNum;
12785 regMaskTP regMask = genRegMask(regNum);
12787 regSet.AddMaskVars(regMask);
12789 if (varDsc->lvType == TYP_REF)
12790 gcrefRegs |= regMask;
12791 else if (varDsc->lvType == TYP_BYREF)
12792 byrefRegs |= regMask;
12794 /* Mark the register holding the variable as such */
12796 if (varTypeIsMultiReg(varDsc))
12798 regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
12799 if (varDsc->lvOtherReg != REG_STK)
12801 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12802 regMask |= genRegMask(varDsc->lvOtherReg);
12807 regTracker.rsTrackRegLclVar(regNum, varNum);
12811 gcInfo.gcPtrArgCnt = 0;
12813 #if FEATURE_STACK_FP_X87
12815 regSet.rsMaskUsedFloat =
12816 regSet.rsMaskRegVarFloat =
12817 regSet.rsMaskLockedFloat = RBM_NONE;
12819 memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
12820 memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
12822 // Setup fp state on block entry
12823 genSetupStateStackFP(block);
12826 if (compiler->verbose)
12831 #endif // FEATURE_STACK_FP_X87
12833 /* Make sure we keep track of what pointers are live */
12835 noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
12836 gcInfo.gcRegGCrefSetCur = gcrefRegs;
12837 gcInfo.gcRegByrefSetCur = byrefRegs;
12839 /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
12840 represent the exception object (TYP_REF).
12841 We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
12842 to the block, it will be the first thing evaluated
12843 (thanks to GTF_ORDER_SIDEEFF).
12846 if (handlerGetsXcptnObj(block->bbCatchTyp))
12848 GenTreePtr firstStmt = block->FirstNonPhiDef();
12849 if (firstStmt != NULL)
12851 GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
12852 if (compiler->gtHasCatchArg(firstTree))
12854 gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
12859 /* Start a new code output block */
12861 #if FEATURE_EH_FUNCLETS
12862 #if defined(_TARGET_ARM_)
12863 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
12864 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
12865 // calls the funclet during non-exceptional control flow.
12866 if (block->bbFlags & BBF_FINALLY_TARGET)
12868 assert(block->bbFlags & BBF_JMP_TARGET);
12870 // Create a label that we'll use for computing the start of an EH region, if this block is
12871 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
12872 // determining the EH regions, then this NOP would end up outside of the region, if this
12873 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
12874 // would be executed, which we would prefer not to do.
12877 if (compiler->verbose)
12879 printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
12883 block->bbUnwindNopEmitCookie = getEmitter()->emitAddLabel(
12884 gcInfo.gcVarPtrSetCur,
12885 gcInfo.gcRegGCrefSetCur,
12886 gcInfo.gcRegByrefSetCur);
12890 #endif // defined(_TARGET_ARM_)
12892 genUpdateCurrentFunclet(block);
12893 #endif // FEATURE_EH_FUNCLETS
12895 #ifdef _TARGET_XARCH_
12896 if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
12898 getEmitter()->emitLoopAlign();
12903 if (compiler->opts.dspCode)
12904 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
12907 block->bbEmitCookie = NULL;
12909 if (block->bbFlags & (BBF_JMP_TARGET|BBF_HAS_LABEL))
12911 /* Mark a label and update the current set of live GC refs */
12913 block->bbEmitCookie = getEmitter()->emitAddLabel(
12914 gcInfo.gcVarPtrSetCur,
12915 gcInfo.gcRegGCrefSetCur,
12916 gcInfo.gcRegByrefSetCur,
12917 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
12918 /*isFinally*/block->bbFlags & BBF_FINALLY_TARGET
12925 if (block == compiler->fgFirstColdBlock)
12928 if (compiler->verbose)
12930 printf("\nThis is the start of the cold region of the method\n");
12933 // We should never have a block that falls through into the Cold section
12934 noway_assert(!lblk->bbFallsThrough());
12936 // We require the block that starts the Cold section to have a label
12937 noway_assert(block->bbEmitCookie);
12938 getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
12941 /* Both stacks are always empty on entry to a basic block */
12944 #if FEATURE_STACK_FP_X87
12945 genResetFPstkLevel();
12946 #endif // FEATURE_STACK_FP_X87
12948 #if !FEATURE_FIXED_OUT_ARGS
12949 /* Check for inserted throw blocks and adjust genStackLevel */
12951 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
12953 noway_assert(block->bbFlags & BBF_JMP_TARGET);
12955 genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
12959 #ifdef _TARGET_X86_
12960 getEmitter()->emitMarkStackLvl(genStackLevel);
12961 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
12963 #else // _TARGET_X86_
12964 NYI("Need emitMarkStackLvl()");
12965 #endif // _TARGET_X86_
12968 #endif // !FEATURE_FIXED_OUT_ARGS
12970 savedStkLvl = genStackLevel;
12972 /* Tell everyone which basic block we're working on */
12974 compiler->compCurBB = block;
12976 #ifdef DEBUGGING_SUPPORT
12977 siBeginBlock(block);
12979 // BBF_INTERNAL blocks don't correspond to any single IL instruction.
12980 if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
12981 genIPmappingAdd((IL_OFFSETX) ICorDebugInfo::NO_MAPPING, true);
12983 bool firstMapping = true;
12984 #endif // DEBUGGING_SUPPORT
12986 /*---------------------------------------------------------------------
12988 * Generate code for each statement-tree in the block
12992 #if FEATURE_EH_FUNCLETS
12993 if (block->bbFlags & BBF_FUNCLET_BEG)
12995 genReserveFuncletProlog(block);
12997 #endif // FEATURE_EH_FUNCLETS
12999 for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
13001 noway_assert(stmt->gtOper == GT_STMT);
13003 #if defined(DEBUGGING_SUPPORT)
13005 /* Do we have a new IL-offset ? */
13007 if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
13009 /* Create and append a new IP-mapping entry */
13010 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
13011 firstMapping = false;
13014 #endif // DEBUGGING_SUPPORT
13017 if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
13019 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
13020 if (compiler->opts.dspCode && compiler->opts.dspInstrs)
13022 while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
13024 genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
13030 /* Get hold of the statement tree */
13031 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
13035 if (compiler->verbose)
13037 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
13038 printf("Holding variables: ");
13039 dspRegMask(regSet.rsMaskVars); printf("\n\n");
13040 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
13042 #if FEATURE_STACK_FP_X87
13046 printf("Execution Order:\n");
13047 for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList;
13049 treeNode = treeNode->gtNext)
13051 compiler->gtDispTree(treeNode, 0, NULL, true);
13055 totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
13056 totalCostSz += stmt->gtCostSz;
13059 compiler->compCurStmt = stmt;
13061 compiler->compCurLifeTree = NULL;
13062 switch (tree->gtOper)
13065 // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
13066 // reported as alive even though not used within the caller for managed debugger sake. So
13067 // consider the return value of the method as used if generating debuggable code.
13068 genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
13069 genUpdateLife (tree);
13070 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
13076 // Just do the side effects
13077 genEvalSideEffects(tree);
13081 /* Generate code for the tree */
13083 genCodeForTree(tree, 0);
13087 regSet.rsSpillChk();
13089 /* The value of the tree isn't used, unless it's a return stmt */
13091 if (tree->gtOper != GT_RETURN)
13092 gcInfo.gcMarkRegPtrVal(tree);
13094 #if FEATURE_STACK_FP_X87
13095 genEndOfStatement();
13099 /* Make sure we didn't bungle pointer register tracking */
13101 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur);
13102 regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
13104 // If return is a GC-type, clear it. Note that if a common
13105 // epilog is generated (compiler->genReturnBB) it has a void return
13106 // even though we might return a ref. We can't use the compRetType
13107 // as the determiner because something we are tracking as a byref
13108 // might be used as a return value of a int function (which is legal)
13109 if (tree->gtOper == GT_RETURN &&
13110 (varTypeIsGC(compiler->info.compRetType) ||
13111 (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
13113 nonVarPtrRegs &= ~RBM_INTRET;
13116 // When profiling, the first statement in a catch block will be the
13117 // harmless "inc" instruction (does not interfere with the exception
13120 if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) &&
13121 (stmt == block->bbTreeList) &&
13122 (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
13124 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
13129 printf("Regset after tree=");
13130 Compiler::printTreeID(tree);
13131 printf(" BB%02u gcr=", block->bbNum);
13132 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
13133 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
13135 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
13136 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
13137 printf(", regVars=");
13138 printRegMaskInt(regSet.rsMaskVars);
13139 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
13143 noway_assert(nonVarPtrRegs == 0);
13146 noway_assert(stmt->gtOper == GT_STMT);
13148 #ifdef DEBUGGING_SUPPORT
13149 genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
13152 } //-------- END-FOR each statement-tree of the current block ---------
13154 #ifdef DEBUGGING_SUPPORT
13156 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
13160 /* Is this the last block, and are there any open scopes left ? */
13162 bool isLastBlockProcessed = (block->bbNext == NULL);
13163 if (block->isBBCallAlwaysPair())
13165 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
13168 if (isLastBlockProcessed && siOpenScopeList.scNext)
13170 /* This assert no longer holds, because we may insert a throw
13171 block to demarcate the end of a try or finally region when they
13172 are at the end of the method. It would be nice if we could fix
13173 our code so that this throw block will no longer be necessary. */
13175 //noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
13177 siCloseAllOpenScopes();
13181 #endif // DEBUGGING_SUPPORT
13183 genStackLevel -= savedStkLvl;
13185 gcInfo.gcMarkRegSetNpt(gcrefRegs|byrefRegs);
13187 if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
13188 compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
13190 /* Both stacks should always be empty on exit from a basic block */
13192 noway_assert(genStackLevel == 0);
13193 #if FEATURE_STACK_FP_X87
13194 noway_assert(genGetFPstkLevel() == 0);
13196 // Do the FPState matching that may have to be done
13197 genCodeForEndBlockTransitionStackFP(block);
13200 noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
13202 /* Do we need to generate a jump or return? */
13204 switch (block->bbJumpKind)
13207 inst_JMP(EJ_jmp, block->bbJumpDest);
13211 genExitCode(block);
13215 // If we have a throw at the end of a function or funclet, we need to emit another instruction
13216 // afterwards to help the OS unwinder determine the correct context during unwind.
13217 // We insert an unexecuted breakpoint instruction in several situations
13218 // following a throw instruction:
13219 // 1. If the throw is the last instruction of the function or funclet. This helps
13220 // the OS unwinder determine the correct context during an unwind from the
13221 // thrown exception.
13222 // 2. If this is this is the last block of the hot section.
13223 // 3. If the subsequent block is a special throw block.
13224 if ((block->bbNext == NULL)
13225 #if FEATURE_EH_FUNCLETS
13226 || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
13227 #endif // FEATURE_EH_FUNCLETS
13228 || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext))
13229 || block->bbNext == compiler->fgFirstColdBlock
13232 instGen(INS_BREAKPOINT); // This should never get executed
13237 case BBJ_CALLFINALLY:
13239 #if defined(_TARGET_X86_)
13241 /* If we are about to invoke a finally locally from a try block,
13242 we have to set the hidden slot corresponding to the finally's
13243 nesting level. When invoked in response to an exception, the
13244 EE usually does it.
13246 We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
13248 This code depends on this order not being messed up.
13255 step: mov [ebp- n ],0
13260 noway_assert(isFramePointerUsed());
13262 // Get the nesting level which contains the finally
13263 compiler->fgGetNestingLevel(block, &finallyNesting);
13265 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
13266 unsigned filterEndOffsetSlotOffs;
13267 filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
13269 unsigned curNestingSlotOffs;
13270 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
13272 // Zero out the slot for the next nesting level
13273 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
13274 compiler->lvaShadowSPslotsVar, curNestingSlotOffs - sizeof(void*));
13276 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK,
13277 compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
13279 // Now push the address of where the finally funclet should
13280 // return to directly.
13281 if ( !(block->bbFlags & BBF_RETLESS_CALL) )
13283 assert(block->isBBCallAlwaysPair());
13284 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
13288 // EE expects a DWORD, so we give him 0
13289 inst_IV(INS_push_hide, 0);
13292 // Jump to the finally BB
13293 inst_JMP(EJ_jmp, block->bbJumpDest);
13295 #elif defined(_TARGET_ARM_)
13297 // Now set REG_LR to the address of where the finally funclet should
13298 // return to directly.
13300 BasicBlock * bbFinallyRet; bbFinallyRet = NULL;
13302 // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
13303 // we would have otherwise created retless calls.
13304 assert(block->isBBCallAlwaysPair());
13306 assert(block->bbNext != NULL);
13307 assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
13308 assert(block->bbNext->bbJumpDest != NULL);
13309 assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
13311 bbFinallyRet = block->bbNext->bbJumpDest;
13312 bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
13315 // We don't know the address of finally funclet yet. But adr requires the offset
13316 // to finally funclet from current IP is within 4095 bytes. So this code is disabled
13318 getEmitter()->emitIns_J_R (INS_adr,
13323 // Load the address where the finally funclet should return into LR.
13324 // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
13326 getEmitter()->emitIns_R_L (INS_movw,
13327 EA_4BYTE_DSP_RELOC,
13330 getEmitter()->emitIns_R_L (INS_movt,
13331 EA_4BYTE_DSP_RELOC,
13334 regTracker.rsTrackRegTrash(REG_LR);
13337 // Jump to the finally BB
13338 inst_JMP(EJ_jmp, block->bbJumpDest);
13343 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
13344 // jump target using bbJumpDest - that is already used to point
13345 // to the finally block. So just skip past the BBJ_ALWAYS unless the
13346 // block is RETLESS.
13347 if ( !(block->bbFlags & BBF_RETLESS_CALL) )
13349 assert(block->isBBCallAlwaysPair());
13352 block = block->bbNext;
13356 #ifdef _TARGET_ARM_
13358 case BBJ_EHCATCHRET:
13359 // set r0 to the address the VM should return to after the catch
13360 getEmitter()->emitIns_R_L (INS_movw,
13361 EA_4BYTE_DSP_RELOC,
13364 getEmitter()->emitIns_R_L (INS_movt,
13365 EA_4BYTE_DSP_RELOC,
13368 regTracker.rsTrackRegTrash(REG_R0);
13372 case BBJ_EHFINALLYRET:
13373 case BBJ_EHFILTERRET:
13374 genReserveFuncletEpilog(block);
13377 #else // _TARGET_ARM_
13379 case BBJ_EHFINALLYRET:
13380 case BBJ_EHFILTERRET:
13381 case BBJ_EHCATCHRET:
13384 #endif // _TARGET_ARM_
13392 noway_assert(!"Unexpected bbJumpKind");
13397 compiler->compCurBB = 0;
13400 } //------------------ END-FOR each block of the method -------------------
13402 /* Nothing is live at this point */
13403 genUpdateLife(VarSetOps::MakeEmpty(compiler));
13405 /* Finalize the spill tracking logic */
13407 regSet.rsSpillEnd();
13409 /* Finalize the temp tracking logic */
13411 compiler->tmpEnd();
13414 if (compiler->verbose)
13417 printf("totalCostEx = %6d, totalCostSz = %5d ",
13418 totalCostEx, totalCostSz);
13419 printf("%s\n", compiler->info.compFullName);
13424 #pragma warning(pop)
13427 /*****************************************************************************
13429 * Generate code for a long operation.
13430 * needReg is a recommendation of which registers to use for the tree.
13431 * For partially enregistered longs, the tree will be marked as GTF_REG_VAL
13432 * without loading the stack part into a register. Note that only leaf
13433 * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
13434 * enregistered so that we can know the memory location of the other half.
13438 #pragma warning(push)
13439 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
13441 void CodeGen::genCodeForTreeLng(GenTreePtr tree,
13443 regMaskTP avoidReg)
13448 regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
13453 noway_assert(tree);
13454 noway_assert(tree->gtOper != GT_STMT);
13455 noway_assert(genActualType(tree->gtType) == TYP_LONG);
13457 /* Figure out what kind of a node we have */
13459 oper = tree->OperGet();
13460 kind = tree->OperKind();
13462 if (tree->gtFlags & GTF_REG_VAL)
13465 regPair = tree->gtRegPair;
13467 gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
13472 /* Is this a constant node? */
13474 if (kind & GTK_CONST)
13478 /* Pick a register pair for the value */
13480 regPair = regSet.rsPickRegPair(needReg);
13482 /* Load the value into the registers */
13484 #if ! CPU_HAS_FP_SUPPORT
13485 if (oper == GT_CNS_DBL)
13487 noway_assert(sizeof(__int64) == sizeof(double));
13489 noway_assert(sizeof(tree->gtLngCon.gtLconVal) ==
13490 sizeof(tree->gtDblCon.gtDconVal));
13492 lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
13497 noway_assert(oper == GT_CNS_LNG);
13499 lval = tree->gtLngCon.gtLconVal;
13502 genSetRegToIcon(genRegPairLo(regPair), int(lval ));
13503 genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
13507 /* Is this a leaf node? */
13509 if (kind & GTK_LEAF)
13517 /* This case has to consider the case in which an int64 LCL_VAR
13518 * may both be enregistered and also have a cached copy of itself
13519 * in a different set of registers.
13520 * We want to return the registers that have the most in common
13521 * with the needReg mask
13524 /* Does the var have a copy of itself in the cached registers?
13525 * And are these cached registers both free?
13526 * If so use these registers if they match any needReg.
13529 regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
13531 if ( ( regPair != REG_PAIR_NONE) &&
13532 ( (regSet.rsRegMaskFree() & needReg) == needReg ) &&
13533 ((genRegPairMask(regPair) & needReg) != RBM_NONE ))
13538 /* Does the variable live in a register?
13539 * If so use these registers.
13541 if (genMarkLclVar(tree))
13544 /* If tree is not an enregistered variable then
13545 * be sure to use any cached register that contain
13546 * a copy of this local variable
13548 if (regPair != REG_PAIR_NONE)
13557 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
13558 // to worry about it being enregistered.
13559 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
13565 /* Pick a register pair for the value */
13567 regPair = regSet.rsPickRegPair(needReg);
13569 /* Load the value into the registers */
13571 instruction loadIns;
13573 loadIns = ins_Load(TYP_INT); // INS_ldr
13574 regLo = genRegPairLo(regPair);
13575 regHi = genRegPairHi(regPair);
13576 // assert(regLo != regHi); // regpair property
13578 #if CPU_LOAD_STORE_ARCH
13580 regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
13581 inst_RV_TT(INS_lea, regAddr, tree, 0);
13582 regTracker.rsTrackRegTrash(regAddr);
13584 if (regLo != regAddr)
13586 // assert(regLo != regAddr); // forced by if statement
13587 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13588 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13592 // assert(regHi != regAddr); // implied by regpair property and the if statement
13593 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13594 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13598 inst_RV_TT(loadIns, regLo, tree, 0);
13599 inst_RV_TT(loadIns, regHi, tree, 4);
13602 #ifdef _TARGET_ARM_
13603 if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
13605 // Emit a memory barrier instruction after the load
13606 instGen_MemoryBarrier();
13610 regTracker.rsTrackRegTrash(regLo);
13611 regTracker.rsTrackRegTrash(regHi);
13617 compiler->gtDispTree(tree);
13619 noway_assert(!"unexpected leaf");
13623 /* Is it a 'simple' unary/binary operator? */
13625 if (kind & GTK_SMPOP)
13631 bool setCarry = false;
13634 GenTreePtr op1 = tree->gtOp.gtOp1;
13635 GenTreePtr op2 = tree->gtGetOp2();
13641 #ifdef DEBUGGING_SUPPORT
13642 unsigned lclVarNum = compiler->lvaCount;
13643 unsigned lclVarILoffs = DUMMY_INIT(0);
13646 /* Is the target a local ? */
13648 if (op1->gtOper == GT_LCL_VAR)
13650 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
13651 LclVarDsc * varDsc;
13653 noway_assert(varNum < compiler->lvaCount);
13654 varDsc = compiler->lvaTable + varNum;
13656 // No dead stores, (with min opts we may have dead stores)
13657 noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
13659 #ifdef DEBUGGING_SUPPORT
13660 /* For non-debuggable code, every definition of a lcl-var has
13661 * to be checked to see if we need to open a new scope for it.
13662 * Remember the local var info to call siCheckVarScope
13663 * AFTER codegen of the assignment.
13665 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
13667 lclVarNum = varNum;
13668 lclVarILoffs = op1->gtLclVar.gtLclILoffs;
13672 /* Has the variable been assigned to a register (pair) ? */
13674 if (genMarkLclVar(op1))
13676 noway_assert(op1->gtFlags & GTF_REG_VAL);
13677 regPair = op1->gtRegPair;
13678 regLo = genRegPairLo(regPair);
13679 regHi = genRegPairHi(regPair);
13680 noway_assert(regLo != regHi);
13682 /* Is the value being assigned a constant? */
13684 if (op2->gtOper == GT_CNS_LNG)
13686 /* Move the value into the target */
13688 genMakeRegPairAvailable(regPair);
13691 if (regLo == REG_STK)
13693 ins = ins_Store(TYP_INT);
13697 // Always do the stack first (in case it grabs a register it can't
13698 // clobber regLo this way)
13699 if (regHi == REG_STK)
13701 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13705 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal ), 0);
13707 // The REG_STK case has already been handled
13708 if (regHi != REG_STK)
13711 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13714 goto DONE_ASSG_REGS;
13717 /* Compute the RHS into desired register pair */
13719 if (regHi != REG_STK)
13721 genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
13722 noway_assert(op2->gtFlags & GTF_REG_VAL);
13723 noway_assert(op2->gtRegPair == regPair);
13731 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
13733 noway_assert(op2->gtFlags & GTF_REG_VAL);
13735 curPair = op2->gtRegPair;
13736 curLo = genRegPairLo(curPair);
13737 curHi = genRegPairHi(curPair);
13739 /* move high first, target is on stack */
13740 inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
13742 if (regLo != curLo)
13744 if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
13745 regSet.rsSpillReg(regLo);
13746 inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
13747 regTracker.rsTrackRegCopy(regLo, curLo);
13751 genReleaseRegPair(op2);
13752 goto DONE_ASSG_REGS;
13757 /* Is the value being assigned a constant? */
13759 if (op2->gtOper == GT_CNS_LNG)
13761 /* Make the target addressable */
13763 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
13765 /* Move the value into the target */
13767 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal ), 0);
13768 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13770 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13776 /* Catch a case where can avoid generating op reg, mem. Better pairing
13781 * To avoid problems with order of evaluation, only do this if op2 is
13782 * a non-enregistered local variable
13785 if (GenTree::OperIsCommutative(oper) &&
13786 op1->gtOper == GT_LCL_VAR &&
13787 op2->gtOper == GT_LCL_VAR)
13789 regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
13791 /* Is op2 a non-enregistered local variable? */
13792 if (regPair == REG_PAIR_NONE)
13794 regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
13796 /* Is op1 an enregistered local variable? */
13797 if (regPair != REG_PAIR_NONE)
13799 /* Swap the operands */
13800 GenTreePtr op = op1;
13808 /* Eliminate worthless assignment "lcl = lcl" */
13810 if (op2->gtOper == GT_LCL_VAR &&
13811 op1->gtOper == GT_LCL_VAR && op2->gtLclVarCommon.gtLclNum ==
13812 op1->gtLclVarCommon.gtLclNum)
13814 genUpdateLife(op2);
13819 if (op2->gtOper == GT_CAST &&
13820 TYP_ULONG == op2->CastToType() &&
13821 op2->CastFromType() <= TYP_INT &&
13822 // op1,op2 need to be materialized in the correct order.
13823 (tree->gtFlags & GTF_REVERSE_OPS))
13825 /* Generate the small RHS into a register pair */
13827 GenTreePtr smallOpr = op2->gtOp.gtOp1;
13829 genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
13831 /* Make the target addressable */
13833 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
13835 /* Make sure everything is still addressable */
13837 genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
13838 noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
13839 regHi = smallOpr->gtRegNum;
13840 addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
13842 // conv.ovf.u8 could overflow if the original number was negative
13843 if (op2->gtOverflow())
13845 noway_assert((op2->gtFlags & GTF_UNSIGNED) == 0); // conv.ovf.u8.un should be bashed to conv.u8.un
13846 instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
13847 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
13848 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
13851 /* Move the value into the target */
13853 inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
13854 inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
13856 /* Free up anything that was tied up by either side */
13858 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13859 genReleaseReg (smallOpr);
13862 if (op1->gtOper == GT_LCL_VAR)
13864 /* clear this local from reg table */
13865 regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13867 /* mark RHS registers as containing the local var */
13868 regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
13875 /* Is the LHS more complex than the RHS? */
13877 if (tree->gtFlags & GTF_REVERSE_OPS)
13879 /* Generate the RHS into a register pair */
13881 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
13882 noway_assert(op2->gtFlags & GTF_REG_VAL);
13884 /* Make the target addressable */
13885 op1 = genCodeForCommaTree(op1);
13886 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
13888 /* Make sure the RHS register hasn't been spilled */
13890 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
13894 /* Make the target addressable */
13896 op1 = genCodeForCommaTree(op1);
13897 addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
13899 /* Generate the RHS into a register pair */
13901 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
13904 /* Lock 'op2' and make sure 'op1' is still addressable */
13906 noway_assert(op2->gtFlags & GTF_REG_VAL);
13907 regPair = op2->gtRegPair;
13909 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13911 /* Move the value into the target */
13913 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
13914 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
13916 /* Free up anything that was tied up by either side */
13918 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13919 genReleaseRegPair(op2);
13925 if (op1->gtOper == GT_LCL_VAR)
13927 /* Clear this local from reg table */
13929 regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13931 if ((op2->gtFlags & GTF_REG_VAL) &&
13932 /* constant has precedence over local */
13933 // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
13934 tree->gtOper == GT_ASG)
13938 /* mark RHS registers as containing the local var */
13940 regNo = genRegPairLo(op2->gtRegPair);
13941 if (regNo != REG_STK)
13942 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
13944 regNo = genRegPairHi(op2->gtRegPair);
13945 if (regNo != REG_STK)
13947 /* For partially enregistered longs, we might have
13948 stomped on op2's hiReg */
13949 if (!(op1->gtFlags & GTF_REG_VAL) ||
13950 regNo != genRegPairLo(op1->gtRegPair))
13952 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
13962 genUpdateLife(op1);
13963 genUpdateLife(tree);
13965 #ifdef DEBUGGING_SUPPORT
13966 /* For non-debuggable code, every definition of a lcl-var has
13967 * to be checked to see if we need to open a new scope for it.
13969 if (lclVarNum < compiler->lvaCount)
13970 siCheckVarScope(lclVarNum, lclVarILoffs);
13976 case GT_SUB: insLo = INS_sub; insHi = INS_SUBC; setCarry = true; goto BINOP_OVF;
13977 case GT_ADD: insLo = INS_add; insHi = INS_ADDC; setCarry = true; goto BINOP_OVF;
13982 ovfl = tree->gtOverflow();
13985 case GT_AND: insLo = insHi = INS_AND; goto BINOP;
13986 case GT_OR : insLo = insHi = INS_OR ; goto BINOP;
13987 case GT_XOR: insLo = insHi = INS_XOR; goto BINOP;
13989 BINOP: ovfl = false; goto _BINOP;
13993 /* The following makes an assumption about gtSetEvalOrder(this) */
13995 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
13997 /* Special case: check for "(long(intval) << 32) | longval" */
13999 if (oper == GT_OR && op1->gtOper == GT_LSH)
14001 GenTreePtr lshLHS = op1->gtOp.gtOp1;
14002 GenTreePtr lshRHS = op1->gtOp.gtOp2;
14004 if (lshLHS->gtOper == GT_CAST &&
14005 lshRHS->gtOper == GT_CNS_INT &&
14006 lshRHS->gtIntCon.gtIconVal == 32 &&
14007 genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
14010 /* Throw away the cast of the shift operand. */
14012 op1 = lshLHS->gtCast.CastOp();
14014 /* Special case: check op2 for "ulong(intval)" */
14015 if ((op2->gtOper == GT_CAST) &&
14016 (op2->CastToType() == TYP_ULONG) &&
14017 genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
14019 /* Throw away the cast of the second operand. */
14021 op2 = op2->gtCast.CastOp();
14022 goto SIMPLE_OR_LONG;
14024 /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
14025 else if (op2->gtOper == GT_AND)
14027 GenTreePtr andLHS; andLHS = op2->gtOp.gtOp1;
14028 GenTreePtr andRHS; andRHS = op2->gtOp.gtOp2;
14030 if (andLHS->gtOper == GT_CAST &&
14031 andRHS->gtOper == GT_CNS_LNG &&
14032 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
14033 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
14035 /* Throw away the cast of the second operand. */
14037 op2 = andLHS->gtCast.CastOp();
14040 // Load the high DWORD, ie. op1
14042 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
14044 noway_assert(op1->gtFlags & GTF_REG_VAL);
14045 regHi = op1->gtRegNum;
14046 regSet.rsMarkRegUsed(op1);
14048 // Load the low DWORD, ie. op2
14050 genCodeForTree(op2, needReg & ~genRegMask(regHi));
14052 noway_assert(op2->gtFlags & GTF_REG_VAL);
14053 regLo = op2->gtRegNum;
14055 /* Make sure regHi is still around. Also, force
14056 regLo to be excluded in case regLo==regHi */
14058 genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
14059 regHi = op1->gtRegNum;
14061 regPair = gen2regs2pair(regLo, regHi);
14066 /* Generate the following sequence:
14067 Prepare op1 (discarding shift)
14068 Compute op2 into some regpair
14072 /* First, make op1 addressable */
14074 /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
14076 It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
14077 which the long value is computed is from needReg. But at this point the safest fix is
14078 to exclude regSet.rsMaskResvd.
14080 Note that needReg could be the set of free registers (excluding reserved ones). If we don't
14081 exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a reg from
14082 reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
14084 regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
14086 addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
14088 genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
14090 noway_assert(op2->gtFlags & GTF_REG_VAL);
14091 regPair = op2->gtRegPair;
14092 regHi = genRegPairHi(regPair);
14094 /* The operand might have interfered with the address */
14096 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
14098 /* Now compute the result */
14100 inst_RV_TT(insHi, regHi, op1, 0);
14102 regTracker.rsTrackRegTrash(regHi);
14104 /* Free up anything that was tied up by the LHS */
14106 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
14108 /* The result is where the second operand is sitting */
14110 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
14112 regPair = op2->gtRegPair;
14117 /* Special case: check for "longval | (long(intval) << 32)" */
14119 if (oper == GT_OR && op2->gtOper == GT_LSH)
14121 GenTreePtr lshLHS = op2->gtOp.gtOp1;
14122 GenTreePtr lshRHS = op2->gtOp.gtOp2;
14124 if (lshLHS->gtOper == GT_CAST &&
14125 lshRHS->gtOper == GT_CNS_INT &&
14126 lshRHS->gtIntCon.gtIconVal == 32 &&
14127 genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
14130 /* We throw away the cast of the shift operand. */
14132 op2 = lshLHS->gtCast.CastOp();
14134 /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
14136 if (op1->gtOper == GT_AND)
14138 GenTreePtr andLHS = op1->gtOp.gtOp1;
14139 GenTreePtr andRHS = op1->gtOp.gtOp2;
14141 if (andLHS->gtOper == GT_CAST &&
14142 andRHS->gtOper == GT_CNS_LNG &&
14143 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
14144 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
14146 /* Throw away the cast of the first operand. */
14148 op1 = andLHS->gtCast.CastOp();
14150 // Load the low DWORD, ie. op1
14152 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
14154 noway_assert(op1->gtFlags & GTF_REG_VAL);
14155 regLo = op1->gtRegNum;
14156 regSet.rsMarkRegUsed(op1);
14158 // Load the high DWORD, ie. op2
14160 genCodeForTree(op2, needReg & ~genRegMask(regLo));
14162 noway_assert(op2->gtFlags & GTF_REG_VAL);
14163 regHi = op2->gtRegNum;
14165 /* Make sure regLo is still around. Also, force
14166 regHi to be excluded in case regLo==regHi */
14168 genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
14169 regLo = op1->gtRegNum;
14171 regPair = gen2regs2pair(regLo, regHi);
14176 /* Generate the following sequence:
14177 Compute op1 into some regpair
14178 Make op2 (ignoring shift) addressable
14182 // First, generate the first operand into some register
14184 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
14185 noway_assert(op1->gtFlags & GTF_REG_VAL);
14187 /* Make the second operand addressable */
14189 addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
14191 /* Make sure the result is in a free register pair */
14193 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
14194 regPair = op1->gtRegPair;
14195 regHi = genRegPairHi(regPair);
14197 /* The operand might have interfered with the address */
14199 addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
14201 /* Compute the new value */
14203 inst_RV_TT(insHi, regHi, op2, 0);
14205 /* The value in the high register has been trashed */
14207 regTracker.rsTrackRegTrash(regHi);
14213 /* Generate the first operand into registers */
14215 if ( (genCountBits(needReg) == 2) &&
14216 ((regSet.rsRegMaskFree() & needReg) == needReg ) &&
14217 ((op2->gtRsvdRegs & needReg) == RBM_NONE) &&
14218 (!(tree->gtFlags & GTF_ASG)) )
14220 regPair = regSet.rsPickRegPair(needReg);
14221 genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
14225 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
14227 noway_assert(op1->gtFlags & GTF_REG_VAL);
14229 regPair = op1->gtRegPair;
14230 op1Mask = genRegPairMask(regPair);
14232 /* Make the second operand addressable */
14233 regMaskTP needReg2;
14234 needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
14235 addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
14237 // TODO: If 'op1' got spilled and 'op2' happens to be
14238 // TODO: in a register, and we have add/mul/and/or/xor,
14239 // TODO: reverse the operands since we can perform the
14240 // TODO: operation directly with the spill temp, e.g.
14241 // TODO: 'add regHi, [temp]'.
14243 /* Make sure the result is in a free register pair */
14245 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
14246 regPair = op1->gtRegPair;
14247 op1Mask = genRegPairMask(regPair);
14249 regLo = genRegPairLo(regPair);
14250 regHi = genRegPairHi(regPair);
14252 /* Make sure that we don't spill regLo/regHi below */
14253 regSet.rsLockUsedReg(op1Mask);
14255 /* The operand might have interfered with the address */
14257 addrReg = genKeepAddressable(op2, addrReg);
14259 /* The value in the register pair is about to be trashed */
14261 regTracker.rsTrackRegTrash(regLo);
14262 regTracker.rsTrackRegTrash(regHi);
14264 /* Compute the new value */
14269 if (op2->gtOper == GT_CNS_LNG)
14271 __int64 icon = op2->gtLngCon.gtLconVal;
14273 /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
14278 if ((int)(icon ) == -1)
14280 if ((int)(icon >> 32) == -1)
14283 if (!(icon & I64(0x00000000FFFFFFFF)))
14285 genSetRegToIcon(regLo, 0);
14289 if (!(icon & I64(0xFFFFFFFF00000000)))
14291 /* Just to always set low first*/
14295 inst_RV_TT(insLo, regLo, op2, 0);
14298 genSetRegToIcon(regHi, 0);
14306 if (!(icon & I64(0x00000000FFFFFFFF)))
14308 if (!(icon & I64(0xFFFFFFFF00000000)))
14316 // Fix 383813 X86/ARM ILGEN
14317 // Fix 383793 ARM ILGEN
14318 // Fix 383911 ARM ILGEN
14319 regMaskTP newMask; newMask = addrReg & ~op1Mask;
14320 regSet.rsLockUsedReg(newMask);
14324 insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14325 inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
14329 insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14330 inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
14333 regSet.rsUnlockUsedReg(newMask);
14334 regSet.rsUnlockUsedReg(op1Mask);
14338 /* Free up anything that was tied up by the LHS */
14340 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
14342 /* The result is where the first operand is sitting */
14344 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
14346 regPair = op1->gtRegPair;
14349 genCheckOverflow(tree);
14355 regPair = genCodeForLongModInt(tree, needReg);
14360 /* Special case: both operands promoted from int */
14362 assert(tree->gtIsValid64RsltMul());
14364 /* Change to an integer multiply temporarily */
14366 tree->gtType = TYP_INT;
14368 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
14369 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
14370 tree->gtOp.gtOp2 = op2->gtCast.CastOp();
14372 assert(tree->gtFlags & GTF_MUL_64RSLT);
14374 #if defined(_TARGET_X86_)
14375 // imul on x86 requires EDX:EAX
14376 genComputeReg(tree, (RBM_EAX|RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
14377 noway_assert(tree->gtFlags & GTF_REG_VAL);
14378 noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
14379 #elif defined(_TARGET_ARM_)
14380 genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
14381 noway_assert(tree->gtFlags & GTF_REG_VAL);
14383 assert(!"Unsupported target for 64-bit multiply codegen");
14386 /* Restore gtType, op1 and op2 from the change above */
14388 tree->gtType = TYP_LONG;
14389 tree->gtOp.gtOp1 = op1;
14390 tree->gtOp.gtOp2 = op2;
14392 #if defined(_TARGET_X86_)
14393 /* The result is now in EDX:EAX */
14394 regPair = REG_PAIR_EAXEDX;
14395 #elif defined(_TARGET_ARM_)
14396 regPair = tree->gtRegPair;
14400 case GT_LSH: helper = CORINFO_HELP_LLSH; goto SHIFT;
14401 case GT_RSH: helper = CORINFO_HELP_LRSH; goto SHIFT;
14402 case GT_RSZ: helper = CORINFO_HELP_LRSZ; goto SHIFT;
14406 noway_assert(op1->gtType == TYP_LONG);
14407 noway_assert(genActualType(op2->gtType) == TYP_INT);
14409 /* Is the second operand a constant? */
14411 if (op2->gtOper == GT_CNS_INT)
14413 unsigned int count = op2->gtIntCon.gtIconVal;
14415 /* Compute the left operand into a free register pair */
14417 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
14418 noway_assert(op1->gtFlags & GTF_REG_VAL);
14420 regPair = op1->gtRegPair;
14421 regLo = genRegPairLo(regPair);
14422 regHi = genRegPairHi(regPair);
14424 /* Assume the value in the register pair is trashed. In some cases, though,
14425 a register might be set to zero, and we can use that information to improve
14426 some code generation.
14429 regTracker.rsTrackRegTrash(regLo);
14430 regTracker.rsTrackRegTrash(regHi);
14432 /* Generate the appropriate shift instructions */
14439 // regHi, regLo are correct
14441 else if (count < 32)
14443 #if defined(_TARGET_XARCH_)
14444 inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
14445 #elif defined(_TARGET_ARM_)
14446 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
14447 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count, INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
14450 #endif // _TARGET_*
14451 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
14453 else // count >= 32
14455 assert(count >= 32);
14458 #if defined(_TARGET_ARM_)
14461 // mov low dword into high dword (i.e. shift left by 32-bits)
14462 inst_RV_RV(INS_mov, regHi, regLo);
14466 assert(count > 32 && count < 64);
14467 getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo, count - 32);
14470 // mov low dword into high dword (i.e. shift left by 32-bits)
14471 inst_RV_RV(INS_mov, regHi, regLo);
14474 // Shift high dword left by count - 32
14475 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
14477 #endif // _TARGET_*
14479 else // count >= 64
14481 assert(count >= 64);
14482 genSetRegToIcon(regHi, 0);
14484 genSetRegToIcon(regLo, 0);
14491 // regHi, regLo are correct
14493 else if (count < 32)
14495 #if defined(_TARGET_XARCH_)
14496 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14497 #elif defined(_TARGET_ARM_)
14498 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14499 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14502 #endif // _TARGET_*
14503 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
14505 else // count >= 32
14507 assert(count >= 32);
14510 #if defined(_TARGET_ARM_)
14513 // mov high dword into low dword (i.e. shift right by 32-bits)
14514 inst_RV_RV(INS_mov, regLo, regHi);
14518 assert(count > 32 && count < 64);
14519 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi, count - 32);
14522 // mov high dword into low dword (i.e. shift right by 32-bits)
14523 inst_RV_RV(INS_mov, regLo, regHi);
14526 // Shift low dword right by count - 32
14527 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
14529 #endif // _TARGET_*
14532 // Propagate sign bit in high dword
14533 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14537 // Propagate the sign from the high dword
14538 inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
14546 // regHi, regLo are correct
14548 else if (count < 32)
14550 #if defined(_TARGET_XARCH_)
14551 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14552 #elif defined(_TARGET_ARM_)
14553 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14554 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14557 #endif // _TARGET_*
14558 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
14560 else // count >= 32
14562 assert(count >= 32);
14565 #if defined(_TARGET_ARM_)
14568 // mov high dword into low dword (i.e. shift right by 32-bits)
14569 inst_RV_RV(INS_mov, regLo, regHi);
14573 assert(count > 32 && count < 64);
14574 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi, count - 32);
14577 // mov high dword into low dword (i.e. shift right by 32-bits)
14578 inst_RV_RV(INS_mov, regLo, regHi);
14581 // Shift low dword right by count - 32
14582 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
14584 #endif // _TARGET_*
14586 else // count >= 64
14588 assert(count >= 64);
14589 genSetRegToIcon(regLo, 0);
14591 genSetRegToIcon(regHi, 0);
14596 noway_assert(!"Illegal oper for long shift");
14603 /* Which operand are we supposed to compute first? */
14605 assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
14607 if (tree->gtFlags & GTF_REVERSE_OPS)
14609 /* The second operand can't be a constant */
14611 noway_assert(op2->gtOper != GT_CNS_INT);
14613 /* Load the shift count, hopefully into RBM_SHIFT */
14614 RegSet::ExactReg exactReg;
14615 if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
14616 exactReg = RegSet::EXACT_REG;
14618 exactReg = RegSet::ANY_REG;
14619 genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
14621 /* Compute the left operand into REG_LNGARG_0 */
14623 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14624 noway_assert(op1->gtFlags & GTF_REG_VAL);
14626 /* Lock op1 so that it doesn't get trashed */
14628 regSet.rsLockUsedReg(RBM_LNGARG_0);
14630 /* Make sure the shift count wasn't displaced */
14632 genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
14636 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14640 /* Compute the left operand into REG_LNGARG_0 */
14642 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14643 noway_assert(op1->gtFlags & GTF_REG_VAL);
14645 /* Compute the shift count into RBM_SHIFT */
14647 genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
14651 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14653 /* Make sure the value hasn't been displaced */
14655 genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
14657 /* Lock op1 so that it doesn't get trashed */
14659 regSet.rsLockUsedReg(RBM_LNGARG_0);
14662 #ifndef _TARGET_X86_
14663 /* The generic helper is a C-routine and so it follows the full ABI */
14665 /* Spill any callee-saved registers which are being used */
14666 regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
14668 /* But do not spill our argument registers. */
14669 spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14673 regSet.rsSpillRegs(spillRegs);
14676 #endif // !_TARGET_X86_
14678 /* Perform the shift by calling a helper function */
14680 noway_assert(op1->gtRegPair == REG_LNGARG_0);
14681 noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
14682 noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
14684 genEmitHelperCall(helper,
14686 EA_8BYTE); // retSize
14688 #ifdef _TARGET_X86_
14689 /* The value in the register pair is trashed */
14691 regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
14692 regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
14693 #else // _TARGET_X86_
14694 /* The generic helper is a C-routine and so it follows the full ABI */
14695 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
14696 #endif // _TARGET_X86_
14698 /* Release both operands */
14700 regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14701 genReleaseRegPair(op1);
14702 genReleaseReg (op2);
14706 noway_assert(op1->gtFlags & GTF_REG_VAL);
14707 regPair = op1->gtRegPair;
14713 /* Generate the operand into some register pair */
14715 genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
14716 noway_assert(op1->gtFlags & GTF_REG_VAL);
14718 regPair = op1->gtRegPair;
14720 /* Figure out which registers the value is in */
14722 regLo = genRegPairLo(regPair);
14723 regHi = genRegPairHi(regPair);
14725 /* The value in the register pair is about to be trashed */
14727 regTracker.rsTrackRegTrash(regLo);
14728 regTracker.rsTrackRegTrash(regHi);
14730 if (oper == GT_NEG)
14732 /* Unary "neg": negate the value in the register pair */
14734 #ifdef _TARGET_ARM_
14736 // ARM doesn't have an opcode that sets the carry bit like
14737 // x86, so we can't use neg/addc/neg. Instead we use subtract
14738 // with carry. Too bad this uses an extra register.
14740 // Lock regLo and regHi so we don't pick them, and then pick
14741 // a third register to be our 0.
14742 regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
14743 regSet.rsLockReg(regPairMask);
14744 regMaskTP regBest = RBM_ALLINT & ~avoidReg;
14745 regNumber regZero = genGetRegSetToIcon(0, regBest);
14746 regSet.rsUnlockReg(regPairMask);
14748 inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
14749 getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
14751 #elif defined(_TARGET_XARCH_)
14753 inst_RV (INS_NEG, regLo, TYP_LONG);
14754 inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
14755 inst_RV (INS_NEG, regHi, TYP_LONG);
14757 NYI("GT_NEG on TYP_LONG");
14762 /* Unary "not": flip all the bits in the register pair */
14764 inst_RV (INS_NOT, regLo, TYP_LONG);
14765 inst_RV (INS_NOT, regHi, TYP_LONG);
14772 case GT_ASG_OR : insLo = insHi = INS_OR ; goto ASG_OPR;
14773 case GT_ASG_XOR: insLo = insHi = INS_XOR; goto ASG_OPR;
14774 case GT_ASG_AND: insLo = insHi = INS_AND; goto ASG_OPR;
14775 case GT_ASG_SUB: insLo = INS_sub; insHi = INS_SUBC; goto ASG_OPR;
14776 case GT_ASG_ADD: insLo = INS_add; insHi = INS_ADDC; goto ASG_OPR;
14780 if (op2->gtOper == GT_CNS_LNG)
14782 __int64 lval = op2->gtLngCon.gtLconVal;
14784 /* Make the target addressable */
14786 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
14788 /* Optimize some special cases */
14793 /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
14798 if ((int)(lval ) == -1) doLo = false;
14799 if ((int)(lval >> 32) == -1) doHi = false;
14804 if (!(lval & 0x00000000FFFFFFFF)) doLo = false;
14805 if (!(lval & 0xFFFFFFFF00000000)) doHi = false;
14809 if (doLo) inst_TT_IV(insLo, op1, (int)(lval ), 0);
14810 if (doHi) inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
14812 bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
14814 tree->gtFlags |= GTF_ZSF_SET;
14816 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
14817 goto DONE_ASSG_REGS;
14820 /* TODO: allow non-const long assignment operators */
14822 noway_assert(!"non-const long asgop NYI");
14824 #endif // LONG_ASG_OPS
14832 regMaskTP availMask = RBM_ALLINT & ~needReg;
14834 /* Make sure the operand is addressable */
14836 addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
14838 GenTreePtr addr = oper == GT_IND ? op1 : tree;
14840 /* Pick a register for the value */
14842 regPair = regSet.rsPickRegPair(needReg);
14843 tmpMask = genRegPairMask(regPair);
14845 /* Is there any overlap between the register pair and the address? */
14849 if (tmpMask & addrReg)
14851 /* Does one or both of the target registers overlap? */
14853 if ((tmpMask & addrReg) != tmpMask)
14855 /* Only one register overlaps */
14857 noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
14859 /* If the low register overlaps, load the upper half first */
14861 if (addrReg & genRegMask(genRegPairLo(regPair)))
14868 /* The register completely overlaps with the address */
14870 noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
14872 /* Can we pick another pair easily? */
14874 regFree = regSet.rsRegMaskFree() & ~addrReg;
14876 regFree &= needReg;
14878 /* More than one free register available? */
14880 if (regFree && !genMaxOneBit(regFree))
14882 regPair = regSet.rsPickRegPair(regFree);
14883 tmpMask = genRegPairMask(regPair);
14887 // printf("Overlap: needReg = %08X\n", needReg);
14889 // Reg-prediction won't allow this
14890 noway_assert((regSet.rsMaskVars & addrReg) == 0);
14892 // Grab one fresh reg, and use any one of addrReg
14894 if (regFree) // Try to follow 'needReg'
14895 regLo = regSet.rsGrabReg(regFree);
14896 else // Pick any reg besides addrReg
14897 regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
14899 unsigned regBit = 0x1;
14902 for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
14904 // Found one of addrReg. Use it.
14905 if (regBit & addrReg)
14908 noway_assert(genIsValidReg(regNo)); // Should have found regNo
14910 regPair = gen2regs2pair(regLo, regNo);
14911 tmpMask = genRegPairMask(regPair);
14916 /* Make sure the value is still addressable */
14918 noway_assert(genStillAddressable(tree));
14920 /* Figure out which registers the value is in */
14922 regLo = genRegPairLo(regPair);
14923 regHi = genRegPairHi(regPair);
14925 /* The value in the register pair is about to be trashed */
14927 regTracker.rsTrackRegTrash(regLo);
14928 regTracker.rsTrackRegTrash(regHi);
14930 /* Load the target registers from where the value is */
14934 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14935 regSet.rsLockReg(genRegMask(regHi));
14936 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14937 regSet.rsUnlockReg(genRegMask(regHi));
14941 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14942 regSet.rsLockReg(genRegMask(regLo));
14943 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14944 regSet.rsUnlockReg(genRegMask(regLo));
14947 #ifdef _TARGET_ARM_
14948 if (tree->gtFlags & GTF_IND_VOLATILE)
14950 // Emit a memory barrier instruction after the load
14951 instGen_MemoryBarrier();
14955 genUpdateLife(tree);
14956 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
14963 /* What are we casting from? */
14965 switch (op1->gtType)
14975 regMaskTP hiRegMask;
14976 regMaskTP loRegMask;
14978 // For an unsigned cast we don't need to sign-extend the 32 bit value
14979 if (tree->gtFlags & GTF_UNSIGNED)
14981 // Does needReg have exactly two bits on and thus
14982 // specifies the exact register pair that we want to use
14983 if (!genMaxOneBit(needReg))
14985 regPair = regSet.rsFindRegPairNo(needReg);
14986 if (needReg != genRegPairMask(regPair))
14987 goto ANY_FREE_REG_UNSIGNED;
14988 loRegMask = genRegMask(genRegPairLo(regPair));
14989 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14990 goto ANY_FREE_REG_UNSIGNED;
14991 hiRegMask = genRegMask(genRegPairHi(regPair));
14995 ANY_FREE_REG_UNSIGNED:
14996 loRegMask = needReg;
14997 hiRegMask = needReg;
15000 genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
15001 noway_assert(op1->gtFlags & GTF_REG_VAL);
15003 regLo = op1->gtRegNum;
15004 loRegMask = genRegMask(regLo);
15005 regSet.rsLockUsedReg(loRegMask);
15006 regHi = regSet.rsPickReg(hiRegMask);
15007 regSet.rsUnlockUsedReg(loRegMask);
15009 regPair = gen2regs2pair(regLo, regHi);
15011 // Move 0 to the higher word of the ULong
15012 genSetRegToIcon(regHi, 0, TYP_INT);
15014 /* We can now free up the operand */
15015 genReleaseReg(op1);
15019 #ifdef _TARGET_XARCH_
15020 /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
15021 and we need the result to be in those registers.
15022 cdq is smaller so we use it for SMALL_CODE
15025 if ((needReg & (RBM_EAX|RBM_EDX)) == (RBM_EAX|RBM_EDX) &&
15026 (regSet.rsRegMaskFree() & RBM_EDX) )
15028 genCodeForTree(op1, RBM_EAX);
15029 regSet.rsMarkRegUsed(op1);
15031 /* If we have to spill EDX, might as well use the faster
15032 sar as the spill will increase code size anyway */
15034 if (op1->gtRegNum != REG_EAX ||
15035 !(regSet.rsRegMaskFree() & RBM_EDX))
15037 hiRegMask = regSet.rsRegMaskFree();
15038 goto USE_SAR_FOR_CAST;
15041 regSet.rsGrabReg (RBM_EDX);
15042 regTracker.rsTrackRegTrash(REG_EDX);
15044 /* Convert the int in EAX into a long in EDX:EAX */
15048 /* The result is in EDX:EAX */
15050 regPair = REG_PAIR_EAXEDX;
15055 /* use the sar instruction to sign-extend a 32-bit integer */
15057 // Does needReg have exactly two bits on and thus
15058 // specifies the exact register pair that we want to use
15059 if (!genMaxOneBit(needReg))
15061 regPair = regSet.rsFindRegPairNo(needReg);
15062 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
15063 goto ANY_FREE_REG_SIGNED;
15064 loRegMask = genRegMask(genRegPairLo(regPair));
15065 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
15066 goto ANY_FREE_REG_SIGNED;
15067 hiRegMask = genRegMask(genRegPairHi(regPair));
15071 ANY_FREE_REG_SIGNED:
15072 loRegMask = needReg;
15073 hiRegMask = RBM_NONE;
15076 genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
15077 #ifdef _TARGET_XARCH_
15080 noway_assert(op1->gtFlags & GTF_REG_VAL);
15082 regLo = op1->gtRegNum;
15083 loRegMask = genRegMask(regLo);
15084 regSet.rsLockUsedReg(loRegMask);
15085 regHi = regSet.rsPickReg(hiRegMask);
15086 regSet.rsUnlockUsedReg(loRegMask);
15088 regPair = gen2regs2pair(regLo, regHi);
15090 /* Copy the lo32 bits from regLo to regHi and sign-extend it */
15092 #ifdef _TARGET_ARM_
15093 // Use one instruction instead of two
15094 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
15096 inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
15097 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
15100 /* The value in the upper register is trashed */
15102 regTracker.rsTrackRegTrash(regHi);
15105 /* We can now free up the operand */
15106 genReleaseReg(op1);
15108 // conv.ovf.u8 could overflow if the original number was negative
15109 if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
15111 regNumber hiReg = genRegPairHi(regPair);
15112 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
15113 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
15114 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
15123 /* Load the FP value onto the coprocessor stack */
15125 genCodeForTreeFlt(op1);
15127 /* Allocate a temp for the long value */
15129 temp = compiler->tmpGetTemp(TYP_LONG);
15131 /* Store the FP value into the temp */
15133 inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
15136 /* Pick a register pair for the value */
15138 regPair = regSet.rsPickRegPair(needReg);
15140 /* Figure out which registers the value is in */
15142 regLo = genRegPairLo(regPair);
15143 regHi = genRegPairHi(regPair);
15145 /* The value in the register pair is about to be trashed */
15147 regTracker.rsTrackRegTrash(regLo);
15148 regTracker.rsTrackRegTrash(regHi);
15150 /* Load the converted value into the registers */
15152 inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
15153 inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
15155 /* We no longer need the temp */
15157 compiler->tmpRlsTemp(temp);
15160 NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
15166 noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
15168 genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
15169 regPair = op1->gtRegPair;
15171 // Do we need to set the sign-flag, or can we checked if it is set?
15172 // and not do this "test" if so.
15174 if (op1->gtFlags & GTF_REG_VAL)
15176 regNumber hiReg = genRegPairHi(op1->gtRegPair);
15177 noway_assert(hiReg != REG_STK);
15178 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
15182 inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
15185 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
15186 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
15192 compiler->gtDispTree(tree);
15194 NO_WAY("unexpected cast to long");
15202 * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
15203 * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
15204 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
15207 // TODO: this should be done AFTER we called exit mon so that
15208 // we are sure that we don't have to keep 'this' alive
15210 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15212 /* either it's an "empty" statement or the return statement
15213 of a synchronized method
15216 genPInvokeMethodEpilog();
15219 #if CPU_LONG_USES_REGPAIR
15220 /* There must be a long return value */
15224 /* Evaluate the return value into EDX:EAX */
15226 genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
15228 noway_assert(op1->gtFlags & GTF_REG_VAL);
15229 noway_assert(op1->gtRegPair == REG_LNGRET);
15232 NYI("64-bit return");
15235 //The profiling hook does not trash registers, so it's safe to call after we emit the code for
15236 //the GT_RETURN tree.
15237 #ifdef PROFILING_SUPPORTED
15238 /* XXX Thu 7/5/2007
15239 * Oh look. More cloned code from the regular processing of GT_RETURN.
15241 if (compiler->compCurBB == compiler->genReturnBB)
15243 genProfilingLeaveCallback();
15249 noway_assert(!"inliner-generated ?: for longs NYI");
15250 NO_WAY("inliner-generated ?: for longs NYI");
15255 if (tree->gtFlags & GTF_REVERSE_OPS)
15258 genCodeForTreeLng(op2, needReg, avoidReg);
15259 genUpdateLife (op2);
15261 noway_assert(op2->gtFlags & GTF_REG_VAL);
15263 regSet.rsMarkRegPairUsed(op2);
15265 // Do side effects of op1
15266 genEvalSideEffects(op1);
15268 // Recover op2 if spilled
15269 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
15271 genReleaseRegPair(op2);
15273 genUpdateLife (tree);
15275 regPair = op2->gtRegPair;
15279 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
15281 /* Generate side effects of the first operand */
15283 genEvalSideEffects(op1);
15284 genUpdateLife (op1);
15286 /* Is the value of the second operand used? */
15288 if (tree->gtType == TYP_VOID)
15290 /* The right operand produces no result */
15292 genEvalSideEffects(op2);
15293 genUpdateLife(tree);
15297 /* Generate the second operand, i.e. the 'real' value */
15299 genCodeForTreeLng(op2, needReg, avoidReg);
15301 /* The result of 'op2' is also the final result */
15303 regPair = op2->gtRegPair;
15310 /* Generate the operand, i.e. the 'real' value */
15312 genCodeForTreeLng(op1, needReg, avoidReg);
15314 /* The result of 'op1' is also the final result */
15316 regPair = op1->gtRegPair;
15325 genCodeForTreeLng(op1, needReg, avoidReg);
15326 regPair = op1->gtRegPair;
15334 compiler->gtDispTree(tree);
15336 noway_assert(!"unexpected 64-bit operator");
15339 /* See what kind of a special operator we have here */
15345 retMask = genCodeForCall(tree, true);
15346 if (retMask == RBM_NONE)
15347 regPair = REG_PAIR_NONE;
15349 regPair = regSet.rsFindRegPairNo(retMask);
15354 compiler->gtDispTree(tree);
15356 NO_WAY("unexpected long operator");
15361 genUpdateLife(tree);
15363 /* Here we've computed the value of 'tree' into 'regPair' */
15365 noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
15367 genMarkTreeInRegPair(tree, regPair);
15370 #pragma warning(pop)
15374 /*****************************************************************************
15376 * Generate code for a mod of a long by an int.
15379 regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree,
15382 #ifdef _TARGET_X86_
15387 genTreeOps oper = tree->OperGet();
15388 GenTreePtr op1 = tree->gtOp.gtOp1;
15389 GenTreePtr op2 = tree->gtOp.gtOp2;
15391 /* Codegen only for Unsigned MOD */
15392 noway_assert(oper == GT_UMOD);
15394 /* op2 must be a long constant in the range 2 to 0x3fffffff */
15396 noway_assert((op2->gtOper == GT_CNS_LNG) &&
15397 (op2->gtLngCon.gtLconVal >= 2) &&
15398 (op2->gtLngCon.gtLconVal <= 0x3fffffff));
15399 int val = (int) op2->gtLngCon.gtLconVal;
15401 op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
15403 op2->gtType = TYP_INT;
15404 op2->gtIntCon.gtIconVal = val;
15406 /* Which operand are we supposed to compute first? */
15408 if (tree->gtFlags & GTF_REVERSE_OPS)
15410 /* Compute the second operand into a scratch register, other
15413 needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15415 /* Special case: if op2 is a local var we are done */
15417 if (op2->gtOper == GT_LCL_VAR ||
15418 op2->gtOper == GT_LCL_FLD ||
15419 op2->gtOper == GT_CLS_VAR)
15421 addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15425 genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15427 noway_assert(op2->gtFlags & GTF_REG_VAL);
15428 addrReg = genRegMask(op2->gtRegNum);
15431 /* Compute the first operand into EAX:EDX */
15433 genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
15434 noway_assert(op1->gtFlags & GTF_REG_VAL);
15435 noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15437 /* And recover the second argument while locking the first one */
15439 addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15443 /* Compute the first operand into EAX:EDX */
15445 genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
15446 noway_assert(op1->gtFlags & GTF_REG_VAL);
15447 noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15449 /* Compute the second operand into a scratch register, other
15452 needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15454 /* Special case: if op2 is a local var we are done */
15456 if (op2->gtOper == GT_LCL_VAR ||
15457 op2->gtOper == GT_LCL_FLD ||
15458 op2->gtOper == GT_CLS_VAR)
15460 addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15464 genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15466 noway_assert(op2->gtFlags & GTF_REG_VAL);
15467 addrReg = genRegMask(op2->gtRegNum);
15470 /* Recover the first argument */
15472 genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
15474 /* And recover the second argument while locking the first one */
15476 addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15479 /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
15480 contains the 32bit divisor. We want to generate the following code:
15482 ==========================
15485 cmp edx, op2->gtRegNum
15496 ==========================
15497 This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
15500 BasicBlock * lab_no_overflow = genCreateTempLabel();
15502 // grab a temporary register other than eax, edx, and op2->gtRegNum
15504 regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
15506 // EAX and tempReg will be trashed by the mov instructions. Doing
15507 // this early won't hurt, and might prevent confusion in genSetRegToIcon.
15509 regTracker.rsTrackRegTrash (REG_PAIR_TMP_LO);
15510 regTracker.rsTrackRegTrash (tempReg);
15512 inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
15513 inst_JMP(EJ_jb ,lab_no_overflow);
15515 inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
15516 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15517 genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15518 inst_TT(INS_UNSIGNED_DIVIDE, op2);
15519 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
15521 // Jump point for no overflow divide
15523 genDefineTempLabel(lab_no_overflow);
15525 // Issue the divide instruction
15527 inst_TT(INS_UNSIGNED_DIVIDE, op2);
15529 /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
15531 regTracker.rsTrackRegTrash (REG_PAIR_TMP_LO);
15532 regTracker.rsTrackRegTrash (REG_PAIR_TMP_HI);
15533 regTracker.rsTrackRegTrash (tempReg);
15534 regTracker.rsTrackRegTrash (op2->gtRegNum);
15537 if (tree->gtFlags & GTF_MOD_INT_RESULT)
15539 /* We don't need to normalize the result, because the caller wants
15542 regPair = REG_PAIR_TMP_REVERSE;
15546 /* The result is now in EDX, we now have to normalize it, i.e. we have
15548 mov eax, edx; xor edx, edx (for UMOD)
15551 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15553 genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15555 regPair = REG_PAIR_TMP;
15558 genReleaseRegPair(op1);
15559 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
15563 #else // !_TARGET_X86_
15565 NYI("codegen for LongModInt");
15567 return REG_PAIR_NONE;
15569 #endif // !_TARGET_X86_
15572 // Given a tree, return the number of registers that are currently
15573 // used to hold integer enregistered local variables.
15574 // Note that, an enregistered TYP_LONG can take 1 or 2 registers.
15575 unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
15577 unsigned regCount = 0;
15579 VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
15580 while (iter.NextElem(compiler, &varNum))
15582 unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
15583 LclVarDsc * varDsc = &compiler->lvaTable[lclNum];
15585 if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
15589 if (varTypeIsLong(varDsc->TypeGet()))
15591 // For enregistered LONG/ULONG, the lower half should always be in a register.
15592 noway_assert(varDsc->lvRegNum != REG_STK);
15594 // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as well.
15595 if (varDsc->lvOtherReg != REG_STK)
15607 /*****************************************************************************/
15608 /*****************************************************************************/
15609 #if CPU_HAS_FP_SUPPORT
15610 /*****************************************************************************
15612 * Generate code for a floating-point operation.
15615 void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
15616 regMaskTP needReg, /* = RBM_ALLFLOAT */
15617 regMaskTP bestReg) /* = RBM_NONE */
15619 genCodeForTreeFloat(tree, needReg, bestReg);
15621 if (tree->OperGet() == GT_RETURN)
15623 //Make sure to get ALL THE EPILOG CODE
15625 // TODO: this should be done AFTER we called exit mon so that
15626 // we are sure that we don't have to keep 'this' alive
15628 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15630 /* either it's an "empty" statement or the return statement
15631 of a synchronized method
15634 genPInvokeMethodEpilog();
15637 //The profiling hook does not trash registers, so it's safe to call after we emit the code for
15638 //the GT_RETURN tree.
15639 #ifdef PROFILING_SUPPORTED
15640 /* XXX Thu 7/5/2007
15641 * Oh look. More cloned code from the regular processing of GT_RETURN.
15643 if (compiler->compCurBB == compiler->genReturnBB)
15645 genProfilingLeaveCallback();
15651 /*****************************************************************************/
15652 #endif//CPU_HAS_FP_SUPPORT
15654 /*****************************************************************************
15656 * Generate a table switch - the switch value (0-based) is in register 'reg'.
15659 void CodeGen::genTableSwitch(regNumber reg,
15661 BasicBlock ** jumpTab)
15663 unsigned jmpTabBase;
15667 //In debug code, we don't optimize away the trivial switch statements. So we can get here with a
15668 //BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
15669 noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
15670 inst_JMP(EJ_jmp, jumpTab[0]);
15674 noway_assert(jumpCnt >= 2);
15676 /* Is the number of cases right for a test and jump switch? */
15678 const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
15679 const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
15680 const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
15683 unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
15685 // This means really just a single cmp/jcc (aka a simple if/else)
15686 if (fFirstCaseFollows || fDefaultFollows)
15687 minSwitchTabJumpCnt++;
15689 #ifdef _TARGET_ARM_
15690 // On the ARM for small switch tables we will
15691 // generate a sequence of compare and branch instructions
15692 // because the code to load the base of the switch
15693 // table is huge and hideous due to the relocation... :(
15695 minSwitchTabJumpCnt++;
15696 if (fHaveScratchReg)
15697 minSwitchTabJumpCnt++;
15699 #endif // _TARGET_ARM_
15701 if (jumpCnt < minSwitchTabJumpCnt)
15703 /* Does the first case label follow? */
15704 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
15706 if (fFirstCaseFollows)
15708 /* Check for the default case */
15709 inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15710 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15711 inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15713 /* No need to jump to the first case */
15718 /* Generate a series of "dec reg; jmp label" */
15720 // Make sure that we can trash the register so
15721 // that we can generate a series of compares and jumps
15723 if ((jumpCnt > 0) && !fHaveScratchReg)
15725 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15726 inst_RV_RV(INS_mov, tmpReg, reg);
15727 regTracker.rsTrackRegTrash(tmpReg);
15731 while (jumpCnt > 0)
15733 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15734 inst_JMP(jmpEqual, *jumpTab++);
15740 /* Check for case0 first */
15741 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
15742 inst_JMP(jmpEqual, *jumpTab);
15744 /* No need to jump to the first case or the default */
15749 /* Generate a series of "dec reg; jmp label" */
15751 // Make sure that we can trash the register so
15752 // that we can generate a series of compares and jumps
15754 if ((jumpCnt > 0) && !fHaveScratchReg)
15756 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15757 inst_RV_RV(INS_mov, tmpReg, reg);
15758 regTracker.rsTrackRegTrash(tmpReg);
15762 while (jumpCnt > 0)
15764 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15765 inst_JMP(jmpEqual, *jumpTab++);
15769 if (!fDefaultFollows)
15771 inst_JMP(EJ_jmp, *jumpTab);
15775 if ((fFirstCaseFollows || fDefaultFollows) && compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
15777 inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
15783 /* First take care of the default case */
15785 inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15786 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15787 inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15789 /* Generate the jump table contents */
15791 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
15794 if (compiler->opts.dspCode)
15795 printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
15798 for (unsigned index = 0; index < jumpCnt - 1; index++)
15800 BasicBlock* target = jumpTab[index];
15802 noway_assert(target->bbFlags & BBF_JMP_TARGET);
15805 if (compiler->opts.dspCode)
15806 printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
15809 getEmitter()->emitDataGenData(index, target);
15812 getEmitter()->emitDataGenEnd();
15814 #ifdef _TARGET_ARM_
15815 // We need to load the address of the table into a register.
15816 // The data section might get placed a long distance away, so we
15817 // can't safely do a PC-relative ADR. :(
15818 // Pick any register except the index register.
15820 regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
15821 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
15822 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
15823 regTracker.rsTrackRegTrash(regTabBase);
15825 // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
15826 getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
15828 #else // !_TARGET_ARM_
15830 getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
15835 /*****************************************************************************
15837 * Generate code for a switch statement.
15840 void CodeGen::genCodeForSwitch(GenTreePtr tree)
15843 BasicBlock * * jumpTab;
15848 noway_assert(tree->gtOper == GT_SWITCH);
15849 oper = tree->gtOp.gtOp1;
15850 noway_assert(genActualTypeIsIntOrI(oper->gtType));
15852 /* Get hold of the jump table */
15854 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
15856 jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
15857 jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
15859 /* Compute the switch value into some register */
15861 genCodeForTree(oper, 0);
15863 /* Get hold of the register the value is in */
15865 noway_assert(oper->gtFlags & GTF_REG_VAL);
15866 reg = oper->gtRegNum;
15868 #if FEATURE_STACK_FP_X87
15869 if (!compCurFPState.IsEmpty())
15871 return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
15874 #endif // FEATURE_STACK_FP_X87
15876 return genTableSwitch(reg, jumpCnt, jumpTab);
15880 /*****************************************************************************/
15881 /*****************************************************************************
15882 * Emit a call to a helper function.
15886 void CodeGen::genEmitHelperCall(unsigned helper,
15890 // Can we call the helper function directly
15892 void * addr = NULL, **pAddr = NULL;
15894 // Don't ask VM if it hasn't requested ELT hooks
15895 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
15896 if (!compiler->compProfilerHookNeeded &&
15897 compiler->opts.compJitELTHookEnabled &&
15898 (helper == CORINFO_HELP_PROF_FCN_ENTER ||
15899 helper == CORINFO_HELP_PROF_FCN_LEAVE ||
15900 helper == CORINFO_HELP_PROF_FCN_TAILCALL))
15902 addr = compiler->compProfilerMethHnd;
15907 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
15911 #ifdef _TARGET_ARM_
15912 if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
15914 // Load the address into a register and call through a register
15915 regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
15918 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
15922 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
15923 regTracker.rsTrackRegTrash(indCallReg);
15926 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
15927 compiler->eeFindHelper(helper),
15928 INDEBUG_LDISASM_COMMA(nullptr)
15932 gcInfo.gcVarPtrSetCur,
15933 gcInfo.gcRegGCrefSetCur,
15934 gcInfo.gcRegByrefSetCur,
15935 BAD_IL_OFFSET, // ilOffset
15936 indCallReg, // ireg
15937 REG_NA, 0, 0, // xreg, xmul, disp
15939 emitter::emitNoGChelper(helper),
15940 (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15944 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN,
15945 compiler->eeFindHelper(helper),
15946 INDEBUG_LDISASM_COMMA(nullptr)
15950 gcInfo.gcVarPtrSetCur,
15951 gcInfo.gcRegGCrefSetCur,
15952 gcInfo.gcRegByrefSetCur,
15953 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* ilOffset, ireg, xreg, xmul, disp */
15954 false, /* isJump */
15955 emitter::emitNoGChelper(helper),
15956 (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15961 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
15965 callType = emitter::EC_FUNC_TOKEN_INDIR;
15969 getEmitter()->emitIns_Call(callType,
15970 compiler->eeFindHelper(helper),
15971 INDEBUG_LDISASM_COMMA(nullptr)
15975 gcInfo.gcVarPtrSetCur,
15976 gcInfo.gcRegGCrefSetCur,
15977 gcInfo.gcRegByrefSetCur,
15978 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* ilOffset, ireg, xreg, xmul, disp */
15979 false, /* isJump */
15980 emitter::emitNoGChelper(helper));
15984 regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
15985 regTracker.rsTrashRegsForGCInterruptability();
15988 /*****************************************************************************
15990 * Push the given registers.
15991 * This function does not check if the register is marked as used, etc.
15994 regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP * byrefRegs, regMaskTP * noRefRegs)
15996 *byrefRegs = RBM_NONE;
15997 *noRefRegs = RBM_NONE;
15999 // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
16001 if (regs == RBM_NONE)
16004 #if FEATURE_FIXED_OUT_ARGS
16006 NYI("Don't call genPushRegs with real regs!");
16009 #else // FEATURE_FIXED_OUT_ARGS
16011 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
16012 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
16014 regMaskTP pushedRegs = regs;
16016 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
16018 regMaskTP regBit = regMaskTP(1) << reg;
16020 if ((regBit & regs) == RBM_NONE)
16024 if (regBit & gcInfo.gcRegGCrefSetCur)
16029 if (regBit & gcInfo.gcRegByrefSetCur)
16031 *byrefRegs |= regBit;
16035 if (noRefRegs != NULL)
16037 *noRefRegs |= regBit;
16045 inst_RV(INS_push, reg, type);
16048 gcInfo.gcMarkRegSetNpt(regBit);
16055 #endif // FEATURE_FIXED_OUT_ARGS
16059 /*****************************************************************************
16061 * Pop the registers pushed by genPushRegs()
16064 void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
16066 if (regs == RBM_NONE)
16069 #if FEATURE_FIXED_OUT_ARGS
16071 NYI("Don't call genPopRegs with real regs!");
16073 #else // FEATURE_FIXED_OUT_ARGS
16075 noway_assert((regs & byrefRegs) == byrefRegs);
16076 noway_assert((regs & noRefRegs) == noRefRegs);
16077 // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
16078 noway_assert((regs & (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur)) == RBM_NONE);
16080 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
16081 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
16083 // Walk the registers in the reverse order as genPushRegs()
16084 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
16086 regMaskTP regBit = regMaskTP(1) << reg;
16088 if ((regBit & regs) == RBM_NONE)
16092 if (regBit & byrefRegs)
16097 if (regBit & noRefRegs)
16106 inst_RV(INS_pop, reg, type);
16109 if (type != TYP_INT)
16110 gcInfo.gcMarkRegPtrVal(reg, type);
16115 #endif // FEATURE_FIXED_OUT_ARGS
16119 /*****************************************************************************
16121 * Push the given argument list, right to left; returns the total amount of
16125 #if !FEATURE_FIXED_OUT_ARGS
16127 #pragma warning(push)
16128 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
16130 size_t CodeGen::genPushArgList(GenTreePtr call)
16132 GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
16136 GenTreeArgList* args;
16137 // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16138 // so we can iterate over this argument list more uniformly.
16139 // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16140 GenTreeArgList firstForObjp(/*temp dummy arg*/call, call->gtCall.gtCallArgs);
16141 if (call->gtCall.gtCallObjp == NULL)
16143 args = call->gtCall.gtCallArgs;
16147 firstForObjp.Current() = call->gtCall.gtCallObjp;
16148 args = &firstForObjp;
16155 for (; args; args = args->Rest())
16157 addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
16159 /* Get hold of the next argument value */
16160 curr = args->Current();
16162 if (curr->IsArgPlaceHolderNode())
16164 assert(curr->gtFlags & GTF_LATE_ARG);
16170 // If we have a comma expression, eval the non-last, then deal with the last.
16171 if (!(curr->gtFlags & GTF_LATE_ARG))
16172 curr = genCodeForCommaTree(curr);
16174 /* See what type of a value we're passing */
16175 type = curr->TypeGet();
16177 opsz = genTypeSize(genActualType(type));
16187 /* Don't want to push a small value, make it a full word */
16189 genCodeForTree(curr, 0);
16191 __fallthrough; // now the value should be in a register ...
16196 #if ! CPU_HAS_FP_SUPPORT
16200 if (curr->gtFlags & GTF_LATE_ARG)
16202 assert(curr->gtOper == GT_ASG);
16203 /* one more argument will be passed in a register */
16204 noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
16206 /* arg is passed in the register, nothing on the stack */
16212 /* Is this value a handle? */
16214 if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
16216 /* Emit a fixup for the push instruction */
16218 inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
16226 /* Is the value a constant? */
16228 if (curr->gtOper == GT_CNS_INT)
16232 regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
16236 inst_RV(INS_push, reg, TYP_INT);
16241 inst_IV(INS_push, curr->gtIntCon.gtIconVal);
16244 /* If the type is TYP_REF, then this must be a "null". So we can
16245 treat it as a TYP_INT as we don't need to report it as a GC ptr */
16247 noway_assert(curr->TypeGet() == TYP_INT ||
16248 (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
16257 if (curr->gtFlags & GTF_LATE_ARG)
16259 /* This must be a register arg temp assignment */
16261 noway_assert(curr->gtOper == GT_ASG);
16263 /* Evaluate it to the temp */
16265 genCodeForTree(curr, 0);
16267 /* Increment the current argument register counter */
16269 intRegState.rsCurRegArgNum++;
16275 /* This is a 32-bit integer non-register argument */
16277 addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
16278 inst_TT(INS_push, curr);
16280 genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
16286 #if !CPU_HAS_FP_SUPPORT
16290 /* Is the value a constant? */
16292 if (curr->gtOper == GT_CNS_LNG)
16294 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
16296 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal ));
16303 addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
16305 inst_TT(INS_push, curr, sizeof(int));
16307 inst_TT(INS_push, curr);
16312 #if CPU_HAS_FP_SUPPORT
16316 #if FEATURE_STACK_FP_X87
16317 addrReg = genPushArgumentStackFP(curr);
16326 /* Is this a nothing node, deferred register argument? */
16328 if (curr->gtFlags & GTF_LATE_ARG)
16330 GenTree* arg = curr;
16331 if (arg->gtOper == GT_COMMA)
16333 while (arg->gtOper == GT_COMMA)
16335 GenTreePtr op1 = arg->gtOp.gtOp1;
16336 genEvalSideEffects(op1);
16337 genUpdateLife(op1);
16338 arg = arg->gtOp.gtOp2;
16340 if (!arg->IsNothingNode())
16342 genEvalSideEffects(arg);
16343 genUpdateLife(arg);
16347 /* increment the register count and continue with the next argument */
16349 intRegState.rsCurRegArgNum++;
16351 noway_assert(opsz == 0);
16361 GenTree* arg = curr;
16362 while (arg->gtOper == GT_COMMA)
16364 GenTreePtr op1 = arg->gtOp.gtOp1;
16365 genEvalSideEffects(op1);
16366 genUpdateLife(op1);
16367 arg = arg->gtOp.gtOp2;
16370 noway_assert(arg->gtOper == GT_OBJ
16371 || arg->gtOper == GT_MKREFANY
16372 || arg->gtOper == GT_IND);
16373 noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
16374 noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
16376 if (arg->gtOper == GT_MKREFANY)
16378 GenTreePtr op1 = arg->gtOp.gtOp1;
16379 GenTreePtr op2 = arg->gtOp.gtOp2;
16381 addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
16383 /* Is this value a handle? */
16384 if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
16386 /* Emit a fixup for the push instruction */
16388 inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
16393 regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
16394 inst_TT(INS_push, op2);
16396 genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
16399 addrReg = genKeepAddressable(op1, addrReg);
16400 inst_TT(INS_push, op1);
16402 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
16404 opsz = 2*TARGET_POINTER_SIZE;
16408 noway_assert(arg->gtOper == GT_OBJ);
16410 if (arg->gtObj.gtOp1->gtOper == GT_ADDR &&
16411 arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16413 GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
16414 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
16415 LclVarDsc * varDsc = &compiler->lvaTable[structLclNum];
16417 // As much as we would like this to be a noway_assert, we can't because
16418 // there are some weird casts out there, and backwards compatiblity
16419 // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
16420 // lvPromoted in general currently do not require the local to be
16421 // TYP_STRUCT, so this assert is really more about how we wish the world
16422 // was then some JIT invariant.
16423 assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
16425 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
16427 if (varDsc->lvPromoted &&
16428 promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
16430 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
16434 // Get the number of BYTES to copy to the stack
16435 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
16436 size_t bytesToBeCopied = opsz;
16438 // postponedFields is true if we have any postponed fields
16439 // Any field that does not start on a 4-byte boundary is a postponed field
16440 // Such a field is required to be a short or a byte
16442 // postponedRegKind records the kind of scratch register we will
16443 // need to process the postponed fields
16444 // RBM_NONE means that we don't need a register
16446 // expectedAlignedOffset records the aligned offset that
16447 // has to exist for a push to cover the postponed fields.
16448 // Since all promoted structs have the tightly packed property
16449 // we are guaranteed that we will have such a push
16451 bool postponedFields = false;
16452 regMaskTP postponedRegKind = RBM_NONE;
16453 size_t expectedAlignedOffset = UINT_MAX;
16455 VARSET_TP* deadVarBits = NULL;
16456 compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
16458 // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
16460 for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
16461 varNum >= (int) varDsc->lvFieldLclStart;
16464 LclVarDsc * fieldVarDsc = compiler->lvaTable + varNum;
16466 if (fieldVarDsc->lvExactSize == 2*sizeof(unsigned))
16468 noway_assert(fieldVarDsc->lvFldOffset % (2*sizeof(unsigned)) == 0);
16469 noway_assert(fieldVarDsc->lvFldOffset + (2*sizeof(unsigned)) == bytesToBeCopied);
16472 // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
16473 // For packed structs we will go back and store the unaligned bytes and shorts
16474 // in the next loop
16476 if (fieldVarDsc->lvStackAligned())
16478 if (fieldVarDsc->lvExactSize != 2*sizeof(unsigned) &&
16479 fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
16481 // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
16482 // Just push some junk (i.e EAX) on the stack.
16483 inst_RV(INS_push, REG_EAX, TYP_INT);
16486 bytesToBeCopied -= sizeof(void*);
16489 // If we have an expectedAlignedOffset make sure that this push instruction
16490 // is what we expect to cover the postponedFields
16492 if (expectedAlignedOffset != UINT_MAX)
16494 // This push must be for a small field
16495 noway_assert(fieldVarDsc->lvExactSize < 4);
16496 // The fldOffset for this push should be equal to the expectedAlignedOffset
16497 noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
16498 expectedAlignedOffset = UINT_MAX;
16501 // Push the "upper half" of LONG var first
16503 if (isRegPairType(fieldVarDsc->lvType))
16505 if (fieldVarDsc->lvOtherReg != REG_STK)
16508 fieldVarDsc->lvOtherReg,
16512 // Prepare the set of vars to be cleared from gcref/gcbyref set
16513 // in case they become dead after genUpdateLife.
16514 // genDoneAddressable() will remove dead gc vars by calling gcInfo.gcMarkRegSetNpt.
16515 // Although it is not addrReg, we just borrow the name here.
16516 addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
16520 getEmitter()->emitIns_S(INS_push,
16527 bytesToBeCopied -= sizeof(void*);
16530 // Push the "upper half" of DOUBLE var if it is not enregistered.
16532 if (fieldVarDsc->lvType == TYP_DOUBLE)
16534 if (!fieldVarDsc->lvRegister)
16536 getEmitter()->emitIns_S(INS_push,
16543 bytesToBeCopied -= sizeof(void*);
16547 // Push the field local.
16550 if (fieldVarDsc->lvRegister)
16552 if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
16555 fieldVarDsc->lvRegNum,
16556 genActualType(fieldVarDsc->TypeGet()));
16559 // Prepare the set of vars to be cleared from gcref/gcbyref set
16560 // in case they become dead after genUpdateLife.
16561 // genDoneAddressable() will remove dead gc vars by calling gcInfo.gcMarkRegSetNpt.
16562 // Although it is not addrReg, we just borrow the name here.
16563 addrReg |= genRegMask(fieldVarDsc->lvRegNum);
16567 // Must be TYP_FLOAT or TYP_DOUBLE
16568 noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
16570 noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
16571 fieldVarDsc->lvExactSize == 2*sizeof(unsigned));
16573 inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
16576 if (fieldVarDsc->lvExactSize == 2*sizeof(unsigned))
16581 #if FEATURE_STACK_FP_X87
16582 GenTree* fieldTree = new (compiler, GT_REG_VAR) GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
16583 fieldTree->gtOper = GT_REG_VAR;
16584 fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
16585 fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
16586 if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
16588 if (fieldVarDsc->lvTracked &&
16589 (deadVarBits == NULL || VarSetOps::IsMember(compiler, *deadVarBits, fieldVarDsc->lvVarIndex)))
16591 fieldTree->gtFlags |= GTF_VAR_DEATH;
16594 genCodeForTreeStackFP_Leaf(fieldTree);
16596 // Take reg to top of stack
16598 FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
16600 // Pop it off to stack
16601 compCurFPState.Pop();
16603 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize), REG_NA, REG_SPBASE, 0);
16605 NYI_FLAT_FP_X87("FP codegen");
16611 getEmitter()->emitIns_S(INS_push,
16612 (fieldVarDsc->TypeGet() == TYP_REF)?EA_GCREF:EA_4BYTE,
16618 bytesToBeCopied -= sizeof(void*);
16620 else // not stack aligned
16622 noway_assert(fieldVarDsc->lvExactSize < 4);
16624 // We will need to use a store byte or store word
16625 // to set this unaligned location
16626 postponedFields = true;
16628 if (expectedAlignedOffset != UINT_MAX)
16630 // This should never change until it is set back to UINT_MAX by an aligned offset
16631 noway_assert(expectedAlignedOffset == roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
16634 expectedAlignedOffset = roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
16636 noway_assert(expectedAlignedOffset < bytesToBeCopied);
16638 if (fieldVarDsc->lvRegister)
16640 // Do we need to use a byte-able register?
16641 if (fieldVarDsc->lvExactSize == 1)
16643 // Did we enregister fieldVarDsc2 in a non byte-able register?
16644 if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
16646 // then we will need to grab a byte-able register
16647 postponedRegKind = RBM_BYTE_REGS;
16651 else // not enregistered
16653 if (fieldVarDsc->lvExactSize == 1)
16655 // We will need to grab a byte-able register
16656 postponedRegKind = RBM_BYTE_REGS;
16660 // We will need to grab any scratch register
16661 if (postponedRegKind != RBM_BYTE_REGS)
16662 postponedRegKind = RBM_ALLINT;
16668 // Now we've pushed all of the aligned fields.
16670 // We should have pushed bytes equal to the entire struct
16671 noway_assert(bytesToBeCopied == 0);
16673 // We should have seen a push that covers every postponed field
16674 noway_assert(expectedAlignedOffset == UINT_MAX);
16676 // Did we have any postponed fields?
16677 if (postponedFields)
16679 regNumber regNum = REG_STK; // means no register
16681 // If we needed a scratch register then grab it here
16683 if (postponedRegKind != RBM_NONE)
16684 regNum = regSet.rsGrabReg(postponedRegKind);
16686 // Forward loop, starts from the lowest field offset
16688 for (unsigned varNum = varDsc->lvFieldLclStart;
16689 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
16692 LclVarDsc * fieldVarDsc = compiler->lvaTable + varNum;
16694 // All stack aligned fields have already been pushed
16695 if (fieldVarDsc->lvStackAligned())
16698 // We have a postponed field
16700 // It must be a byte or a short
16701 noway_assert(fieldVarDsc->lvExactSize < 4);
16703 // Is the field enregistered?
16704 if (fieldVarDsc->lvRegister)
16706 // Frequently we can just use that register
16707 regNumber tmpRegNum = fieldVarDsc->lvRegNum;
16709 // Do we need to use a byte-able register?
16710 if (fieldVarDsc->lvExactSize == 1)
16712 // Did we enregister the field in a non byte-able register?
16713 if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
16715 // then we will need to use the byte-able register 'regNum'
16716 noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
16718 // Copy the register that contains fieldVarDsc into 'regNum'
16719 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum, fieldVarDsc->lvRegNum);
16720 regTracker.rsTrackRegLclVar(regNum, varNum);
16722 // tmpRegNum is the register that we will extract the byte value from
16723 tmpRegNum = regNum;
16725 noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
16728 getEmitter()->emitIns_AR_R (ins_Store(fieldVarDsc->TypeGet()),
16729 (emitAttr)fieldVarDsc->lvExactSize,
16732 fieldVarDsc->lvFldOffset);
16734 else // not enregistered
16736 // We will copy the non-enregister fieldVar into our scratch register 'regNum'
16738 noway_assert(regNum != REG_STK);
16739 getEmitter()->emitIns_R_S (ins_Load(fieldVarDsc->TypeGet()),
16740 (emitAttr)fieldVarDsc->lvExactSize,
16745 regTracker.rsTrackRegLclVar(regNum, varNum);
16747 // Store the value (byte or short) into the stack
16749 getEmitter()->emitIns_AR_R (ins_Store(fieldVarDsc->TypeGet()),
16750 (emitAttr)fieldVarDsc->lvExactSize,
16753 fieldVarDsc->lvFldOffset);
16757 genUpdateLife(structLocalTree);
16764 genCodeForTree(arg->gtObj.gtOp1, 0);
16765 noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
16766 regNumber reg = arg->gtObj.gtOp1->gtRegNum;
16767 // Get the number of DWORDS to copy to the stack
16768 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
16769 unsigned slots = (unsigned)(opsz / sizeof(void*));
16771 BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16773 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
16775 BOOL bNoneGC = TRUE;
16776 for (int i = slots - 1; i >= 0; --i)
16778 if (gcLayout[i] != TYPE_GC_NONE)
16785 /* passing large structures using movq instead of pushes does not increase codesize very much */
16786 unsigned movqLenMin = 8;
16787 unsigned movqLenMax = 64;
16788 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
16790 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
16792 // Don't bother with this optimization in
16793 // rarely run blocks or when optimizing for size
16794 movqLenMax = movqLenMin = 0;
16796 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
16798 // Be more aggressive when optimizing for speed
16802 /* Adjust for BB weight */
16803 if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT)/2)
16805 // Be more aggressive when we are inside a loop
16809 if (compiler->opts.compCanUseSSE2 && bNoneGC &&
16810 (opsz >= movqLenMin) && (opsz <= movqLenMax))
16812 JITLOG_THIS(compiler, (LL_INFO10000, "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
16813 opsz, compiler->info.compFullName));
16815 int stkDisp = (int)(unsigned)opsz;
16817 regNumber xmmReg = REG_XMM0;
16821 stkDisp -= sizeof(void*);
16822 getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
16826 inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
16827 genStackLevel += stkDisp;
16829 while (curDisp < stkDisp)
16831 getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
16832 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
16833 curDisp += 2 * sizeof(void*);
16835 noway_assert(curDisp == stkDisp);
16839 for (int i = slots-1; i >= 0; --i)
16841 emitAttr fieldSize;
16842 if (gcLayout[i] == TYPE_GC_NONE)
16843 fieldSize = EA_4BYTE;
16844 else if (gcLayout[i] == TYPE_GC_REF)
16845 fieldSize = EA_GCREF;
16848 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16849 fieldSize = EA_BYREF;
16851 getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i*sizeof(void*));
16855 gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
16863 noway_assert(!"unhandled/unexpected arg type");
16864 NO_WAY("unhandled/unexpected arg type");
16867 /* Update the current set of live variables */
16869 genUpdateLife(curr);
16871 /* Update the current set of register pointers */
16873 noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
16874 genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
16876 /* Remember how much stuff we've pushed on the stack */
16880 /* Update the current argument stack offset */
16883 /* Continue with the next argument, if any more are present */
16887 /* Move the deferred arguments to registers */
16889 for (args = regArgs; args; args = args->Rest())
16891 curr = args->Current();
16893 assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
16895 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16896 assert(curArgTabEntry);
16897 regNumber regNum = curArgTabEntry->regNum;
16899 noway_assert(isRegParamType(curr->TypeGet()));
16900 noway_assert(curr->gtType != TYP_VOID);
16902 /* Evaluate the argument to a register [pair] */
16904 if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
16906 /* Check if this is the guess area for the resolve interface call
16907 * Pass a size of EA_OFFSET*/
16908 if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
16910 getEmitter()->emitIns_R_C(ins_Load(TYP_INT),
16913 curr->gtClsVar.gtClsVarHnd,
16915 regTracker.rsTrackRegTrash(regNum);
16917 /* The value is now in the appropriate register */
16919 genMarkTreeInReg(curr, regNum);
16923 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
16926 noway_assert(curr->gtRegNum == regNum);
16928 /* If the register is already marked as used, it will become
16929 multi-used. However, since it is a callee-trashed register,
16930 we will have to spill it before the call anyway. So do it now */
16932 if (regSet.rsMaskUsed & genRegMask(regNum))
16934 noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
16935 regSet.rsSpillReg(regNum);
16938 /* Mark the register as 'used' */
16940 regSet.rsMarkRegUsed(curr);
16944 noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
16948 /* If any of the previously loaded arguments were spilled - reload them */
16950 for (args = regArgs; args; args = args->Rest())
16952 curr = args->Current();
16955 if (curr->gtFlags & GTF_SPILLED)
16957 if (isRegPairType(curr->gtType))
16959 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
16963 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
16968 /* Return the total size pushed */
16973 #pragma warning(pop)
16976 #else // FEATURE_FIXED_OUT_ARGS
16979 // ARM and AMD64 uses this method to pass the stack based args
16981 // returns size pushed (always zero)
16982 size_t CodeGen::genPushArgList(GenTreePtr call)
16985 GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
16990 GenTreeArgList* args;
16991 // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16992 // so we can iterate over this argument list more uniformly.
16993 // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16994 GenTreeArgList objpArgList(/*temp dummy arg*/call, call->gtCall.gtCallArgs);
16995 if (call->gtCall.gtCallObjp == NULL)
16997 args = call->gtCall.gtCallArgs;
17001 objpArgList.Current() = call->gtCall.gtCallObjp;
17002 args = &objpArgList;
17005 for (; args; args = args->Rest())
17007 /* Get hold of the next argument value */
17008 curr = args->Current();
17010 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
17011 assert(curArgTabEntry);
17012 regNumber regNum = curArgTabEntry->regNum;
17013 int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
17015 /* See what type of a value we're passing */
17016 type = curr->TypeGet();
17018 // This holds the set of registers corresponding to enregistered promoted struct field variables
17019 // that go dead after this use of the variable in the argument list.
17020 regMaskTP deadFieldVarRegs = RBM_NONE;
17022 argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
17024 if (curr->IsArgPlaceHolderNode())
17026 assert(curr->gtFlags & GTF_LATE_ARG);
17030 if (varTypeIsSmall(type))
17032 // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
17042 #if defined(_TARGET_ARM_)
17044 argSize = (TARGET_POINTER_SIZE * 2);
17046 /* Is the value a constant? */
17048 if (curr->gtOper == GT_CNS_LNG)
17050 assert((curr->gtFlags & GTF_LATE_ARG) == 0);
17052 int hiVal = (int) (curr->gtLngCon.gtLconVal >> 32);
17053 int loVal = (int) (curr->gtLngCon.gtLconVal & 0xffffffff);
17055 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal,
17056 compiler->lvaOutgoingArgSpaceVar, argOffset);
17058 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal,
17059 compiler->lvaOutgoingArgSpaceVar, argOffset + 4);
17065 genCodeForTree(curr, 0);
17067 if (curr->gtFlags & GTF_LATE_ARG)
17069 // The arg was assigned into a temp and
17070 // will be moved to the correct register or slot later
17072 argSize = 0; // nothing is passed on the stack
17076 // The arg is passed in the outgoing argument area of the stack frame
17078 assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
17079 assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
17081 if (type == TYP_LONG)
17083 regNumber regLo = genRegPairLo(curr->gtRegPair);
17084 regNumber regHi = genRegPairHi(curr->gtRegPair);
17086 assert(regLo != REG_STK);
17087 inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
17088 if (regHi == REG_STK)
17090 regHi = regSet.rsPickFreeReg();
17091 inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
17092 regTracker.rsTrackRegTrash(regHi);
17094 inst_SA_RV(ins_Store(TYP_INT), argOffset+4, regHi, TYP_INT);
17096 else // (type == TYP_DOUBLE)
17098 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
17104 #elif defined(_TARGET_64BIT_)
17107 #error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
17115 /* Is the value a constant? */
17117 if (curr->gtOper == GT_CNS_INT)
17119 assert(!(curr->gtFlags & GTF_LATE_ARG));
17122 regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
17126 inst_SA_RV(ins_Store(type), argOffset, reg, type);
17131 bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
17132 emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
17133 instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
17134 compiler->lvaOutgoingArgSpaceVar, argOffset);
17139 /* This is passed as a pointer-sized integer argument */
17141 genCodeForTree(curr, 0);
17142 if (curr->gtFlags & GTF_LATE_ARG)
17144 // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
17146 #ifdef _TARGET_ARM_
17147 argSize = 0; // nothing is passed on the stack
17152 // The arg is passed in the outgoing argument area of the stack frame
17154 assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
17155 assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
17156 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
17158 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
17159 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
17164 /* Is this a nothing node, deferred register argument? */
17166 if (curr->gtFlags & GTF_LATE_ARG)
17168 /* Handle side-effects */
17170 if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
17172 #ifdef _TARGET_ARM_
17174 GenTreePtr curArgNode = curArgTabEntry->node;
17175 var_types curRegArgType = curArgNode->gtType;
17176 assert(curRegArgType != TYP_UNDEF);
17178 if (curRegArgType == TYP_STRUCT)
17180 // If the RHS of the COPYBLK is a promoted struct local, then the use of that
17181 // is an implicit use of all its field vars. If these are last uses, remember that,
17182 // so we can later update the GC compiler->info.
17183 if (curr->OperIsCopyBlkOp())
17184 deadFieldVarRegs |= genFindDeadFieldRegs(curr);
17187 #endif // _TARGET_ARM_
17189 genCodeForTree(curr, 0);
17193 assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
17196 #if defined(_TARGET_ARM_)
17197 argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
17202 for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
17204 GenTreePtr op1 = arg->gtOp.gtOp1;
17206 genEvalSideEffects(op1);
17207 genUpdateLife(op1);
17212 #ifdef _TARGET_ARM_
17216 GenTree* arg = curr;
17217 while (arg->gtOper == GT_COMMA)
17219 GenTreePtr op1 = arg->gtOp.gtOp1;
17220 genEvalSideEffects(op1);
17221 genUpdateLife(op1);
17222 arg = arg->gtOp.gtOp2;
17224 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
17226 CORINFO_CLASS_HANDLE clsHnd;
17229 BYTE* gcLayout = NULL;
17231 // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
17232 // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
17233 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
17234 // table entry for the promoted struct local. As we fill slots with the contents of a
17235 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
17236 // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
17237 // variable number of the next field variable to be copied.
17238 LclVarDsc* promotedStructLocalVarDesc = NULL;
17239 GenTreePtr structLocalTree = NULL;
17240 unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
17241 unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
17242 unsigned promotedStructOffsetOfFirstStackSlot = 0;
17243 unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
17245 if (arg->OperGet() == GT_OBJ)
17247 clsHnd = arg->gtObj.gtClass;
17248 unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
17249 argAlign = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
17250 argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
17252 slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
17254 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17256 compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
17258 // Are we loading a promoted struct local var?
17259 if (arg->gtObj.gtOp1->gtOper == GT_ADDR &&
17260 arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17262 structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
17263 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17264 LclVarDsc * varDsc = &compiler->lvaTable[structLclNum];
17266 // As much as we would like this to be a noway_assert, we can't because
17267 // there are some weird casts out there, and backwards compatiblity
17268 // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
17269 // lvPromoted in general currently do not require the local to be
17270 // TYP_STRUCT, so this assert is really more about how we wish the world
17271 // was then some JIT invariant.
17272 assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
17274 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17276 if (varDsc->lvPromoted &&
17277 promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
17279 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17280 promotedStructLocalVarDesc = varDsc;
17281 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17287 noway_assert(arg->OperGet() == GT_MKREFANY);
17290 argAlign = TARGET_POINTER_SIZE;
17291 argSize = 2*TARGET_POINTER_SIZE;
17295 // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
17296 noway_assert(regNum == REG_STK);
17298 // This code passes a TYP_STRUCT by value using the outgoing arg space var
17300 if (arg->OperGet() == GT_OBJ)
17302 regNumber regSrc = REG_STK;
17303 regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
17304 int cStackSlots = 0;
17306 if (promotedStructLocalVarDesc == NULL)
17308 genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
17309 noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
17310 regSrc = arg->gtObj.gtOp1->gtRegNum;
17313 // The number of bytes to add "argOffset" to get the arg offset of the current slot.
17314 int extraArgOffset = 0;
17316 for (unsigned i = 0; i < slots; i++)
17318 emitAttr fieldSize;
17319 if (gcLayout[i] == TYPE_GC_NONE)
17320 fieldSize = EA_PTRSIZE;
17321 else if (gcLayout[i] == TYPE_GC_REF)
17322 fieldSize = EA_GCREF;
17325 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17326 fieldSize = EA_BYREF;
17329 // Pass the argument using the lvaOutgoingArgSpaceVar
17331 if (promotedStructLocalVarDesc != NULL)
17333 if (argOffsetOfFirstStackSlot == UINT32_MAX) argOffsetOfFirstStackSlot = argOffset;
17335 regNumber maxRegArg = regNumber(MAX_REG_ARG);
17336 bool filledExtraSlot =
17337 genFillSlotFromPromotedStruct(arg,
17339 promotedStructLocalVarDesc,
17341 &nextPromotedStructFieldVar,
17342 &bytesOfNextSlotOfCurPromotedStruct,
17343 /*pCurRegNum*/ &maxRegArg,
17344 /*argOffset*/ argOffset + extraArgOffset,
17345 /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
17346 argOffsetOfFirstStackSlot,
17349 extraArgOffset += TARGET_POINTER_SIZE;
17350 // If we filled an extra slot with an 8-byte value, skip a slot.
17351 if (filledExtraSlot)
17355 extraArgOffset += TARGET_POINTER_SIZE;
17360 if (regTmp == REG_STK)
17362 regTmp = regSet.rsPickFreeReg();
17365 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL),
17369 i*TARGET_POINTER_SIZE);
17371 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17374 compiler->lvaOutgoingArgSpaceVar,
17375 argOffset+cStackSlots*TARGET_POINTER_SIZE);
17376 regTracker.rsTrackRegTrash(regTmp);
17381 if (promotedStructLocalVarDesc == NULL)
17383 regSet.rsMarkRegFree(genRegMask(regSrc));
17385 if (structLocalTree != NULL) genUpdateLife(structLocalTree);
17389 assert(arg->OperGet() == GT_MKREFANY);
17390 PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
17391 argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
17395 #endif // _TARGET_ARM_
17398 assert(!"unhandled/unexpected arg type");
17399 NO_WAY("unhandled/unexpected arg type");
17402 /* Update the current set of live variables */
17404 genUpdateLife(curr);
17406 // Now, if some copied field locals were enregistered, and they're now dead, update the set of
17407 // register holding gc pointers.
17408 if (deadFieldVarRegs != 0)
17409 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
17411 /* Update the current argument stack offset */
17413 argOffset += argSize;
17415 /* Continue with the next argument, if any more are present */
17421 SetupLateArgs(call);
17424 /* Return the total size pushed */
17429 #ifdef _TARGET_ARM_
17430 bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg,
17431 fgArgTabEntryPtr curArgTabEntry,
17432 LclVarDsc* promotedStructLocalVarDesc,
17433 emitAttr fieldSize,
17434 unsigned* pNextPromotedStructFieldVar,
17435 unsigned* pBytesOfNextSlotOfCurPromotedStruct,
17436 regNumber* pCurRegNum,
17438 int fieldOffsetOfFirstStackSlot,
17439 int argOffsetOfFirstStackSlot,
17440 regMaskTP* deadFieldVarRegs,
17441 regNumber* pRegTmp)
17443 unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
17444 unsigned limitPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
17445 unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
17447 regNumber curRegNum = *pCurRegNum;
17448 regNumber regTmp = *pRegTmp;
17449 bool filledExtraSlot = false;
17451 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17453 // We've already finished; just return.
17454 // We can reach this because the calling loop computes a # of slots based on the size of the struct.
17455 // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
17456 // the fourth slot, even though we've copied all the fields.
17460 LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17462 // Does this field fill an entire slot, and does it go at the start of the slot?
17463 // If so, things are easier...
17465 bool oneFieldFillsSlotFromStart =
17466 (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
17467 && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
17468 && (nextPromotedStructFieldVar+1 == limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
17469 || compiler->lvaTable[nextPromotedStructFieldVar+1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
17471 // Compute the proper size.
17472 if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
17474 switch (fieldVarDsc->lvExactSize)
17476 case 1: fieldSize = EA_1BYTE; break;
17477 case 2: fieldSize = EA_2BYTE; break;
17479 // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
17480 // in which case we should not have promoted the struct variable.
17481 noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
17483 // If the current reg number is not aligned, align it, and return to the calling loop, which will
17484 // consider that a filled slot and move on to the next argument register.
17485 if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
17487 // We must update the slot target, however!
17488 bytesOfNextSlotOfCurPromotedStruct += 4;
17489 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17492 // Dest is an aligned pair of arg regs, if the struct type demands it.
17493 noway_assert((curRegNum % 2) == 0);
17494 // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
17496 default: assert(fieldVarDsc->lvExactSize == 4); break;
17501 // If the gc layout said it's a GC ref or byref, then the field size must be 4.
17502 noway_assert(fieldVarDsc->lvExactSize == 4);
17505 // We may need the type of the field to influence instruction selection.
17506 // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
17507 // If the fieldVarDsc is enregistered float we must use the field's exact type
17508 // however if it is in memory we can use an integer type TYP_I_IMPL
17510 var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
17511 if ((fieldVarDsc->lvType == TYP_LONG) ||
17512 (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
17514 fieldTypeForInstr = TYP_I_IMPL;
17517 // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
17518 if (curArgTabEntry->isHfaRegArg)
17520 assert(oneFieldFillsSlotFromStart);
17522 // Is the field variable promoted?
17523 if (fieldVarDsc->lvRegister)
17525 // Move the field var living in register to dst, if they are different registers.
17526 regNumber srcReg = fieldVarDsc->lvRegNum;
17527 regNumber dstReg = curRegNum;
17528 if (srcReg != dstReg)
17530 inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
17531 assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
17536 // Move the field var living in stack to dst.
17537 getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
17538 fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE,
17540 nextPromotedStructFieldVar,
17542 assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
17545 // Mark the arg as used and using reg val.
17546 genMarkTreeInReg(arg, curRegNum);
17547 regSet.SetUsedRegFloat(arg, true);
17549 // Advance for double.
17550 if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
17552 bytesOfNextSlotOfCurPromotedStruct += 4;
17553 curRegNum = REG_NEXT(curRegNum);
17554 arg->gtRegNum = curRegNum;
17555 regSet.SetUsedRegFloat(arg, true);
17556 filledExtraSlot = true;
17558 arg->gtRegNum = curArgTabEntry->regNum;
17561 bytesOfNextSlotOfCurPromotedStruct += 4;
17562 nextPromotedStructFieldVar++;
17566 if (oneFieldFillsSlotFromStart)
17568 // If we write to the stack, offset in outgoing args at which we'll write.
17569 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17570 assert(fieldArgOffset >= 0);
17572 // Is the source a register or memory?
17573 if (fieldVarDsc->lvRegister)
17575 if (fieldTypeForInstr == TYP_DOUBLE)
17577 fieldSize = EA_8BYTE;
17580 // Are we writing to a register or to the stack?
17581 if (curRegNum != MAX_REG_ARG)
17583 // Source is register and Dest is register.
17585 instruction insCopy = INS_mov;
17587 if (varTypeIsFloating(fieldTypeForInstr))
17589 if (fieldTypeForInstr == TYP_FLOAT)
17591 insCopy = INS_vmov_f2i;
17595 assert(fieldTypeForInstr == TYP_DOUBLE);
17596 insCopy = INS_vmov_d2i;
17600 // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
17601 // register (which may become a tmp register, if its held in the argument register that the first
17602 // register to be copied will overwrite).
17603 regNumber otherRegNum = REG_STK;
17604 if (fieldVarDsc->lvType == TYP_LONG)
17606 otherRegNum = fieldVarDsc->lvOtherReg;
17607 // Are we about to overwrite?
17608 if (otherRegNum == curRegNum)
17610 if (regTmp == REG_STK)
17612 regTmp = regSet.rsPickFreeReg();
17614 // Copy the second register to the temp reg.
17615 getEmitter()->emitIns_R_R(INS_mov,
17619 regTracker.rsTrackRegCopy(regTmp, otherRegNum);
17620 otherRegNum = regTmp;
17624 if (fieldVarDsc->lvType == TYP_DOUBLE)
17626 assert(curRegNum <= REG_R2);
17627 getEmitter()->emitIns_R_R_R(insCopy,
17630 genRegArgNext(curRegNum),
17631 fieldVarDsc->lvRegNum);
17632 regTracker.rsTrackRegTrash(curRegNum);
17633 regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
17637 // Now do the first register.
17638 // It might be the case that it's already in the desired register; if so do nothing.
17639 if (curRegNum != fieldVarDsc->lvRegNum)
17641 getEmitter()->emitIns_R_R(insCopy,
17644 fieldVarDsc->lvRegNum);
17645 regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
17649 // In either case, mark the arg register as used.
17650 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17652 // Is there a second half of the value?
17653 if (fieldVarDsc->lvExactSize == 8)
17655 curRegNum = genRegArgNext(curRegNum);
17656 // The second dest reg must also be an argument register.
17657 noway_assert(curRegNum < MAX_REG_ARG);
17659 // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17660 if (fieldVarDsc->lvType == TYP_LONG)
17662 // Copy the second register into the next argument register
17664 // If it's a register variable for a TYP_LONG value, then otherReg now should
17665 // hold the second register or it might say that it's in the stack.
17666 if (otherRegNum == REG_STK)
17668 // Apparently when we partially enregister, we allocate stack space for the full
17669 // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
17670 // parameter, to get the high half.
17671 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17674 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17675 regTracker.rsTrackRegTrash(curRegNum);
17679 // The other half is in a register.
17680 // Again, it might be the case that it's already in the desired register; if so do nothing.
17681 if (curRegNum != otherRegNum)
17683 getEmitter()->emitIns_R_R(INS_mov,
17687 regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
17692 // Also mark the 2nd arg register as used.
17693 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
17694 // Record the fact that we filled in an extra register slot
17695 filledExtraSlot = true;
17700 // Source is register and Dest is memory (OutgoingArgSpace).
17702 // Now write the srcReg into the right location in the outgoing argument list.
17703 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17705 fieldVarDsc->lvRegNum,
17706 compiler->lvaOutgoingArgSpaceVar,
17709 if (fieldVarDsc->lvExactSize == 8)
17711 // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17712 if (fieldVarDsc->lvType == TYP_LONG)
17714 if (fieldVarDsc->lvOtherReg == REG_STK)
17716 // Source is stack.
17717 if (regTmp == REG_STK)
17719 regTmp = regSet.rsPickFreeReg();
17721 // Apparently if we partially enregister, we allocate stack space for the full
17722 // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
17723 // parameter, to get the high half.
17724 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17727 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17728 regTracker.rsTrackRegTrash(regTmp);
17729 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17732 compiler->lvaOutgoingArgSpaceVar,
17733 fieldArgOffset + TARGET_POINTER_SIZE);
17737 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17739 fieldVarDsc->lvOtherReg,
17740 compiler->lvaOutgoingArgSpaceVar,
17741 fieldArgOffset + TARGET_POINTER_SIZE);
17744 // Record the fact that we filled in an extra register slot
17745 filledExtraSlot = true;
17748 assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
17749 // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
17750 if (arg->gtFlags & GTF_VAR_DEATH)
17752 *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
17753 // We don't bother with the second reg of a register pair, since if it has one,
17754 // it obviously doesn't hold a pointer.
17759 // Source is in memory.
17761 if (curRegNum != MAX_REG_ARG)
17764 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17767 nextPromotedStructFieldVar, 0);
17768 regTracker.rsTrackRegTrash(curRegNum);
17770 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17772 if (fieldVarDsc->lvExactSize == 8)
17774 noway_assert(fieldSize == EA_4BYTE);
17775 curRegNum = genRegArgNext(curRegNum);
17776 noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
17777 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL),
17780 nextPromotedStructFieldVar,
17781 TARGET_POINTER_SIZE);
17782 regTracker.rsTrackRegTrash(curRegNum);
17783 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17784 // Record the fact that we filled in an extra stack slot
17785 filledExtraSlot = true;
17791 if (regTmp == REG_STK)
17793 regTmp = regSet.rsPickFreeReg();
17795 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17798 nextPromotedStructFieldVar, 0);
17800 // Now write regTmp into the right location in the outgoing argument list.
17801 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17804 compiler->lvaOutgoingArgSpaceVar,
17806 // We overwrote "regTmp", so erase any previous value we recorded that it contained.
17807 regTracker.rsTrackRegTrash(regTmp);
17809 if (fieldVarDsc->lvExactSize == 8)
17811 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17814 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17816 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
17819 compiler->lvaOutgoingArgSpaceVar,
17820 fieldArgOffset + TARGET_POINTER_SIZE);
17821 // Record the fact that we filled in an extra stack slot
17822 filledExtraSlot = true;
17827 // Bump up the following if we filled in an extra slot
17828 if (filledExtraSlot)
17829 bytesOfNextSlotOfCurPromotedStruct += 4;
17831 // Go to the next field.
17832 nextPromotedStructFieldVar++;
17833 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17835 fieldVarDsc = NULL;
17839 // The next field should have the same parent variable, and we should have put the field vars in order sorted by offset.
17840 assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField
17841 && fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl
17842 && fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17843 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17845 bytesOfNextSlotOfCurPromotedStruct += 4;
17847 else // oneFieldFillsSlotFromStart == false
17849 // The current slot should contain more than one field.
17850 // We'll construct a word in memory for the slot, then load it into a register.
17851 // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current slot,
17852 // in which case we'll just skip this loop altogether.)
17853 while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
17855 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
17856 // whose fields have their natural alignment, and alignment == size on ARM).
17857 noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
17859 // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
17860 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17861 noway_assert(argOffset == INT32_MAX || (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
17863 if (fieldVarDsc->lvRegister)
17865 if (curRegNum != MAX_REG_ARG)
17867 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17869 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17871 fieldVarDsc->lvRegNum,
17872 compiler->lvaPromotedStructAssemblyScratchVar,
17873 fieldVarDsc->lvFldOffset % 4);
17877 // Dest is stack; write directly.
17878 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17880 fieldVarDsc->lvRegNum,
17881 compiler->lvaOutgoingArgSpaceVar,
17887 // Source is in memory.
17889 // Make sure we have a temporary register to use...
17890 if (regTmp == REG_STK)
17892 regTmp = regSet.rsPickFreeReg();
17894 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr),
17897 nextPromotedStructFieldVar, 0);
17898 regTracker.rsTrackRegTrash(regTmp);
17900 if (curRegNum != MAX_REG_ARG)
17902 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17904 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17907 compiler->lvaPromotedStructAssemblyScratchVar,
17908 fieldVarDsc->lvFldOffset % 4);
17912 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr),
17915 compiler->lvaOutgoingArgSpaceVar,
17919 // Go to the next field.
17920 nextPromotedStructFieldVar++;
17921 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17923 fieldVarDsc = NULL;
17927 // The next field should have the same parent variable, and we should have put the field vars in order sorted by offset.
17928 noway_assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField
17929 && fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl
17930 && fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17931 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17934 // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to write to
17935 // an argument register, do so.
17936 if (curRegNum != MAX_REG_ARG)
17938 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17940 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL),
17943 compiler->lvaPromotedStructAssemblyScratchVar, 0);
17944 regTracker.rsTrackRegTrash(curRegNum);
17945 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17947 // We've finished a slot; set the goal of the next slot.
17948 bytesOfNextSlotOfCurPromotedStruct += 4;
17952 // Write back the updates.
17953 *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
17954 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17955 *pCurRegNum = curRegNum;
17958 return filledExtraSlot;
17960 #endif // _TARGET_ARM_
17962 regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
17964 noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
17965 GenTreePtr lst = cpBlk->gtOp.gtOp1;
17966 noway_assert(lst->OperGet() == GT_LIST); // Well-formedness.
17967 GenTreePtr rhs = lst->gtOp.gtOp2;
17969 if (rhs->OperGet() == GT_ADDR)
17971 rhs = rhs->gtOp.gtOp1;
17972 if (rhs->OperGet() == GT_LCL_VAR)
17974 LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
17975 if (rhsDsc->lvPromoted)
17977 // It is promoted; iterate over its field vars.
17978 unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
17979 for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
17981 LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
17982 // Did the variable go dead, and is it enregistered?
17983 if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
17985 // Add the register number to the set of registers holding field vars that are going dead.
17986 res |= genRegMask(fieldVarDsc->lvRegNum);
17996 void CodeGen::SetupLateArgs(GenTreePtr call)
17998 GenTreeArgList* lateArgs;
18001 /* Generate the code to move the late arguments into registers */
18003 for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
18005 curr = lateArgs->Current();
18008 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
18009 assert(curArgTabEntry);
18010 regNumber regNum = curArgTabEntry->regNum;
18011 unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18013 assert(isRegParamType(curr->TypeGet()));
18014 assert(curr->gtType != TYP_VOID);
18016 /* If the register is already marked as used, it will become
18017 multi-used. However, since it is a callee-trashed register,
18018 we will have to spill it before the call anyway. So do it now */
18021 // Remember which registers hold pointers. We will spill
18022 // them, but the code that follows will fetch reg vars from
18023 // the registers, so we need that gc compiler->info.
18024 // Also regSet.rsSpillReg doesn't like to spill enregistered
18025 // variables, but if this is their last use that is *exactly*
18026 // what we need to do, so we have to temporarily pretend
18027 // they are no longer live.
18028 // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
18029 // when their last use is about to occur?
18030 // It is because this is the second operand to be evaluated
18031 // of some parent binary op, and the first operand is
18032 // live across this tree, and thought it could re-use the
18033 // variables register (like a GT_REG_VAR). This probably
18034 // is caused by RegAlloc assuming the first operand would
18035 // evaluate into another register.
18036 regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
18037 regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
18038 regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
18039 regSet.RemoveMaskVars(rsTemp);
18041 regNumber regNum2 = regNum;
18042 for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
18044 if (regSet.rsMaskUsed & genRegMask(regNum2))
18046 assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
18047 regSet.rsSpillReg(regNum2);
18049 if (isValidIntArgReg(regNum2))
18051 regNum2 = genRegArgNext(regNum2);
18055 regNum2 = genRegArgNextFloat(regNum2);
18057 assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
18060 // Restore gc tracking masks.
18061 gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
18062 gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
18064 // Set maskvars back to normal
18065 regSet.AddMaskVars(rsTemp);
18068 /* Evaluate the argument to a register */
18070 /* Check if this is the guess area for the resolve interface call
18071 * Pass a size of EA_OFFSET*/
18072 if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
18074 getEmitter()->emitIns_R_C(ins_Load(TYP_INT),
18077 curr->gtClsVar.gtClsVarHnd,
18079 regTracker.rsTrackRegTrash(regNum);
18081 /* The value is now in the appropriate register */
18083 genMarkTreeInReg(curr, regNum);
18085 regSet.rsMarkRegUsed(curr);
18087 #ifdef _TARGET_ARM_
18088 else if (curr->gtType == TYP_STRUCT)
18090 GenTree* arg = curr;
18091 while (arg->gtOper == GT_COMMA)
18093 GenTreePtr op1 = arg->gtOp.gtOp1;
18094 genEvalSideEffects(op1);
18095 genUpdateLife(op1);
18096 arg = arg->gtOp.gtOp2;
18098 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_MKREFANY));
18100 // This code passes a TYP_STRUCT by value using
18101 // the argument registers first and
18102 // then the lvaOutgoingArgSpaceVar area.
18105 // We prefer to choose low registers here to reduce code bloat
18106 regMaskTP regNeedMask = RBM_LOW_REGS;
18107 unsigned firstStackSlot = 0;
18108 unsigned argAlign = TARGET_POINTER_SIZE;
18109 size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
18111 unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
18114 if (regNum == REG_STK)
18116 firstStackSlot = 0;
18120 if (argAlign == (TARGET_POINTER_SIZE * 2))
18122 assert((regNum & 1) == 0);
18125 // firstStackSlot is an index of the first slot of the struct
18126 // that is on the stack, in the range [0,slots]. If it is 'slots',
18127 // then the entire struct is in registers. It is also equal to
18128 // the number of slots of the struct that are passed in registers.
18130 if (curArgTabEntry->isHfaRegArg)
18132 // HFA arguments that have been decided to go into registers fit the reg space.
18133 assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
18134 assert(regNum + slots - 1 <= LAST_FP_ARGREG && "HFA argument doesn't fit entirely in FP argument registers");
18135 firstStackSlot = slots;
18137 else if (regNum + slots > MAX_REG_ARG)
18139 firstStackSlot = MAX_REG_ARG - regNum;
18140 assert(firstStackSlot > 0);
18144 firstStackSlot = slots;
18147 if (curArgTabEntry->isHfaRegArg)
18149 // Mask out the registers used by an HFA arg from the ones used to compute tree into.
18150 for (unsigned i = regNum; i < regNum + slots; i ++)
18152 regNeedMask &= ~genRegMask(regNumber(i));
18157 // This holds the set of registers corresponding to enregistered promoted struct field variables
18158 // that go dead after this use of the variable in the argument list.
18159 regMaskTP deadFieldVarRegs = RBM_NONE;
18161 // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
18162 // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
18163 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
18164 // table entry for the promoted struct local. As we fill slots with the contents of a
18165 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
18166 // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're working
18167 // on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're done),
18168 // and "nextPromotedStructFieldVar" will be the local
18169 // variable number of the next field variable to be copied.
18170 LclVarDsc* promotedStructLocalVarDesc = NULL;
18171 unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
18172 unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
18173 GenTreePtr structLocalTree = NULL;
18175 BYTE * gcLayout = NULL;
18176 regNumber regSrc = REG_NA;
18177 if (arg->gtOper == GT_OBJ)
18179 // Are we loading a promoted struct local var?
18180 if (arg->gtObj.gtOp1->gtOper == GT_ADDR &&
18181 arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18183 structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
18184 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
18185 LclVarDsc * varDsc = &compiler->lvaTable[structLclNum];
18187 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
18189 if (varDsc->lvPromoted &&
18190 promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
18192 // Fix 388395 ARM JitStress WP7
18193 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
18195 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
18196 promotedStructLocalVarDesc = varDsc;
18197 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
18201 if (promotedStructLocalVarDesc == NULL)
18203 // If it's not a promoted struct variable, set "regSrc" to the address
18204 // of the struct local.
18205 genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18206 noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
18207 regSrc = arg->gtObj.gtOp1->gtRegNum;
18208 // Remove this register from the set of registers that we pick from, unless slots equals 1
18210 regNeedMask &= ~genRegMask(regSrc);
18213 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
18214 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
18216 else if (arg->gtOper == GT_LCL_VAR)
18218 // Move the address of the LCL_VAR in arg into reg
18220 unsigned varNum = arg->gtLclVarCommon.gtLclNum;
18222 // Are we loading a promoted struct local var?
18223 structLocalTree = arg;
18224 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
18225 LclVarDsc * varDsc = &compiler->lvaTable[structLclNum];
18227 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
18229 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
18231 if (varDsc->lvPromoted &&
18232 promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
18234 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
18235 promotedStructLocalVarDesc = varDsc;
18236 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
18239 if (promotedStructLocalVarDesc == NULL)
18241 regSrc = regSet.rsPickFreeReg(regNeedMask);
18242 // Remove this register from the set of registers that we pick from, unless slots equals 1
18244 regNeedMask &= ~genRegMask(regSrc);
18246 getEmitter()->emitIns_R_S(INS_lea,
18250 regTracker.rsTrackRegTrash(regSrc);
18251 gcLayout = compiler->lvaGetGcLayout(varNum);
18254 else if (arg->gtOper == GT_MKREFANY)
18256 assert(slots == 2);
18257 assert((firstStackSlot == 1) || (firstStackSlot == 2));
18258 assert(argOffset == 0); // ???
18259 PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
18261 // Adjust argOffset if part of this guy was pushed onto the stack
18262 if (firstStackSlot < slots)
18264 argOffset += TARGET_POINTER_SIZE;
18267 // Skip the copy loop below because we have already placed the argument in the right place
18273 assert(!"Unsupported TYP_STRUCT arg kind");
18274 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
18277 if (promotedStructLocalVarDesc != NULL)
18279 // We must do do the stack parts first, since those might need values
18280 // from argument registers that will be overwritten in the portion of the
18281 // loop that writes into the argument registers.
18282 bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot+1) * TARGET_POINTER_SIZE;
18283 // Now find the var number of the first that starts in the first stack slot.
18284 unsigned fieldVarLim = promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
18285 while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset < (firstStackSlot*TARGET_POINTER_SIZE)
18286 && nextPromotedStructFieldVar < fieldVarLim)
18288 nextPromotedStructFieldVar++;
18290 // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the first stack slot is after
18292 assert(nextPromotedStructFieldVar < fieldVarLim|| firstStackSlot >= slots);
18295 if (slots > 0) // the mkref case may have set "slots" to zero.
18297 // First pass the stack portion of the struct (if any)
18299 for (unsigned i = firstStackSlot; i < slots; i++)
18301 emitAttr fieldSize;
18302 if (gcLayout[i] == TYPE_GC_NONE)
18303 fieldSize = EA_PTRSIZE;
18304 else if (gcLayout[i] == TYPE_GC_REF)
18305 fieldSize = EA_GCREF;
18308 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
18309 fieldSize = EA_BYREF;
18312 regNumber maxRegArg = regNumber(MAX_REG_ARG);
18313 if (promotedStructLocalVarDesc != NULL)
18315 regNumber regTmp = REG_STK;
18317 bool filledExtraSlot =
18318 genFillSlotFromPromotedStruct(arg,
18320 promotedStructLocalVarDesc,
18322 &nextPromotedStructFieldVar,
18323 &bytesOfNextSlotOfCurPromotedStruct,
18324 /*pCurRegNum*/&maxRegArg,
18326 /*fieldOffsetOfFirstStackSlot*/ firstStackSlot * TARGET_POINTER_SIZE,
18327 /*argOffsetOfFirstStackSlot*/ 0, // is always zero in this "spanning" case.
18330 if (filledExtraSlot)
18333 argOffset += TARGET_POINTER_SIZE;
18336 else // (promotedStructLocalVarDesc == NULL)
18338 // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
18339 // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
18340 // to regSet.rsPickFreeReg, so we need to be a little more forceful.
18341 // Otherwise, just re-use the same register.
18343 regNumber regTmp = regSrc;
18346 regMaskTP regSrcUsed;
18347 regSet.rsLockReg(genRegMask(regSrc), ®SrcUsed);
18349 regTmp = regSet.rsPickFreeReg(regNeedMask);
18351 noway_assert(regTmp != regSrc);
18353 regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
18356 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL),
18360 i * TARGET_POINTER_SIZE);
18362 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL),
18365 compiler->lvaOutgoingArgSpaceVar,
18367 regTracker.rsTrackRegTrash(regTmp);
18369 argOffset += TARGET_POINTER_SIZE;
18372 // Now pass the register portion of the struct
18375 bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
18376 if (promotedStructLocalVarDesc != NULL)
18377 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
18379 // Create a nested loop here so that the first time thru the loop
18380 // we setup all of the regArg registers except for possibly
18381 // the one that would overwrite regSrc. Then in the final loop
18382 // (if necessary) we just setup regArg/regSrc with the overwrite
18384 bool overwriteRegSrc=false;
18385 bool needOverwriteRegSrc=false;
18387 if (needOverwriteRegSrc)
18388 overwriteRegSrc = true;
18390 for (unsigned i = 0; i < firstStackSlot; i++)
18392 regNumber regArg = (regNumber) (regNum+i);
18394 if (overwriteRegSrc == false)
18396 if (regArg == regSrc)
18398 needOverwriteRegSrc=true;
18404 if (regArg != regSrc)
18408 emitAttr fieldSize;
18409 if (gcLayout[i] == TYPE_GC_NONE)
18410 fieldSize = EA_PTRSIZE;
18411 else if (gcLayout[i] == TYPE_GC_REF)
18412 fieldSize = EA_GCREF;
18415 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
18416 fieldSize = EA_BYREF;
18419 regNumber regTmp = REG_STK;
18420 if (promotedStructLocalVarDesc != NULL)
18422 bool filledExtraSlot =
18423 genFillSlotFromPromotedStruct(arg,
18425 promotedStructLocalVarDesc,
18427 &nextPromotedStructFieldVar,
18428 &bytesOfNextSlotOfCurPromotedStruct,
18429 /*pCurRegNum*/®Arg,
18430 /*argOffset*/ INT32_MAX,
18431 /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
18432 /*argOffsetOfFirstStackSlot*/ INT32_MAX,
18435 if (filledExtraSlot)
18440 getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
18444 i*TARGET_POINTER_SIZE);
18446 regTracker.rsTrackRegTrash(regArg);
18448 } while (needOverwriteRegSrc != overwriteRegSrc);
18451 if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
18453 regSet.rsMarkRegFree(genRegMask(regSrc));
18456 if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs used.
18458 arg->gtFlags |= GTF_REG_VAL;
18459 for (unsigned i = 1; i < firstStackSlot; i++)
18461 arg->gtRegNum = (regNumber)(regNum + i);
18462 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true)
18463 : regSet.rsMarkRegUsed(arg);
18465 arg->gtRegNum = regNum;
18466 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true)
18467 : regSet.rsMarkRegUsed(arg);
18470 // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
18471 // so update liveness.
18472 genUpdateLife(arg);
18474 // Now, if some copied field locals were enregistered, and they're now dead, update the set of
18475 // register holding gc pointers.
18476 if (deadFieldVarRegs != RBM_NONE)
18477 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
18479 else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
18481 if (curArgTabEntry->regNum == REG_STK)
18483 // The arg is passed in the outgoing argument area of the stack frame
18484 genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
18485 assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
18487 inst_SA_RV(ins_Store(TYP_INT), argOffset+0, genRegPairLo(curr->gtRegPair), TYP_INT);
18488 inst_SA_RV(ins_Store(TYP_INT), argOffset+4, genRegPairHi(curr->gtRegPair), TYP_INT);
18492 assert(regNum < REG_ARG_LAST);
18493 regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
18494 genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
18495 assert(curr->gtRegPair == regPair);
18496 regSet.rsMarkRegPairUsed(curr);
18499 #endif // _TARGET_ARM_
18500 else if (curArgTabEntry->regNum == REG_STK)
18502 // The arg is passed in the outgoing argument area of the stack frame
18504 genCodeForTree(curr, 0);
18505 assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
18507 inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
18509 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
18510 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
18514 if (!varTypeIsFloating(curr->gtType))
18516 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
18517 assert(curr->gtRegNum == regNum);
18518 regSet.rsMarkRegUsed(curr);
18520 else // varTypeIsFloating(curr->gtType)
18522 if (genIsValidFloatReg(regNum))
18524 genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG, false);
18525 assert(curr->gtRegNum == regNum);
18526 regSet.rsMarkRegUsed(curr);
18530 genCodeForTree(curr, 0);
18531 // If we are loading a floating point type into integer registers
18532 // then it must be for varargs.
18533 // genCodeForTree will load it into a floating point register,
18534 // now copy it into the correct integer register(s)
18535 if (curr->TypeGet() == TYP_FLOAT)
18537 assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
18538 regSet.rsSpillRegIfUsed(regNum);
18539 #ifdef _TARGET_ARM_
18540 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
18542 #error "Unsupported target"
18544 regTracker.rsTrackRegTrash(regNum);
18546 curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
18547 curr->gtRegNum = regNum;
18548 regSet.rsMarkRegUsed(curr);
18552 assert(curr->TypeGet() == TYP_DOUBLE);
18553 regNumber intRegNumLo = regNum;
18554 curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
18555 #ifdef _TARGET_ARM_
18556 regNumber intRegNumHi = regNumber(intRegNumLo + 1);
18557 assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
18558 assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
18559 regSet.rsSpillRegIfUsed(intRegNumHi);
18560 regSet.rsSpillRegIfUsed(intRegNumLo);
18562 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
18563 regTracker.rsTrackRegTrash(intRegNumLo);
18564 regTracker.rsTrackRegTrash(intRegNumHi);
18565 curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
18566 regSet.rsMarkRegPairUsed(curr);
18568 #error "Unsupported target"
18576 /* If any of the previously loaded arguments were spilled - reload them */
18578 for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
18580 curr = lateArgs->Current();
18583 if (curr->gtFlags & GTF_SPILLED)
18585 if (isRegPairType(curr->gtType))
18587 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
18591 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
18598 #ifdef _TARGET_ARM_
18600 // 'Push' a single GT_MKREFANY argument onto a call's argument list
18601 // The argument is passed as described by the fgArgTabEntry
18602 // If any part of the struct is to be passed in a register the
18603 // regNum value will be equal to the the registers used to pass the
18604 // the first part of the struct.
18605 // If any part is to go onto the stack, we first generate the
18606 // value into a register specified by 'regNeedMask' and
18607 // then store it to the out going argument area.
18608 // When this method returns, both parts of the TypeReference have
18609 // been pushed onto the stack, but *no* registers have been marked
18610 // as 'in-use', that is the responsibility of the caller.
18612 void CodeGen::PushMkRefAnyArg ( GenTreePtr mkRefAnyTree,
18613 fgArgTabEntryPtr curArgTabEntry,
18614 regMaskTP regNeedMask)
18616 regNumber regNum = curArgTabEntry->regNum;
18618 assert(mkRefAnyTree->gtOper == GT_MKREFANY);
18619 regMaskTP arg1RegMask = 0;
18620 int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18622 // Construct the TypedReference directly into the argument list of the call by
18623 // 'pushing' the first field of the typed reference: the pointer.
18624 // Do this by directly generating it into the argument register or outgoing arg area of the stack.
18625 // Mark it as used so we don't trash it while generating the second field.
18627 if (regNum == REG_STK)
18629 genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18630 noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
18631 regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
18632 inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
18633 regTracker.rsTrackRegTrash(tmpReg1);
18634 argOffset += TARGET_POINTER_SIZE;
18639 assert(regNum <= REG_ARG_LAST);
18640 arg1RegMask = genRegMask(regNum);
18641 genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18642 regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
18645 // Now 'push' the second field of the typed reference: the method table.
18646 if (regNum2 == REG_STK)
18648 genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18649 noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
18650 regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
18651 inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
18652 regTracker.rsTrackRegTrash(tmpReg2);
18656 assert(regNum2 <= REG_ARG_LAST);
18657 // We don't have to mark this register as being in use here because it will
18658 // be done by the caller, and we don't want to double-count it.
18659 genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
18662 // Now that we are done generating the second part of the TypeReference, we can mark
18663 // the first register as free.
18664 // The caller in the shared path we will re-mark all registers used by this argument
18665 // as being used, so we don't want to double-count this one.
18666 if (arg1RegMask != 0)
18668 GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
18669 if (op1->gtFlags & GTF_SPILLED)
18671 /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
18673 regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
18677 regSet.rsMarkRegFree(arg1RegMask);
18681 #endif // _TARGET_ARM_
18683 #endif // FEATURE_FIXED_OUT_ARGS
18686 regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
18688 assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
18690 regMaskTP fptrRegs;
18692 /* Loading the indirect call target might cause one or more of the previously
18693 loaded argument registers to be spilled. So, we save information about all
18694 the argument registers, and unspill any of them that get spilled, after
18695 the call target is loaded.
18706 regArgTab[MAX_REG_ARG];
18708 /* Record the previously loaded arguments, if any */
18711 regMaskTP prefRegs = regSet.rsRegMaskFree();
18712 regMaskTP argRegs = RBM_NONE;
18713 for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18716 regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
18717 GenTreePtr argTree = regSet.rsUsedTree[regNum];
18718 regArgTab[regIndex].node = argTree;
18719 if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
18721 assert(argTree->gtFlags & GTF_REG_VAL);
18722 if (isRegPairType(argTree->gtType))
18724 regPairNo regPair = argTree->gtRegPair;
18725 assert(regNum == genRegPairHi(regPair) ||
18726 regNum == genRegPairLo(regPair));
18727 regArgTab[regIndex].regPair = regPair;
18728 mask = genRegPairMask(regPair);
18732 assert(regNum == argTree->gtRegNum);
18733 regArgTab[regIndex].regNum = regNum;
18734 mask = genRegMask(regNum);
18736 assert(!(prefRegs & mask));
18741 /* Record the register(s) used for the indirect call func ptr */
18742 fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
18744 /* If any of the previously loaded arguments were spilled, reload them */
18746 for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18748 GenTreePtr argTree = regArgTab[regIndex].node;
18749 if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
18751 assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
18752 if (isRegPairType(argTree->gtType))
18754 regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
18758 regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
18763 /* Make sure the target is still addressable while avoiding the argument registers */
18765 fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
18770 /*****************************************************************************
18772 * Generate code for a call. If the call returns a value in register(s), the
18773 * register mask that describes where the result will be found is returned;
18774 * otherwise, RBM_NONE is returned.
18778 #pragma warning(push)
18779 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
18781 regMaskTP CodeGen::genCodeForCall(GenTreePtr call,
18788 emitter::EmitCallType emitCallType;
18790 unsigned saveStackLvl;
18792 BasicBlock * returnLabel = DUMMY_INIT(NULL);
18793 LclVarDsc * frameListRoot = NULL;
18795 unsigned savCurIntArgReg;
18796 unsigned savCurFloatArgReg;
18800 regMaskTP fptrRegs = RBM_NONE;
18801 regMaskTP vptrMask = RBM_NONE;
18804 unsigned stackLvl = getEmitter()->emitCurStackLvl;
18806 if (compiler->verbose)
18808 printf("\t\t\t\t\t\t\tBeg call ");
18809 Compiler::printTreeID(call);
18810 printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
18814 gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
18815 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
18817 CORINFO_SIG_INFO* sigInfo = nullptr;
18819 #ifdef DEBUGGING_SUPPORT
18820 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
18822 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
18826 /* Make some sanity checks on the call node */
18829 noway_assert(call->IsCall());
18830 // "this" only makes sense for user functions
18831 noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
18832 // tailcalls won't be done for helpers, caller-pop args, and check that
18833 // the global flag is set
18834 noway_assert(!call->gtCall.IsTailCall() ||
18835 (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
18838 // Pass the call signature information down into the emitter so the emitter can associate
18839 // native call sites with the signatures they were generated from.
18840 if (callType != CT_HELPER)
18842 sigInfo = call->gtCall.callSig;
18846 unsigned pseudoStackLvl = 0;
18848 if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
18850 noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
18852 pseudoStackLvl = genStackLevel;
18854 noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
18855 "so fgAddCodeRef() should have set isFramePointerRequired()");
18858 /* Mark the current stack level and list of pointer arguments */
18860 saveStackLvl = genStackLevel;
18863 /*-------------------------------------------------------------------------
18864 * Set up the registers and arguments
18867 /* We'll keep track of how much we've pushed on the stack */
18871 /* We need to get a label for the return address with the proper stack depth. */
18872 /* For the callee pops case (the default) that is before the args are pushed. */
18874 if ((call->gtFlags & GTF_CALL_UNMANAGED) &&
18875 !(call->gtFlags & GTF_CALL_POP_ARGS))
18877 returnLabel = genCreateTempLabel();
18881 Make sure to save the current argument register status
18882 in case we have nested calls.
18885 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
18886 savCurIntArgReg = intRegState.rsCurRegArgNum;
18887 savCurFloatArgReg = floatRegState.rsCurRegArgNum;
18888 intRegState.rsCurRegArgNum = 0;
18889 floatRegState.rsCurRegArgNum = 0;
18891 /* Pass the arguments */
18893 if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
18895 argSize += genPushArgList(call);
18898 /* We need to get a label for the return address with the proper stack depth. */
18899 /* For the caller pops case (cdecl) that is after the args are pushed. */
18901 if (call->gtFlags & GTF_CALL_UNMANAGED)
18903 if (call->gtFlags & GTF_CALL_POP_ARGS)
18904 returnLabel = genCreateTempLabel();
18906 /* Make sure that we now have a label */
18907 noway_assert(returnLabel != DUMMY_INIT(NULL));
18910 if (callType == CT_INDIRECT)
18912 fptrRegs = genLoadIndirectCallTarget(call);
18915 /* Make sure any callee-trashed registers are saved */
18917 regMaskTP calleeTrashedRegs = RBM_NONE;
18919 #if GTF_CALL_REG_SAVE
18920 if (call->gtFlags & GTF_CALL_REG_SAVE)
18922 /* The return value reg(s) will definitely be trashed */
18924 switch (call->gtType)
18929 #if!CPU_HAS_FP_SUPPORT
18932 calleeTrashedRegs = RBM_INTRET;
18936 #if!CPU_HAS_FP_SUPPORT
18939 calleeTrashedRegs = RBM_LNGRET;
18943 #if CPU_HAS_FP_SUPPORT
18947 calleeTrashedRegs = 0;
18951 noway_assert(!"unhandled/unexpected type");
18957 calleeTrashedRegs = RBM_CALLEE_TRASH;
18960 /* Spill any callee-saved registers which are being used */
18962 regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
18964 /* We need to save all GC registers to the InlinedCallFrame.
18965 Instead, just spill them to temps. */
18967 if (call->gtFlags & GTF_CALL_UNMANAGED)
18968 spillRegs |= (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
18970 // Ignore fptrRegs as it is needed only to perform the indirect call
18972 spillRegs &= ~fptrRegs;
18974 /* Do not spill the argument registers.
18975 Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
18977 noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
18978 spillRegs &= ~call->gtCall.gtCallRegUsedMask;
18982 regSet.rsSpillRegs(spillRegs);
18985 #if FEATURE_STACK_FP_X87
18987 SpillForCallStackFP();
18989 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
18992 regNumber regReturn = regSet.PickRegFloat();
18994 // Assign reg to tree
18995 genMarkTreeInReg(call, regReturn);
18998 regSet.SetUsedRegFloat(call, true);
19001 compCurFPState.Push(regReturn);
19004 SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
19007 /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
19009 retSize = EA_PTRSIZE;
19014 if (call->gtType == TYP_REF ||
19015 call->gtType == TYP_ARRAY)
19017 retSize = EA_GCREF;
19019 else if (call->gtType == TYP_BYREF)
19021 retSize = EA_BYREF;
19026 /*-------------------------------------------------------------------------
19027 * For caller-pop calls, the GC info will report the arguments as pending
19028 arguments as the caller explicitly pops them. Also should be
19029 reported as non-GC arguments as they effectively go dead at the
19030 call site (callee owns them)
19033 args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize)
19036 /*-------------------------------------------------------------------------
19037 * Generate the profiling hooks for the call
19040 /* Treat special cases first */
19042 #ifdef PROFILING_SUPPORTED
19044 /* fire the event at the call site */
19045 /* alas, right now I can only handle calls via a method handle */
19046 if (compiler->compIsProfilerHookNeeded() &&
19047 (callType == CT_USER_FUNC) &&
19048 call->gtCall.IsTailCall())
19050 unsigned saveStackLvl2 = genStackLevel;
19053 // Push the profilerHandle
19055 #ifdef _TARGET_X86_
19056 regMaskTP byrefPushedRegs;
19057 regMaskTP norefPushedRegs;
19058 regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
19060 if (compiler->compProfilerMethHndIndirected)
19062 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
19063 (ssize_t)compiler->compProfilerMethHnd);
19067 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
19071 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
19072 sizeof(int) * 1, // argSize
19073 EA_UNKNOWN); // retSize
19076 // Adjust the number of stack slots used by this managed method if necessary.
19078 if (compiler->fgPtrArgCntMax < 1)
19080 compiler->fgPtrArgCntMax = 1;
19083 genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
19085 // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
19086 // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
19087 // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
19089 // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want to disturb them
19090 // and hence argument registers are locked here.
19091 regMaskTP usedMask = RBM_NONE;
19092 regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
19094 regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
19095 regSet.rsLockReg(genRegMask(scratchReg));
19097 emitAttr attr = EA_UNKNOWN;
19098 if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
19101 gcInfo.gcMarkRegSetGCref(scratchReg);
19103 else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
19106 gcInfo.gcMarkRegSetByref(scratchReg);
19113 getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
19114 regTracker.rsTrackRegTrash(scratchReg);
19116 if (compiler->compProfilerMethHndIndirected)
19118 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
19119 regTracker.rsTrackRegTrash(REG_R0);
19123 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
19126 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
19128 EA_UNKNOWN); // retSize
19130 // Restore back to the state that existed before profiler callback
19131 gcInfo.gcMarkRegSetNpt(scratchReg);
19132 getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
19133 regTracker.rsTrackRegTrash(REG_R0);
19134 regSet.rsUnlockReg(genRegMask(scratchReg));
19135 regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
19137 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
19138 #endif //_TARGET_X86_
19141 /* Restore the stack level */
19142 genStackLevel = saveStackLvl2;
19145 #endif // PROFILING_SUPPORTED
19150 /*-------------------------------------------------------------------------
19151 * Generate an ESP check for the call
19154 if (compiler->opts.compStackCheckOnCall
19155 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
19156 //check the stacks as frequently as possible
19157 && !call->IsHelperCall()
19159 && call->gtCall.gtCallType == CT_USER_FUNC
19163 noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
19164 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
19168 /*-------------------------------------------------------------------------
19169 * Generate the call
19172 bool fPossibleSyncHelperCall = false;
19173 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
19175 bool fTailCallTargetIsVSD = false;
19177 bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
19179 /* Check for Delegate.Invoke. If so, we inline it. We get the
19180 target-object and target-function from the delegate-object, and do
19184 if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
19186 noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
19188 assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) & (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL));
19190 /* Find the offsets of the 'this' pointer and new target */
19192 CORINFO_EE_INFO * pInfo;
19193 unsigned instOffs; // offset of new 'this' pointer
19194 unsigned firstTgtOffs; // offset of first target to invoke
19195 const regNumber regThis = genGetThisArgReg(call);
19197 pInfo = compiler->eeGetEEInfo();
19198 instOffs = pInfo->offsetOfDelegateInstance;
19199 firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
19201 // Grab an available register to use for the CALL indirection
19202 regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
19204 // Save the invoke-target-function in indCallReg
19205 // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
19206 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
19207 regTracker.rsTrackRegTrash(indCallReg);
19209 /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
19211 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
19212 regTracker.rsTrackRegTrash(regThis);
19213 noway_assert(instOffs < 127);
19215 /* Call through indCallReg */
19217 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19219 INDEBUG_LDISASM_COMMA(sigInfo)
19223 gcInfo.gcVarPtrSetCur,
19224 gcInfo.gcRegGCrefSetCur,
19225 gcInfo.gcRegByrefSetCur,
19231 /*-------------------------------------------------------------------------
19232 * Virtual and interface calls
19235 switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
19237 case GTF_CALL_VIRT_STUB:
19239 regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
19241 // An x86 JIT which uses full stub dispatch must generate only
19242 // the following stub dispatch calls:
19244 // (1) isCallRelativeIndirect:
19245 // call dword ptr [rel32] ; FF 15 ---rel32----
19246 // (2) isCallRelative:
19247 // call abc ; E8 ---rel32----
19248 // (3) isCallRegisterIndirect:
19250 // call dword ptr [eax] ; FF 10
19252 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
19253 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
19256 // Please do not insert any Random NOPs while constructing this VSD call
19258 getEmitter()->emitDisableRandomNops();
19262 // This is code to set up an indirect call to a stub address computed
19263 // via dictionary lookup. However the dispatch stub receivers aren't set up
19264 // to accept such calls at the moment.
19265 if (callType == CT_INDIRECT)
19269 // -------------------------------------------------------------------------
19270 // The importer decided we needed a stub call via a computed
19271 // stub dispatch address, i.e. an address which came from a dictionary lookup.
19272 // - The dictionary lookup produces an indirected address, suitable for call
19273 // via "call [REG_VIRTUAL_STUB_PARAM]"
19275 // This combination will only be generated for shared generic code and when
19276 // stub dispatch is active.
19278 // No need to null check the this pointer - the dispatch code will deal with this.
19280 noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19282 // Now put the address in REG_VIRTUAL_STUB_PARAM.
19283 // This is typically a nop when the register used for
19284 // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
19286 inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
19287 regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
19289 #if defined(_TARGET_XARCH_)
19290 #if defined(_TARGET_X86_)
19291 // Emit enough bytes of nops so that this sequence can be distinguished
19292 // from other virtual stub dispatch calls.
19294 // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
19295 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
19297 getEmitter()->emitIns_Nop(3);
19299 #endif // _TARGET_X86_
19301 // Make the virtual stub call:
19302 // call [REG_VIRTUAL_STUB_PARAM]
19304 emitCallType = emitter::EC_INDIR_ARD;
19306 indReg = REG_VIRTUAL_STUB_PARAM;
19307 genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19309 #elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
19311 genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19313 // Make the virtual stub call:
19314 // ldr indReg, [REG_VIRTUAL_STUB_PARAM]
19317 emitCallType = emitter::EC_INDIR_R;
19319 // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
19321 indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
19322 assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
19323 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
19324 regTracker.rsTrackRegTrash(indReg);
19327 #error "Unknown target for VSD call"
19330 getEmitter()->emitIns_Call(emitCallType,
19332 INDEBUG_LDISASM_COMMA(sigInfo)
19336 gcInfo.gcVarPtrSetCur,
19337 gcInfo.gcRegGCrefSetCur,
19338 gcInfo.gcRegByrefSetCur,
19344 // -------------------------------------------------------------------------
19345 // Check for a direct stub call.
19348 // Get stub addr. This will return NULL if virtual call stubs are not active
19349 void *stubAddr = NULL;
19351 stubAddr = (void *) call->gtCall.gtStubCallStubAddr;
19353 noway_assert(stubAddr != NULL);
19355 // -------------------------------------------------------------------------
19356 // Direct stub calls, though the stubAddr itself may still need to be
19357 // accesed via an indirection.
19360 // No need to null check - the dispatch code will deal with null this.
19362 emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
19363 void* addr = stubAddr;
19365 regNumber callReg = REG_NA;
19367 if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
19369 #if CPU_LOAD_STORE_ARCH
19370 callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
19371 noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
19373 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC,REG_VIRTUAL_STUB_PARAM,(ssize_t)stubAddr);
19374 // The stub will write-back to this register, so don't track it
19375 regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
19376 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE,REG_JUMP_THUNK_PARAM,REG_VIRTUAL_STUB_PARAM, 0);
19377 regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
19378 callTypeStubAddr = emitter::EC_INDIR_R;
19379 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19381 INDEBUG_LDISASM_COMMA(sigInfo)
19385 gcInfo.gcVarPtrSetCur,
19386 gcInfo.gcRegGCrefSetCur,
19387 gcInfo.gcRegByrefSetCur,
19389 REG_JUMP_THUNK_PARAM);
19392 // emit an indirect call
19393 callTypeStubAddr = emitter::EC_INDIR_C;
19395 disp = (ssize_t) stubAddr;
19399 #if CPU_LOAD_STORE_ARCH
19400 if (callTypeStubAddr != emitter::EC_INDIR_R)
19403 getEmitter()->emitIns_Call(callTypeStubAddr,
19404 call->gtCall.gtCallMethHnd,
19405 INDEBUG_LDISASM_COMMA(sigInfo)
19409 gcInfo.gcVarPtrSetCur,
19410 gcInfo.gcRegGCrefSetCur,
19411 gcInfo.gcRegByrefSetCur,
19420 else // tailCall is true
19423 // Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
19424 // moves the this pointer out of it's usual place and into the argument list.
19425 #ifdef _TARGET_X86_
19427 // Generate "cmp ECX, [ECX]" to trap null pointers
19428 const regNumber regThis = genGetThisArgReg(call);
19429 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
19431 #endif // _TARGET_X86_
19433 if (callType == CT_INDIRECT)
19435 noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19437 // Now put the address in EAX.
19438 inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
19439 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19441 genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19445 // importer/EE should guarantee the indirection
19446 noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
19448 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, ssize_t(call->gtCall.gtStubCallStubAddr));
19451 fTailCallTargetIsVSD = true;
19455 // OK to start inserting random NOPs again
19457 getEmitter()->emitEnableRandomNops();
19461 case GTF_CALL_VIRT_VTABLE:
19462 // stub dispatching is off or this is not a virtual call (could be a tailcall)
19465 unsigned vtabOffsOfIndirection;
19466 unsigned vtabOffsAfterIndirection;
19468 noway_assert(callType == CT_USER_FUNC);
19470 vptrReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
19471 vptrMask = genRegMask(vptrReg);
19473 /* The register no longer holds a live pointer value */
19474 gcInfo.gcMarkRegSetNpt(vptrMask);
19476 // MOV vptrReg, [REG_CALL_THIS + offs]
19477 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE,
19478 vptrReg, genGetThisArgReg(call), VPTR_OFFS);
19479 regTracker.rsTrackRegTrash(vptrReg);
19481 noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
19483 /* Get hold of the vtable offset (note: this might be expensive) */
19485 compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
19487 /* Get the appropriate vtable chunk */
19489 /* The register no longer holds a live pointer value */
19490 gcInfo.gcMarkRegSetNpt(vptrMask);
19492 // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
19493 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE,
19494 vptrReg, vptrReg, vtabOffsOfIndirection);
19496 /* Call through the appropriate vtable slot */
19500 /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
19502 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19503 vptrReg, vtabOffsAfterIndirection);
19507 #if CPU_LOAD_STORE_ARCH
19508 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg, vtabOffsAfterIndirection);
19510 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19511 call->gtCall.gtCallMethHnd,
19512 INDEBUG_LDISASM_COMMA(sigInfo)
19516 gcInfo.gcVarPtrSetCur,
19517 gcInfo.gcRegGCrefSetCur,
19518 gcInfo.gcRegByrefSetCur,
19522 getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL,
19523 call->gtCall.gtCallMethHnd,
19524 INDEBUG_LDISASM_COMMA(sigInfo)
19528 gcInfo.gcVarPtrSetCur,
19529 gcInfo.gcRegGCrefSetCur,
19530 gcInfo.gcRegByrefSetCur,
19535 vtabOffsAfterIndirection); // disp
19536 #endif // CPU_LOAD_STORE_ARCH
19541 case GTF_CALL_NONVIRT:
19543 //------------------------ Non-virtual/Indirect calls -------------------------
19544 // Lots of cases follow
19545 // - Direct P/Invoke calls
19546 // - Indirect calls to P/Invoke functions via the P/Invoke stub
19547 // - Direct Helper calls
19548 // - Indirect Helper calls
19549 // - Direct calls to known addresses
19550 // - Direct calls where address is accessed by one or two indirections
19551 // - Indirect calls to computed addresses
19552 // - Tailcall versions of all of the above
19554 CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
19557 //------------------------------------------------------
19558 // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
19560 // For (final and private) functions which were called with
19561 // invokevirtual, but which we call directly, we need to
19562 // dereference the object pointer to make sure it's not NULL.
19565 if (call->gtFlags & GTF_CALL_NULLCHECK)
19567 /* Generate "cmp ECX, [ECX]" to trap null pointers */
19568 const regNumber regThis = genGetThisArgReg(call);
19569 #if CPU_LOAD_STORE_ARCH
19570 regNumber indReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
19571 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
19572 regTracker.rsTrackRegTrash(indReg);
19574 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
19578 if (call->gtFlags & GTF_CALL_UNMANAGED)
19580 //------------------------------------------------------
19581 // Non-virtual/Indirect calls: PInvoke calls.
19583 noway_assert(compiler->info.compCallUnmanaged != 0);
19585 /* args shouldn't be greater than 64K */
19587 noway_assert((argSize&0xffff0000) == 0);
19589 /* Remember the varDsc for the callsite-epilog */
19591 frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
19593 // exact codegen is required
19594 getEmitter()->emitDisableRandomNops();
19598 regNumber indCallReg = REG_NA;
19600 if (callType == CT_INDIRECT)
19602 noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19604 if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
19605 indCallReg = call->gtCall.gtCallAddr->gtRegNum;
19607 nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
19612 noway_assert(callType == CT_USER_FUNC);
19616 tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
19620 if (callType == CT_INDIRECT)
19622 /* Double check that the callee didn't use/trash the
19623 registers holding the call target.
19625 noway_assert(tcbReg != indCallReg);
19627 if (indCallReg == REG_NA)
19629 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
19631 /* Please note that this even works with tcbReg == REG_EAX.
19632 tcbReg contains an interesting value only if frameListRoot is
19633 an enregistered local that stays alive across the call
19634 (certainly not EAX). If frameListRoot has been moved into
19635 EAX, we can trash it since it won't survive across the call
19639 inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
19640 regTracker.rsTrackRegTrash(indCallReg);
19643 emitCallType = emitter::EC_INDIR_R;
19647 noway_assert(callType == CT_USER_FUNC);
19650 addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
19653 #if CPU_LOAD_STORE_ARCH
19654 // Load the address into a register, indirect it and call through a register
19655 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
19656 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19657 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19658 regTracker.rsTrackRegTrash(indCallReg);
19659 // Now make the call "call indCallReg"
19661 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19662 methHnd, // methHnd
19663 INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
19667 gcInfo.gcVarPtrSetCur,
19668 gcInfo.gcRegGCrefSetCur,
19669 gcInfo.gcRegByrefSetCur,
19673 emitCallType = emitter::EC_INDIR_R;
19676 emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19677 indCallReg = REG_NA;
19682 // Double-indirection. Load the address into a register
19683 // and call indirectly through a register
19684 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
19686 #if CPU_LOAD_STORE_ARCH
19687 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
19688 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19689 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19690 regTracker.rsTrackRegTrash(indCallReg);
19692 emitCallType = emitter::EC_INDIR_R;
19695 getEmitter()->emitIns_R_AI(INS_mov,
19699 regTracker.rsTrackRegTrash(indCallReg);
19700 emitCallType = emitter::EC_INDIR_ARD;
19702 #endif // CPU_LOAD_STORE_ARCH
19706 getEmitter()->emitIns_Call(emitCallType,
19707 compiler->eeMarkNativeTarget(methHnd),
19708 INDEBUG_LDISASM_COMMA(sigInfo)
19712 gcInfo.gcVarPtrSetCur,
19713 gcInfo.gcRegGCrefSetCur,
19714 gcInfo.gcRegByrefSetCur,
19718 if (callType == CT_INDIRECT)
19719 genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19721 getEmitter()->emitEnableRandomNops();
19723 // Done with PInvoke calls
19727 if (callType == CT_INDIRECT)
19729 noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
19731 if (call->gtCall.gtCallCookie)
19733 //------------------------------------------------------
19734 // Non-virtual indirect calls via the P/Invoke stub
19736 GenTreePtr cookie = call->gtCall.gtCallCookie;
19737 GenTreePtr target = call->gtCall.gtCallAddr;
19739 noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
19741 noway_assert(cookie->gtOper == GT_CNS_INT ||
19742 cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
19744 noway_assert(args == argSize);
19746 #if defined(_TARGET_X86_)
19747 /* load eax with the real target */
19749 inst_RV_TT(INS_mov, REG_EAX, target);
19750 regTracker.rsTrackRegTrash(REG_EAX);
19752 if (cookie->gtOper == GT_CNS_INT)
19753 inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
19755 inst_TT(INS_push, cookie);
19757 /* Keep track of ESP for EBP-less frames */
19760 argSize += sizeof(void *);
19762 #elif defined(_TARGET_ARM_)
19764 // Ensure that we spill these registers (if caller saved) in the prolog
19765 regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
19767 // ARM: load r12 with the real target
19768 // X64: load r10 with the real target
19769 inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
19770 regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
19772 // ARM: load r4 with the pinvoke VASigCookie
19773 // X64: load r11 with the pinvoke VASigCookie
19774 if (cookie->gtOper == GT_CNS_INT)
19775 inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal, EA_HANDLE_CNS_RELOC);
19777 inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
19778 regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19780 noway_assert(args == argSize);
19782 // Ensure that we don't trash any of these registers if we have to load
19783 // the helper call target into a register to invoke it.
19784 regMaskTP regsUsed;
19785 regSet.rsLockReg(call->gtCall.gtCallRegUsedMask|RBM_PINVOKE_TARGET_PARAM|RBM_PINVOKE_COOKIE_PARAM, ®sUsed);
19787 NYI("Non-virtual indirect calls via the P/Invoke stub");
19791 noway_assert((size_t)(int)args == args);
19793 genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
19795 #if defined(_TARGET_ARM_)
19796 regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask|RBM_PINVOKE_TARGET_PARAM|RBM_PINVOKE_COOKIE_PARAM, regsUsed);
19799 #ifdef _TARGET_ARM_
19800 // genEmitHelperCall doesn't record all registers a helper call would trash.
19801 regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19807 //------------------------------------------------------
19808 // Non-virtual indirect calls
19812 inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
19813 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19816 instEmit_indCall(call, args, retSize);
19819 genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19821 // Done with indirect calls
19825 //------------------------------------------------------
19826 // Non-virtual direct/indirect calls: Work out if the address of the
19827 // call is known at JIT time (if not it is either an indirect call
19828 // or the address must be accessed via an single/double indirection)
19830 noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
19833 InfoAccessType accessType;
19835 helperNum = compiler->eeGetHelperNum(methHnd);
19837 if (callType == CT_HELPER)
19839 noway_assert(helperNum != CORINFO_HELP_UNDEF);
19842 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
19844 accessType = IAT_VALUE;
19848 accessType = IAT_PVALUE;
19854 noway_assert(helperNum == CORINFO_HELP_UNDEF);
19856 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
19858 if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
19859 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
19861 if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
19862 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
19864 CORINFO_CONST_LOOKUP addrInfo;
19865 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
19867 accessType = addrInfo.accessType;
19868 addr = addrInfo.addr;
19873 noway_assert(callType == CT_USER_FUNC);
19875 switch (accessType)
19878 //------------------------------------------------------
19879 // Non-virtual direct calls to known addressess
19881 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19885 //------------------------------------------------------
19886 // Non-virtual direct calls to addresses accessed by
19887 // a single indirection.
19889 // For tailcalls we place the target address in REG_TAILCALL_ADDR
19890 #if CPU_LOAD_STORE_ARCH
19892 regNumber indReg = REG_TAILCALL_ADDR;
19893 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19894 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19895 regTracker.rsTrackRegTrash(indReg);
19898 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR,
19900 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19905 //------------------------------------------------------
19906 // Non-virtual direct calls to addresses accessed by
19907 // a double indirection.
19909 // For tailcalls we place the target address in REG_TAILCALL_ADDR
19910 #if CPU_LOAD_STORE_ARCH
19912 regNumber indReg = REG_TAILCALL_ADDR;
19913 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19914 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19915 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19916 regTracker.rsTrackRegTrash(indReg);
19919 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR,
19921 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19922 REG_TAILCALL_ADDR, 0);
19923 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19928 noway_assert(!"Bad accessType");
19934 switch (accessType)
19936 regNumber indCallReg;
19939 //------------------------------------------------------
19940 // Non-virtual direct calls to known addressess
19942 // The vast majority of calls end up here.... Wouldn't
19943 // it be nice if they all did!
19944 #ifdef _TARGET_ARM_
19945 if (!arm_Valid_Imm_For_BL((ssize_t)addr))
19947 // Load the address into a register and call through a register
19948 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
19949 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19951 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19953 INDEBUG_LDISASM_COMMA(sigInfo)
19957 gcInfo.gcVarPtrSetCur,
19958 gcInfo.gcRegGCrefSetCur,
19959 gcInfo.gcRegByrefSetCur,
19961 indCallReg, // ireg
19962 REG_NA, 0, 0, // xreg, xmul, disp
19964 emitter::emitNoGChelper(helperNum));
19969 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN,
19971 INDEBUG_LDISASM_COMMA(sigInfo)
19975 gcInfo.gcVarPtrSetCur,
19976 gcInfo.gcRegGCrefSetCur,
19977 gcInfo.gcRegByrefSetCur,
19979 REG_NA, REG_NA, 0, 0, /* ireg, xreg, xmul, disp */
19980 false, /* isJump */
19981 emitter::emitNoGChelper(helperNum));
19986 //------------------------------------------------------
19987 // Non-virtual direct calls to addresses accessed by
19988 // a single indirection.
19990 #if CPU_LOAD_STORE_ARCH
19991 // Load the address into a register, load indirect and call through a register
19992 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
19994 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19995 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19996 regTracker.rsTrackRegTrash(indCallReg);
19998 emitCallType = emitter::EC_INDIR_R;
20002 emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
20003 indCallReg = REG_NA;
20005 #endif // CPU_LOAD_STORE_ARCH
20007 getEmitter()->emitIns_Call( emitCallType,
20009 INDEBUG_LDISASM_COMMA(sigInfo)
20013 gcInfo.gcVarPtrSetCur,
20014 gcInfo.gcRegGCrefSetCur,
20015 gcInfo.gcRegByrefSetCur,
20017 indCallReg, // ireg
20018 REG_NA, 0, 0, // xreg, xmul, disp
20019 false, /* isJump */
20020 emitter::emitNoGChelper(helperNum));
20025 //------------------------------------------------------
20026 // Non-virtual direct calls to addresses accessed by
20027 // a double indirection.
20029 // Double-indirection. Load the address into a register
20030 // and call indirectly through the register
20032 noway_assert(helperNum == CORINFO_HELP_UNDEF);
20034 // Grab an available register to use for the CALL indirection
20035 indCallReg = regSet.rsGrabReg(RBM_ALLINT);
20037 #if CPU_LOAD_STORE_ARCH
20038 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
20039 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
20040 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
20041 regTracker.rsTrackRegTrash(indCallReg);
20043 emitCallType = emitter::EC_INDIR_R;
20047 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC,
20050 regTracker.rsTrackRegTrash(indCallReg);
20052 emitCallType = emitter::EC_INDIR_ARD;
20054 #endif // CPU_LOAD_STORE_ARCH
20056 getEmitter()->emitIns_Call(emitCallType,
20058 INDEBUG_LDISASM_COMMA(sigInfo)
20062 gcInfo.gcVarPtrSetCur,
20063 gcInfo.gcRegGCrefSetCur,
20064 gcInfo.gcRegByrefSetCur,
20066 indCallReg, // ireg
20067 REG_NA, 0, 0, // xreg, xmul, disp
20069 emitter::emitNoGChelper(helperNum));
20074 noway_assert(!"Bad accessType");
20078 // tracking of region protected by the monitor in synchronized methods
20079 if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
20081 fPossibleSyncHelperCall = true;
20088 noway_assert(!"strange call type");
20093 /*-------------------------------------------------------------------------
20094 * For tailcalls, REG_INTRET contains the address of the target function,
20095 * enregistered args are in the correct registers, and the stack arguments
20096 * have been pushed on the stack. Now call the stub-sliding helper
20102 if (compiler->info.compCallUnmanaged)
20103 genPInvokeMethodEpilog();
20105 #ifdef _TARGET_X86_
20106 noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
20109 // Push the count of the incoming stack arguments
20111 unsigned nOldStkArgs = (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void *)))/sizeof(void*));
20112 getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
20113 genSinglePush(); // Keep track of ESP for EBP-less frames
20114 args += sizeof(void*);
20116 // Push the count of the outgoing stack arguments
20118 getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize/sizeof(void*));
20119 genSinglePush(); // Keep track of ESP for EBP-less frames
20120 args += sizeof(void*);
20122 // Push info about the callee-saved registers to be restored
20123 // For now, we always spill all registers if compiler->compTailCallUsed
20125 DWORD calleeSavedRegInfo =
20126 1 | // always restore EDI,ESI,EBX
20127 (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
20128 getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
20129 genSinglePush(); // Keep track of ESP for EBP-less frames
20130 args += sizeof(void*);
20132 // Push the address of the target function
20134 getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
20135 genSinglePush(); // Keep track of ESP for EBP-less frames
20136 args += sizeof(void*);
20138 #else // _TARGET_X86_
20141 retSize = EA_UNKNOWN;
20143 #endif // _TARGET_X86_
20145 if (compiler->getNeedsGSSecurityCookie())
20147 genEmitGSCookieCheck(true);
20150 // TailCall helper does not poll for GC. An explicit GC poll
20151 // Should have been placed in when we morphed this into a tail call.
20152 noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
20154 // Now call the helper
20156 genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
20160 /*-------------------------------------------------------------------------
20162 * Trash registers, pop arguments if needed, etc
20165 /* Mark the argument registers as free */
20167 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
20169 for (areg = 0; areg < MAX_REG_ARG; areg++)
20171 regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
20173 // Is this one of the used argument registers?
20174 if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
20177 #ifdef _TARGET_ARM_
20178 if (regSet.rsUsedTree[areg] == NULL)
20180 noway_assert(areg % 2 == 1 && (((areg+1) >= MAX_REG_ARG) ||
20181 (regSet.rsUsedTree[areg+1]->TypeGet() == TYP_STRUCT) ||
20182 (genTypeStSz(regSet.rsUsedTree[areg+1]->TypeGet()) == 2)));
20187 regSet.rsMarkRegFree(curArgMask);
20189 // We keep regSet.rsMaskVars current during codegen, so we have to remove any
20190 // that have been copied into arg regs.
20192 regSet.RemoveMaskVars(curArgMask);
20193 gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
20194 gcInfo.gcRegByrefSetCur &= ~(curArgMask);
20197 //-------------------------------------------------------------------------
20198 // free up the FP args
20200 #if !FEATURE_STACK_FP_X87
20201 for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
20203 regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
20204 regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
20206 // Is this one of the used argument registers?
20207 if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
20210 regSet.rsMaskUsed &= ~curArgMask;
20211 regSet.rsUsedTree[argRegNum] = NULL;
20213 #endif // !FEATURE_STACK_FP_X87
20215 /* restore the old argument register status */
20217 intRegState.rsCurRegArgNum = savCurIntArgReg;
20218 floatRegState.rsCurRegArgNum = savCurFloatArgReg;
20220 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
20222 /* Mark all trashed registers as such */
20224 if (calleeTrashedRegs)
20225 regTracker.rsTrashRegSet(calleeTrashedRegs);
20227 regTracker.rsTrashRegsForGCInterruptability();
20231 if (!(call->gtFlags & GTF_CALL_POP_ARGS))
20233 if (compiler->verbose)
20235 printf("\t\t\t\t\t\t\tEnd call ");
20236 Compiler::printTreeID(call);
20237 printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
20239 noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
20244 #if FEATURE_STACK_FP_X87
20245 /* All float temps must be spilled around function calls */
20246 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20248 noway_assert(compCurFPState.m_uStackSize == 1);
20252 noway_assert(compCurFPState.m_uStackSize == 0);
20255 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20257 #ifdef _TARGET_ARM_
20258 if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
20260 // Result return for vararg methods is in r0, r1, but our callers would
20261 // expect the return in s0, s1 because of floating type. Do the move now.
20262 if (call->gtType == TYP_FLOAT)
20264 inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
20268 inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
20272 genMarkTreeInReg(call, REG_FLOATRET);
20276 /* The function will pop all arguments before returning */
20278 genStackLevel = saveStackLvl;
20280 /* No trashed registers may possibly hold a pointer at this point */
20283 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) & ~regSet.rsMaskVars & ~vptrMask;
20286 // A reg may be dead already. The assertion is too strong.
20290 // use compiler->compCurLife
20291 for (varNum = 0, varDsc = compiler->lvaTable;
20292 varNum < compiler->lvaCount && ptrRegs != 0;
20293 varNum++ , varDsc++)
20295 /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
20297 if (!varDsc->lvTracked)
20299 if (!varDsc->lvRegister)
20301 if (varDsc->IsFloatRegType())
20304 /* Get hold of the index and the bitmask for the variable */
20306 unsigned varIndex = varDsc->lvVarIndex;
20308 /* Is this variable live currently? */
20310 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
20312 regNumber regNum = varDsc->lvRegNum;
20313 regMaskTP regMask = genRegMask(regNum);
20315 if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
20316 ptrRegs &= ~regMask;
20321 printf("Bad call handling for ");
20322 Compiler::printTreeID(call);
20324 noway_assert(!"A callee trashed reg is holding a GC pointer");
20329 #if defined(_TARGET_X86_)
20330 //-------------------------------------------------------------------------
20331 // Create a label for tracking of region protected by the monitor in synchronized methods.
20332 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
20333 // so the GC state vars have been updated before creating the label.
20335 if (fPossibleSyncHelperCall)
20337 switch (helperNum) {
20338 case CORINFO_HELP_MON_ENTER:
20339 case CORINFO_HELP_MON_ENTER_STATIC:
20340 noway_assert(compiler->syncStartEmitCookie == NULL);
20341 compiler->syncStartEmitCookie = getEmitter()->emitAddLabel(
20342 gcInfo.gcVarPtrSetCur,
20343 gcInfo.gcRegGCrefSetCur,
20344 gcInfo.gcRegByrefSetCur);
20345 noway_assert(compiler->syncStartEmitCookie != NULL);
20347 case CORINFO_HELP_MON_EXIT:
20348 case CORINFO_HELP_MON_EXIT_STATIC:
20349 noway_assert(compiler->syncEndEmitCookie == NULL);
20350 compiler->syncEndEmitCookie = getEmitter()->emitAddLabel(
20351 gcInfo.gcVarPtrSetCur,
20352 gcInfo.gcRegGCrefSetCur,
20353 gcInfo.gcRegByrefSetCur);
20354 noway_assert(compiler->syncEndEmitCookie != NULL);
20360 #endif // _TARGET_X86_
20362 if (call->gtFlags & GTF_CALL_UNMANAGED)
20364 genDefineTempLabel(returnLabel);
20366 #ifdef _TARGET_X86_
20367 if (getInlinePInvokeCheckEnabled())
20369 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
20370 BasicBlock * esp_check;
20372 CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
20373 /* mov ecx, dword ptr [frame.callSiteTracker] */
20375 getEmitter()->emitIns_R_S (INS_mov,
20378 compiler->lvaInlinedPInvokeFrameVar,
20379 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
20380 regTracker.rsTrackRegTrash(REG_ARG_0);
20382 /* Generate the conditional jump */
20384 if (!(call->gtFlags & GTF_CALL_POP_ARGS))
20388 getEmitter()->emitIns_R_I (INS_add,
20396 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
20398 esp_check = genCreateTempLabel();
20400 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20401 inst_JMP(jmpEqual, esp_check);
20403 getEmitter()->emitIns(INS_BREAKPOINT);
20405 /* genCondJump() closes the current emitter block */
20407 genDefineTempLabel(esp_check);
20412 /* Are we supposed to pop the arguments? */
20414 #if defined(_TARGET_X86_)
20415 if (call->gtFlags & GTF_CALL_UNMANAGED)
20417 if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
20418 compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
20420 // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
20421 // take care of the cdecl argument popping here as well but the stack depth tracking logic
20422 // makes this very hard, i.e. it needs to "see" the actual pop.
20424 CORINFO_EE_INFO *pInfo = compiler->eeGetEEInfo();
20426 if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
20428 /* mov esp, dword ptr [frame.callSiteTracker] */
20429 getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
20432 compiler->lvaInlinedPInvokeFrameVar,
20433 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
20437 /* mov ecx, dword ptr [frame.callSiteTracker] */
20438 getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
20441 compiler->lvaInlinedPInvokeFrameVar,
20442 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
20443 regTracker.rsTrackRegTrash(REG_ARG_0);
20445 /* lea esp, [ecx + argSize] */
20446 getEmitter()->emitIns_R_AR (INS_lea,
20454 #endif // _TARGET_X86_
20456 if (call->gtFlags & GTF_CALL_POP_ARGS)
20458 noway_assert(args == (size_t)-(int)argSize);
20462 genAdjustSP(argSize);
20466 if (pseudoStackLvl)
20468 noway_assert(call->gtType == TYP_VOID);
20477 /* What does the function return? */
20481 switch (call->gtType)
20486 gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
20491 #if!CPU_HAS_FP_SUPPORT
20494 retVal = RBM_INTRET;
20497 #ifdef _TARGET_ARM_
20500 assert(call->gtCall.gtRetClsHnd != NULL);
20501 assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
20502 int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
20503 assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
20504 assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
20505 retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
20511 #if!CPU_HAS_FP_SUPPORT
20514 retVal = RBM_LNGRET;
20517 #if CPU_HAS_FP_SUPPORT
20528 noway_assert(!"unexpected/unhandled fn return type");
20531 // We now have to generate the "call epilog" (if it was a call to unmanaged code).
20532 /* if it is a call to unmanaged code, frameListRoot must be set */
20534 noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
20537 genPInvokeCallEpilog(frameListRoot, retVal);
20539 if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
20541 if (frameListRoot->lvRegister)
20543 bool isBorn = false;
20544 bool isDying = true;
20545 genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
20550 if (compiler->opts.compStackCheckOnCall
20551 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
20552 //check the stack as frequently as possible
20553 && !call->IsHelperCall()
20555 && call->gtCall.gtCallType == CT_USER_FUNC
20559 noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
20562 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
20563 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
20564 getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
20565 regTracker.rsTrackRegTrash(REG_ARG_0);
20568 getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
20570 BasicBlock * esp_check = genCreateTempLabel();
20571 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20572 inst_JMP(jmpEqual, esp_check);
20573 getEmitter()->emitIns(INS_BREAKPOINT);
20574 genDefineTempLabel(esp_check);
20578 #if FEATURE_STACK_FP_X87
20579 UnspillRegVarsStackFp();
20580 #endif // FEATURE_STACK_FP_X87
20582 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20584 // Restore return node if necessary
20585 if (call->gtFlags & GTF_SPILLED)
20587 UnspillFloat(call);
20591 #if FEATURE_STACK_FP_X87
20592 regSet.SetUsedRegFloat(call, false);
20596 #if FEATURE_STACK_FP_X87
20598 if (compiler->verbose)
20608 #pragma warning(pop)
20612 /*****************************************************************************
20614 * Create and record GC Info for the function.
20616 #ifdef JIT32_GCENCODER
20621 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20623 #ifdef JIT32_GCENCODER
20624 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
20626 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
20630 #ifdef JIT32_GCENCODER
20631 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20633 BYTE headerBuf[64];
20638 size_t headerSize =
20640 compiler->compInfoBlkSize = gcInfo.gcInfoBlockHdrSave(headerBuf,
20648 size_t argTabOffset = 0;
20649 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
20653 if (genInterruptible)
20655 gcHeaderISize += compiler->compInfoBlkSize;
20656 gcPtrMapISize += ptrMapSize;
20660 gcHeaderNSize += compiler->compInfoBlkSize;
20661 gcPtrMapNSize += ptrMapSize;
20664 #endif // DISPLAY_SIZES
20666 compiler->compInfoBlkSize += ptrMapSize;
20668 /* Allocate the info block for the method */
20670 compiler->compInfoBlkAddr = (BYTE *) compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
20672 #if 0 // VERBOSE_SIZES
20673 // TODO-Review: 'dataSize', below, is not defined
20675 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
20677 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
20678 compiler->info.compILCodeSize,
20679 compiler->compInfoBlkSize,
20680 codeSize + dataSize,
20681 codeSize + dataSize - prologSize - epilogSize,
20682 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
20683 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
20684 compiler->info.compClassName,
20685 compiler->info.compMethodName);
20690 /* Fill in the info block and return it to the caller */
20692 void* infoPtr = compiler->compInfoBlkAddr;
20694 /* Create the method info block: header followed by GC tracking tables */
20696 compiler->compInfoBlkAddr += gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1,
20703 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
20704 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
20705 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
20711 BYTE * temp = (BYTE *)infoPtr;
20712 unsigned size = compiler->compInfoBlkAddr - temp;
20713 BYTE * ptab = temp + headerSize;
20715 noway_assert(size == headerSize + ptrMapSize);
20717 printf("Method info block - header [%u bytes]:", headerSize);
20719 for (unsigned i = 0; i < size; i++)
20723 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
20724 printf("\n %04X: %*c", i & ~0xF, 3*(i&0xF), ' ');
20729 printf("\n %04X: ", i);
20732 printf("%02X ", *temp++);
20742 if (compiler->opts.dspGCtbls)
20744 const BYTE *base = (BYTE *)infoPtr;
20746 unsigned methodSize;
20747 InfoHdr dumpHeader;
20749 printf("GC Info for method %s\n", compiler->info.compFullName);
20750 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
20752 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
20753 // printf("size of header encoding is %3u\n", size);
20756 if (compiler->opts.dspGCtbls)
20759 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
20760 // printf("size of pointer table is %3u\n", size);
20762 noway_assert(compiler->compInfoBlkAddr == (base+size));
20768 if (jitOpts.testMask & 128)
20770 for (unsigned offs = 0; offs < codeSize; offs++)
20772 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
20776 #endif // DUMP_GC_TABLES
20778 /* Make sure we ended up generating the expected number of bytes */
20780 noway_assert(compiler->compInfoBlkAddr == (BYTE *)infoPtr + compiler->compInfoBlkSize);
20785 #else // JIT32_GCENCODER
20787 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
20789 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
20790 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
20791 assert(gcInfoEncoder);
20793 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
20794 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
20796 // First we figure out the encoder ID's for the stack slots and registers.
20797 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
20798 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
20799 gcInfoEncoder->FinalizeSlotIds();
20800 // Now we can actually use those slot ID's to declare live ranges.
20801 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
20803 gcInfoEncoder->Build();
20805 //GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
20806 //let's save the values anyway for debugging purposes
20807 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
20808 compiler->compInfoBlkSize = 0; //not exposed by the GCEncoder interface
20813 /*****************************************************************************
20817 regNumber CodeGen::genLclHeap(GenTreePtr size)
20819 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
20821 // regCnt is a register used to hold both
20822 // the amount to stack alloc (either in bytes or pointer sized words)
20823 // and the final stack alloc address to return as the result
20825 regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
20826 var_types type = genActualType(size->gtType);
20827 emitAttr easz = emitTypeSize(type);
20831 if (compiler->opts.compStackCheckOnRet)
20833 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20834 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20836 BasicBlock * esp_check = genCreateTempLabel();
20837 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20838 inst_JMP(jmpEqual, esp_check);
20839 getEmitter()->emitIns(INS_BREAKPOINT);
20840 genDefineTempLabel(esp_check);
20844 noway_assert(isFramePointerUsed());
20845 noway_assert(genStackLevel == 0); // Can't have anything on the stack
20847 BasicBlock* endLabel = NULL;
20848 #if FEATURE_FIXED_OUT_ARGS
20849 bool stackAdjusted = false;
20852 if (size->IsCnsIntOrI())
20854 #if FEATURE_FIXED_OUT_ARGS
20855 // If we have an outgoing arg area then we must adjust the SP
20856 // essentially popping off the outgoing arg area,
20857 // We will restore it right before we return from this method
20859 if (compiler->lvaOutgoingArgSpaceSize > 0)
20861 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
20862 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20863 stackAdjusted = true;
20866 size_t amount = size->gtIntCon.gtIconVal;
20868 // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
20869 amount += (STACK_ALIGN - 1);
20870 amount &= ~(STACK_ALIGN - 1);
20871 amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
20872 size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20874 /* If amount is zero then return null in RegCnt */
20877 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20878 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20882 /* For small allocations we will generate up to six push 0 inline */
20885 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20886 #if CPU_LOAD_STORE_ARCH
20887 regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20888 // Set 'regZero' to zero
20889 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
20892 while (amount != 0)
20894 #if CPU_LOAD_STORE_ARCH
20895 inst_IV(INS_push, (unsigned) genRegMask(regZero));
20897 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
20902 regTracker.rsTrackRegTrash(regCnt);
20903 // --- move regCnt, ESP
20904 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20909 if (!compiler->info.compInitMem)
20911 // Re-bias amount to be number of bytes to adjust the SP
20912 amount <<= STACK_ALIGN_SHIFT;
20913 size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20914 if (amount < compiler->eeGetPageSize()) // must be < not <=
20916 // Since the size is a page or less, simply adjust ESP
20918 // ESP might already be in the guard page, must touch it BEFORE
20919 // the alloc, not after.
20920 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20921 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20922 #if CPU_LOAD_STORE_ARCH
20923 regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20924 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
20925 regTracker.rsTrackRegTrash(regTmp);
20927 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20929 inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
20930 inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
20931 regTracker.rsTrackRegTrash(regCnt);
20938 // Compute the size of the block to allocate
20939 genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
20940 noway_assert(size->gtFlags & GTF_REG_VAL);
20941 regCnt = size->gtRegNum;
20943 #if FEATURE_FIXED_OUT_ARGS
20944 // If we have an outgoing arg area then we must adjust the SP
20945 // essentially popping off the outgoing arg area,
20946 // We will restore it right before we return from this method
20948 if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
20950 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
20951 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20952 stackAdjusted = true;
20956 // Perform alignment if we don't have a GT_CNS size
20958 if (!size->IsCnsIntOrI())
20960 endLabel = genCreateTempLabel();
20962 // If 0 we bail out
20963 instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
20964 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20965 inst_JMP(jmpEqual, endLabel);
20967 // Align to STACK_ALIGN
20968 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
20970 if (compiler->info.compInitMem)
20972 // regCnt will be the number of pointer-sized words to locAlloc
20973 // If the shift right won't do the 'and' do it here
20974 #if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
20975 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20977 // --- shr regCnt, 2 ---
20978 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
20982 // regCnt will be the total number of bytes to locAlloc
20984 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20988 BasicBlock* loop; loop = genCreateTempLabel();
20990 if (compiler->info.compInitMem)
20992 // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
20994 /* Since we have to zero out the allocated memory AND ensure that
20995 ESP is always valid by tickling the pages, we will just push 0's
20998 #if defined(_TARGET_ARM_)
20999 regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
21000 regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
21001 // Set 'regZero1' and 'regZero2' to zero
21002 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
21003 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
21007 genDefineTempLabel(loop);
21009 #if defined(_TARGET_X86_)
21011 inst_IV(INS_push_hide, 0); // --- push 0
21013 inst_RV(INS_dec, regCnt, type);
21015 #elif defined(_TARGET_ARM_)
21017 inst_IV(INS_push, (unsigned) (genRegMask(regZero1) | genRegMask(regZero2)));
21019 inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
21022 assert(!"Codegen missing");
21025 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
21026 inst_JMP(jmpNotEqual, loop);
21028 // Move the final value of ESP into regCnt
21029 inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
21030 regTracker.rsTrackRegTrash(regCnt);
21034 // At this point 'regCnt' is set to the total number of bytes to locAlloc
21036 /* We don't need to zero out the allocated memory. However, we do have
21037 to tickle the pages to ensure that ESP is always valid and is
21038 in sync with the "stack guard page". Note that in the worst
21039 case ESP is on the last byte of the guard page. Thus you must
21040 touch ESP+0 first not ESP+x01000.
21042 Another subtlety is that you don't want ESP to be exactly on the
21043 boundary of the guard page because PUSH is predecrement, thus
21044 call setup would not touch the guard page but just beyond it */
21046 /* Note that we go through a few hoops so that ESP never points to
21047 illegal pages at any time during the ticking process
21050 add REG, ESP // reg now holds ultimate ESP
21051 jb loop // result is smaller than orignial ESP (no wrap around)
21052 xor REG, REG, // Overflow, pick lowest possible number
21054 test ESP, [ESP+0] // X86 - tickle the page
21055 ldr REGH,[ESP+0] // ARM - tickle the page
21057 sub REGH, PAGE_SIZE
21065 #ifdef _TARGET_ARM_
21066 inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
21067 inst_JMP(EJ_hs, loop);
21069 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
21070 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
21071 inst_JMP(EJ_jb, loop);
21073 regTracker.rsTrackRegTrash(regCnt);
21075 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
21077 genDefineTempLabel(loop);
21079 // This is a workaround to avoid the emitter trying to track the
21080 // decrement of the ESP - we do the subtraction in another reg
21081 // instead of adjusting ESP directly.
21083 regNumber regTemp = regSet.rsPickReg();
21085 // Tickle the decremented value, and move back to ESP,
21086 // note that it has to be done BEFORE the update of ESP since
21087 // ESP might already be on the guard page. It is OK to leave
21088 // the final value of ESP on the guard page
21090 #if CPU_LOAD_STORE_ARCH
21091 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
21093 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
21096 inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
21097 regTracker.rsTrackRegTrash(regTemp);
21099 inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
21100 inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
21102 genRecoverReg(size, RBM_ALLINT, RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
21103 noway_assert(size->gtFlags & GTF_REG_VAL);
21104 regCnt = size->gtRegNum;
21105 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
21106 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
21107 inst_JMP(jmpGEU, loop);
21109 // Move the final value to ESP
21110 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
21112 regSet.rsMarkRegFree(genRegMask(regCnt));
21116 noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
21118 if (endLabel != NULL)
21119 genDefineTempLabel(endLabel);
21121 #if FEATURE_FIXED_OUT_ARGS
21122 // If we have an outgoing arg area then we must readjust the SP
21126 assert(compiler->lvaOutgoingArgSpaceSize > 0);
21127 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
21128 inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
21132 /* Write the lvaShadowSPfirst stack frame slot */
21133 noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
21134 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
21135 compiler->lvaLocAllocSPvar, 0);
21138 // Don't think it is worth it the codegen complexity to embed this
21139 // when it's possible in each of the customized allocas.
21140 if (compiler->opts.compNeedStackProbes)
21142 genGenerateStackProbe();
21148 if (compiler->opts.compStackCheckOnRet)
21150 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
21151 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
21159 /*****************************************************************************/
21160 #ifdef DEBUGGING_SUPPORT
21161 /*****************************************************************************
21164 * Called for every scope info piece to record by the main genSetScopeInfo()
21167 void CodeGen::genSetScopeInfo (unsigned which,
21168 UNATIVE_OFFSET startOffs,
21169 UNATIVE_OFFSET length,
21173 Compiler::siVarLoc& varLoc)
21175 /* We need to do some mapping while reporting back these variables */
21177 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
21178 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
21180 // Non-x86 platforms are allowed to access all arguments directly
21181 // so we don't need this code.
21182 #ifdef _TARGET_X86_
21184 // Is this a varargs function?
21186 if (compiler->info.compIsVarArgs &&
21187 varNum != compiler->lvaVarargsHandleArg &&
21188 varNum < compiler->info.compArgsCount &&
21189 !compiler->lvaTable[varNum].lvIsRegArg)
21191 noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
21193 // All stack arguments (except the varargs handle) have to be
21194 // accessed via the varargs cookie. Discard generated info,
21195 // and just find its position relative to the varargs handle
21197 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
21198 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
21200 noway_assert(!compiler->opts.compDbgCode);
21204 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
21205 // arguments of vararg functions to avoid reporting them to GC.
21206 noway_assert(!compiler->lvaTable[varNum].lvRegister);
21207 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
21208 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
21210 noway_assert(cookieOffset < varOffset);
21211 unsigned offset = varOffset - cookieOffset;
21212 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void *);
21213 noway_assert(offset < stkArgSize);
21214 offset = stkArgSize - offset;
21216 varLoc.vlType = Compiler::VLT_FIXED_VA;
21217 varLoc.vlFixedVarArg.vlfvOffset = offset;
21220 #endif // _TARGET_X86_
21222 VarName name = NULL;
21226 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
21228 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
21230 name = compiler->info.compVarScopes[scopeNum].vsdName;
21234 // Hang on to this compiler->info.
21236 TrnslLocalVarInfo &tlvi = genTrnslLocalVarInfo[which];
21238 tlvi.tlviVarNum = ilVarNum;
21239 tlvi.tlviLVnum = LVnum;
21240 tlvi.tlviName = name;
21241 tlvi.tlviStartPC = startOffs;
21242 tlvi.tlviLength = length;
21243 tlvi.tlviAvailable = avail;
21244 tlvi.tlviVarLoc = varLoc;
21248 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
21251 /*****************************************************************************/
21252 #endif // DEBUGGING_SUPPORT
21253 /*****************************************************************************/
21255 /*****************************************************************************
21257 * Return non-zero if the given register is free after the given tree is
21258 * evaluated (i.e. the register is either not used at all, or it holds a
21259 * register variable which is not live after the given node).
21260 * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
21261 * constant operand, and one that's in a register. Thus, the only thing we
21262 * need to determine is whether the register holding op1 is dead.
21264 bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
21267 regMaskTP mask = genRegMask(reg);
21269 if (regSet.rsMaskUsed & mask)
21272 assert(tree->gtOper == GT_ADD);
21273 GenTreePtr regValTree = tree->gtOp.gtOp1;
21274 if (!tree->gtOp.gtOp2->IsCnsIntOrI())
21276 regValTree = tree->gtOp.gtOp2;
21277 assert(tree->gtOp.gtOp1->IsCnsIntOrI());
21279 assert(regValTree->gtFlags & GTF_REG_VAL);
21281 /* At this point, the only way that the register will remain live
21282 * is if it is itself a register variable that isn't dying.
21284 assert(regValTree->gtRegNum == reg);
21285 if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
21291 /*****************************************************************************/
21293 // This method calculates the USE and DEF values for a statement.
21294 // It also calls fgSetRngChkTarget for the statement.
21296 // We refactor out this code from fgPerBlockLocalVarLiveness
21297 // and add QMARK logics to it.
21299 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
21301 // The usage of this method is very limited.
21302 // We should only call it for the first node in the statement or
21303 // for the node after the GTF_RELOP_QMARK node.
21305 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
21309 Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
21310 when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
21313 Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
21316 The algorithm we use is:
21317 (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
21318 (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
21319 We cache copies of current fgCurDefSet and fgCurUseSet.
21320 (The fact that it is recursively calling itself is for nested QMARK case,
21321 where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
21322 (3) We walk the thenTree.
21323 (4) When we see GT_COLON node, we know that we just finished the thenTree.
21324 We then make a copy of the current fgCurDefSet and fgCurUseSet,
21325 restore them to the ones before the thenTree, and then continue walking
21327 (5) When we see the GT_QMARK node, we know we just finished the elseTree.
21328 So we combine the results from the thenTree and elseTree and then return.
21331 +--------------------+
21333 +----------+---------+
21339 +---------------------+ +--------------------+
21340 | GT_<cond> 3 | | GT_COLON 7 |
21341 | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
21342 +----------+----------+ +---------+----------+
21348 2 1 thenTree 6 elseTree 10
21351 +----------------+ / / \ / \
21352 |prevExpr->gtNext+------/ / \ / \
21353 +----------------+ / \ / \
21359 GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
21360 GenTreePtr relopNode, // The node before the startNode.
21361 // (It should either be NULL or
21362 // a GTF_RELOP_QMARK node.)
21363 GenTreePtr asgdLclVar
21368 VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
21369 VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
21371 bool heapUse_BeforeSplit = fgCurHeapUse;
21372 bool heapDef_BeforeSplit = fgCurHeapDef;
21373 bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
21375 VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store the USE and DEF sets after
21376 VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
21378 bool heapUse_AfterThenTree = fgCurHeapUse;
21379 bool heapDef_AfterThenTree = fgCurHeapDef;
21380 bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
21382 // relopNode is either NULL or a GTF_RELOP_QMARK node.
21383 assert(!relopNode ||
21384 (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK)
21387 // If relopNode is NULL, then the startNode must be the 1st node of the statement.
21388 // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
21389 assert( (!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
21390 (relopNode && startNode == relopNode->gtNext)
21393 for (tree = startNode; tree; tree = tree->gtNext)
21395 switch (tree->gtOper)
21400 // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
21401 noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
21403 // By the time we see a GT_QMARK, we must have finished processing the elseTree.
21404 // So it's the time to combine the results
21405 // from the the thenTree and the elseTree, and then return.
21407 VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
21408 VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
21410 fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
21411 fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
21412 fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
21414 // Return the GT_QMARK node itself so the caller can continue from there.
21415 // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
21416 // in the "for" statement.
21420 // By the time we see GT_COLON, we must have just walked the thenTree.
21421 // So we need to do two things here.
21422 // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
21423 // with the result from the elseTree.
21424 // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
21425 // and then continue walking the elseTree.
21426 VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
21427 VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
21429 heapDef_AfterThenTree = fgCurHeapDef;
21430 heapHavoc_AfterThenTree = fgCurHeapHavoc;
21431 heapUse_AfterThenTree = fgCurHeapUse;
21433 VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
21434 VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
21436 fgCurHeapDef = heapDef_BeforeSplit;
21437 fgCurHeapHavoc = heapHavoc_BeforeSplit;
21438 fgCurHeapUse = heapUse_BeforeSplit;
21444 case GT_LCL_VAR_ADDR:
21445 case GT_LCL_FLD_ADDR:
21446 case GT_STORE_LCL_VAR:
21447 case GT_STORE_LCL_FLD:
21448 fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
21452 // For Volatile indirection, first mutate the global heap
21453 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
21454 // This models Volatile reads as def-then-use of the heap.
21455 // and allows for a CSE of a subsequent non-volatile read
21456 if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
21458 // For any Volatile indirection, we must handle it as a
21459 // definition of the global heap
21460 fgCurHeapDef = true;
21463 // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to assignment.
21464 // Otherwise, we treat it as a use here.
21465 if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
21467 fgCurHeapUse = true;
21472 // For Volatile indirection, first mutate the global heap
21473 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
21474 // This models Volatile reads as def-then-use of the heap.
21475 // and allows for a CSE of a subsequent non-volatile read
21476 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
21478 // For any Volatile indirection, we must handle it as a
21479 // definition of the global heap
21480 fgCurHeapDef = true;
21483 // If the GT_IND is the lhs of an assignment, we'll handle it
21484 // as a heap def, when we get to assignment.
21485 // Otherwise, we treat it as a use here.
21486 if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
21488 GenTreeLclVarCommon* dummyLclVarTree = NULL;
21489 bool dummyIsEntire = false;
21490 GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
21491 if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/0, &dummyLclVarTree, &dummyIsEntire))
21495 fgCurHeapUse = true;
21500 // Defines a local addr
21501 assert(dummyLclVarTree != nullptr);
21502 fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
21507 // These should have been morphed away to become GT_INDs:
21513 // We'll assume these are use-then-defs of the heap.
21520 fgCurHeapUse = true;
21522 fgCurHeapDef = true;
21523 fgCurHeapHavoc = true;
21526 case GT_MEMORYBARRIER:
21527 // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
21528 fgCurHeapDef = true;
21531 // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
21534 GenTreeCall* call = tree->AsCall();
21535 bool modHeap = true;
21536 if (call->gtCallType == CT_HELPER)
21538 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
21540 if ( !s_helperCallProperties.MutatesHeap(helpFunc)
21541 && !s_helperCallProperties.MayRunCctor(helpFunc))
21550 fgCurHeapUse = true;
21552 fgCurHeapDef = true;
21553 fgCurHeapHavoc = true;
21557 // If this is a p/invoke unmanaged call or if this is a tail-call
21558 // and we have an unmanaged p/invoke call in the method,
21559 // then we're going to run the p/invoke epilog.
21560 // So we mark the FrameRoot as used by this instruction.
21561 // This ensures that the block->bbVarUse will contain
21562 // the FrameRoot local var if is it a tracked variable.
21564 if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
21566 /* Get the TCB local and mark it as used */
21568 noway_assert(info.compLvFrameListRoot < lvaCount);
21570 LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
21572 if (varDsc->lvTracked)
21574 if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
21576 VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
21585 // Determine whether it defines a heap location.
21586 if (tree->OperIsAssignment() || tree->OperIsBlkOp())
21588 GenTreeLclVarCommon* dummyLclVarTree = NULL;
21589 if (!tree->DefinesLocal(this, &dummyLclVarTree))
21591 // If it doesn't define a local, then it might update the heap.
21592 fgCurHeapDef = true;
21596 // Are we seeing a GT_<cond> for a GT_QMARK node?
21597 if ( (tree->OperKind() & GTK_RELOP) &&
21598 (tree->gtFlags & GTF_RELOP_QMARK)
21600 // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
21601 // Recursively call fgLegacyPerStatementLocalVarLiveness.
21602 // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the current
21603 // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
21604 // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON node.
21605 tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
21607 // We must have been returned here after seeing a GT_QMARK node.
21608 noway_assert(tree->gtOper == GT_QMARK);
21619 /*****************************************************************************/
21621 /*****************************************************************************
21622 * Initialize the TCB local and the NDirect stub, afterwards "push"
21623 * the hoisted NDirect stub.
21625 * 'initRegs' is the set of registers which will be zeroed out by the prolog
21626 * typically initRegs is zero
21628 * The layout of the NDirect Inlined Call Frame is as follows:
21629 * (see VM/frames.h and VM/JITInterface.cpp for more information)
21631 * offset field name when set
21632 * --------------------------------------------------------------
21633 * +00h vptr for class InlinedCallFrame method prolog
21634 * +04h m_Next method prolog
21635 * +08h m_Datum call site
21636 * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
21637 * +10h m_pCallerReturnAddress call site
21638 * +14h m_pCalleeSavedRegisters not set by JIT
21639 * +18h JIT retval spill area (int) before call_gc
21640 * +1ch JIT retval spill area (long) before call_gc
21641 * +20h Saved value of EBP method prolog
21644 regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
21646 assert(compiler->compGeneratingProlog);
21647 noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21648 noway_assert(compiler->info.compCallUnmanaged);
21650 CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
21651 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21653 /* let's find out if compLvFrameListRoot is enregistered */
21655 LclVarDsc * varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21657 noway_assert(!varDsc->lvIsParam);
21658 noway_assert(varDsc->lvType == TYP_I_IMPL);
21660 DWORD threadTlsIndex, *pThreadTlsIndex;
21662 threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**) &pThreadTlsIndex);
21663 #if defined(_TARGET_X86_)
21664 if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
21669 // Instead of calling GetThread(), and getting GS cookie and
21670 // InlinedCallFrame vptr through indirections, we'll call only one helper.
21671 // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
21672 // and uses REG_PINVOKE_SCRATCH as scratch register.
21673 getEmitter()->emitIns_R_S (INS_lea,
21676 compiler->lvaInlinedPInvokeFrameVar,
21677 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21678 regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21680 // We're about to trask REG_PINVOKE_TCB, it better not be in use!
21681 assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
21683 // Don't use the argument registers (including the special argument in
21684 // REG_PINVOKE_FRAME) for computing the target address.
21685 regSet.rsLockReg(RBM_ARG_REGS|RBM_PINVOKE_FRAME);
21687 genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
21689 regSet.rsUnlockReg(RBM_ARG_REGS|RBM_PINVOKE_FRAME);
21691 if (varDsc->lvRegister)
21693 regNumber regTgt = varDsc->lvRegNum;
21695 // we are about to initialize it. So turn the bit off in initRegs to prevent
21696 // the prolog reinitializing it.
21697 initRegs &= ~genRegMask(regTgt);
21699 if (regTgt != REG_PINVOKE_TCB)
21701 // move TCB to the its register if necessary
21702 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
21703 regTracker.rsTrackRegTrash(regTgt);
21708 // move TCB to its stack location
21709 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21712 compiler->info.compLvFrameListRoot,
21716 // We are done, the rest of this function deals with the inlined case.
21722 if (varDsc->lvRegister)
21724 regTCB = varDsc->lvRegNum;
21726 // we are about to initialize it. So turn the bit off in initRegs to prevent
21727 // the prolog reinitializing it.
21728 initRegs &= ~genRegMask(regTCB);
21730 else // varDsc is allocated on the Stack
21732 regTCB = REG_PINVOKE_TCB;
21735 /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
21737 // TODO-ARM-CQ: should we inline TlsGetValue here?
21738 #if !defined(_TARGET_ARM_) && !defined(_TARGET_AMD64_)
21739 #define WIN_NT_TLS_OFFSET (0xE10)
21740 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
21742 if (threadTlsIndex < 64)
21744 // mov reg, FS:[0xE10+threadTlsIndex*4]
21745 getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
21749 WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
21750 regTracker.rsTrackRegTrash(regTCB);
21754 noway_assert(pInfo->osMajor >= 5);
21756 DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
21757 threadTlsIndex -= 64;
21759 // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
21760 // mov reg, [reg+threadTlsIndex*4]
21762 getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
21767 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL),
21771 threadTlsIndex*sizeof(int));
21772 regTracker.rsTrackRegTrash(regTCB);
21776 /* save TCB in local var if not enregistered */
21778 if (!varDsc->lvRegister)
21780 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21783 compiler->info.compLvFrameListRoot,
21787 /* set frame's vptr */
21789 const void * inlinedCallFrameVptr, **pInlinedCallFrameVptr;
21790 inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**) &pInlinedCallFrameVptr);
21791 noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
21793 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t) inlinedCallFrameVptr,
21794 compiler->lvaInlinedPInvokeFrameVar,
21795 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
21796 REG_PINVOKE_SCRATCH);
21798 // Set the GSCookie
21799 GSCookie gsCookie, * pGSCookie;
21800 compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
21801 noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
21803 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t) gsCookie,
21804 compiler->lvaInlinedPInvokeFrameVar,
21805 pInfo->inlinedCallFrameInfo.offsetOfGSCookie,
21806 REG_PINVOKE_SCRATCH);
21808 /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
21809 set next field in frame */
21811 getEmitter()->emitIns_R_AR (ins_Load(TYP_I_IMPL),
21813 REG_PINVOKE_SCRATCH,
21815 pInfo->offsetOfThreadFrame);
21816 regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21818 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21820 REG_PINVOKE_SCRATCH,
21821 compiler->lvaInlinedPInvokeFrameVar,
21822 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21824 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
21826 /* set EBP value in frame */
21827 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
21829 genFramePointerReg(),
21830 compiler->lvaInlinedPInvokeFrameVar,
21831 pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
21833 /* reset track field in frame */
21834 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
21835 compiler->lvaInlinedPInvokeFrameVar,
21836 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress,
21837 REG_PINVOKE_SCRATCH);
21839 /* get address of our frame */
21841 getEmitter()->emitIns_R_S (INS_lea,
21843 REG_PINVOKE_SCRATCH,
21844 compiler->lvaInlinedPInvokeFrameVar,
21845 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21846 regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21848 /* now "push" our N/direct frame */
21850 getEmitter()->emitIns_AR_R (ins_Store(TYP_I_IMPL),
21852 REG_PINVOKE_SCRATCH,
21854 pInfo->offsetOfThreadFrame);
21860 /*****************************************************************************
21861 * Unchain the InlinedCallFrame.
21862 * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
21865 void CodeGen::genPInvokeMethodEpilog()
21867 noway_assert(compiler->info.compCallUnmanaged);
21868 noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21869 noway_assert(compiler->compCurBB == compiler->genReturnBB ||
21870 (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
21871 (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
21873 CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
21874 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21876 getEmitter()->emitDisableRandomNops();
21877 //debug check to make sure that we're not using ESI and/or EDI across this call, except for
21878 //compLvFrameListRoot.
21879 unsigned regTrashCheck = 0;
21881 /* XXX Tue 5/29/2007
21882 * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
21883 * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
21885 LclVarDsc * varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21887 regNumber reg2 = REG_PINVOKE_FRAME;
21891 // Two cases for epilog invocation:
21894 // We can trash the ESI/EDI registers.
21897 // When tail called, we'd like to preserve enregistered args,
21898 // in ESI/EDI so we can pass it to the callee.
21900 // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
21901 // Instead use the reserved local variable slot.
21903 if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21905 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21907 #if FEATURE_FIXED_OUT_ARGS
21908 // Save the register in the reserved local var slot.
21909 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->lvaPInvokeFrameRegSaveVar, 0);
21911 inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
21914 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21916 #if FEATURE_FIXED_OUT_ARGS
21917 // Save the register in the reserved local var slot.
21918 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21920 inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
21925 if (varDsc->lvRegister)
21927 reg = varDsc->lvRegNum;
21929 reg2 = REG_PINVOKE_TCB;
21931 regTrashCheck |= genRegMask(reg2);
21935 /* mov esi, [tcb address] */
21937 getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
21940 compiler->info.compLvFrameListRoot,
21942 regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21943 reg = REG_PINVOKE_TCB;
21945 regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
21948 /* mov edi, [ebp-frame.next] */
21950 getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
21953 compiler->lvaInlinedPInvokeFrameVar,
21954 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21955 regTracker.rsTrackRegTrash(reg2);
21957 /* mov [esi+offsetOfThreadFrame], edi */
21959 getEmitter()->emitIns_AR_R (ins_Store(TYP_I_IMPL),
21963 pInfo->offsetOfThreadFrame);
21965 noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
21967 if (compiler->genReturnLocal != BAD_VAR_NUM &&
21968 compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
21969 compiler->lvaTable[compiler->genReturnLocal].lvRegister)
21971 //really make sure we're not clobbering compiler->genReturnLocal.
21972 noway_assert(!(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum)
21973 & ( (varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0)
21974 | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
21977 (void)regTrashCheck;
21979 // Restore the registers ESI and EDI.
21980 if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21982 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21984 #if FEATURE_FIXED_OUT_ARGS
21985 // Restore the register from the reserved local var slot.
21986 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21988 inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
21990 regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21992 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21994 #if FEATURE_FIXED_OUT_ARGS
21995 // Restore the register from the reserved local var slot.
21996 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->lvaPInvokeFrameRegSaveVar, 0);
21998 inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
22000 regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
22003 getEmitter()->emitEnableRandomNops();
22007 /*****************************************************************************
22008 This function emits the call-site prolog for direct calls to unmanaged code.
22009 It does all the necessary setup of the InlinedCallFrame.
22010 frameListRoot specifies the local containing the thread control block.
22011 argSize or methodToken is the value to be copied into the m_datum
22012 field of the frame (methodToken may be indirected & have a reloc)
22013 The function returns the register now containing the thread control block,
22014 (it could be either enregistered or loaded into one of the scratch registers)
22017 regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
22019 CORINFO_METHOD_HANDLE methodToken,
22020 BasicBlock* returnLabel)
22022 // Some stack locals might be 'cached' in registers, we need to trash them
22023 // from the regTracker *and* also ensure the gc tracker does not consider
22024 // them live (see the next assert). However, they might be live reg vars
22025 // that are non-pointers CSE'd from pointers.
22026 // That means the register will be live in rsMaskVars, so we can't just
22027 // call gcMarkSetNpt().
22029 regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
22030 gcInfo.gcRegGCrefSetCur &= ~deadRegs;
22031 gcInfo.gcRegByrefSetCur &= ~deadRegs;
22034 deadRegs &= regSet.rsMaskVars;
22037 for (LclVarDsc * varDsc = compiler->lvaTable;
22038 ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs);
22041 if (!varDsc->lvTracked || !varDsc->lvRegister)
22044 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
22047 regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
22048 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
22049 varRegMask |= genRegMask(varDsc->lvOtherReg);
22051 if (varRegMask & deadRegs)
22053 // We found the enregistered var that should not be live if it
22054 // was a GC pointer.
22055 noway_assert(!varTypeIsGC(varDsc));
22056 deadRegs &= ~varRegMask;
22063 /* Since we are using the InlinedCallFrame, we should have spilled all
22064 GC pointers to it - even from callee-saved registers */
22066 noway_assert(((gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
22068 /* must specify only one of these parameters */
22069 noway_assert((argSize == 0) || (methodToken == NULL));
22071 /* We are about to call unmanaged code directly.
22072 Before we can do that we have to emit the following sequence:
22074 mov dword ptr [frame.callTarget], MethodToken
22075 mov dword ptr [frame.callSiteTracker], esp
22076 mov reg, dword ptr [tcb_address]
22077 mov byte ptr [tcb+offsetOfGcState], 0
22081 CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
22083 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
22085 /* mov dword ptr [frame.callSiteTarget], value */
22087 if (methodToken == NULL)
22089 /* mov dword ptr [frame.callSiteTarget], argSize */
22090 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize,
22091 compiler->lvaInlinedPInvokeFrameVar,
22092 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
22096 void * embedMethHnd, * pEmbedMethHnd;
22098 embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(
22102 noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
22104 if (embedMethHnd != NULL)
22106 /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
22108 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t) embedMethHnd,
22109 compiler->lvaInlinedPInvokeFrameVar,
22110 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
22114 /* mov reg, dword ptr [MethodDescIndir]
22115 mov dword ptr [frame.callSiteTarget], reg */
22117 regNumber reg = regSet.rsPickFreeReg();
22119 #if CPU_LOAD_STORE_ARCH
22120 instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22122 (ssize_t) pEmbedMethHnd);
22123 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
22124 #else // !CPU_LOAD_STORE_ARCH
22125 #ifdef _TARGET_AMD64_
22126 if (reg != REG_RAX)
22128 instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22130 (ssize_t) pEmbedMethHnd);
22131 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
22134 #endif // _TARGET_AMD64_
22136 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC,
22137 reg, (ssize_t) pEmbedMethHnd);
22139 #endif // !CPU_LOAD_STORE_ARCH
22140 regTracker.rsTrackRegTrash(reg);
22141 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
22144 compiler->lvaInlinedPInvokeFrameVar,
22145 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
22149 regNumber tcbReg = REG_NA;
22151 if (frameListRoot->lvRegister)
22153 tcbReg = frameListRoot->lvRegNum;
22157 tcbReg = regSet.rsGrabReg(RBM_ALLINT);
22159 /* mov reg, dword ptr [tcb address] */
22161 getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
22164 (unsigned)(frameListRoot - compiler->lvaTable),
22166 regTracker.rsTrackRegTrash(tcbReg);
22169 #ifdef _TARGET_X86_
22170 /* mov dword ptr [frame.callSiteTracker], esp */
22172 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
22175 compiler->lvaInlinedPInvokeFrameVar,
22176 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
22177 #endif // _TARGET_X86_
22179 /* mov dword ptr [frame.callSiteReturnAddress], label */
22181 #if CPU_LOAD_STORE_ARCH
22182 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
22183 getEmitter()->emitIns_J_R (INS_adr,
22187 regTracker.rsTrackRegTrash(tmpReg);
22188 getEmitter()->emitIns_S_R (ins_Store(TYP_I_IMPL),
22191 compiler->lvaInlinedPInvokeFrameVar,
22192 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
22193 #else // !CPU_LOAD_STORE_ARCH
22194 // TODO-AMD64-CQ: Consider changing to a rip relative sequence on x64.
22195 getEmitter()->emitIns_J_S (ins_Store(TYP_I_IMPL),
22198 compiler->lvaInlinedPInvokeFrameVar,
22199 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
22200 #endif // !CPU_LOAD_STORE_ARCH
22202 #if CPU_LOAD_STORE_ARCH
22203 instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
22205 noway_assert(tmpReg != tcbReg);
22207 getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE),
22211 pInfo->offsetOfGCState);
22212 #else // !CPU_LOAD_STORE_ARCH
22213 /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
22215 getEmitter()->emitIns_I_AR (ins_Store(TYP_BYTE),
22219 pInfo->offsetOfGCState);
22220 #endif // !CPU_LOAD_STORE_ARCH
22225 /*****************************************************************************
22227 First we have to mark in the hoisted NDirect stub that we are back
22228 in managed code. Then we have to check (a global flag) whether GC is
22229 pending or not. If so, we just call into a jit-helper.
22230 Right now we have this call always inlined, i.e. we always skip around
22231 the jit-helper call.
22233 The tcb address is a regular local (initialized in the prolog), so it is either
22234 enregistered or in the frame:
22236 tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
22237 mov byte ptr[tcb_reg+offsetOfGcState], 1
22238 cmp 'global GC pending flag', 0
22240 [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
22241 [mov [EBP+spill_area+0], eax] ; spill the int return value if any
22242 [mov [EBP+spill_area+4], edx] ; spill the long return value if any
22244 [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
22245 [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
22249 void CodeGen::genPInvokeCallEpilog(LclVarDsc * frameListRoot,
22252 BasicBlock * clab_nostop;
22253 CORINFO_EE_INFO * pInfo = compiler->eeGetEEInfo();
22256 #ifdef _TARGET_ARM_
22261 #ifdef _TARGET_AMD64_
22262 TempDsc * retTmp = NULL;
22265 getEmitter()->emitDisableRandomNops();
22267 if (frameListRoot->lvRegister)
22269 /* make sure that register is live across the call */
22271 reg2 = frameListRoot->lvRegNum;
22272 noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
22276 /* mov reg2, dword ptr [tcb address] */
22277 #ifdef _TARGET_ARM_
22283 getEmitter()->emitIns_R_S (ins_Load(TYP_I_IMPL),
22286 (unsigned)(frameListRoot - compiler->lvaTable),
22288 regTracker.rsTrackRegTrash(reg2);
22292 #ifdef _TARGET_ARM_
22294 /* strb [r2+offsetOfGcState], r3 */
22295 instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
22296 getEmitter()->emitIns_AR_R (ins_Store(TYP_BYTE),
22300 pInfo->offsetOfGCState);
22302 /* mov byte ptr [tcb+offsetOfGcState], 1 */
22303 getEmitter()->emitIns_I_AR (ins_Store(TYP_BYTE),
22307 pInfo->offsetOfGCState);
22310 /* test global flag (we return to managed code) */
22312 LONG * addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
22314 addrOfCaptureThreadGlobal = compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**) &pAddrOfCaptureThreadGlobal);
22315 noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
22317 // Can we directly use addrOfCaptureThreadGlobal?
22319 if (addrOfCaptureThreadGlobal)
22321 #ifdef _TARGET_ARM_
22322 instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22324 (ssize_t)addrOfCaptureThreadGlobal);
22325 getEmitter()->emitIns_R_R_I (ins_Load(TYP_INT),
22330 regTracker.rsTrackRegTrash(reg3);
22331 getEmitter()->emitIns_R_I (INS_cmp,
22335 #elif defined(_TARGET_AMD64_)
22337 if (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint(addrOfCaptureThreadGlobal))
22339 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
22341 getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, reg3, 0);
22345 getEmitter()->emitIns_I_AI(INS_cmp, EA_4BYTE_DSP_RELOC, 0, (ssize_t)addrOfCaptureThreadGlobal);
22349 getEmitter()->emitIns_C_I (INS_cmp,
22352 (ssize_t) addrOfCaptureThreadGlobal,
22358 #ifdef _TARGET_ARM_
22359 instGen_Set_Reg_To_Imm (EA_HANDLE_CNS_RELOC,
22361 (ssize_t)pAddrOfCaptureThreadGlobal);
22362 getEmitter()->emitIns_R_R_I (ins_Load(TYP_INT),
22367 regTracker.rsTrackRegTrash(reg3);
22368 getEmitter()->emitIns_R_R_I (ins_Load(TYP_INT),
22373 getEmitter()->emitIns_R_I (INS_cmp,
22377 #else // !_TARGET_ARM_
22379 #ifdef _TARGET_AMD64_
22380 if (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint(pAddrOfCaptureThreadGlobal))
22382 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_ECX, (ssize_t)pAddrOfCaptureThreadGlobal);
22383 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ECX, REG_ECX, 0);
22384 regTracker.rsTrackRegTrash(REG_ECX);
22387 #endif // _TARGET_AMD64_
22389 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
22390 (ssize_t)pAddrOfCaptureThreadGlobal);
22391 regTracker.rsTrackRegTrash(REG_ECX);
22394 getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
22395 #endif // !_TARGET_ARM_
22399 clab_nostop = genCreateTempLabel();
22401 /* Generate the conditional jump */
22402 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
22403 inst_JMP(jmpEqual, clab_nostop);
22405 #ifdef _TARGET_ARM_
22406 // The helper preserves the return value on ARM
22408 /* save return value (if necessary) */
22409 if (retVal != RBM_NONE)
22411 if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
22413 #ifdef _TARGET_AMD64_
22414 retTmp = compiler->tmpGetTemp(TYP_LONG);
22415 inst_ST_RV(INS_mov, retTmp, 0, REG_INTRET, TYP_LONG);
22416 #elif defined(_TARGET_X86_)
22419 inst_RV(INS_push, REG_INTRET, TYP_INT);
22421 if (retVal == RBM_LNGRET)
22425 inst_RV(INS_push, REG_EDX, TYP_INT);
22427 #endif // _TARGET_AMD64_
22432 /* emit the call to the EE-helper that stops for GC (or other reasons) */
22434 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC,
22436 EA_UNKNOWN); /* retSize */
22438 #ifdef _TARGET_ARM_
22439 // The helper preserves the return value on ARM
22441 /* restore return value (if necessary) */
22443 if (retVal != RBM_NONE)
22445 if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
22447 #ifdef _TARGET_AMD64_
22449 assert(retTmp != NULL);
22450 inst_RV_ST(INS_mov, REG_INTRET, retTmp, 0, TYP_LONG);
22451 regTracker.rsTrackRegTrash(REG_INTRET);
22452 compiler->tmpRlsTemp(retTmp);
22454 #elif defined(_TARGET_X86_)
22455 if (retVal == RBM_LNGRET)
22459 inst_RV(INS_pop, REG_EDX, TYP_INT);
22460 regTracker.rsTrackRegTrash(REG_EDX);
22465 inst_RV(INS_pop, REG_INTRET, TYP_INT);
22466 regTracker.rsTrackRegTrash(REG_INTRET);
22467 #endif // _TARGET_AMD64_
22472 /* genCondJump() closes the current emitter block */
22474 genDefineTempLabel(clab_nostop);
22476 // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
22477 // the above code. So the process is:
22478 // 1) Return to cooperative mode
22479 // 2) Check to see if we need to stop for GC
22480 // 3) Return from the p/invoke (as far as the stack walker is concerned).
22482 /* mov dword ptr [frame.callSiteTracker], 0 */
22484 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0,
22485 compiler->lvaInlinedPInvokeFrameVar,
22486 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
22488 getEmitter()->emitEnableRandomNops();
22491 /*****************************************************************************/
22493 /*****************************************************************************
22494 * TRACKING OF FLAGS
22495 *****************************************************************************/
22497 void CodeGen::genFlagsEqualToNone()
22499 genFlagsEqReg = REG_NA;
22500 genFlagsEqVar = (unsigned)-1;
22501 genFlagsEqLoc.Init();
22504 /*****************************************************************************
22506 * Record the fact that the flags register has a value that reflects the
22507 * contents of the given register.
22510 void CodeGen::genFlagsEqualToReg(GenTreePtr tree,
22513 genFlagsEqLoc.CaptureLocation(getEmitter());
22514 genFlagsEqReg = reg;
22516 /* previous setting of flags by a var becomes invalid */
22518 genFlagsEqVar = 0xFFFFFFFF;
22520 /* Set appropriate flags on the tree */
22524 tree->gtFlags |= GTF_ZSF_SET;
22525 assert(tree->gtSetFlags());
22529 /*****************************************************************************
22531 * Record the fact that the flags register has a value that reflects the
22532 * contents of the given local variable.
22535 void CodeGen::genFlagsEqualToVar(GenTreePtr tree,
22538 genFlagsEqLoc.CaptureLocation(getEmitter());
22539 genFlagsEqVar = var;
22541 /* previous setting of flags by a register becomes invalid */
22543 genFlagsEqReg = REG_NA;
22545 /* Set appropriate flags on the tree */
22549 tree->gtFlags |= GTF_ZSF_SET;
22550 assert(tree->gtSetFlags());
22554 /*****************************************************************************
22556 * Return an indication of whether the flags register is set to the current
22557 * value of the given register/variable. The return value is as follows:
22560 * true .. the zero flag (ZF) and sign flag (SF) is set
22563 bool CodeGen::genFlagsAreReg(regNumber reg)
22565 if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
22573 bool CodeGen::genFlagsAreVar(unsigned var)
22575 if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
22583 /*****************************************************************************
22584 * This utility function returns true iff the execution path from "from"
22585 * (inclusive) to "to" (exclusive) contains a death of the given var
22588 CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
22591 for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
22593 if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
22595 unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
22596 if (dyingVarNum == varNum) return true;
22597 LclVarDsc * varDsc = &(compiler->lvaTable[varNum]);
22598 if (varDsc->lvPromoted)
22600 assert(varDsc->lvType == TYP_STRUCT);
22601 unsigned firstFieldNum = varDsc->lvFieldLclStart;
22602 if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
22609 assert(tree != NULL);
22613 #endif // LEGACY_BACKEND