1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
22 #error AMD64 must be !LEGACY_BACKEND
26 #error ARM64 must be !LEGACY_BACKEND
32 #ifndef JIT32_GCENCODER
33 #include "gcinfoencoder.h"
36 /*****************************************************************************
38 * Determine what variables die between beforeSet and afterSet, and
39 * update the liveness globals accordingly:
40 * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
43 void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
48 VARSET_TP deadSet(VarSetOps::Diff(compiler, beforeSet, afterSet));
50 if (VarSetOps::IsEmpty(compiler, deadSet))
53 /* iterate through the dead variables */
55 VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
56 while (iter.NextElem(&varIndex))
58 varNum = compiler->lvaTrackedToVarNum[varIndex];
59 varDsc = compiler->lvaTable + varNum;
61 /* Remove this variable from the 'deadSet' bit set */
63 noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
65 VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
67 noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
68 VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
70 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
72 /* We are done if the variable is not enregistered */
74 if (!varDsc->lvRegister)
77 if (compiler->verbose)
79 printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
85 #if !FEATURE_FP_REGALLOC
86 // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
87 if (!varDsc->IsFloatRegType())
90 /* Get hold of the appropriate register bit(s) */
92 if (varTypeIsFloating(varDsc->TypeGet()))
94 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
98 regBit = genRegMask(varDsc->lvRegNum);
99 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
100 regBit |= genRegMask(varDsc->lvOtherReg);
104 if (compiler->verbose)
106 printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
107 compiler->compRegVarName(varDsc->lvRegNum));
110 noway_assert((regSet.rsMaskVars & regBit) != 0);
112 regSet.RemoveMaskVars(regBit);
114 // Remove GC tracking if any for this register
116 if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
117 gcInfo.gcMarkRegSetNpt(regBit);
122 /*****************************************************************************
124 * Change the given enregistered local variable node to a register variable node
127 void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
129 noway_assert(tree->gtOper == GT_LCL_VAR);
130 noway_assert(varDsc->lvRegister);
132 if (isRegPairType(varDsc->lvType))
134 /* Check for the case of a variable that was narrowed to an int */
136 if (isRegPairType(tree->gtType))
138 genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
142 noway_assert(tree->gtFlags & GTF_VAR_CAST);
143 noway_assert(tree->gtType == TYP_INT);
147 noway_assert(!isRegPairType(tree->gtType));
150 /* It's a register variable -- modify the node */
152 unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
154 ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
155 tree->SetOper(GT_REG_VAR);
156 tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
158 tree->gtFlags |= livenessFlags;
160 tree->gtRegNum = varDsc->lvRegNum;
161 tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
162 tree->gtRegVar.SetLclNum(varNum);
166 void CodeGen::saveLiveness(genLivenessSet* ls)
168 VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
169 VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
170 ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
171 ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
172 ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
176 void CodeGen::restoreLiveness(genLivenessSet* ls)
178 VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
179 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
180 regSet.rsMaskVars = ls->maskVars;
181 gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
182 gcInfo.gcRegByrefSetCur = ls->byRefRegs;
186 void CodeGen::checkLiveness(genLivenessSet* ls)
188 assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
189 assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
190 assert(regSet.rsMaskVars == ls->maskVars);
191 assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
192 assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
196 bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
201 assert(tree->gtOper == GT_LCL_VAR);
203 /* Does the variable live in a register? */
205 varNum = tree->gtLclVarCommon.gtLclNum;
206 assert(varNum < compiler->lvaCount);
207 varDsc = compiler->lvaTable + varNum;
209 if (varDsc->lvRegister)
211 genBashLclVar(tree, varNum, varDsc);
221 GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
229 if (genCreateAddrMode(tree, // address
232 RBM_NONE, // reg mask
236 #if SCALED_ADDR_MODES
239 &cns, // displacement
240 true)) // don't generate code
246 #if FEATURE_STACK_FP_X87
248 void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
250 genFPstkLevel = newValue;
254 unsigned CodeGenInterface::genGetFPstkLevel()
256 return genFPstkLevel;
260 void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
262 noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
263 genFPstkLevel += inc;
267 void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
269 noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
270 genFPstkLevel -= dec;
273 #endif // FEATURE_STACK_FP_X87
275 /*****************************************************************************
277 * Generate code that will set the given register to the integer constant.
280 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
282 noway_assert(type != TYP_REF || val == NULL);
284 /* Does the reg already hold this constant? */
286 if (!regTracker.rsIconIsInReg(val, reg))
290 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
293 // If we can set a register to a constant with a small encoding, then do that.
294 else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
296 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
301 /* See if a register holds the value or a close value? */
302 bool constantLoaded = false;
304 regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
306 if (srcReg != REG_NA)
310 inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
311 constantLoaded = true;
315 #if defined(_TARGET_XARCH_)
316 /* delta should fit inside a byte */
317 if (delta == (signed char)delta)
319 /* use an lea instruction to set reg */
320 getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
321 constantLoaded = true;
323 #elif defined(_TARGET_ARM_)
324 /* We found a register 'regS' that has the value we need, modulo a small delta.
325 That is, the value we need is 'regS + delta'.
326 We one to generate one of the following instructions, listed in order of preference:
328 adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
330 subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
332 adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
333 subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
334 mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
335 movw regD, icon ; 4 bytes. 0<=icon<=65535
336 add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
337 sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
338 addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
339 subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
341 If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
342 than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
343 can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
344 0x80000000. In this case, delta will be 1.
348 regMaskTP regMask = genRegMask(reg);
349 regMaskTP srcRegMask = genRegMask(srcReg);
351 if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
352 (unsigned_abs(delta) <= 255))
356 else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
357 (unsigned_abs(delta) <= 7))
361 else if (arm_Valid_Imm_For_Mov(val))
363 // fall through to general "!constantLoaded" case below
365 else if (arm_Valid_Imm_For_Add(delta, flags))
372 getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
373 constantLoaded = true;
376 assert(!"Codegen missing");
381 if (!constantLoaded) // Have we loaded it yet?
386 /* or reg,-1 takes 3 bytes */
387 inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
390 /* For SMALL_CODE it is smaller to push a small immediate and
391 then pop it into the dest register */
392 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
394 /* "mov" has no s(sign)-bit and so always takes 6 bytes,
395 whereas push+pop takes 2+1 bytes */
397 inst_IV(INS_push, val);
400 inst_RV(INS_pop, reg, type);
404 #endif // _TARGET_X86_
406 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
411 regTracker.rsTrackRegIntCns(reg, val);
412 gcInfo.gcMarkRegPtrVal(reg, type);
415 /*****************************************************************************
417 * Find an existing register set to the given integer constant, or
418 * pick a register and generate code that will set it to the integer constant.
420 * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
421 * to pick some register to set. NOTE that this means the returned regNumber
422 * might *not* be in regBest. It also implies that you should lock any registers
423 * you don't want spilled (not just mark as used).
427 regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
432 // Is there already a register with zero that we can use?
433 regCns = regTracker.rsIconIsInReg(val);
435 if (regCns == REG_NA)
438 // If not, grab a register to hold the constant, preferring
439 // any register besides RBM_TMP_0 so it can hopefully be re-used
440 regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
442 // Now set the constant
443 genSetRegToIcon(regCns, val, type);
446 // NOTE: there is guarantee that regCns is in regBest's mask
450 /*****************************************************************************/
451 /*****************************************************************************
453 * Add the given constant to the specified register.
454 * 'tree' is the resulting tree
457 void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
459 bool setFlags = (tree != NULL) && tree->gtSetFlags();
461 #ifdef _TARGET_XARCH_
462 /* First check to see if we can generate inc or dec instruction(s) */
463 /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
464 if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
466 emitAttr size = emitTypeSize(dstType);
471 inst_RV(INS_inc, reg, dstType, size);
474 inst_RV(INS_inc, reg, dstType, size);
476 goto UPDATE_LIVENESS;
479 inst_RV(INS_dec, reg, dstType, size);
482 inst_RV(INS_dec, reg, dstType, size);
484 goto UPDATE_LIVENESS;
489 insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
490 inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
493 #ifdef _TARGET_XARCH_
498 genFlagsEqualToReg(tree, reg);
500 regTracker.rsTrackRegTrash(reg);
502 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
506 if (!tree->OperIsAssignment())
508 genMarkTreeInReg(tree, reg);
509 if (varTypeIsGC(tree->TypeGet()))
510 gcInfo.gcMarkRegSetByref(genRegMask(reg));
515 /*****************************************************************************
517 * Subtract the given constant from the specified register.
518 * Should only be used for unsigned sub with overflow. Else
519 * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
520 * for these cases as the flags are set differently, and the following
521 * check for overflow won't work correctly.
522 * 'tree' is the resulting tree.
525 void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
527 noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
528 ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
529 noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
531 regTracker.rsTrackRegTrash(reg);
533 noway_assert(!varTypeIsGC(tree->TypeGet()));
534 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
536 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
537 inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
539 if (tree->gtSetFlags())
540 genFlagsEqualToReg(tree, reg);
544 genMarkTreeInReg(tree, reg);
548 /*****************************************************************************
550 * Multiply the specified register by the given value.
551 * 'tree' is the resulting tree
554 void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
556 noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
558 regTracker.rsTrackRegTrash(reg);
562 genMarkTreeInReg(tree, reg);
565 bool use_shift = false;
566 unsigned shift_by = 0;
568 if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
571 BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
578 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
579 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
580 if (tree->gtSetFlags())
581 genFlagsEqualToReg(tree, reg);
587 #ifdef _TARGET_XARCH_
588 ins = getEmitter()->inst3opImulForReg(reg);
593 inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
597 /*****************************************************************************/
598 /*****************************************************************************/
599 /*****************************************************************************
601 * Compute the value 'tree' into a register that's in 'needReg'
602 * (or any free register if 'needReg' is RBM_NONE).
604 * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
605 * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
607 * If you require that the register returned is trashable, pass true for 'freeOnly'.
610 void CodeGen::genComputeReg(
611 GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
613 noway_assert(tree->gtType != TYP_VOID);
618 #if FEATURE_STACK_FP_X87
619 noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
620 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
621 #elif defined(_TARGET_ARM_)
622 noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
623 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
624 genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
625 genActualType(tree->gtType) == TYP_STRUCT);
627 noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
628 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
629 genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
632 /* Generate the value, hopefully into the right register */
634 genCodeForTree(tree, needReg);
635 noway_assert(tree->InReg());
637 // There is a workaround in genCodeForTreeLng() that changes the type of the
638 // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
639 // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
640 // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
641 // running the rest of this code, because anything looking at gtRegNum on ARM or
642 // attempting to move from EAX/EDX will be wrong.
643 if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
646 reg = tree->gtRegNum;
648 /* Did the value end up in an acceptable register? */
650 if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
652 /* Not good enough to satisfy the caller's orders */
654 if (varTypeIsFloating(tree))
656 RegSet::RegisterPreference pref(needReg, RBM_NONE);
657 rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
661 rg2 = regSet.rsGrabReg(needReg);
666 /* Do we have to end up with a free register? */
671 /* Did we luck out and the value got computed into an unused reg? */
673 if (genRegMask(reg) & regSet.rsRegMaskFree())
676 /* Register already in use, so spill previous value */
678 if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
680 rg2 = regSet.rsGrabReg(needReg);
683 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
684 tree->gtRegNum = reg;
690 /* OK, let's find a trashable home for the value */
692 regMaskTP rv1RegUsed;
694 regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
695 rg2 = regSet.rsPickReg(needReg);
696 regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
700 noway_assert(reg != rg2);
702 /* Update the value in the target register */
704 regTracker.rsTrackRegCopy(rg2, reg);
706 inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
708 /* The value has been transferred to 'reg' */
710 if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
711 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
713 gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
715 /* The value is now in an appropriate register */
717 tree->gtRegNum = rg2;
721 /* Does the caller want us to mark the register as used? */
723 if (keepReg == RegSet::KEEP_REG)
725 /* In case we're computing a value into a register variable */
729 /* Mark the register as 'used' */
731 regSet.rsMarkRegUsed(tree);
735 /*****************************************************************************
737 * Same as genComputeReg(), the only difference being that the result is
738 * guaranteed to end up in a trashable register.
742 void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
744 genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
747 /*****************************************************************************
749 * The value 'tree' was earlier computed into a register; free up that
750 * register (but also make sure the value is presently in a register).
753 void CodeGen::genReleaseReg(GenTreePtr tree)
755 if (tree->gtFlags & GTF_SPILLED)
757 /* The register has been spilled -- reload it */
759 regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
763 regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
766 /*****************************************************************************
768 * The value 'tree' was earlier computed into a register. Check whether that
769 * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
770 * free the register. The caller shouldn't need to be setting GCness of the register
771 * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
774 void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
776 if (tree->gtFlags & GTF_SPILLED)
778 /* The register has been spilled -- reload it */
780 regSet.rsUnspillReg(tree, needReg, keepReg);
783 else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
785 /* We need the tree in another register. So move it there */
787 noway_assert(tree->InReg());
788 regNumber oldReg = tree->gtRegNum;
790 /* Pick an acceptable register */
792 regNumber reg = regSet.rsGrabReg(needReg);
796 inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
797 tree->gtRegNum = reg;
799 gcInfo.gcMarkRegPtrVal(tree);
800 regSet.rsMarkRegUsed(tree);
801 regSet.rsMarkRegFree(oldReg, tree);
803 regTracker.rsTrackRegCopy(reg, oldReg);
806 /* Free the register if the caller desired so */
808 if (keepReg == RegSet::FREE_REG)
810 regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
811 // Can't use RegSet::FREE_REG on a GC type
812 noway_assert(!varTypeIsGC(tree->gtType));
816 noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
820 /*****************************************************************************
822 * Move one half of a register pair to its new regPair(half).
826 void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
830 // handle long to unsigned long overflow casts
831 while (tree->gtOper == GT_CAST)
833 noway_assert(tree->gtType == TYP_LONG);
834 tree = tree->gtCast.CastOp();
836 noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
837 noway_assert(tree->gtType == TYP_LONG);
838 inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
839 regTracker.rsTrackRegTrash(dst);
843 regTracker.rsTrackRegCopy(dst, src);
844 inst_RV_RV(INS_mov, dst, src, TYP_INT);
848 /*****************************************************************************
850 * The given long value is in a register pair, but it's not an acceptable
851 * one. We have to move the value into a register pair in 'needReg' (if
852 * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
854 * Important note: if 'needReg' is non-zero, we assume the current pair
855 * has not been marked as free. If, OTOH, 'newPair' is specified, we
856 * assume that the current register pair is marked as used and free it.
859 void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
868 /* Either a target set or a specific pair may be requested */
870 noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
872 /* Get hold of the current pair */
874 oldPair = tree->gtRegPair;
875 noway_assert(oldPair != newPair);
877 /* Are we supposed to move to a specific pair? */
879 if (newPair != REG_PAIR_NONE)
881 regMaskTP oldMask = genRegPairMask(oldPair);
882 regMaskTP loMask = genRegMask(genRegPairLo(newPair));
883 regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
884 regMaskTP overlap = oldMask & (loMask | hiMask);
886 /* First lock any registers that are in both pairs */
888 noway_assert((regSet.rsMaskUsed & overlap) == overlap);
889 noway_assert((regSet.rsMaskLock & overlap) == 0);
890 regSet.rsMaskLock |= overlap;
892 /* Make sure any additional registers we need are free */
894 if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
896 regSet.rsGrabReg(loMask);
899 if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
901 regSet.rsGrabReg(hiMask);
904 /* Unlock those registers we have temporarily locked */
906 noway_assert((regSet.rsMaskUsed & overlap) == overlap);
907 noway_assert((regSet.rsMaskLock & overlap) == overlap);
908 regSet.rsMaskLock -= overlap;
910 /* We can now free the old pair */
912 regSet.rsMarkRegFree(oldMask);
916 /* Pick the new pair based on the caller's stated preference */
918 newPair = regSet.rsGrabRegPair(needReg);
921 // If grabbed pair is the same as old one we're done
922 if (newPair == oldPair)
924 noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
925 newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
929 /* Move the values from the old pair into the new one */
931 oldLo = genRegPairLo(oldPair);
932 oldHi = genRegPairHi(oldPair);
933 newLo = genRegPairLo(newPair);
934 newHi = genRegPairHi(newPair);
936 noway_assert(newLo != REG_STK && newHi != REG_STK);
938 /* Careful - the register pairs might overlap */
942 /* The low registers are identical, just move the upper half */
944 noway_assert(newHi != oldHi);
945 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
949 /* The low registers are different, are the upper ones the same? */
953 /* Just move the lower half, then */
954 genMoveRegPairHalf(tree, newLo, oldLo, 0);
958 /* Both sets are different - is there an overlap? */
962 /* Are high and low simply swapped ? */
967 /* Let's use XOR swap to reduce register pressure. */
968 inst_RV_RV(INS_eor, oldLo, oldHi);
969 inst_RV_RV(INS_eor, oldHi, oldLo);
970 inst_RV_RV(INS_eor, oldLo, oldHi);
972 inst_RV_RV(INS_xchg, oldHi, oldLo);
974 regTracker.rsTrackRegSwap(oldHi, oldLo);
978 /* New lower == old higher, so move higher half first */
980 noway_assert(newHi != oldLo);
981 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
982 genMoveRegPairHalf(tree, newLo, oldLo, 0);
987 /* Move lower half first */
988 genMoveRegPairHalf(tree, newLo, oldLo, 0);
989 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
994 /* Record the fact that we're switching to another pair */
996 tree->gtRegPair = newPair;
999 /*****************************************************************************
1001 * Compute the value 'tree' into the register pair specified by 'needRegPair'
1002 * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
1003 * those in avoidReg.
1004 * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
1005 * value ends up in as being used.
1008 void CodeGen::genComputeRegPair(
1009 GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
1014 regMaskTP tmpUsedMask;
1018 noway_assert(isRegPairType(tree->gtType));
1020 if (needRegPair == REG_PAIR_NONE)
1024 regMask = regSet.rsRegMaskFree() & ~avoidReg;
1025 if (genMaxOneBit(regMask))
1026 regMask = regSet.rsRegMaskFree();
1030 regMask = RBM_ALLINT & ~avoidReg;
1033 if (genMaxOneBit(regMask))
1034 regMask = regSet.rsRegMaskCanGrab();
1038 regMask = genRegPairMask(needRegPair);
1041 /* Generate the value, hopefully into the right register pair */
1043 genCodeForTreeLng(tree, regMask, avoidReg);
1045 noway_assert(tree->InReg());
1047 regPair = tree->gtRegPair;
1048 tmpMask = genRegPairMask(regPair);
1050 rLo = genRegPairLo(regPair);
1051 rHi = genRegPairHi(regPair);
1053 /* At least one half is in a real register */
1055 noway_assert(rLo != REG_STK || rHi != REG_STK);
1057 /* Did the value end up in an acceptable register pair? */
1059 if (needRegPair != REG_PAIR_NONE)
1061 if (needRegPair != regPair)
1063 /* This is a workaround. If we specify a regPair for genMoveRegPair */
1064 /* it expects the source pair being marked as used */
1065 regSet.rsMarkRegPairUsed(tree);
1066 genMoveRegPair(tree, 0, needRegPair);
1071 /* Do we have to end up with a free register pair?
1072 Something might have gotten freed up above */
1073 bool mustMoveReg = false;
1075 regMask = regSet.rsRegMaskFree() & ~avoidReg;
1077 if (genMaxOneBit(regMask))
1078 regMask = regSet.rsRegMaskFree();
1080 if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
1082 /* Note that we must call genMoveRegPair if one of our registers
1083 comes from the used mask, so that it will be properly spilled. */
1088 if (genMaxOneBit(regMask))
1089 regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
1091 if (genMaxOneBit(regMask))
1092 regMask |= regSet.rsRegMaskCanGrab();
1094 /* Did the value end up in a free register pair? */
1098 /* We'll have to move the value to a free (trashable) pair */
1099 genMoveRegPair(tree, regMask, REG_PAIR_NONE);
1104 noway_assert(needRegPair == REG_PAIR_NONE);
1105 noway_assert(!freeOnly);
1107 /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
1108 tmpUsedMask = tmpMask & regSet.rsMaskUsed;
1109 tmpMask &= ~regSet.rsMaskUsed;
1111 /* Make sure that the value is in "real" registers*/
1114 /* Get one of the desired registers, but exclude rHi */
1116 regSet.rsLockReg(tmpMask);
1117 regSet.rsLockUsedReg(tmpUsedMask);
1119 regNumber reg = regSet.rsPickReg(regMask);
1121 regSet.rsUnlockUsedReg(tmpUsedMask);
1122 regSet.rsUnlockReg(tmpMask);
1124 inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
1126 tree->gtRegPair = gen2regs2pair(reg, rHi);
1128 regTracker.rsTrackRegTrash(reg);
1129 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1131 else if (rHi == REG_STK)
1133 /* Get one of the desired registers, but exclude rLo */
1135 regSet.rsLockReg(tmpMask);
1136 regSet.rsLockUsedReg(tmpUsedMask);
1138 regNumber reg = regSet.rsPickReg(regMask);
1140 regSet.rsUnlockUsedReg(tmpUsedMask);
1141 regSet.rsUnlockReg(tmpMask);
1143 inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
1145 tree->gtRegPair = gen2regs2pair(rLo, reg);
1147 regTracker.rsTrackRegTrash(reg);
1148 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1152 /* Does the caller want us to mark the register as used? */
1154 if (keepReg == RegSet::KEEP_REG)
1156 /* In case we're computing a value into a register variable */
1158 genUpdateLife(tree);
1160 /* Mark the register as 'used' */
1162 regSet.rsMarkRegPairUsed(tree);
1166 /*****************************************************************************
1168 * Same as genComputeRegPair(), the only difference being that the result
1169 * is guaranteed to end up in a trashable register pair.
1173 void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
1175 genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
1178 /*****************************************************************************
1180 * The value 'tree' was earlier computed into a register pair; free up that
1181 * register pair (but also make sure the value is presently in a register
1185 void CodeGen::genReleaseRegPair(GenTreePtr tree)
1187 if (tree->gtFlags & GTF_SPILLED)
1189 /* The register has been spilled -- reload it */
1191 regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
1195 regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1198 /*****************************************************************************
1200 * The value 'tree' was earlier computed into a register pair. Check whether
1201 * either register of that pair has been spilled (and reload it if so), and
1202 * if 'keepReg' is 0, free the register pair.
1205 void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
1207 if (tree->gtFlags & GTF_SPILLED)
1211 if (regPair == REG_PAIR_NONE)
1214 regMask = genRegPairMask(regPair);
1216 /* The register pair has been spilled -- reload it */
1218 regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
1221 /* Does the caller insist on the value being in a specific place? */
1223 if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
1225 /* No good -- we'll have to move the value to a new place */
1227 genMoveRegPair(tree, 0, regPair);
1229 /* Mark the pair as used if appropriate */
1231 if (keepReg == RegSet::KEEP_REG)
1232 regSet.rsMarkRegPairUsed(tree);
1237 /* Free the register pair if the caller desired so */
1239 if (keepReg == RegSet::FREE_REG)
1240 regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1243 /*****************************************************************************
1245 * Compute the given long value into the specified register pair; don't mark
1246 * the register pair as used.
1250 void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
1252 genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
1253 genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
1256 /*****************************************************************************
1257 * This helper makes sure that the regpair target of an assignment is
1258 * available for use. This needs to be called in genCodeForTreeLng just before
1259 * a long assignment, but must not be called until everything has been
1260 * evaluated, or else we might try to spill enregistered variables.
1265 void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
1267 /* Make sure the target of the store is available */
1269 regNumber regLo = genRegPairLo(regPair);
1270 regNumber regHi = genRegPairHi(regPair);
1272 if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
1273 regSet.rsSpillReg(regHi);
1275 if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
1276 regSet.rsSpillReg(regLo);
1279 /*****************************************************************************/
1280 /*****************************************************************************
1282 * Return true if the given tree 'addr' can be computed via an addressing mode,
1283 * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
1284 * try to make it so (but we don't try 'too hard' to accomplish this).
1286 * If we end up needing a register (or two registers) to hold some part(s) of the
1287 * address, we return the use register mask via '*useMaskPtr'.
1289 * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
1290 * in use. The caller would then be responsible for calling
1291 * regSet.rsMarkRegFree(*useMaskPtr).
1293 * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
1294 * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
1297 bool CodeGen::genMakeIndAddrMode(GenTreePtr addr,
1301 RegSet::KeepReg keepReg,
1302 regMaskTP* useMaskPtr,
1305 if (addr->gtOper == GT_ARR_ELEM)
1307 regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
1315 bool operIsArrIndex; // is oper an array index
1316 GenTreePtr scaledIndex; // If scaled addressing mode can't be used
1318 regMaskTP anyMask = RBM_ALLINT;
1324 int ixv = INT_MAX; // unset value
1326 GenTreePtr scaledIndexVal;
1328 regMaskTP newLiveMask;
1332 /* Deferred address mode forming NYI for x86 */
1334 noway_assert(deferOK == false);
1336 noway_assert(oper == NULL ||
1337 ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
1338 ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
1339 operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
1341 if (addr->gtOper == GT_LEA)
1343 rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
1344 GenTreeAddrMode* lea = addr->AsAddrMode();
1348 cns = lea->gtOffset;
1350 if (rv1 != NULL && rv2 == NULL && cns == 0 && rv1->InReg())
1358 // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
1361 /* Is the complete address already sitting in a register? */
1363 if ((addr->InReg()) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
1365 genUpdateLife(addr);
1368 rv2 = scaledIndex = 0;
1374 /* Is it an absolute address */
1376 if (addr->IsCnsIntOrI())
1378 rv1 = rv2 = scaledIndex = 0;
1379 // along this code path cns is never used, so place a BOGUS value in it as proof
1380 // cns = addr->gtIntCon.gtIconVal;
1386 /* Is there a chance of forming an address mode? */
1388 if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
1390 /* This better not be an array index */
1391 noway_assert(!operIsArrIndex);
1395 // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
1398 /* For scaled array access, RV2 may not be pointing to the index of the
1399 array if the CPU does not support the needed scaling factor. We will
1400 make it point to the actual index, and scaledIndex will point to
1404 scaledIndexVal = NULL;
1406 if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
1407 rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
1410 compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
1412 noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
1415 /* Has the address already been computed? */
1425 genUpdateLife(addr);
1430 Here we have the following operands:
1432 rv1 ..... base address
1433 rv2 ..... offset value (or NULL)
1434 mul ..... multiplier for rv2 (or 0)
1435 cns ..... additional constant (or 0)
1437 The first operand must be present (and be an address) unless we're
1438 computing an expression via 'LEA'. The scaled operand is optional,
1439 but must not be a pointer if present.
1442 noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
1444 /*-------------------------------------------------------------------------
1446 * Make sure both rv1 and rv2 (if present) are in registers
1450 // Trivial case : Is either rv1 or rv2 a NULL ?
1454 /* A single operand, make sure it's in a register */
1458 // In the case where "rv1" is already in a register, there's no reason to get into a
1459 // register in "regMask" yet, if there's a non-zero constant that we're going to add;
1460 // if there is, we can do an LEA.
1461 genCodeForTree(rv1, RBM_NONE);
1465 genCodeForTree(rv1, regMask);
1471 /* A single (scaled) operand, make sure it's in a register */
1473 genCodeForTree(rv2, 0);
1477 /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
1478 they are in registers */
1480 noway_assert(rv1 && rv2);
1482 /* If we have to check a constant array index, compare it against
1483 the array dimension (see below) but then fold the index with a
1484 scaling factor (if any) and additional offset (if any).
1487 if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
1489 if (scaledIndex != NULL)
1491 assert(rv2 == scaledIndex && scaledIndexVal != NULL);
1492 rv2 = scaledIndexVal;
1494 /* We must have a range-checked index operation */
1496 noway_assert(operIsArrIndex);
1498 /* Get hold of the index value and see if it's a constant */
1500 if (rv2->IsIntCnsFitsInI32())
1502 ixv = (int)rv2->gtIntCon.gtIconVal;
1503 // Maybe I should just set "fold" true in the call to genMakeAddressable above.
1504 if (scaledIndex != NULL)
1506 int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
1517 rv2 = scaledIndex = NULL;
1519 /* Add the scaled index into the added value */
1526 /* Make sure 'rv1' is in a register */
1528 genCodeForTree(rv1, regMask);
1536 /* op1 already in register - how about op2? */
1540 /* Great - both operands are in registers already. Just update
1541 the liveness and we are done. */
1557 /* rv1 is in a register, but rv2 isn't */
1561 /* rv1 is already materialized in a register. Just update liveness
1562 to rv1 and generate code for rv2 */
1565 regSet.rsMarkRegUsed(rv1, oper);
1570 else if (rv2->InReg())
1572 /* rv2 is in a register, but rv1 isn't */
1574 noway_assert(rv2->gtOper == GT_REG_VAR);
1578 /* rv2 is already materialized in a register. Update liveness
1579 to after rv2 and then hang on to rv2 */
1582 regSet.rsMarkRegUsed(rv2, oper);
1585 /* Generate the for the first operand */
1587 genCodeForTree(rv1, regMask);
1591 // Free up rv2 in the right fashion (it might be re-marked if keepReg)
1592 regSet.rsMarkRegUsed(rv1, oper);
1593 regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1595 regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1600 /* We have evaluated rv1, and now we just need to update liveness
1601 to rv2 which was already in a register */
1612 /* Make sure we preserve the correct operand order */
1616 /* Generate the second operand first */
1618 // Determine what registers go live between rv2 and rv1
1619 newLiveMask = genNewLiveRegMask(rv2, rv1);
1621 rv2Mask = regMask & ~newLiveMask;
1622 rv2Mask &= ~rv1->gtRsvdRegs;
1624 if (rv2Mask == RBM_NONE)
1626 // The regMask hint cannot be honored
1627 // We probably have a call that trashes the register(s) in regMask
1628 // so ignore the regMask hint, but try to avoid using
1629 // the registers in newLiveMask and the rv1->gtRsvdRegs
1631 rv2Mask = RBM_ALLINT & ~newLiveMask;
1632 rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
1635 genCodeForTree(rv2, rv2Mask);
1636 regMask &= ~genRegMask(rv2->gtRegNum);
1638 regSet.rsMarkRegUsed(rv2, oper);
1640 /* Generate the first operand second */
1642 genCodeForTree(rv1, regMask);
1643 regSet.rsMarkRegUsed(rv1, oper);
1645 /* Free up both operands in the right order (they might be
1646 re-marked as used below)
1648 regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1650 regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1655 /* Get the first operand into a register */
1657 // Determine what registers go live between rv1 and rv2
1658 newLiveMask = genNewLiveRegMask(rv1, rv2);
1660 rv1Mask = regMask & ~newLiveMask;
1661 rv1Mask &= ~rv2->gtRsvdRegs;
1663 if (rv1Mask == RBM_NONE)
1665 // The regMask hint cannot be honored
1666 // We probably have a call that trashes the register(s) in regMask
1667 // so ignore the regMask hint, but try to avoid using
1668 // the registers in liveMask and the rv2->gtRsvdRegs
1670 rv1Mask = RBM_ALLINT & ~newLiveMask;
1671 rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
1674 genCodeForTree(rv1, rv1Mask);
1675 regSet.rsMarkRegUsed(rv1, oper);
1679 /* Here, we need to get rv2 in a register. We have either already
1680 materialized rv1 into a register, or it was already in a one */
1682 noway_assert(rv1->InReg());
1683 noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
1685 /* Generate the second operand as well */
1687 regMask &= ~genRegMask(rv1->gtRegNum);
1688 genCodeForTree(rv2, regMask);
1692 /* rev==true means the evaluation order is rv2,rv1. We just
1693 evaluated rv2, and rv1 was already in a register. Just
1694 update liveness to rv1 and we are done. */
1700 /* We have evaluated rv1 and rv2. Free up both operands in
1701 the right order (they might be re-marked as used below) */
1703 /* Even though we have not explicitly marked rv2 as used,
1704 rv2->gtRegNum may be used if rv2 is a multi-use or
1705 an enregistered variable. */
1707 regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
1709 /* Check for special case both rv1 and rv2 are the same register */
1710 if (rv2Used != genRegMask(rv1->gtRegNum))
1713 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1717 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1723 /*-------------------------------------------------------------------------
1725 * At this point, both rv1 and rv2 (if present) are in registers
1731 /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
1733 if (rv1 && !(rv1->InReg()))
1735 if (rv2 && !(rv2->InReg()))
1740 // *(intVar1+intVar1) causes problems as we
1741 // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
1742 // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
1743 // that currently as we return a single mask in useMaskPtr.
1745 if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && rv1->InReg() && rv2->InReg())
1747 if (rv1->gtRegNum == rv2->gtRegNum)
1749 noway_assert(!operIsArrIndex);
1754 /* Check either register operand to see if it needs to be saved */
1758 noway_assert(rv1->InReg());
1760 if (keepReg == RegSet::KEEP_REG)
1762 regSet.rsMarkRegUsed(rv1, oper);
1766 /* If the register holds an address, mark it */
1768 gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
1774 noway_assert(rv2->InReg());
1776 if (keepReg == RegSet::KEEP_REG)
1777 regSet.rsMarkRegUsed(rv2, oper);
1782 noway_assert(!scaledIndex);
1786 /* Compute the set of registers the address depends on */
1788 regMaskTP useMask = RBM_NONE;
1792 if (rv1->gtFlags & GTF_SPILLED)
1793 regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
1795 noway_assert(rv1->InReg());
1796 useMask |= genRegMask(rv1->gtRegNum);
1801 if (rv2->gtFlags & GTF_SPILLED)
1805 regMaskTP lregMask = genRegMask(rv1->gtRegNum);
1808 regSet.rsLockReg(lregMask, &used);
1809 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1810 regSet.rsUnlockReg(lregMask, used);
1813 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1815 noway_assert(rv2->InReg());
1816 useMask |= genRegMask(rv2->gtRegNum);
1819 /* Tell the caller which registers we need to hang on to */
1821 *useMaskPtr = useMask;
1826 /*****************************************************************************
1828 * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
1831 void CodeGen::genRangeCheck(GenTreePtr oper)
1833 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1834 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1836 GenTreePtr arrLen = bndsChk->gtArrLen;
1837 GenTreePtr arrRef = NULL;
1840 /* Is the array index a constant value? */
1841 GenTreePtr index = bndsChk->gtIndex;
1842 if (!index->IsCnsIntOrI())
1844 // No, it's not a constant.
1845 genCodeForTree(index, RBM_ALLINT);
1846 regSet.rsMarkRegUsed(index);
1849 // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
1850 // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
1853 if (arrLen->OperGet() == GT_ARR_LENGTH)
1855 GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
1856 lenOffset = arrLenExact->ArrLenOffset();
1858 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1859 // We always load the length into a register on ARM and x64.
1861 // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
1862 // lengths, but the index expression *can* be native int (64-bits)
1863 arrRef = arrLenExact->ArrRef();
1864 genCodeForTree(arrRef, RBM_ALLINT);
1865 noway_assert(arrRef->InReg());
1866 regSet.rsMarkRegUsed(arrRef);
1867 noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1870 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1871 // This is another form in which we have an array reference and a constant length. Don't use
1872 // on LOAD_STORE or 64BIT.
1873 else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
1875 genCodeForTree(arrRef, RBM_ALLINT);
1876 noway_assert(arrRef->InReg());
1877 regSet.rsMarkRegUsed(arrRef);
1878 noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1882 // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
1885 // (Unless it's a constant.)
1886 if (!arrLen->IsCnsIntOrI())
1888 genCodeForTree(arrLen, RBM_ALLINT);
1889 regSet.rsMarkRegUsed(arrLen);
1891 noway_assert(arrLen->InReg());
1892 noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
1896 if (!index->IsCnsIntOrI())
1898 // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
1899 // from its register, get it back in a register.
1900 regMaskTP indRegMask = RBM_ALLINT;
1901 regMaskTP arrRegMask = RBM_ALLINT;
1902 if (!(index->gtFlags & GTF_SPILLED))
1903 arrRegMask = ~genRegMask(index->gtRegNum);
1906 genRecoverReg(arrRef, arrRegMask, RegSet::KEEP_REG);
1907 indRegMask &= ~genRegMask(arrRef->gtRegNum);
1909 else if (!arrLen->IsCnsIntOrI())
1911 genRecoverReg(arrLen, arrRegMask, RegSet::KEEP_REG);
1912 indRegMask &= ~genRegMask(arrLen->gtRegNum);
1914 if (index->gtFlags & GTF_SPILLED)
1915 regSet.rsUnspillReg(index, indRegMask, RegSet::KEEP_REG);
1917 /* Make sure we have the values we expect */
1918 noway_assert(index->InReg());
1919 noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
1921 noway_assert(index->TypeGet() == TYP_I_IMPL ||
1922 (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
1923 var_types indxType = index->TypeGet();
1924 if (indxType != TYP_I_IMPL)
1928 { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1930 /* Generate "cmp index, [arrRef+LenOffs]" */
1931 inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
1933 else if (arrLen->IsCnsIntOrI())
1935 ssize_t len = arrLen->AsIntConCommon()->IconValue();
1936 inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
1940 inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
1943 /* Generate "jae <fail_label>" */
1945 noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
1946 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1947 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1951 /* Generate "cmp [rv1+LenOffs], cns" */
1953 bool indIsInt = true;
1954 #ifdef _TARGET_64BIT_
1956 ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1957 if (ixvFull > INT32_MAX)
1966 ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1967 int ixv = (int)ixvFull;
1969 if (arrRef != NULL && indIsInt)
1970 { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1971 /* Generate "cmp [arrRef+LenOffs], ixv" */
1972 inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
1973 // Generate "jbe <fail_label>"
1974 emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1975 genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1977 else if (arrLen->IsCnsIntOrI())
1979 ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
1980 // Both are constants; decide at compile time.
1981 if (!(0 <= ixvFull && ixvFull < lenv))
1983 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1988 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1992 /* Generate "cmp arrLen, ixv" */
1993 inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
1994 // Generate "jbe <fail_label>"
1995 emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1996 genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2000 // Free the registers that were used.
2001 if (!index->IsCnsIntOrI())
2003 regSet.rsMarkRegFree(index->gtRegNum, index);
2008 regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
2010 else if (!arrLen->IsCnsIntOrI())
2012 regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
2016 /*****************************************************************************
2018 * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
2019 * Otherwise, check if rvalue is in register. If so, mark it. Then
2020 * call genMakeAddressable(). Needed because genMakeAddressable is used
2021 * for both lvalue and rvalue, and we only can do this for rvalue.
2025 regMaskTP CodeGen::genMakeRvalueAddressable(
2026 GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
2032 if (tree->gtOper == GT_LCL_VAR)
2034 reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
2036 if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
2038 noway_assert(!isRegPairType(tree->gtType));
2040 genMarkTreeInReg(tree, reg);
2046 return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
2049 /*****************************************************************************/
2051 bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
2053 const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
2055 noway_assert(tree->OperGet() == GT_LCL_VAR);
2056 noway_assert(varDsc->lvTracked);
2058 return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
2061 /*****************************************************************************
2063 * This is genMakeAddressable(GT_ARR_ELEM).
2064 * Makes the array-element addressible and returns the addressibility registers.
2065 * It also marks them as used if keepReg==RegSet::KEEP_REG.
2066 * tree is the dependant tree.
2068 * Note that an array-element needs 2 registers to be addressibile, the
2069 * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
2070 * with the 2 registers so that other functions (like instGetAddrMode()) know
2071 * where to look for the offset to use.
2074 regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
2076 noway_assert(arrElem->gtOper == GT_ARR_ELEM);
2077 noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
2079 /* Evaluate all the operands. We don't evaluate them into registers yet
2080 as GT_ARR_ELEM does not reorder the evaluation of the operands, and
2081 hence may use a sub-optimal ordering. We try to improve this
2082 situation somewhat by accessing the operands in stages
2083 (genMakeAddressable2 + genComputeAddressable and
2084 genCompIntoFreeReg + genRecoverReg).
2086 Note: we compute operands into free regs to avoid multiple uses of
2087 the same register. Multi-use would cause problems when we free
2088 registers in FIFO order instead of the assumed LIFO order that
2089 applies to all type of tree nodes except for GT_ARR_ELEM.
2092 GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj;
2093 unsigned rank = arrElem->gtArrElem.gtArrRank;
2094 var_types elemType = arrElem->gtArrElem.gtArrElemType;
2095 regMaskTP addrReg = RBM_NONE;
2096 regMaskTP regNeed = RBM_ALLINT;
2098 #if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
2099 // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
2100 // since the arrObj participates in the lea/add instruction
2101 // that computes ARG_0 we should avoid putting it in ARG_1
2103 if (varTypeIsGC(elemType))
2105 regNeed &= ~RBM_ARG_1;
2109 // Strip off any comma expression.
2110 arrObj = genCodeForCommaTree(arrObj);
2112 // Having generated the code for the comma, we don't care about it anymore.
2113 arrElem->gtArrElem.gtArrObj = arrObj;
2115 // If the array ref is a stack var that's dying here we have to move it
2116 // into a register (regalloc already counts of this), as if it's a GC pointer
2117 // it can be collected from here on. This is not an issue for locals that are
2118 // in a register, as they get marked as used an will be tracked.
2119 // The bug that caused this is #100776. (untracked vars?)
2120 if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
2121 !genMarkLclVar(arrObj))
2123 genCodeForTree(arrObj, regNeed);
2124 regSet.rsMarkRegUsed(arrObj, 0);
2125 addrReg = genRegMask(arrObj->gtRegNum);
2129 addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
2130 true, // forLoadStore
2133 true); // evalSideEffs
2137 for (dim = 0; dim < rank; dim++)
2138 genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
2140 /* Ensure that the array-object is in a register */
2142 addrReg = genKeepAddressable(arrObj, addrReg);
2143 genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
2145 regNumber arrReg = arrObj->gtRegNum;
2146 regMaskTP arrRegMask = genRegMask(arrReg);
2147 regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
2148 regSet.rsLockUsedReg(arrRegMask);
2150 /* Now process all the indices, do the range check, and compute
2151 the offset of the element */
2153 regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
2155 for (dim = 0; dim < rank; dim++)
2157 GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
2159 /* Get the index into a free register (other than the register holding the array) */
2161 genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
2163 #if CPU_LOAD_STORE_ARCH
2164 /* Subtract the lower bound, and do the range check */
2166 regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
2167 getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2168 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2169 regTracker.rsTrackRegTrash(valueReg);
2170 getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
2171 regTracker.rsTrackRegTrash(index->gtRegNum);
2173 getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2174 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2175 getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
2177 /* Subtract the lower bound, and do the range check */
2178 getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
2179 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2180 regTracker.rsTrackRegTrash(index->gtRegNum);
2182 getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
2183 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2185 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2186 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
2190 /* Hang on to the register of the first index */
2192 noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
2193 accReg = index->gtRegNum;
2194 noway_assert(accReg != arrReg);
2195 regSet.rsLockUsedReg(genRegMask(accReg));
2199 /* Evaluate accReg = accReg*dim_size + index */
2201 noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
2202 #if CPU_LOAD_STORE_ARCH
2203 getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2204 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2205 regTracker.rsTrackRegTrash(valueReg);
2206 getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
2208 getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
2209 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2212 inst_RV_RV(INS_add, accReg, index->gtRegNum);
2213 regSet.rsMarkRegFree(index->gtRegNum, index);
2214 regTracker.rsTrackRegTrash(accReg);
2218 if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
2220 regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
2222 getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
2223 regTracker.rsTrackRegTrash(accReg);
2226 regSet.rsUnlockUsedReg(genRegMask(arrReg));
2227 regSet.rsUnlockUsedReg(genRegMask(accReg));
2229 regSet.rsMarkRegFree(genRegMask(arrReg));
2230 regSet.rsMarkRegFree(genRegMask(accReg));
2232 if (keepReg == RegSet::KEEP_REG)
2234 /* We mark the addressability registers on arrObj and gtArrInds[0].
2235 instGetAddrMode() knows to work with this. */
2237 regSet.rsMarkRegUsed(arrObj, tree);
2238 regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
2241 return genRegMask(arrReg) | genRegMask(accReg);
2244 /*****************************************************************************
2246 * Make sure the given tree is addressable. 'needReg' is a mask that indicates
2247 * the set of registers we would prefer the destination tree to be computed
2248 * into (RBM_NONE means no preference).
2250 * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
2252 * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
2253 * on as used, and return the mask for that register set (if no registers
2254 * are marked as used, RBM_NONE is returned).
2256 * If 'smallOK' is not true and the datatype being address is a byte or short,
2257 * then the tree is forced into a register. This is useful when the machine
2258 * instruction being emitted does not have a byte or short version.
2260 * The "deferOK" parameter indicates the mode of operation - when it's false,
2261 * upon returning an actual address mode must have been formed (i.e. it must
2262 * be possible to immediately call one of the inst_TT methods to operate on
2263 * the value). When "deferOK" is true, we do whatever it takes to be ready
2264 * to form the address mode later - for example, if an index address mode on
2265 * a particular CPU requires the use of a specific register, we usually don't
2266 * want to immediately grab that register for an address mode that will only
2267 * be needed later. The convention is to call genMakeAddressable() with
2268 * "deferOK" equal to true, do whatever work is needed to prepare the other
2269 * operand, call genMakeAddressable() with "deferOK" equal to false, and
2270 * finally call one of the inst_TT methods right after that.
2272 * If we do any other codegen after genMakeAddressable(tree) which can
2273 * potentially spill the addressability registers, genKeepAddressable()
2274 * needs to be called before accessing the tree again.
2276 * genDoneAddressable() needs to be called when we are done with the tree
2277 * to free the addressability registers.
2280 regMaskTP CodeGen::genMakeAddressable(
2281 GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
2283 GenTreePtr addr = NULL;
2286 /* Is the value simply sitting in a register? */
2290 genUpdateLife(tree);
2295 // TODO: If the value is for example a cast of float -> int, compute
2296 // TODO: the converted value into a stack temp, and leave it there,
2297 // TODO: since stack temps are always addressable. This would require
2298 // TODO: recording the fact that a particular tree is in a stack temp.
2300 /* byte/char/short operand -- is this acceptable to the caller? */
2302 if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
2305 // Evaluate non-last elements of comma expressions, to get to the last.
2306 tree = genCodeForCommaTree(tree);
2308 switch (tree->gtOper)
2312 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
2313 // to worry about it being enregistered.
2314 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
2316 genUpdateLife(tree);
2321 if (!genMarkLclVar(tree))
2323 genUpdateLife(tree);
2327 __fallthrough; // it turns out the variable lives in a register
2331 genUpdateLife(tree);
2340 #ifdef _TARGET_64BIT_
2341 // Non-relocs will be sign extended, so we don't have to enregister
2342 // constants that are equivalent to a sign-extended int.
2343 // Relocs can be left alone if they are RIP-relative.
2344 if ((genTypeSize(tree->TypeGet()) > 4) &&
2345 (!tree->IsIntCnsFitsInI32() ||
2346 (tree->IsIconHandle() &&
2347 (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
2351 #endif // _TARGET_64BIT_
2356 // For MinOpts, we don't do constant folding, so we have
2357 // constants showing up in places we don't like.
2358 // force them into a register now to prevent that.
2359 if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
2366 /* Try to make the address directly addressable */
2368 if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2369 needReg, keepReg, ®Mask, deferOK))
2371 genUpdateLife(tree);
2375 /* No good, we'll have to load the address into a register */
2378 tree = tree->gtOp.gtOp1;
2387 /* Here we need to compute the value 'tree' into a register */
2389 genCodeForTree(tree, needReg);
2393 noway_assert(tree->InReg());
2395 if (isRegPairType(tree->gtType))
2397 /* Are we supposed to hang on to the register? */
2399 if (keepReg == RegSet::KEEP_REG)
2400 regSet.rsMarkRegPairUsed(tree);
2402 regMask = genRegPairMask(tree->gtRegPair);
2406 /* Are we supposed to hang on to the register? */
2408 if (keepReg == RegSet::KEEP_REG)
2409 regSet.rsMarkRegUsed(tree, addr);
2411 regMask = genRegMask(tree->gtRegNum);
2417 /*****************************************************************************
2418 * Compute a tree (which was previously made addressable using
2419 * genMakeAddressable()) into a register.
2420 * needReg - mask of preferred registers.
2421 * keepReg - should the computed register be marked as used by the tree
2422 * freeOnly - target register needs to be a scratch register
2425 void CodeGen::genComputeAddressable(GenTreePtr tree,
2427 RegSet::KeepReg keptReg,
2429 RegSet::KeepReg keepReg,
2432 noway_assert(genStillAddressable(tree));
2433 noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
2435 genDoneAddressable(tree, addrReg, keptReg);
2441 reg = tree->gtRegNum;
2443 if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
2448 if (tree->OperIsConst())
2450 /* Need to handle consts separately as we don't want to emit
2451 "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
2452 handles consts better for SMALL_CODE */
2454 noway_assert(tree->IsCnsIntOrI());
2455 reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
2460 reg = regSet.rsPickReg(needReg);
2462 inst_RV_TT(INS_mov, reg, tree);
2463 regTracker.rsTrackRegTrash(reg);
2467 genMarkTreeInReg(tree, reg);
2469 if (keepReg == RegSet::KEEP_REG)
2470 regSet.rsMarkRegUsed(tree);
2472 gcInfo.gcMarkRegPtrVal(tree);
2475 /*****************************************************************************
2476 * Should be similar to genMakeAddressable() but gives more control.
2479 regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree,
2481 RegSet::KeepReg keepReg,
2488 bool evalToReg = false;
2490 if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
2493 #if CPU_LOAD_STORE_ARCH
2500 genCodeForTree(tree, needReg);
2502 noway_assert(tree->InReg());
2504 if (isRegPairType(tree->gtType))
2506 /* Are we supposed to hang on to the register? */
2508 if (keepReg == RegSet::KEEP_REG)
2509 regSet.rsMarkRegPairUsed(tree);
2511 return genRegPairMask(tree->gtRegPair);
2515 /* Are we supposed to hang on to the register? */
2517 if (keepReg == RegSet::KEEP_REG)
2518 regSet.rsMarkRegUsed(tree);
2520 return genRegMask(tree->gtRegNum);
2525 return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
2529 /*****************************************************************************
2531 * The given tree was previously passed to genMakeAddressable(); return
2532 * 'true' if the operand is still addressable.
2536 bool CodeGen::genStillAddressable(GenTreePtr tree)
2538 /* Has the value (or one or more of its sub-operands) been spilled? */
2540 if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
2546 /*****************************************************************************
2548 * Recursive helper to restore complex address modes. The 'lockPhase'
2549 * argument indicates whether we're in the 'lock' or 'reload' phase.
2552 regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
2554 regMaskTP regMask = RBM_NONE;
2556 /* Have we found a spilled value? */
2558 if (tree->gtFlags & GTF_SPILLED)
2560 /* Do nothing if we're locking, otherwise reload and lock */
2564 /* Unspill the register */
2566 regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
2568 /* The value should now be sitting in a register */
2570 noway_assert(tree->InReg());
2571 regMask = genRegMask(tree->gtRegNum);
2573 /* Mark the register as used for the address */
2575 regSet.rsMarkRegUsed(tree, addr);
2577 /* Lock the register until we're done with the entire address */
2579 regSet.rsMaskLock |= regMask;
2585 /* Is this sub-tree sitting in a register? */
2589 regMask = genRegMask(tree->gtRegNum);
2591 /* Lock the register if we're in the locking phase */
2594 regSet.rsMaskLock |= regMask;
2598 /* Process any sub-operands of this node */
2600 unsigned kind = tree->OperKind();
2602 if (kind & GTK_SMPOP)
2604 /* Unary/binary operator */
2606 if (tree->gtOp.gtOp1)
2607 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
2608 if (tree->gtGetOp2IfPresent())
2609 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
2611 else if (tree->gtOper == GT_ARR_ELEM)
2613 /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
2614 which holds the offset-calculation */
2616 regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
2617 regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
2619 else if (tree->gtOper == GT_CMPXCHG)
2621 regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
2625 /* Must be a leaf/constant node */
2627 noway_assert(kind & (GTK_LEAF | GTK_CONST));
2634 /*****************************************************************************
2636 * The given tree was previously passed to genMakeAddressable, but since then
2637 * some of its registers are known to have been spilled; do whatever it takes
2638 * to make the operand addressable again (typically by reloading any spilled
2642 regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
2644 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2646 /* Is this a 'simple' register spill? */
2648 if (tree->gtFlags & GTF_SPILLED)
2650 /* The mask must match the original register/regpair */
2652 if (isRegPairType(tree->gtType))
2654 noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
2656 regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2658 addrReg = genRegPairMask(tree->gtRegPair);
2662 noway_assert(addrReg == genRegMask(tree->gtRegNum));
2664 regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2666 addrReg = genRegMask(tree->gtRegNum);
2669 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2670 regSet.rsMaskLock -= lockMask;
2675 /* We have a complex address mode with some of its sub-operands spilled */
2677 noway_assert((tree->InReg()) == 0);
2678 noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
2681 We'll proceed in several phases:
2683 1. Lock any registers that are part of the address mode and
2684 have not been spilled. This prevents these registers from
2685 getting spilled in step 2.
2687 2. Reload any registers that have been spilled; lock each
2688 one right after it is reloaded.
2690 3. Unlock all the registers.
2693 addrReg = genRestoreAddrMode(tree, tree, true);
2694 addrReg |= genRestoreAddrMode(tree, tree, false);
2696 /* Unlock all registers that the address mode uses */
2698 lockMask |= addrReg;
2700 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2701 regSet.rsMaskLock -= lockMask;
2706 /*****************************************************************************
2708 * The given tree was previously passed to genMakeAddressable, but since then
2709 * some of its registers might have been spilled ('addrReg' is the set of
2710 * registers used by the address). This function makes sure the operand is
2711 * still addressable (while avoiding any of the registers in 'avoidMask'),
2712 * and returns the (possibly modified) set of registers that are used by
2713 * the address (these will be marked as used on exit).
2716 regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
2718 /* Is the operand still addressable? */
2720 tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
2722 if (!genStillAddressable(tree))
2726 // Temporarily lock 'avoidMask' while we restore addressability
2727 // genRestAddressable will unlock the 'avoidMask' for us
2728 // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
2729 // In regSet.rsRegMaskFree() we require that all locked register be marked as used
2731 regSet.rsLockUsedReg(avoidMask);
2734 addrReg = genRestAddressable(tree, addrReg, avoidMask);
2736 noway_assert((regSet.rsMaskLock & avoidMask) == 0);
2742 /*****************************************************************************
2744 * After we're finished with the given operand (which was previously marked
2745 * by calling genMakeAddressable), this function must be called to free any
2746 * registers that may have been used by the address.
2747 * keptReg indicates if the addressability registers were marked as used
2748 * by genMakeAddressable().
2751 void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
2753 if (keptReg == RegSet::FREE_REG)
2755 // We exclude regSet.rsMaskUsed since the registers may be multi-used.
2756 // ie. There may be a pending use in a higher-up tree.
2758 addrReg &= ~regSet.rsMaskUsed;
2760 /* addrReg was not marked as used. So just reset its GC info */
2763 gcInfo.gcMarkRegSetNpt(addrReg);
2768 /* addrReg was marked as used. So we need to free it up (which
2769 will also reset its GC info) */
2771 regSet.rsMarkRegFree(addrReg);
2775 /*****************************************************************************/
2776 /*****************************************************************************
2778 * Make sure the given floating point value is addressable, and return a tree
2779 * that will yield the value as an addressing mode (this tree may differ from
2780 * the one passed in, BTW). If the only way to make the value addressable is
2781 * to evaluate into the FP stack, we do this and return zero.
2784 GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
2788 switch (tree->gtOper)
2796 if (tree->gtType == TYP_FLOAT)
2798 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
2799 return genMakeConst(&f, TYP_FLOAT, tree, false);
2801 return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
2806 /* Try to make the address directly addressable */
2808 if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2809 0, RegSet::FREE_REG, regMaskPtr, false))
2811 genUpdateLife(tree);
2820 #if FEATURE_STACK_FP_X87
2821 /* We have no choice but to compute the value 'tree' onto the FP stack */
2823 genCodeForTreeFlt(tree);
2828 /*****************************************************************************/
2829 /*****************************************************************************
2831 * Display a string literal value (debug only).
2837 /*****************************************************************************
2839 * Generate code to check that the GS cookie wasn't thrashed by a buffer
2840 * overrun. If pushReg is true, preserve all registers around code sequence.
2841 * Otherwise, ECX maybe modified.
2843 * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
2845 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2847 // Make sure that EAX didn't die in the return expression
2848 if (!pushReg && (compiler->info.compRetType == TYP_REF))
2849 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2851 // Add cookie check code for unsafe buffers
2852 BasicBlock* gsCheckBlk;
2853 regMaskTP byrefPushedRegs = RBM_NONE;
2854 regMaskTP norefPushedRegs = RBM_NONE;
2855 regMaskTP pushedRegs = RBM_NONE;
2857 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2859 if (compiler->gsGlobalSecurityCookieAddr == NULL)
2862 CLANG_FORMAT_COMMENT_ANCHOR;
2864 #if CPU_LOAD_STORE_ARCH
2866 regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
2867 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
2868 regTracker.rsTrackRegTrash(reg);
2870 if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
2871 arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
2873 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
2877 // Load CookieVal into a register
2878 regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
2879 instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
2880 getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
2883 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
2884 (int)compiler->gsGlobalSecurityCookieVal);
2889 regNumber regGSCheck;
2890 regMaskTP regMaskGSCheck;
2891 #if CPU_LOAD_STORE_ARCH
2892 regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
2893 regMaskGSCheck = genRegMask(regGSCheck);
2895 // Don't pick the 'this' register
2896 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
2897 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
2899 regGSCheck = REG_EDX;
2900 regMaskGSCheck = RBM_EDX;
2904 regGSCheck = REG_ECX;
2905 regMaskGSCheck = RBM_ECX;
2909 if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
2911 pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
2915 noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
2918 #if defined(_TARGET_ARM_)
2919 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2920 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
2922 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
2923 (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2924 #endif // !_TARGET_ARM_
2925 regTracker.rsTrashRegSet(regMaskGSCheck);
2927 regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
2928 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
2929 regTracker.rsTrackRegTrash(regTmp);
2930 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
2932 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
2936 gsCheckBlk = genCreateTempLabel();
2937 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2938 inst_JMP(jmpEqual, gsCheckBlk);
2939 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
2940 genDefineTempLabel(gsCheckBlk);
2942 genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
2945 /*****************************************************************************
2947 * Generate any side effects within the given expression tree.
2950 void CodeGen::genEvalSideEffects(GenTreePtr tree)
2957 /* Does this sub-tree contain any side-effects? */
2958 if (tree->gtFlags & GTF_SIDE_EFFECT)
2960 #if FEATURE_STACK_FP_X87
2961 /* Remember the current FP stack level */
2962 int iTemps = genNumberTemps();
2964 if (tree->OperIsIndir())
2966 regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
2970 gcInfo.gcMarkRegPtrVal(tree);
2971 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
2973 // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
2974 // do not need an additional null-check
2975 /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
2976 else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
2978 /* Compare against any register to do null-check */
2979 CLANG_FORMAT_COMMENT_ANCHOR;
2981 #if defined(_TARGET_XARCH_)
2982 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
2983 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
2984 #elif CPU_LOAD_STORE_ARCH
2985 if (varTypeIsFloating(tree->TypeGet()))
2987 genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
2992 genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
2995 if (tree->gtFlags & GTF_IND_VOLATILE)
2997 // Emit a memory barrier instruction after the load
2998 instGen_MemoryBarrier();
3007 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3012 /* Generate the expression and throw it away */
3013 genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
3016 gcInfo.gcMarkRegPtrVal(tree);
3019 #if FEATURE_STACK_FP_X87
3020 /* If the tree computed a value on the FP stack, pop the stack */
3021 if (genNumberTemps() > iTemps)
3023 noway_assert(genNumberTemps() == iTemps + 1);
3024 genDiscardStackFP(tree);
3030 noway_assert(tree->gtOper != GT_ASG);
3032 /* Walk the tree, just to mark any dead values appropriately */
3034 oper = tree->OperGet();
3035 kind = tree->OperKind();
3037 /* Is this a constant or leaf node? */
3039 if (kind & (GTK_CONST | GTK_LEAF))
3041 #if FEATURE_STACK_FP_X87
3042 if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
3044 genRegVarDeathStackFP(tree);
3045 FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
3048 genUpdateLife(tree);
3049 gcInfo.gcMarkRegPtrVal(tree);
3053 /* Must be a 'simple' unary/binary operator */
3055 noway_assert(kind & GTK_SMPOP);
3057 if (tree->gtGetOp2IfPresent())
3059 genEvalSideEffects(tree->gtOp.gtOp1);
3061 tree = tree->gtOp.gtOp2;
3066 tree = tree->gtOp.gtOp1;
3072 /*****************************************************************************
3074 * A persistent pointer value is being overwritten, record it for the GC.
3076 * tgt : the destination being written to
3077 * assignVal : the value being assigned (the source). It must currently be in a register.
3078 * tgtAddrReg : the set of registers being used by "tgt"
3080 * Returns : the mask of the scratch register that was used.
3081 * RBM_NONE if a write-barrier is not needed.
3084 regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
3086 noway_assert(assignVal->InReg());
3088 GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3089 if (wbf == GCInfo::WBF_NoBarrier)
3092 regMaskTP resultRegMask = RBM_NONE;
3094 #if FEATURE_WRITE_BARRIER
3096 regNumber reg = assignVal->gtRegNum;
3098 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3100 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
3103 const static int regToHelper[2][8] = {
3104 // If the target is known to be in managed memory
3106 CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
3107 CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
3110 // Don't know if the target is in managed memory
3112 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
3113 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
3114 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
3118 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
3119 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
3120 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
3121 noway_assert(regToHelper[0][REG_ESP] == -1);
3122 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
3123 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
3124 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
3126 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
3127 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
3128 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
3129 noway_assert(regToHelper[1][REG_ESP] == -1);
3130 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
3131 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
3132 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
3134 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
3137 Generate the following code:
3140 call write_barrier_helper_reg
3142 First grab the RBM_WRITE_BARRIER register for the target address.
3148 if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
3150 rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
3152 regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
3153 regSet.rsMaskLock |= RBM_WRITE_BARRIER;
3159 rg1 = REG_WRITE_BARRIER;
3164 noway_assert(rg1 == REG_WRITE_BARRIER);
3166 /* Generate "lea EDX, [addr-mode]" */
3168 noway_assert(tgt->gtType == TYP_REF);
3169 tgt->gtType = TYP_BYREF;
3170 inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
3172 /* Free up anything that was tied up by the LHS */
3173 genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3175 // In case "tgt" was a comma:
3176 tgt = tgt->gtEffectiveVal();
3178 regTracker.rsTrackRegTrash(rg1);
3179 gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
3180 gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
3182 /* Call the proper vm helper */
3184 // enforced by gcIsWriteBarrierCandidate
3185 noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
3187 unsigned tgtAnywhere = 0;
3188 if ((tgt->gtOper == GT_IND) &&
3189 ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
3194 int helper = regToHelper[tgtAnywhere][reg];
3195 resultRegMask = genRegMask(reg);
3197 gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
3199 genEmitHelperCall(helper,
3201 EA_PTRSIZE); // retSize
3205 regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
3206 regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
3209 return resultRegMask;
3215 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3217 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3220 Generate the following code (or its equivalent on the given target):
3224 call write_barrier_helper
3226 First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
3229 if (reg != REG_ARG_1)
3231 // We may need to spill whatever is in the ARG_1 register
3233 if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
3235 regSet.rsSpillReg(REG_ARG_1);
3238 inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
3240 resultRegMask = RBM_ARG_1;
3242 regTracker.rsTrackRegTrash(REG_ARG_1);
3243 gcInfo.gcMarkRegSetNpt(REG_ARG_1);
3244 gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
3246 bool free_arg1 = false;
3247 if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
3249 regSet.rsMaskUsed |= RBM_ARG_1;
3253 // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
3255 /* Generate "lea R0, [addr-mode]" */
3257 noway_assert(tgt->gtType == TYP_REF);
3258 tgt->gtType = TYP_BYREF;
3260 tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
3262 // We may need to spill whatever is in the ARG_0 register
3264 if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
3265 ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
3266 (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
3268 regSet.rsSpillReg(REG_ARG_0);
3271 inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
3273 /* Free up anything that was tied up by the LHS */
3274 genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3276 regTracker.rsTrackRegTrash(REG_ARG_0);
3277 gcInfo.gcMarkRegSetNpt(REG_ARG_0);
3278 gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
3281 #if NOGC_WRITE_BARRIERS
3282 // Finally, we may be required to spill whatever is in the further argument registers
3283 // trashed by the call. The write barrier trashes some further registers --
3284 // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
3286 regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
3288 regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
3290 // Spill any other registers trashed by the write barrier call and currently in use.
3291 regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
3293 regSet.rsSpillRegs(mustSpill);
3294 #endif // _TARGET_ARM_
3296 bool free_arg0 = false;
3297 if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
3299 regSet.rsMaskUsed |= RBM_ARG_0;
3303 // genEmitHelperCall might need to grab a register
3304 // so don't let it spill one of the arguments
3306 regMaskTP reallyUsedRegs = RBM_NONE;
3307 regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
3309 genGCWriteBarrier(tgt, wbf);
3311 regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
3312 gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
3316 regSet.rsMaskUsed &= ~RBM_ARG_0;
3320 regSet.rsMaskUsed &= ~RBM_ARG_1;
3323 return resultRegMask;
3325 #endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3327 #else // !FEATURE_WRITE_BARRIER
3329 NYI("FEATURE_WRITE_BARRIER unimplemented");
3330 return resultRegMask;
3332 #endif // !FEATURE_WRITE_BARRIER
3336 /*****************************************************************************
3338 * Generate the appropriate conditional jump(s) right after the low 32 bits
3339 * of two long values have been compared.
3342 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3346 jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3352 inst_JMP(EJ_jne, jumpFalse);
3356 inst_JMP(EJ_jne, jumpTrue);
3363 inst_JMP(EJ_ja, jumpFalse);
3364 inst_JMP(EJ_jb, jumpTrue);
3368 inst_JMP(EJ_jg, jumpFalse);
3369 inst_JMP(EJ_jl, jumpTrue);
3377 inst_JMP(EJ_jb, jumpFalse);
3378 inst_JMP(EJ_ja, jumpTrue);
3382 inst_JMP(EJ_jl, jumpFalse);
3383 inst_JMP(EJ_jg, jumpTrue);
3388 noway_assert(!"expected a comparison operator");
3392 /*****************************************************************************
3394 * Generate the appropriate conditional jump(s) right after the high 32 bits
3395 * of two long values have been compared.
3398 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3403 inst_JMP(EJ_je, jumpTrue);
3407 inst_JMP(EJ_jne, jumpTrue);
3411 inst_JMP(EJ_jb, jumpTrue);
3415 inst_JMP(EJ_jbe, jumpTrue);
3419 inst_JMP(EJ_jae, jumpTrue);
3423 inst_JMP(EJ_ja, jumpTrue);
3427 noway_assert(!"expected comparison");
3430 #elif defined(_TARGET_ARM_)
3431 /*****************************************************************************
3433 * Generate the appropriate conditional jump(s) right after the low 32 bits
3434 * of two long values have been compared.
3437 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3441 jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3447 inst_JMP(EJ_ne, jumpFalse);
3451 inst_JMP(EJ_ne, jumpTrue);
3458 inst_JMP(EJ_hi, jumpFalse);
3459 inst_JMP(EJ_lo, jumpTrue);
3463 inst_JMP(EJ_gt, jumpFalse);
3464 inst_JMP(EJ_lt, jumpTrue);
3472 inst_JMP(EJ_lo, jumpFalse);
3473 inst_JMP(EJ_hi, jumpTrue);
3477 inst_JMP(EJ_lt, jumpFalse);
3478 inst_JMP(EJ_gt, jumpTrue);
3483 noway_assert(!"expected a comparison operator");
3487 /*****************************************************************************
3489 * Generate the appropriate conditional jump(s) right after the high 32 bits
3490 * of two long values have been compared.
3493 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3498 inst_JMP(EJ_eq, jumpTrue);
3502 inst_JMP(EJ_ne, jumpTrue);
3506 inst_JMP(EJ_lo, jumpTrue);
3510 inst_JMP(EJ_ls, jumpTrue);
3514 inst_JMP(EJ_hs, jumpTrue);
3518 inst_JMP(EJ_hi, jumpTrue);
3522 noway_assert(!"expected comparison");
3526 /*****************************************************************************
3528 * Called by genCondJump() for TYP_LONG.
3531 void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
3533 noway_assert(jumpTrue && jumpFalse);
3534 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
3535 noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
3537 GenTreePtr op1 = cond->gtOp.gtOp1;
3538 GenTreePtr op2 = cond->gtOp.gtOp2;
3539 genTreeOps cmp = cond->OperGet();
3543 /* Are we comparing against a constant? */
3545 if (op2->gtOper == GT_CNS_LNG)
3547 __int64 lval = op2->gtLngCon.gtLconVal;
3550 // We're "done" evaluating op2; let's strip any commas off op1 before we
3552 op1 = genCodeForCommaTree(op1);
3554 /* We can generate better code for some special cases */
3555 instruction ins = INS_invalid;
3556 bool useIncToSetFlags = false;
3557 bool specialCaseCmp = false;
3565 useIncToSetFlags = false;
3566 specialCaseCmp = true;
3568 else if (lval == -1)
3572 useIncToSetFlags = true;
3573 specialCaseCmp = true;
3576 else if (cmp == GT_NE)
3582 useIncToSetFlags = false;
3583 specialCaseCmp = true;
3585 else if (lval == -1)
3589 useIncToSetFlags = true;
3590 specialCaseCmp = true;
3596 /* Make the comparand addressable */
3598 addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
3600 regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
3601 insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
3605 regPairNo regPair = op1->gtRegPair;
3606 regNumber rLo = genRegPairLo(regPair);
3607 regNumber rHi = genRegPairHi(regPair);
3608 if (tmpMask & genRegMask(rLo))
3612 else if (tmpMask & genRegMask(rHi))
3619 rTmp = regSet.rsGrabReg(tmpMask);
3620 inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
3623 /* The register is now trashed */
3624 regTracker.rsTrackRegTrash(rTmp);
3628 /* Set the flags using INS_AND | INS_OR */
3629 inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
3633 /* Set the flags using INS_AND | INS_OR */
3634 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3637 else // op1 is not in a register.
3639 rTmp = regSet.rsGrabReg(tmpMask);
3641 /* Load the low 32-bits of op1 */
3642 inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
3644 /* The register is now trashed */
3645 regTracker.rsTrackRegTrash(rTmp);
3647 /* Set the flags using INS_AND | INS_OR */
3648 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3651 /* Free up the addrReg(s) if any */
3652 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
3654 /* compares against -1, also requires an an inc instruction */
3655 if (useIncToSetFlags)
3657 /* Make sure the inc will set the flags */
3658 assert(cond->gtSetFlags());
3659 genIncRegBy(rTmp, 1, cond, TYP_INT);
3662 #if FEATURE_STACK_FP_X87
3663 // We may need a transition block
3666 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3669 emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
3670 inst_JMP(jmpKind, jumpTrue);
3672 else // specialCaseCmp == false
3674 /* Make the comparand addressable */
3675 addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
3677 /* Compare the high part first */
3679 int ival = (int)(lval >> 32);
3681 /* Comparing a register against 0 is easier */
3683 if (!ival && (op1->InReg()) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
3685 /* Generate 'test rTmp, rTmp' */
3686 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3690 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3692 /* Special case: comparison of two constants */
3693 // Needed as gtFoldExpr() doesn't fold longs
3695 noway_assert(addrReg == 0);
3696 int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
3698 /* Get the constant operand into a register */
3699 rTmp = genGetRegSetToIcon(op1_hiword);
3701 /* Generate 'cmp rTmp, ival' */
3703 inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3707 /* Generate 'cmp op1, ival' */
3709 inst_TT_IV(INS_cmp, op1, ival, 4);
3713 #if FEATURE_STACK_FP_X87
3714 // We may need a transition block
3717 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3720 /* Generate the appropriate jumps */
3722 if (cond->gtFlags & GTF_UNSIGNED)
3723 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3725 genJccLongHi(cmp, jumpTrue, jumpFalse);
3727 /* Compare the low part second */
3731 /* Comparing a register against 0 is easier */
3733 if (!ival && (op1->InReg()) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
3735 /* Generate 'test rTmp, rTmp' */
3736 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3740 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3742 /* Special case: comparison of two constants */
3743 // Needed as gtFoldExpr() doesn't fold longs
3745 noway_assert(addrReg == 0);
3746 int op1_loword = (int)op1->gtLngCon.gtLconVal;
3748 /* get the constant operand into a register */
3749 rTmp = genGetRegSetToIcon(op1_loword);
3751 /* Generate 'cmp rTmp, ival' */
3753 inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3757 /* Generate 'cmp op1, ival' */
3759 inst_TT_IV(INS_cmp, op1, ival, 0);
3763 /* Generate the appropriate jumps */
3764 genJccLongLo(cmp, jumpTrue, jumpFalse);
3766 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
3769 else // (op2->gtOper != GT_CNS_LNG)
3772 /* The operands would be reversed by physically swapping them */
3774 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
3776 /* Generate the first operand into a register pair */
3778 genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
3779 noway_assert(op1->InReg());
3781 #if CPU_LOAD_STORE_ARCH
3782 /* Generate the second operand into a register pair */
3783 // Fix 388442 ARM JitStress WP7
3784 genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3785 noway_assert(op2->InReg());
3786 regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
3788 /* Make the second operand addressable */
3790 addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3792 /* Make sure the first operand hasn't been spilled */
3794 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
3795 noway_assert(op1->InReg());
3797 regPairNo regPair = op1->gtRegPair;
3799 #if !CPU_LOAD_STORE_ARCH
3800 /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
3802 addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
3805 #if FEATURE_STACK_FP_X87
3806 // We may need a transition block
3809 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3813 /* Perform the comparison - high parts */
3815 inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
3817 if (cond->gtFlags & GTF_UNSIGNED)
3818 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3820 genJccLongHi(cmp, jumpTrue, jumpFalse);
3822 /* Compare the low parts */
3824 inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
3825 genJccLongLo(cmp, jumpTrue, jumpFalse);
3827 /* Free up anything that was tied up by either operand */
3828 CLANG_FORMAT_COMMENT_ANCHOR;
3830 #if CPU_LOAD_STORE_ARCH
3832 // Fix 388442 ARM JitStress WP7
3833 regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
3834 genReleaseRegPair(op2);
3836 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
3838 genReleaseRegPair(op1);
3842 /*****************************************************************************
3843 * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
3844 * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
3845 * to the architecture we're running on.
3848 * gen_fcomp_FN: fcomp ST(0), stk
3849 * gen_fcomp_FS_TT: fcomp ST(0), addr
3850 * gen_fcompp_FS: fcompp
3851 * These are followed by fnstsw, sahf to get the flags in EFLAGS.
3854 * gen_fcomp_FN: fcomip ST(0), stk
3855 * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
3856 * (and reverse the branch condition since addr comes first)
3857 * gen_fcompp_FS: fcomip, fstp
3858 * These instructions will correctly set the EFLAGS register.
3860 * Return value: These functions return true if the instruction has
3861 * already placed its result in the EFLAGS register.
3864 bool CodeGen::genUse_fcomip()
3866 return compiler->opts.compUseFCOMI;
3869 /*****************************************************************************
3871 * Sets the flag for the TYP_INT/TYP_REF comparison.
3872 * We try to use the flags if they have already been set by a prior
3874 * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
3875 * need to compare again with zero. Just use a "INS_js"
3877 * Returns the flags the following jump/set instruction should use.
3880 emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
3882 noway_assert(cond->OperIsCompare());
3883 noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
3885 GenTreePtr op1 = cond->gtOp.gtOp1;
3886 GenTreePtr op2 = cond->gtOp.gtOp2;
3887 genTreeOps cmp = cond->OperGet();
3889 if (cond->gtFlags & GTF_REVERSE_OPS)
3891 /* Don't forget to modify the condition as well */
3893 cond->gtOp.gtOp1 = op2;
3894 cond->gtOp.gtOp2 = op1;
3895 cond->SetOper(GenTree::SwapRelop(cmp));
3896 cond->gtFlags &= ~GTF_REVERSE_OPS;
3898 /* Get hold of the new values */
3900 cmp = cond->OperGet();
3901 op1 = cond->gtOp.gtOp1;
3902 op2 = cond->gtOp.gtOp2;
3905 // Note that op1's type may get bashed. So save it early
3907 var_types op1Type = op1->TypeGet();
3908 bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
3909 emitAttr size = EA_UNKNOWN;
3912 regMaskTP addrReg1 = RBM_NONE;
3913 regMaskTP addrReg2 = RBM_NONE;
3914 emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
3919 regMaskTP newLiveMask;
3922 /* Are we comparing against a constant? */
3924 if (op2->IsCnsIntOrI())
3926 ssize_t ival = op2->gtIntConCommon.IconValue();
3928 /* unsigned less than comparisons with 1 ('< 1' )
3929 should be transformed into '== 0' to potentially
3930 suppress a tst instruction.
3932 if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
3934 op2->gtIntCon.gtIconVal = ival = 0;
3935 cond->gtOper = cmp = GT_EQ;
3938 /* Comparisons against 0 can be easier */
3942 // if we can safely change the comparison to unsigned we do so
3943 if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
3948 /* unsigned comparisons with 0 should be transformed into
3949 '==0' or '!= 0' to potentially suppress a tst instruction. */
3954 cond->gtOper = cmp = GT_NE;
3955 else if (cmp == GT_LE)
3956 cond->gtOper = cmp = GT_EQ;
3959 /* Is this a simple zero/non-zero test? */
3961 if (cmp == GT_EQ || cmp == GT_NE)
3963 /* Is the operand an "AND" operation? */
3965 if (op1->gtOper == GT_AND)
3967 GenTreePtr an1 = op1->gtOp.gtOp1;
3968 GenTreePtr an2 = op1->gtOp.gtOp2;
3970 /* Check for the case "expr & icon" */
3972 if (an2->IsIntCnsFitsInI32())
3974 int iVal = (int)an2->gtIntCon.gtIconVal;
3976 /* make sure that constant is not out of an1's range */
3978 switch (an1->gtType)
3982 if (iVal & 0xffffff00)
3983 goto NO_TEST_FOR_AND;
3987 if (iVal & 0xffff0000)
3988 goto NO_TEST_FOR_AND;
3994 if (an1->IsCnsIntOrI())
3996 // Special case - Both operands of AND are consts
3997 genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
3998 addrReg1 = genRegMask(an1->gtRegNum);
4002 addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
4004 #if CPU_LOAD_STORE_ARCH
4005 if ((an1->InReg()) == 0)
4007 genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
4008 if (arm_Valid_Imm_For_Alu(iVal))
4010 inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
4014 regNumber regTmp = regSet.rsPickFreeReg();
4015 instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
4016 inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
4019 addrReg1 = RBM_NONE;
4024 #ifdef _TARGET_XARCH_
4025 // Check to see if we can use a smaller immediate.
4026 if ((an1->InReg()) && ((iVal & 0x0000FFFF) == iVal))
4028 var_types testType =
4029 (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
4030 #if CPU_HAS_BYTE_REGS
4031 // if we don't have byte-able register, switch to the 2-byte form
4032 if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
4034 testType = TYP_USHORT;
4036 #endif // CPU_HAS_BYTE_REGS
4038 inst_TT_IV(INS_TEST, an1, iVal, testType);
4041 #endif // _TARGET_XARCH_
4043 inst_TT_IV(INS_TEST, an1, iVal);
4052 // TODO: Check for other cases that can generate 'test',
4053 // TODO: also check for a 64-bit integer zero test which
4054 // TODO: could generate 'or lo, hi' followed by jz/jnz.
4058 // See what Jcc instruction we would use if we can take advantage of
4059 // the knowledge of EFLAGs.
4064 Unsigned comparison to 0. Using this table:
4066 ----------------------------------------------------
4067 | Comparison | Flags Checked | Instruction Used |
4068 ----------------------------------------------------
4069 | == 0 | ZF = 1 | je |
4070 ----------------------------------------------------
4071 | != 0 | ZF = 0 | jne |
4072 ----------------------------------------------------
4073 | < 0 | always FALSE | N/A |
4074 ----------------------------------------------------
4075 | <= 0 | ZF = 1 | je |
4076 ----------------------------------------------------
4077 | >= 0 | always TRUE | N/A |
4078 ----------------------------------------------------
4079 | > 0 | ZF = 0 | jne |
4080 ----------------------------------------------------
4103 #elif defined(_TARGET_X86_)
4124 noway_assert(!"Unexpected comparison OpCode");
4131 Signed comparison to 0. Using this table:
4133 -----------------------------------------------------
4134 | Comparison | Flags Checked | Instruction Used |
4135 -----------------------------------------------------
4136 | == 0 | ZF = 1 | je |
4137 -----------------------------------------------------
4138 | != 0 | ZF = 0 | jne |
4139 -----------------------------------------------------
4140 | < 0 | SF = 1 | js |
4141 -----------------------------------------------------
4142 | <= 0 | N/A | N/A |
4143 -----------------------------------------------------
4144 | >= 0 | SF = 0 | jns |
4145 -----------------------------------------------------
4147 -----------------------------------------------------
4171 #elif defined(_TARGET_X86_)
4192 noway_assert(!"Unexpected comparison OpCode");
4195 assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
4197 assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
4199 /* Is the value a simple local variable? */
4201 if (op1->gtOper == GT_LCL_VAR)
4203 /* Is the flags register set to the value? */
4205 if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
4207 if (jumpKind != EJ_NONE)
4209 addrReg1 = RBM_NONE;
4216 /* Make the comparand addressable */
4217 addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4219 /* Are the condition flags set based on the value? */
4221 unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
4225 if (genFlagsAreReg(op1->gtRegNum))
4227 flags |= GTF_ZSF_SET;
4233 if (jumpKind != EJ_NONE)
4239 /* Is the value in a register? */
4243 regNumber reg = op1->gtRegNum;
4245 /* With a 'test' we can do any signed test or any test for equality */
4247 if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
4249 emitAttr compareSize = emitTypeSize(op1->TypeGet());
4251 // If we have an GT_REG_VAR then the register will be properly sign/zero extended
4252 // But only up to 4 bytes
4253 if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
4255 compareSize = EA_4BYTE;
4258 #if CPU_HAS_BYTE_REGS
4259 // Make sure if we require a byte compare that we have a byte-able register
4260 if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
4261 #endif // CPU_HAS_BYTE_REGS
4263 /* Generate 'test reg, reg' */
4264 instGen_Compare_Reg_To_Zero(compareSize, reg);
4271 else // if (ival != 0)
4273 bool smallOk = true;
4275 /* make sure that constant is not out of op1's range
4276 if it is, we need to perform an int with int comparison
4277 and therefore, we set smallOk to false, so op1 gets loaded
4281 /* If op1 is TYP_SHORT, and is followed by an unsigned
4282 * comparison, we can use smallOk. But we don't know which
4283 * flags will be needed. This probably doesn't happen often.
4285 var_types gtType = op1->TypeGet();
4290 if (ival != (signed char)ival)
4295 if (ival != (unsigned char)ival)
4300 if (ival != (signed short)ival)
4304 if (ival != (unsigned short)ival)
4308 #ifdef _TARGET_64BIT_
4310 if (!FitsIn<INT32>(ival))
4314 if (!FitsIn<UINT32>(ival))
4317 #endif // _TARGET_64BIT_
4323 if (smallOk && // constant is in op1's range
4324 !unsignedCmp && // signed comparison
4325 varTypeIsSmall(gtType) && // smalltype var
4326 varTypeIsUnsigned(gtType)) // unsigned type
4331 /* Make the comparand addressable */
4332 addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
4335 /* Special case: comparison of two constants */
4337 // Needed if Importer doesn't call gtFoldExpr()
4339 if (!(op1->InReg()) && (op1->IsCnsIntOrI()))
4341 // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
4343 /* Workaround: get the constant operand into a register */
4344 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4346 noway_assert(addrReg1 == RBM_NONE);
4347 noway_assert(op1->InReg());
4349 addrReg1 = genRegMask(op1->gtRegNum);
4352 /* Compare the operand against the constant */
4354 if (op2->IsIconHandle())
4356 inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
4360 inst_TT_IV(INS_cmp, op1, ival);
4365 //---------------------------------------------------------------------
4367 // We reach here if op2 was not a GT_CNS_INT
4373 if (op1Type == op2->gtType)
4375 shortCmp = varTypeIsShort(op1Type);
4376 byteCmp = varTypeIsByte(op1Type);
4379 noway_assert(op1->gtOper != GT_CNS_INT);
4381 if (op2->gtOper == GT_LCL_VAR)
4384 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4385 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4387 /* Are we comparing against a register? */
4391 /* Make the comparands addressable and mark as used */
4393 assert(addrReg1 == RBM_NONE);
4394 addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4396 /* Is the size of the comparison byte/char/short ? */
4398 if (varTypeIsSmall(op1->TypeGet()))
4400 /* Is op2 sitting in an appropriate register? */
4402 if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
4405 /* Is op2 of the right type for a small comparison */
4407 if (op2->gtOper == GT_REG_VAR)
4409 if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
4414 if (op1->gtType != op2->gtType)
4418 if (varTypeIsUnsigned(op1->TypeGet()))
4422 assert(addrReg2 == RBM_NONE);
4424 genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4425 addrReg2 = genRegMask(op2->gtRegNum);
4426 addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
4427 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4428 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4430 /* Compare against the register */
4432 inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
4438 // op1 has been made addressable and is marked as in use
4439 // op2 is un-generated
4440 assert(addrReg2 == 0);
4442 if ((op1->InReg()) == 0)
4444 regNumber reg1 = regSet.rsPickReg();
4446 noway_assert(varTypeIsSmall(op1->TypeGet()));
4447 instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->InReg()) != 0);
4449 // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
4450 // so we need to make sure it is still valid. Note that at this point, reg1 is
4451 // *not* marked as in use, and it is possible for it to be used in the address
4452 // mode expression, but that is OK, because we are done with expression after
4453 // this. We only need reg1.
4454 addrReg1 = genKeepAddressable(op1, addrReg1);
4455 inst_RV_TT(ins, reg1, op1);
4456 regTracker.rsTrackRegTrash(reg1);
4458 genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4461 genMarkTreeInReg(op1, reg1);
4463 regSet.rsMarkRegUsed(op1);
4464 addrReg1 = genRegMask(op1->gtRegNum);
4467 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4468 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4473 // We come here if op2 is not enregistered or not in a "good" register.
4475 assert(addrReg1 == 0);
4477 // Determine what registers go live between op1 and op2
4478 newLiveMask = genNewLiveRegMask(op1, op2);
4480 // Setup regNeed with the set of register that we suggest for op1 to be in
4482 regNeed = RBM_ALLINT;
4484 // avoid selecting registers that get newly born in op2
4485 regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
4487 // avoid selecting op2 reserved regs
4488 regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
4490 #if CPU_HAS_BYTE_REGS
4491 // if necessary setup regNeed to select just the byte-able registers
4493 regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
4494 #endif // CPU_HAS_BYTE_REGS
4496 // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
4498 genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
4499 noway_assert(op1->InReg());
4501 op1Reg = op1->gtRegNum;
4503 // Setup regNeed with the set of register that we require for op1 to be in
4505 regNeed = RBM_ALLINT;
4507 #if CPU_HAS_BYTE_REGS
4508 // if necessary setup regNeed to select just the byte-able registers
4510 regNeed &= RBM_BYTE_REGS;
4511 #endif // CPU_HAS_BYTE_REGS
4513 // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
4514 regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
4516 // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
4517 regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
4519 // Did we end up in an acceptable register?
4520 // and do we have an acceptable free register available to grab?
4522 if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
4524 // Grab an acceptable register
4525 regNumber newReg = regSet.rsGrabReg(regNeed);
4527 noway_assert(op1Reg != newReg);
4529 /* Update the value in the target register */
4531 regTracker.rsTrackRegCopy(newReg, op1Reg);
4533 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
4535 /* The value has been transferred to 'reg' */
4537 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
4538 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
4540 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
4542 /* The value is now in an appropriate register */
4544 op1->gtRegNum = newReg;
4546 noway_assert(op1->InReg());
4547 op1Reg = op1->gtRegNum;
4551 /* Mark the register as 'used' */
4552 regSet.rsMarkRegUsed(op1);
4554 addrReg1 = genRegMask(op1Reg);
4556 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4557 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4561 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4562 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4563 noway_assert(op1->InReg());
4565 // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
4566 // when byteCmp is true we will perform a byte sized cmp instruction
4567 // and that instruction requires that any registers used are byte-able ones.
4569 regNeed = RBM_ALLINT;
4571 #if CPU_HAS_BYTE_REGS
4572 // if necessary setup regNeed to select just the byte-able registers
4574 regNeed &= RBM_BYTE_REGS;
4575 #endif // CPU_HAS_BYTE_REGS
4577 /* Make the comparand addressable */
4578 assert(addrReg2 == 0);
4579 addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
4581 /* Make sure the first operand is still in a register; if
4582 it's been spilled, we have to make sure it's reloaded
4583 into a byte-addressable register if needed.
4584 Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
4587 assert(addrReg1 != 0);
4588 genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
4590 noway_assert(op1->InReg());
4591 noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
4593 addrReg1 = genRegMask(op1->gtRegNum);
4594 regSet.rsLockUsedReg(addrReg1);
4596 /* Make sure that op2 is addressable. If we are going to do a
4597 byte-comparison, we need it to be in a byte register. */
4599 if (byteCmp && (op2->InReg()))
4601 genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
4602 addrReg2 = genRegMask(op2->gtRegNum);
4606 addrReg2 = genKeepAddressable(op2, addrReg2);
4609 regSet.rsUnlockUsedReg(addrReg1);
4611 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4612 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4614 if (byteCmp || shortCmp)
4616 size = emitTypeSize(op2->TypeGet());
4617 if (varTypeIsUnsigned(op1Type))
4622 size = emitActualTypeSize(op2->TypeGet());
4625 /* Perform the comparison */
4626 inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
4630 jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
4632 DONE_FLAGS: // We have determined what jumpKind to use
4634 genUpdateLife(cond);
4636 /* The condition value is dead at the jump that follows */
4638 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4639 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4640 genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4641 genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
4643 noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
4648 /*****************************************************************************/
4649 /*****************************************************************************/
4650 /*****************************************************************************
4652 * Generate code to jump to the jump target of the current basic block if
4653 * the given relational operator yields 'true'.
4656 void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
4658 BasicBlock* jumpTrue;
4659 BasicBlock* jumpFalse;
4661 GenTreePtr op1 = cond->gtOp.gtOp1;
4662 GenTreePtr op2 = cond->gtOp.gtOp2;
4663 genTreeOps cmp = cond->OperGet();
4667 jumpTrue = destTrue;
4668 jumpFalse = destFalse;
4672 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
4674 jumpTrue = compiler->compCurBB->bbJumpDest;
4675 jumpFalse = compiler->compCurBB->bbNext;
4678 noway_assert(cond->OperIsCompare());
4680 /* Make sure the more expensive operand is 'op1' */
4681 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
4683 if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
4685 /* Don't forget to modify the condition as well */
4687 cond->gtOp.gtOp1 = op2;
4688 cond->gtOp.gtOp2 = op1;
4689 cond->SetOper(GenTree::SwapRelop(cmp));
4690 cond->gtFlags &= ~GTF_REVERSE_OPS;
4692 /* Get hold of the new values */
4694 cmp = cond->OperGet();
4695 op1 = cond->gtOp.gtOp1;
4696 op2 = cond->gtOp.gtOp2;
4699 /* What is the type of the operand? */
4701 switch (genActualType(op1->gtType))
4706 emitJumpKind jumpKind;
4708 // Check if we can use the currently set flags. Else set them
4710 jumpKind = genCondSetFlags(cond);
4712 #if FEATURE_STACK_FP_X87
4715 genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
4720 /* Generate the conditional jump */
4721 inst_JMP(jumpKind, jumpTrue);
4727 #if FEATURE_STACK_FP_X87
4730 genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
4735 genCondJumpLng(cond, jumpTrue, jumpFalse);
4741 #if FEATURE_STACK_FP_X87
4742 genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
4744 genCondJumpFloat(cond, jumpTrue, jumpFalse);
4750 compiler->gtDispTree(cond);
4752 unreached(); // unexpected/unsupported 'jtrue' operands type
4756 /*****************************************************************************
4757 * Spill registers to check callers can handle it.
4762 void CodeGen::genStressRegs(GenTreePtr tree)
4764 if (regSet.rsStressRegs() < 2)
4767 /* Spill as many registers as possible. Callers should be prepared
4768 to handle this case.
4769 But don't spill trees with no size (TYP_STRUCT comes to mind) */
4772 regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
4776 for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
4778 if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
4779 (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
4781 regSet.rsSpillReg(regNum);
4783 spillRegs &= regSet.rsMaskUsed;
4791 regMaskTP trashRegs = regSet.rsRegMaskFree();
4793 if (trashRegs == RBM_NONE)
4796 /* It is sometimes reasonable to expect that calling genCodeForTree()
4797 on certain trees won't spill anything */
4799 if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
4800 handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
4802 trashRegs &= ~(RBM_EXCEPTION_OBJECT);
4805 // If genCodeForTree() effectively gets called a second time on the same tree
4809 noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
4810 trashRegs &= ~genRegMask(tree->gtRegNum);
4813 if (tree->gtType == TYP_INT && tree->OperIsSimple())
4815 GenTreePtr op1 = tree->gtOp.gtOp1;
4816 GenTreePtr op2 = tree->gtOp.gtOp2;
4817 if (op1 && (op1->InReg()))
4818 trashRegs &= ~genRegMask(op1->gtRegNum);
4819 if (op2 && (op2->InReg()))
4820 trashRegs &= ~genRegMask(op2->gtRegNum);
4823 if (compiler->compCurBB == compiler->genReturnBB)
4825 if (compiler->info.compCallUnmanaged)
4827 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
4828 if (varDsc->lvRegister)
4829 trashRegs &= ~genRegMask(varDsc->lvRegNum);
4833 /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
4834 to save/restore the register. We try to be as unintrusive
4837 noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
4838 // This is obviously false for ARM, but this function is never called.
4839 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
4841 regMaskTP regMask = genRegMask(reg);
4843 if (regSet.rsRegsModified(regMask & trashRegs))
4844 genSetRegToIcon(reg, 0);
4850 /*****************************************************************************
4852 * Generate code for a GTK_CONST tree
4855 void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
4857 noway_assert(tree->IsCnsIntOrI());
4859 ssize_t ival = tree->gtIntConCommon.IconValue();
4860 regMaskTP needReg = destReg;
4862 bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
4866 /* If we are targeting destReg and ival is zero */
4867 /* we would rather xor needReg than copy another register */
4871 bool reuseConstantInReg = false;
4873 if (destReg == RBM_NONE)
4874 reuseConstantInReg = true;
4877 // If we can set a register to a constant with a small encoding, then do that.
4878 // Assume we'll get a low register if needReg has low registers as options.
4879 if (!reuseConstantInReg &&
4880 !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
4882 reuseConstantInReg = true;
4885 if (!reuseConstantInReg && ival != 0)
4886 reuseConstantInReg = true;
4889 if (reuseConstantInReg)
4891 /* Is the constant already in register? If so, use this register */
4893 reg = regTracker.rsIconIsInReg(ival);
4899 #endif // REDUNDANT_LOAD
4901 reg = regSet.rsPickReg(needReg, bestReg);
4903 /* If the constant is a handle, we need a reloc to be applied to it */
4907 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
4908 regTracker.rsTrackRegTrash(reg);
4912 genSetRegToIcon(reg, ival, tree->TypeGet());
4918 /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
4920 if (!genTempLiveChg)
4922 VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
4923 genTempLiveChg = true;
4927 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
4928 genCodeForTree_DONE(tree, reg);
4931 /*****************************************************************************
4933 * Generate code for a GTK_LEAF tree
4936 void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
4938 genTreeOps oper = tree->OperGet();
4939 regNumber reg = DUMMY_INIT(REG_CORRUPT);
4940 regMaskTP regs = regSet.rsMaskUsed;
4941 regMaskTP needReg = destReg;
4944 noway_assert(tree->OperKind() & GTK_LEAF);
4949 NO_WAY("GT_REG_VAR should have been caught above");
4954 /* Does the variable live in a register? */
4956 if (genMarkLclVar(tree))
4958 genCodeForTree_REG_VAR1(tree);
4964 /* Is the local variable already in register? */
4966 reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
4970 /* Use the register the variable happens to be in */
4971 regMaskTP regMask = genRegMask(reg);
4973 // If the register that it was in isn't one of the needRegs
4974 // then try to move it into a needReg register
4976 if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
4978 regNumber rg2 = reg;
4979 reg = regSet.rsPickReg(needReg, bestReg);
4982 regMask = genRegMask(reg);
4983 inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
4987 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
4988 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
4997 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
4998 // to worry about it being enregistered.
4999 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
5006 /* Pick a register for the value */
5008 reg = regSet.rsPickReg(needReg, bestReg);
5010 /* Load the variable into the register */
5012 size = genTypeSize(tree->gtType);
5014 if (size < EA_4BYTE)
5016 instruction ins = ins_Move_Extend(tree->TypeGet(), tree->InReg());
5017 inst_RV_TT(ins, reg, tree, 0);
5019 /* We've now "promoted" the tree-node to TYP_INT */
5021 tree->gtType = TYP_INT;
5025 inst_RV_TT(INS_mov, reg, tree, 0);
5028 regTracker.rsTrackRegTrash(reg);
5030 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
5035 regTracker.rsTrackRegClsVar(reg, tree);
5038 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5043 noway_assert(!"Unexpected oper");
5047 if (tree->gtFlags & GTF_IND_VOLATILE)
5049 // Emit a memory barrier instruction after the load
5050 instGen_MemoryBarrier();
5061 #if !FEATURE_EH_FUNCLETS
5064 /* Have to clear the shadowSP of the nesting level which
5065 encloses the finally */
5067 unsigned finallyNesting;
5068 finallyNesting = (unsigned)tree->gtVal.gtVal1;
5069 noway_assert(tree->gtVal.gtVal1 <
5070 compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
5071 noway_assert(finallyNesting < compiler->compHndBBtabCount);
5073 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
5074 unsigned filterEndOffsetSlotOffs;
5075 PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
5076 sizeof(void*)); // below doesn't underflow.
5077 filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
5079 unsigned curNestingSlotOffs;
5080 curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
5081 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
5084 #endif // !FEATURE_EH_FUNCLETS
5088 noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
5090 /* Catch arguments get passed in a register. genCodeForBBlist()
5091 would have marked it as holding a GC object, but not used. */
5093 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
5094 reg = REG_EXCEPTION_OBJECT;
5098 genCodeForTreeLeaf_GT_JMP(tree);
5101 case GT_MEMORYBARRIER:
5102 // Emit the memory barrier instruction
5103 instGen_MemoryBarrier();
5109 compiler->gtDispTree(tree);
5111 noway_assert(!"unexpected leaf");
5114 noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
5115 genCodeForTree_DONE(tree, reg);
5118 GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
5120 while (tree->OperGet() == GT_COMMA)
5122 GenTreePtr op1 = tree->gtOp.gtOp1;
5123 genCodeForTree(op1, RBM_NONE);
5124 gcInfo.gcMarkRegPtrVal(op1);
5126 tree = tree->gtOp.gtOp2;
5131 /*****************************************************************************
5133 * Generate code for the a leaf node of type GT_JMP
5136 void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
5138 noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
5140 #ifdef PROFILING_SUPPORTED
5141 if (compiler->compIsProfilerHookNeeded())
5143 /* fire the event at the call site */
5144 unsigned saveStackLvl2 = genStackLevel;
5146 compiler->info.compProfilerCallback = true;
5150 // Push the profilerHandle
5152 regMaskTP byrefPushedRegs;
5153 regMaskTP norefPushedRegs;
5154 regMaskTP pushedArgRegs =
5155 genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
5158 if (compiler->compProfilerMethHndIndirected)
5160 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
5161 (ssize_t)compiler->compProfilerMethHnd);
5165 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
5169 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5170 sizeof(int) * 1, // argSize
5171 EA_UNKNOWN); // retSize
5174 // Adjust the number of stack slots used by this managed method if necessary.
5176 if (compiler->fgPtrArgCntMax < 1)
5178 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
5179 compiler->fgPtrArgCntMax = 1;
5182 genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
5184 // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
5185 // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
5186 regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
5187 noway_assert(argReg == REG_PROFILER_JMP_ARG);
5188 regSet.rsLockReg(RBM_PROFILER_JMP_USED);
5190 if (compiler->compProfilerMethHndIndirected)
5192 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
5193 regTracker.rsTrackRegTrash(argReg);
5197 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
5200 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5202 EA_UNKNOWN); // retSize
5204 regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
5206 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
5207 #endif //_TARGET_X86_
5209 /* Restore the stack level */
5210 SetStackLevel(saveStackLvl2);
5212 #endif // PROFILING_SUPPORTED
5214 /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
5215 * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
5216 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
5219 if (compiler->info.compCallUnmanaged)
5221 genPInvokeMethodEpilog();
5224 // Make sure register arguments are in their initial registers
5225 // and stack arguments are put back as well.
5227 // This does not deal with circular dependencies of register
5228 // arguments, which is safe because RegAlloc prevents that by
5229 // not enregistering any RegArgs when a JMP opcode is used.
5231 if (compiler->info.compArgsCount == 0)
5239 // First move any enregistered stack arguments back to the stack
5240 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5242 noway_assert(varDsc->lvIsParam);
5243 if (varDsc->lvIsRegArg || !varDsc->lvRegister)
5246 /* Argument was passed on the stack, but ended up in a register
5247 * Store it back to the stack */
5248 CLANG_FORMAT_COMMENT_ANCHOR;
5250 #ifndef _TARGET_64BIT_
5251 if (varDsc->TypeGet() == TYP_LONG)
5253 /* long - at least the low half must be enregistered */
5255 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
5257 /* Is the upper half also enregistered? */
5259 if (varDsc->lvOtherReg != REG_STK)
5261 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
5265 #endif // _TARGET_64BIT_
5267 getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
5273 regMaskTP fixedArgsMask = RBM_NONE;
5276 // Next move any un-enregistered register arguments back to their register
5277 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5279 /* Is this variable a register arg? */
5281 if (!varDsc->lvIsRegArg)
5284 /* Register argument */
5286 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5287 noway_assert(!varDsc->lvRegister);
5289 /* Reload it from the stack */
5290 CLANG_FORMAT_COMMENT_ANCHOR;
5292 #ifndef _TARGET_64BIT_
5293 if (varDsc->TypeGet() == TYP_LONG)
5295 /* long - at least the low half must be enregistered */
5297 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
5298 regTracker.rsTrackRegTrash(varDsc->lvArgReg);
5300 /* Also assume the upper half also enregistered */
5302 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
5304 regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
5307 fixedArgsMask |= genRegMask(varDsc->lvArgReg);
5308 fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
5312 #endif // _TARGET_64BIT_
5314 if (varDsc->lvIsHfaRegArg())
5316 const var_types elemType = varDsc->GetHfaType();
5317 const instruction loadOp = ins_Load(elemType);
5318 const emitAttr size = emitTypeSize(elemType);
5319 regNumber argReg = varDsc->lvArgReg;
5320 const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
5322 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5324 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5325 assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
5326 argReg = regNextOfType(argReg, elemType);
5329 else if (varDsc->TypeGet() == TYP_STRUCT)
5331 const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
5332 const instruction loadOp = ins_Load(elemType);
5333 const emitAttr size = emitTypeSize(elemType);
5334 regNumber argReg = varDsc->lvArgReg;
5335 const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
5337 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5339 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5340 regTracker.rsTrackRegTrash(argReg);
5342 fixedArgsMask |= genRegMask(argReg);
5344 argReg = genRegArgNext(argReg);
5348 #endif //_TARGET_ARM_
5350 var_types loadType = varDsc->TypeGet();
5351 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5352 bool twoParts = false;
5354 if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
5356 #ifndef _TARGET_64BIT_
5357 if (loadType == TYP_DOUBLE)
5360 loadType = TYP_I_IMPL;
5361 assert(isValidIntArgReg(argReg));
5364 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
5365 regTracker.rsTrackRegTrash(argReg);
5368 fixedArgsMask |= genRegMask(argReg);
5372 argReg = genRegArgNext(argReg);
5373 assert(isValidIntArgReg(argReg));
5375 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
5376 regTracker.rsTrackRegTrash(argReg);
5379 fixedArgsMask |= genRegMask(argReg);
5386 // Check if we have any non-fixed args possibly in the arg registers.
5387 if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
5389 noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
5391 regNumber regDeclArgs = REG_ARG_FIRST;
5393 // Skip the 'this' pointer.
5394 if (!compiler->info.compIsStatic)
5396 regDeclArgs = REG_NEXT(regDeclArgs);
5399 // Skip the 'generic context.'
5400 if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5402 regDeclArgs = REG_NEXT(regDeclArgs);
5405 // Skip any 'return buffer arg.'
5406 if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
5408 regDeclArgs = REG_NEXT(regDeclArgs);
5411 // Skip the 'vararg cookie.'
5412 regDeclArgs = REG_NEXT(regDeclArgs);
5414 // Also add offset for the vararg cookie.
5415 int offset = REGSIZE_BYTES;
5417 // Load all the variable arguments in registers back to their registers.
5418 for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
5420 if (!(fixedArgsMask & genRegMask(reg)))
5422 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
5423 regTracker.rsTrackRegTrash(reg);
5425 offset += REGSIZE_BYTES;
5428 #endif // _TARGET_ARM_
5431 /*****************************************************************************
5433 * Check if a variable is assigned to in a tree. The variable number is
5434 * passed in pCallBackData. If the variable is assigned to, return
5435 * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
5437 Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
5439 GenTreePtr tree = *pTree;
5440 if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
5441 (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
5443 return Compiler::WALK_ABORT;
5446 return Compiler::WALK_CONTINUE;
5449 regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
5454 if (tree->gtOper == GT_LCL_VAR)
5456 /* Does the variable live in a register? */
5458 varNum = tree->gtLclVarCommon.gtLclNum;
5459 noway_assert(varNum < compiler->lvaCount);
5460 varDsc = compiler->lvaTable + varNum;
5462 if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
5464 return varDsc->lvRegNum;
5472 void CodeGen::unspillLiveness(genLivenessSet* ls)
5474 // Only try to unspill the registers that are missing from the currentLiveRegs
5476 regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
5477 regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
5478 cannotSpillMask &= ~currentLiveRegs;
5480 // Typically this will always be true and we will return
5482 if (cannotSpillMask == 0)
5485 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
5487 // Is this a register that we cannot leave in the spilled state?
5489 if ((cannotSpillMask & genRegMask(reg)) == 0)
5492 RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
5494 // Was it spilled, if not then skip it.
5499 noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
5501 regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
5505 /*****************************************************************************
5507 * Generate code for a qmark colon
5510 void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
5512 GenTreePtr op1 = tree->gtOp.gtOp1;
5513 GenTreePtr op2 = tree->gtOp.gtOp2;
5515 regMaskTP regs = regSet.rsMaskUsed;
5516 regMaskTP needReg = destReg;
5518 noway_assert(compiler->compQmarkUsed);
5519 noway_assert(tree->gtOper == GT_QMARK);
5520 noway_assert(op1->OperIsCompare());
5521 noway_assert(op2->gtOper == GT_COLON);
5523 GenTreePtr thenNode = op2->AsColon()->ThenNode();
5524 GenTreePtr elseNode = op2->AsColon()->ElseNode();
5526 /* If elseNode is a Nop node you must reverse the
5527 thenNode and elseNode prior to reaching here!
5528 (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
5530 noway_assert(!elseNode->IsNothingNode());
5532 /* Try to implement the qmark colon using a CMOV. If we can't for
5533 whatever reason, this will return false and we will implement
5534 it using regular branching constructs. */
5536 if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
5540 This is a ?: operator; generate code like this:
5543 jmp_if_true lab_true
5546 op1 (false = 'else' part)
5550 op2 (true = 'then' part)
5555 NOTE: If no 'then' part we do not generate the 'jmp lab_done'
5556 or the 'lab_done' label
5559 BasicBlock* lab_true;
5560 BasicBlock* lab_false;
5561 BasicBlock* lab_done;
5563 genLivenessSet entryLiveness;
5564 genLivenessSet exitLiveness;
5566 lab_true = genCreateTempLabel();
5567 lab_false = genCreateTempLabel();
5569 #if FEATURE_STACK_FP_X87
5570 /* Spill any register that hold partial values so that the exit liveness
5571 from sides is the same */
5572 CLANG_FORMAT_COMMENT_ANCHOR;
5575 regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
5577 // spillMask should be the whole FP stack
5578 noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
5581 SpillTempsStackFP(regSet.rsMaskUsedFloat);
5582 noway_assert(regSet.rsMaskUsedFloat == 0);
5585 /* Before we generate code for qmark, we spill all the currently used registers
5586 that conflict with the registers used in the qmark tree. This is to avoid
5587 introducing spills that only occur on either the 'then' or 'else' side of
5588 the tree, but not both identically. We need to be careful with enregistered
5589 variables that are used; see below.
5592 if (regSet.rsMaskUsed)
5594 /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
5595 variable), then it may not get spilled. However, the variable may
5596 then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
5597 may get spilled from one side and not the other. So unmark regSet.rsMaskVars
5598 before spilling regSet.rsMaskUsed */
5600 regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
5601 regMaskTP rsAdditional = RBM_NONE;
5603 // For each multi-use of an enregistered variable, we need to determine if
5604 // it can get spilled inside the qmark colon. This can only happen if
5605 // its life ends somewhere in the qmark colon. We have the following
5607 // 1) Variable is dead at the end of the colon -- needs to be spilled
5608 // 2) Variable is alive at the end of the colon -- needs to be spilled
5609 // iff it is assigned to in the colon. In order to determine that, we
5610 // examine the GTF_ASG flag to see if any assignments were made in the
5611 // colon. If there are any, we need to do a tree walk to see if this
5612 // variable is the target of an assignment. This treewalk should not
5613 // happen frequently.
5614 if (rsAdditionalCandidates)
5617 if (compiler->verbose)
5619 Compiler::printTreeID(tree);
5620 printf(": Qmark-Colon additional spilling candidates are ");
5621 dspRegMask(rsAdditionalCandidates);
5626 // If any candidates are not alive at the GT_QMARK node, then they
5627 // need to be spilled
5629 const VARSET_TP& rsLiveNow(compiler->compCurLife);
5630 VARSET_TP rsLiveAfter(compiler->fgUpdateLiveSet(compiler->compCurLife, compiler->compCurLifeTree, tree));
5632 VARSET_TP regVarLiveNow(VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
5634 VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
5635 while (iter.NextElem(&varIndex))
5637 // Find the variable in compiler->lvaTable
5638 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
5639 LclVarDsc* varDsc = compiler->lvaTable + varNum;
5641 #if !FEATURE_FP_REGALLOC
5642 if (varDsc->IsFloatRegType())
5646 noway_assert(varDsc->lvRegister);
5650 if (varTypeIsFloating(varDsc->TypeGet()))
5652 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
5656 regBit = genRegMask(varDsc->lvRegNum);
5658 // For longs we may need to spill both regs
5659 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
5660 regBit |= genRegMask(varDsc->lvOtherReg);
5663 // Is it one of our reg-use vars? If not, we don't need to spill it.
5664 regBit &= rsAdditionalCandidates;
5668 // Is the variable live at the end of the colon?
5669 if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
5671 // Variable is alive at the end of the colon. Was it assigned
5672 // to inside the colon?
5674 if (!(op2->gtFlags & GTF_ASG))
5677 if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
5678 Compiler::WALK_ABORT)
5680 // Variable was assigned to, so we need to spill it.
5682 rsAdditional |= regBit;
5684 if (compiler->verbose)
5686 Compiler::printTreeID(tree);
5687 printf(": Qmark-Colon candidate ");
5690 printf(" is assigned to inside colon and will be spilled\n");
5697 // Variable is not alive at the end of the colon. We need to spill it.
5699 rsAdditional |= regBit;
5701 if (compiler->verbose)
5703 Compiler::printTreeID(tree);
5704 printf(": Qmark-Colon candidate ");
5707 printf(" is alive at end of colon and will be spilled\n");
5714 if (compiler->verbose)
5716 Compiler::printTreeID(tree);
5717 printf(": Qmark-Colon approved additional spilling candidates are ");
5718 dspRegMask(rsAdditional);
5724 noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
5726 // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
5727 // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
5728 // we will have unbalanced spills and generate bad code.
5730 ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
5733 // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
5734 // 'bad' registers, causing spills. So, just force all used registers to get spilled
5735 // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
5736 // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
5737 // unspilled while generating that same tree.
5739 if (regSet.rsStressRegs() >= 1)
5741 rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
5747 // Remember which registers hold pointers. We will spill
5748 // them, but the code that follows will fetch reg vars from
5749 // the registers, so we need that gc compiler->info.
5750 regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
5751 regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
5753 // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
5754 // So, pretend there aren't any, and spill them anyway. This will only occur
5755 // if rsAdditional is non-empty.
5756 regMaskTP rsTemp = regSet.rsMaskVars;
5757 regSet.ClearMaskVars();
5759 regSet.rsSpillRegs(rsSpill);
5761 // Restore gc tracking masks.
5762 gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
5763 gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
5765 // Set regSet.rsMaskVars back to normal
5766 regSet.rsMaskVars = rsTemp;
5770 // Generate the conditional jump but without doing any StackFP fixups.
5771 genCondJump(op1, lab_true, lab_false, false);
5773 /* Save the current liveness, register status, and GC pointers */
5774 /* This is the liveness information upon entry */
5775 /* to both the then and else parts of the qmark */
5777 saveLiveness(&entryLiveness);
5779 /* Clear the liveness of any local variables that are dead upon */
5780 /* entry to the else part. */
5782 /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
5783 /* from the "colon or op2" liveSet */
5784 genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
5786 /* genCondJump() closes the current emitter block */
5788 genDefineTempLabel(lab_false);
5790 #if FEATURE_STACK_FP_X87
5793 QmarkStateStackFP tempFPState;
5794 bool bHasFPUState = !compCurFPState.IsEmpty();
5795 genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
5798 /* Does the operator yield a value? */
5800 if (tree->gtType == TYP_VOID)
5802 /* Generate the code for the else part of the qmark */
5804 genCodeForTree(elseNode, needReg, bestReg);
5806 /* The type is VOID, so we shouldn't have computed a value */
5808 noway_assert(!(elseNode->InReg()));
5810 /* Save the current liveness, register status, and GC pointers */
5811 /* This is the liveness information upon exit of the then part of the qmark */
5813 saveLiveness(&exitLiveness);
5815 /* Is there a 'then' part? */
5817 if (thenNode->IsNothingNode())
5819 #if FEATURE_STACK_FP_X87
5822 // We had FP state on entry just after the condition, so potentially, the else
5823 // node may have to do transition work.
5824 lab_done = genCreateTempLabel();
5826 /* Generate jmp lab_done */
5828 inst_JMP(EJ_jmp, lab_done);
5830 /* No 'then' - just generate the 'lab_true' label */
5832 genDefineTempLabel(lab_true);
5834 // We need to do this after defining the lab_false label
5835 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5836 genQMarkAfterThenBlockStackFP(&tempFPState);
5837 genDefineTempLabel(lab_done);
5840 #endif // FEATURE_STACK_FP_X87
5842 /* No 'then' - just generate the 'lab_true' label */
5843 genDefineTempLabel(lab_true);
5848 lab_done = genCreateTempLabel();
5850 /* Generate jmp lab_done */
5852 inst_JMP(EJ_jmp, lab_done);
5854 /* Restore the liveness that we had upon entry of the then part of the qmark */
5856 restoreLiveness(&entryLiveness);
5858 /* Clear the liveness of any local variables that are dead upon */
5859 /* entry to the then part. */
5860 genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5862 /* Generate lab_true: */
5864 genDefineTempLabel(lab_true);
5865 #if FEATURE_STACK_FP_X87
5866 // We need to do this after defining the lab_false label
5867 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5869 /* Enter the then part - trash all registers */
5871 regTracker.rsTrackRegClr();
5873 /* Generate the code for the then part of the qmark */
5875 genCodeForTree(thenNode, needReg, bestReg);
5877 /* The type is VOID, so we shouldn't have computed a value */
5879 noway_assert(!(thenNode->InReg()));
5881 unspillLiveness(&exitLiveness);
5883 /* Verify that the exit liveness information is the same for the two parts of the qmark */
5885 checkLiveness(&exitLiveness);
5886 #if FEATURE_STACK_FP_X87
5887 genQMarkAfterThenBlockStackFP(&tempFPState);
5889 /* Define the "result" label */
5891 genDefineTempLabel(lab_done);
5894 /* Join of the two branches - trash all registers */
5896 regTracker.rsTrackRegClr();
5898 /* We're just about done */
5900 genUpdateLife(tree);
5904 /* Generate code for a qmark that generates a value */
5906 /* Generate the code for the else part of the qmark */
5908 noway_assert(elseNode->IsNothingNode() == false);
5910 /* Compute the elseNode into any free register */
5911 genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
5912 noway_assert(elseNode->InReg());
5913 noway_assert(elseNode->gtRegNum != REG_NA);
5915 /* Record the chosen register */
5916 reg = elseNode->gtRegNum;
5917 regs = genRegMask(reg);
5919 /* Save the current liveness, register status, and GC pointers */
5920 /* This is the liveness information upon exit of the else part of the qmark */
5922 saveLiveness(&exitLiveness);
5924 /* Generate jmp lab_done */
5925 lab_done = genCreateTempLabel();
5928 // We will use this to assert we don't emit instructions if we decide not to
5930 unsigned emittedInstructions = getEmitter()->emitInsCount;
5931 bool bSkippedJump = false;
5933 // We would like to know here if the else node is really going to generate
5934 // code, as if it isn't, we're generating here a jump to the next instruction.
5935 // What you would really like is to be able to go back and remove the jump, but
5936 // we have no way of doing that right now.
5939 #if FEATURE_STACK_FP_X87
5940 !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
5942 genIsEnregisteredIntVariable(thenNode) == reg)
5945 // For the moment, fix this easy case (enregistered else node), which
5946 // is the one that happens all the time.
5948 bSkippedJump = true;
5953 inst_JMP(EJ_jmp, lab_done);
5956 /* Restore the liveness that we had upon entry of the else part of the qmark */
5958 restoreLiveness(&entryLiveness);
5960 /* Clear the liveness of any local variables that are dead upon */
5961 /* entry to the then part. */
5962 genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5964 /* Generate lab_true: */
5965 genDefineTempLabel(lab_true);
5966 #if FEATURE_STACK_FP_X87
5969 // We need to do this after defining the lab_true label
5970 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5972 /* Enter the then part - trash all registers */
5974 regTracker.rsTrackRegClr();
5976 /* Generate the code for the then part of the qmark */
5978 noway_assert(thenNode->IsNothingNode() == false);
5980 /* This must place a value into the chosen register */
5981 genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
5983 noway_assert(thenNode->InReg());
5984 noway_assert(thenNode->gtRegNum == reg);
5986 unspillLiveness(&exitLiveness);
5988 /* Verify that the exit liveness information is the same for the two parts of the qmark */
5989 checkLiveness(&exitLiveness);
5990 #if FEATURE_STACK_FP_X87
5991 genQMarkAfterThenBlockStackFP(&tempFPState);
5995 noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
5998 /* Define the "result" label */
5999 genDefineTempLabel(lab_done);
6001 /* Join of the two branches - trash all registers */
6003 regTracker.rsTrackRegClr();
6005 /* Check whether this subtree has freed up any variables */
6007 genUpdateLife(tree);
6009 genMarkTreeInReg(tree, reg);
6013 /*****************************************************************************
6015 * Generate code for a qmark colon using the CMOV instruction. It's OK
6016 * to return false when we can't easily implement it using a cmov (leading
6017 * genCodeForQmark to implement it using branches).
6020 bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
6022 #ifdef _TARGET_XARCH_
6023 GenTreePtr cond = tree->gtOp.gtOp1;
6024 GenTreePtr colon = tree->gtOp.gtOp2;
6025 // Warning: this naming of the local vars is backwards!
6026 GenTreePtr thenNode = colon->gtOp.gtOp1;
6027 GenTreePtr elseNode = colon->gtOp.gtOp2;
6028 GenTreePtr alwaysNode, predicateNode;
6030 regMaskTP needReg = destReg;
6032 noway_assert(tree->gtOper == GT_QMARK);
6033 noway_assert(cond->OperIsCompare());
6034 noway_assert(colon->gtOper == GT_COLON);
6037 if (JitConfig.JitNoCMOV())
6043 /* Can only implement CMOV on processors that support it */
6045 if (!compiler->opts.compUseCMOV)
6050 /* thenNode better be a local or a constant */
6052 if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
6057 /* elseNode better be a local or a constant or nothing */
6059 if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
6064 /* can't handle two constants here */
6066 if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
6071 /* let's not handle comparisons of non-integer types */
6073 if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
6078 /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
6079 The biggest constraint is that cmov doesn't take an integer argument.
6082 bool reverseCond = false;
6083 if (elseNode->OperGet() == GT_CNS_INT)
6085 // else node is a constant
6087 alwaysNode = elseNode;
6088 predicateNode = thenNode;
6093 alwaysNode = thenNode;
6094 predicateNode = elseNode;
6097 // If the live set in alwaysNode is not the same as in tree, then
6098 // the variable in predicate node dies here. This is a dangerous
6099 // case that we don't handle (genComputeReg could overwrite
6100 // the value of the variable in the predicate node).
6102 // This assert is just paranoid (we've already asserted it above)
6103 assert(predicateNode->OperGet() == GT_LCL_VAR);
6104 if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
6109 // Pass this point we are comitting to use CMOV.
6113 compiler->gtReverseCond(cond);
6116 emitJumpKind jumpKind = genCondSetFlags(cond);
6118 // Compute the always node into any free register. If it's a constant,
6119 // we need to generate the mov instruction here (otherwise genComputeReg might
6120 // modify the flags, as in xor reg,reg).
6122 if (alwaysNode->OperGet() == GT_CNS_INT)
6124 reg = regSet.rsPickReg(needReg, bestReg);
6125 inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
6126 gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
6127 regTracker.rsTrackRegTrash(reg);
6131 genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6132 noway_assert(alwaysNode->InReg());
6133 noway_assert(alwaysNode->gtRegNum != REG_NA);
6135 // Record the chosen register
6137 reg = alwaysNode->gtRegNum;
6140 regNumber regPredicate = REG_NA;
6142 // Is predicateNode an enregistered variable?
6144 if (genMarkLclVar(predicateNode))
6146 // Variable lives in a register
6148 regPredicate = predicateNode->gtRegNum;
6153 // Checks if the variable happens to be in any of the registers
6155 regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
6159 const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae,
6160 INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns,
6161 INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg};
6163 noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
6164 instruction cmov_ins = EJtoCMOV[jumpKind];
6166 noway_assert(insIsCMOV(cmov_ins));
6168 if (regPredicate != REG_NA)
6170 // regPredicate is in a register
6172 inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
6176 // regPredicate is in memory
6178 inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
6180 gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
6181 regTracker.rsTrackRegTrash(reg);
6183 genUpdateLife(alwaysNode);
6184 genUpdateLife(predicateNode);
6185 genCodeForTree_DONE_LIFE(tree, reg);
6192 #ifdef _TARGET_XARCH_
6193 void CodeGen::genCodeForMultEAX(GenTreePtr tree)
6195 GenTreePtr op1 = tree->gtOp.gtOp1;
6196 GenTreePtr op2 = tree->gtGetOp2();
6197 bool ovfl = tree->gtOverflow();
6198 regNumber reg = DUMMY_INIT(REG_CORRUPT);
6201 noway_assert(tree->OperGet() == GT_MUL);
6203 /* We'll evaluate 'op1' first */
6205 regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
6207 /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
6209 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
6210 noway_assert(op1->InReg());
6212 // If op2 is a constant we need to load the constant into a register
6213 if (op2->OperKind() & GTK_CONST)
6215 genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
6216 noway_assert(op2->InReg());
6217 regSet.rsMarkRegUsed(op2);
6218 addrReg = genRegMask(op2->gtRegNum);
6222 /* Make the second operand addressable */
6223 // Try to avoid EAX.
6224 addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
6227 /* Make sure the first operand is still in a register */
6228 // op1 *must* go into EAX.
6229 genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
6230 noway_assert(op1->InReg());
6232 reg = op1->gtRegNum;
6234 // For 8 bit operations, we need to pick byte addressable registers
6236 if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
6238 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6240 inst_RV_RV(INS_mov, byteReg, reg);
6242 regTracker.rsTrackRegTrash(byteReg);
6243 regSet.rsMarkRegFree(genRegMask(reg));
6246 op1->gtRegNum = reg;
6247 regSet.rsMarkRegUsed(op1);
6250 /* Make sure the operand is still addressable */
6251 addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
6253 /* Free up the operand, if it's a regvar */
6257 /* The register is about to be trashed */
6259 regTracker.rsTrackRegTrash(reg);
6261 // For overflow instructions, tree->TypeGet() is the accurate type,
6262 // and gives us the size for the operands.
6264 emitAttr opSize = emitTypeSize(tree->TypeGet());
6266 /* Compute the new value */
6268 noway_assert(op1->gtRegNum == REG_EAX);
6270 // Make sure Edx is free (unless used by op2 itself)
6271 bool op2Released = false;
6273 if ((addrReg & RBM_EDX) == 0)
6275 // op2 does not use Edx, so make sure noone else does either
6276 regSet.rsGrabReg(RBM_EDX);
6278 else if (regSet.rsMaskMult & RBM_EDX)
6280 /* Edx is used by op2 and some other trees.
6281 Spill the other trees besides op2. */
6283 regSet.rsGrabReg(RBM_EDX);
6286 /* keepReg==RegSet::FREE_REG so that the other multi-used trees
6287 don't get marked as unspilled as well. */
6288 regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
6293 if (tree->gtFlags & GTF_UNSIGNED)
6298 inst_TT(ins, op2, 0, 0, opSize);
6300 /* Both EAX and EDX are now trashed */
6302 regTracker.rsTrackRegTrash(REG_EAX);
6303 regTracker.rsTrackRegTrash(REG_EDX);
6305 /* Free up anything that was tied up by the operand */
6308 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
6310 /* The result will be where the first operand is sitting */
6312 /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
6313 genRecoverReg(op1, 0, RegSet::KEEP_REG);
6315 reg = op1->gtRegNum;
6316 noway_assert(reg == REG_EAX);
6320 /* Do we need an overflow check */
6323 genCheckOverflow(tree);
6325 genCodeForTree_DONE(tree, reg);
6327 #endif // _TARGET_XARCH_
6330 void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
6332 GenTreePtr op1 = tree->gtOp.gtOp1;
6333 GenTreePtr op2 = tree->gtGetOp2();
6335 noway_assert(tree->OperGet() == GT_MUL);
6337 /* Generate the first operand into some register */
6339 genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6340 noway_assert(op1->InReg());
6342 /* Generate the second operand into some register */
6344 genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6345 noway_assert(op2->InReg());
6347 /* Make sure the first operand is still in a register */
6348 genRecoverReg(op1, 0, RegSet::KEEP_REG);
6349 noway_assert(op1->InReg());
6351 /* Free up the operands */
6352 genUpdateLife(tree);
6357 regNumber regLo = regSet.rsPickReg(destReg, bestReg);
6360 regSet.rsLockReg(genRegMask(regLo));
6361 regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
6362 regSet.rsUnlockReg(genRegMask(regLo));
6365 if (tree->gtFlags & GTF_UNSIGNED)
6370 getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
6371 regTracker.rsTrackRegTrash(regHi);
6372 regTracker.rsTrackRegTrash(regLo);
6374 /* Do we need an overflow check */
6376 if (tree->gtOverflow())
6378 // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
6380 regSet.rsLockReg(genRegMask(regLo));
6382 if (tree->gtFlags & GTF_MUL_64RSLT)
6383 regSet.rsLockReg(genRegMask(regHi));
6385 regNumber regTmpHi = regHi;
6386 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
6388 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
6389 regTmpHi = regSet.rsPickReg(RBM_ALLINT);
6390 getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
6391 regTracker.rsTrackRegTrash(regTmpHi);
6393 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
6395 // Jump to the block which will throw the expection
6396 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
6397 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
6399 // Unlock regLo [and regHi] after generating code for the gtOverflow() case
6401 regSet.rsUnlockReg(genRegMask(regLo));
6403 if (tree->gtFlags & GTF_MUL_64RSLT)
6404 regSet.rsUnlockReg(genRegMask(regHi));
6407 genUpdateLife(tree);
6409 if (tree->gtFlags & GTF_MUL_64RSLT)
6410 genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
6412 genMarkTreeInReg(tree, regLo);
6414 #endif // _TARGET_ARM_
6416 /*****************************************************************************
6418 * Generate code for a simple binary arithmetic or logical operator.
6419 * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
6422 void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
6425 genTreeOps oper = tree->OperGet();
6426 const var_types treeType = tree->TypeGet();
6427 GenTreePtr op1 = tree->gtOp.gtOp1;
6428 GenTreePtr op2 = tree->gtGetOp2();
6429 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
6430 regNumber reg = DUMMY_INIT(REG_CORRUPT);
6431 regMaskTP needReg = destReg;
6433 /* Figure out what instruction to generate */
6466 #ifdef _TARGET_XARCH_
6467 /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
6469 if ((oper == GT_MUL) &&
6470 op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
6471 !op1->IsCnsIntOrI() && // op1 is not a constant
6472 (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
6473 !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
6474 !tree->gtOverflow()) // 3 operand imul doesn't set flags
6476 /* Make the first operand addressable */
6478 regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
6480 /* Grab a register for the target */
6482 reg = regSet.rsPickReg(needReg, bestReg);
6485 /* Compute the value into the target: reg=op1*op2_icon */
6486 if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
6491 regSrc = op1->gtRegNum;
6495 inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
6498 getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
6499 (op2->gtIntCon.gtIconVal & -2), 0);
6502 #endif // LEA_AVAILABLE
6504 /* Compute the value into the target: reg=op1*op2_icon */
6505 inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
6508 /* The register has been trashed now */
6510 regTracker.rsTrackRegTrash(reg);
6512 /* The address is no longer live */
6514 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
6516 genCodeForTree_DONE(tree, reg);
6519 #endif // _TARGET_XARCH_
6525 // We only reach here for GT_ADD, GT_SUB and GT_MUL.
6526 assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
6528 ovfl = tree->gtOverflow();
6530 /* We record the accurate (small) types in trees only we need to
6531 * check for overflow. Otherwise we record genActualType()
6534 noway_assert(ovfl || (treeType == genActualType(treeType)));
6538 /* Can we use an 'lea' to compute the result?
6539 Can't use 'lea' for overflow as it doesn't set flags
6540 Can't use 'lea' unless we have at least two free registers */
6542 bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
6543 genCountBits(regSet.rsMaskLock) + // Locked registers
6544 2 // We will need two regisers
6545 <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
6547 regMaskTP regs = RBM_NONE; // OUT argument
6548 if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, ®s, false))
6552 /* Is the value now computed in some register? */
6556 genCodeForTree_REG_VAR1(tree);
6560 /* If we can reuse op1/2's register directly, and 'tree' is
6561 a simple expression (ie. not in scaled index form),
6562 might as well just use "add" instead of "lea" */
6564 // However, if we're in a context where we want to evaluate "tree" into a specific
6565 // register different from the reg we'd use in this optimization, then it doesn't
6566 // make sense to do the "add", since we'd also have to do a "mov."
6569 reg = op1->gtRegNum;
6571 if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6575 /* Simply add op2 to the register */
6577 inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
6579 if (tree->gtSetFlags())
6580 genFlagsEqualToReg(tree, reg);
6584 else if (op2->OperGet() == GT_CNS_INT)
6586 /* Simply add op2 to the register */
6588 genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
6597 reg = op2->gtRegNum;
6599 if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6603 /* Simply add op1 to the register */
6605 inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
6607 if (tree->gtSetFlags())
6608 genFlagsEqualToReg(tree, reg);
6615 // The expression either requires a scaled-index form, or the
6616 // op1 or op2's register can't be targeted, this can be
6617 // caused when op1 or op2 are enregistered variables.
6619 reg = regSet.rsPickReg(needReg, bestReg);
6620 size = emitActualTypeSize(treeType);
6622 /* Generate "lea reg, [addr-mode]" */
6624 inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
6626 #ifndef _TARGET_XARCH_
6627 // Don't call genFlagsEqualToReg on x86/x64
6628 // as it does not set the flags
6629 if (tree->gtSetFlags())
6630 genFlagsEqualToReg(tree, reg);
6634 /* The register has been trashed now */
6635 regTracker.rsTrackRegTrash(reg);
6637 genDoneAddressable(tree, regs, RegSet::FREE_REG);
6639 /* The following could be an 'inner' pointer!!! */
6641 noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
6643 if (treeType == TYP_BYREF)
6645 genUpdateLife(tree);
6647 gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
6648 gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
6651 genCodeForTree_DONE(tree, reg);
6656 #endif // LEA_AVAILABLE
6658 noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
6661 /* The following makes an assumption about gtSetEvalOrder(this) */
6663 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
6665 /* Compute a useful register mask */
6666 needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6667 needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6669 // Determine what registers go live between op1 and op2
6670 // Don't bother checking if op1 is already in a register.
6671 // This is not just for efficiency; if it's already in a
6672 // register then it may already be considered "evaluated"
6673 // for the purposes of liveness, in which genNewLiveRegMask
6677 regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
6680 needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
6684 #if CPU_HAS_BYTE_REGS
6685 /* 8-bit operations can only be done in the byte-regs */
6686 if (varTypeIsByte(treeType))
6687 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6688 #endif // CPU_HAS_BYTE_REGS
6690 // Try selecting one of the 'bestRegs'
6691 needReg = regSet.rsNarrowHint(needReg, bestReg);
6693 /* Special case: small_val & small_mask */
6695 if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
6697 size_t and_val = op2->gtIntCon.gtIconVal;
6699 var_types typ = op1->TypeGet();
6706 andMask = 0x000000FF;
6710 andMask = 0x0000FFFF;
6713 noway_assert(!"unexpected type");
6717 // Is the 'and_val' completely contained within the bits found in 'andMask'
6718 if ((and_val & ~andMask) == 0)
6720 // We must use unsigned instructions when loading op1
6721 if (varTypeIsByte(typ))
6723 op1->gtType = TYP_UBYTE;
6725 else // varTypeIsShort(typ)
6727 assert(varTypeIsShort(typ));
6728 op1->gtType = TYP_CHAR;
6731 /* Generate the first operand into a scratch register */
6733 op1 = genCodeForCommaTree(op1);
6734 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6736 noway_assert(op1->InReg());
6738 regNumber op1Reg = op1->gtRegNum;
6740 // Did we end up in an acceptable register?
6741 // and do we have an acceptable free register available to grab?
6743 if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6745 // See if we can pick a register from bestReg
6748 // Grab an acceptable register
6750 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6751 newReg = regSet.rsGrabReg(bestReg);
6753 newReg = regSet.rsGrabReg(needReg);
6755 noway_assert(op1Reg != newReg);
6757 /* Update the value in the target register */
6759 regTracker.rsTrackRegCopy(newReg, op1Reg);
6761 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6763 /* The value has been transferred to 'reg' */
6765 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6766 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6768 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6770 /* The value is now in an appropriate register */
6772 op1->gtRegNum = newReg;
6774 noway_assert(op1->InReg());
6777 /* Mark the register as 'used' */
6778 regSet.rsMarkRegUsed(op1);
6779 reg = op1->gtRegNum;
6781 if (and_val != andMask) // Does the "and" mask only cover some of the bits?
6783 /* "and" the value */
6785 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
6789 /* Update the live set of register variables */
6790 if (compiler->opts.varNames)
6791 genUpdateLife(tree);
6794 /* Now we can update the register pointer information */
6797 gcInfo.gcMarkRegPtrVal(reg, treeType);
6799 genCodeForTree_DONE_LIFE(tree, reg);
6804 #ifdef _TARGET_XARCH_
6806 // Do we have to use the special "imul" instruction
6807 // which has eax as the implicit operand ?
6809 bool multEAX = false;
6813 if (tree->gtFlags & GTF_MUL_64RSLT)
6815 /* Only multiplying with EAX will leave the 64-bit
6816 * result in EDX:EAX */
6822 if (tree->gtFlags & GTF_UNSIGNED)
6824 /* "mul reg/mem" always has EAX as default operand */
6828 else if (varTypeIsSmall(treeType))
6830 /* Only the "imul with EAX" encoding has the 'w' bit
6831 * to specify the size of the operands */
6840 noway_assert(oper == GT_MUL);
6842 return genCodeForMultEAX(tree);
6844 #endif // _TARGET_XARCH_
6848 // Do we have to use the special 32x32 => 64 bit multiply
6850 bool mult64 = false;
6854 if (tree->gtFlags & GTF_MUL_64RSLT)
6860 // We always must use the 32x32 => 64 bit multiply
6861 // to detect overflow
6868 noway_assert(oper == GT_MUL);
6870 return genCodeForMult64(tree, destReg, bestReg);
6872 #endif // _TARGET_ARM_
6874 /* Generate the first operand into a scratch register */
6876 op1 = genCodeForCommaTree(op1);
6877 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6879 noway_assert(op1->InReg());
6881 regNumber op1Reg = op1->gtRegNum;
6883 // Setup needReg with the set of register that we require for op1 to be in
6885 needReg = RBM_ALLINT;
6887 /* Compute a useful register mask */
6888 needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6889 needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6891 #if CPU_HAS_BYTE_REGS
6892 /* 8-bit operations can only be done in the byte-regs */
6893 if (varTypeIsByte(treeType))
6894 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6895 #endif // CPU_HAS_BYTE_REGS
6897 // Did we end up in an acceptable register?
6898 // and do we have an acceptable free register available to grab?
6900 if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6902 // See if we can pick a register from bestReg
6905 // Grab an acceptable register
6907 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6908 newReg = regSet.rsGrabReg(bestReg);
6910 newReg = regSet.rsGrabReg(needReg);
6912 noway_assert(op1Reg != newReg);
6914 /* Update the value in the target register */
6916 regTracker.rsTrackRegCopy(newReg, op1Reg);
6918 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6920 /* The value has been transferred to 'reg' */
6922 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6923 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6925 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6927 /* The value is now in an appropriate register */
6929 op1->gtRegNum = newReg;
6931 noway_assert(op1->InReg());
6932 op1Reg = op1->gtRegNum;
6936 /* Mark the register as 'used' */
6937 regSet.rsMarkRegUsed(op1);
6939 bool isSmallConst = false;
6942 if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
6944 isSmallConst = true;
6947 /* Make the second operand addressable */
6949 regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
6951 #if CPU_LOAD_STORE_ARCH
6952 genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
6953 #else // !CPU_LOAD_STORE_ARCH
6954 /* Is op1 spilled and op2 in a register? */
6956 if ((op1->gtFlags & GTF_SPILLED) && (op2->InReg()) && (ins != INS_sub))
6958 noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
6960 // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
6961 noway_assert(op2->gtOper != GT_LCL_VAR ||
6962 varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
6964 reg = op2->gtRegNum;
6965 regMaskTP regMask = genRegMask(reg);
6967 /* Is the register holding op2 available? */
6969 if (regMask & regSet.rsMaskVars)
6974 /* Get the temp we spilled into. */
6976 TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
6978 /* For 8bit operations, we need to make sure that op2 is
6979 in a byte-addressable registers */
6981 if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
6983 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6985 inst_RV_RV(INS_mov, byteReg, reg);
6986 regTracker.rsTrackRegTrash(byteReg);
6988 /* op2 couldn't have spilled as it was not sitting in
6989 RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
6990 noway_assert(op2->InReg());
6992 regSet.rsUnlockReg(regMask);
6993 regSet.rsMarkRegFree(regMask);
6996 regMask = genRegMask(reg);
6997 op2->gtRegNum = reg;
6998 regSet.rsMarkRegUsed(op2);
7001 inst_RV_ST(ins, reg, temp, 0, treeType);
7003 regTracker.rsTrackRegTrash(reg);
7007 compiler->tmpRlsTemp(temp);
7009 /* 'add'/'sub' set all CC flags, others only ZF */
7011 /* If we need to check overflow, for small types, the
7012 * flags can't be used as we perform the arithmetic
7013 * operation (on small registers) and then sign extend it
7015 * NOTE : If we ever don't need to sign-extend the result,
7016 * we can use the flags
7019 if (tree->gtSetFlags())
7021 genFlagsEqualToReg(tree, reg);
7024 /* The result is where the second operand is sitting. Mark result reg as free */
7025 regSet.rsMarkRegFree(genRegMask(reg));
7027 gcInfo.gcMarkRegPtrVal(reg, treeType);
7032 #endif // !CPU_LOAD_STORE_ARCH
7034 /* Make sure the first operand is still in a register */
7035 regSet.rsLockUsedReg(addrReg);
7036 genRecoverReg(op1, 0, RegSet::KEEP_REG);
7037 noway_assert(op1->InReg());
7038 regSet.rsUnlockUsedReg(addrReg);
7040 reg = op1->gtRegNum;
7042 // For 8 bit operations, we need to pick byte addressable registers
7044 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
7046 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7048 inst_RV_RV(INS_mov, byteReg, reg);
7050 regTracker.rsTrackRegTrash(byteReg);
7051 regSet.rsMarkRegFree(genRegMask(reg));
7054 op1->gtRegNum = reg;
7055 regSet.rsMarkRegUsed(op1);
7058 /* Make sure the operand is still addressable */
7059 addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
7061 /* Free up the operand, if it's a regvar */
7065 /* The register is about to be trashed */
7067 regTracker.rsTrackRegTrash(reg);
7070 bool op2Released = false;
7072 // For overflow instructions, tree->gtType is the accurate type,
7073 // and gives us the size for the operands.
7075 emitAttr opSize = emitTypeSize(treeType);
7077 /* Compute the new value */
7079 if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
7080 #if !CPU_HAS_FP_SUPPORT
7081 && (treeType == TYP_INT || treeType == TYP_I_IMPL)
7085 ssize_t ival = op2->gtIntCon.gtIconVal;
7089 genIncRegBy(reg, ival, tree, treeType, ovfl);
7091 else if (oper == GT_SUB)
7093 if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7094 (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
7095 // Therefore we can't use -ival.
7098 /* For unsigned overflow, we have to use INS_sub to set
7099 the flags correctly */
7101 genDecRegBy(reg, ival, tree);
7105 /* Else, we simply add the negative of the value */
7107 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7110 else if (oper == GT_MUL)
7112 genMulRegBy(reg, ival, tree, treeType, ovfl);
7117 // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
7118 op2 = op2->gtEffectiveVal();
7119 if (varTypeIsByte(treeType) && op2->InReg())
7121 noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
7123 regNumber op2reg = op2->gtRegNum;
7124 regMaskTP op2regMask = genRegMask(op2reg);
7126 if (!(op2regMask & RBM_BYTE_REGS))
7128 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7130 inst_RV_RV(INS_mov, byteReg, op2reg);
7131 regTracker.rsTrackRegTrash(byteReg);
7133 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7136 op2->gtRegNum = byteReg;
7140 inst_RV_TT(ins, reg, op2, 0, opSize, flags);
7143 /* Free up anything that was tied up by the operand */
7147 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7150 /* The result will be where the first operand is sitting */
7152 /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
7153 genRecoverReg(op1, 0, RegSet::KEEP_REG);
7155 reg = op1->gtRegNum;
7157 /* 'add'/'sub' set all CC flags, others only ZF+SF */
7159 if (tree->gtSetFlags())
7160 genFlagsEqualToReg(tree, reg);
7164 #if !CPU_LOAD_STORE_ARCH
7166 #endif // !CPU_LOAD_STORE_ARCH
7168 /* Do we need an overflow check */
7171 genCheckOverflow(tree);
7173 genCodeForTree_DONE(tree, reg);
7176 /*****************************************************************************
7178 * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
7179 * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
7182 void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7185 const genTreeOps oper = tree->OperGet();
7186 const var_types treeType = tree->TypeGet();
7187 GenTreePtr op1 = tree->gtOp.gtOp1;
7188 GenTreePtr op2 = tree->gtGetOp2();
7189 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7190 regNumber reg = DUMMY_INIT(REG_CORRUPT);
7191 regMaskTP needReg = destReg;
7194 /* Figure out what instruction to generate */
7227 // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
7229 ovfl = tree->gtOverflow();
7231 // We can't use += with overflow if the value cannot be changed
7232 // in case of an overflow-exception which the "+" might cause
7233 noway_assert(!ovfl ||
7234 ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
7236 /* Do not allow overflow instructions with refs/byrefs */
7238 noway_assert(!ovfl || !varTypeIsGC(treeType));
7240 // We disallow overflow and byte-ops here as it is too much trouble
7241 noway_assert(!ovfl || !varTypeIsByte(treeType));
7243 /* Is the second operand a constant? */
7245 if (op2->IsIntCnsFitsInI32())
7247 int ival = (int)op2->gtIntCon.gtIconVal;
7249 /* What is the target of the assignment? */
7251 switch (op1->gtOper)
7257 reg = op1->gtRegVar.gtRegNum;
7259 /* No registers are needed for addressing */
7262 #if !CPU_LOAD_STORE_ARCH
7265 /* We're adding a constant to a register */
7267 if (oper == GT_ASG_ADD)
7268 genIncRegBy(reg, ival, tree, treeType, ovfl);
7269 else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7270 ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
7272 // Therefore we can't
7275 /* For unsigned overflow, we have to use INS_sub to set
7276 the flags correctly */
7277 genDecRegBy(reg, ival, tree);
7279 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7285 /* Does the variable live in a register? */
7287 if (genMarkLclVar(op1))
7294 /* Make the target addressable for load/store */
7295 addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
7297 #if !CPU_LOAD_STORE_ARCH
7298 // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7300 /* For small types with overflow check, we need to
7301 sign/zero extend the result, so we need it in a reg */
7303 if (ovfl && genTypeSize(treeType) < sizeof(int))
7304 #endif // !CPU_LOAD_STORE_ARCH
7306 // Load op1 into a reg
7308 reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
7310 inst_RV_TT(INS_mov, reg, op1);
7312 // Issue the add/sub and the overflow check
7314 inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
7315 regTracker.rsTrackRegTrash(reg);
7319 genCheckOverflow(tree);
7322 /* Store the (sign/zero extended) result back to
7323 the stack location of the variable */
7325 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7329 #if !CPU_LOAD_STORE_ARCH
7332 /* Add/subtract the new value into/from the target */
7336 reg = op1->gtRegNum;
7340 /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
7341 if (!ovfl && (ival == 1 || ival == -1) &&
7342 !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
7344 noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
7345 if (oper == GT_ASG_SUB)
7348 ins = (ival > 0) ? INS_inc : INS_dec;
7353 inst_TT_IV(ins, op1, ival);
7356 if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
7358 if (tree->gtSetFlags())
7359 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
7364 #endif // !CPU_LOAD_STORE_ARCH
7365 } // end switch (op1->gtOper)
7367 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7369 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7371 } // end if (op2->IsIntCnsFitsInI32())
7372 } // end if (isArith)
7374 noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
7376 /* Is the target a register or local variable? */
7378 switch (op1->gtOper)
7382 /* Does the target variable live in a register? */
7384 if (!genMarkLclVar(op1))
7391 /* Get hold of the target register */
7393 reg = op1->gtRegVar.gtRegNum;
7395 /* Make sure the target of the store is available */
7397 if (regSet.rsMaskUsed & genRegMask(reg))
7399 regSet.rsSpillReg(reg);
7402 /* Make the RHS addressable */
7404 addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
7406 /* Compute the new value into the target register */
7407 CLANG_FORMAT_COMMENT_ANCHOR;
7409 #if CPU_HAS_BYTE_REGS
7411 // Fix 383833 X86 ILGEN
7415 reg2 = op2->gtRegNum;
7422 // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
7423 // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
7425 if (varTypeIsByte(treeType) &&
7426 (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
7428 // We will force op2 into a register (via sign/zero extending load)
7429 // for the cases where op2 is in memory and thus could have
7430 // an unmapped page just beyond its location
7432 if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
7434 genCodeForTree(op2, 0);
7435 assert(op2->InReg());
7438 inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
7440 bool canOmit = false;
7442 if (varTypeIsUnsigned(treeType))
7444 // When op2 is a byte sized constant we can omit the zero extend instruction
7445 if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
7450 else // treeType is signed
7452 // When op2 is a positive 7-bit or smaller constant
7453 // we can omit the sign extension sequence.
7454 if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
7462 // If reg is a byte reg then we can use a movzx/movsx instruction
7464 if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
7466 instruction extendIns = ins_Move_Extend(treeType, true);
7467 inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
7469 else // we can't encode a movzx/movsx instruction
7471 if (varTypeIsUnsigned(treeType))
7473 // otherwise, we must zero the upper 24 bits of 'reg'
7474 inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
7476 else // treeType is signed
7478 // otherwise, we must sign extend the result in the non-byteable register 'reg'
7479 // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
7480 // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
7482 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
7483 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
7489 #endif // CPU_HAS_BYTE_REGS
7491 inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
7494 /* The zero flag is now equal to the register value */
7496 if (tree->gtSetFlags())
7497 genFlagsEqualToReg(tree, reg);
7499 /* Remember that we trashed the target */
7501 regTracker.rsTrackRegTrash(reg);
7503 /* Free up anything that was tied up by the RHS */
7505 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7507 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7512 } // end switch (op1->gtOper)
7514 #if !CPU_LOAD_STORE_ARCH
7515 /* Special case: "x ^= -1" is actually "not(x)" */
7517 if (oper == GT_ASG_XOR)
7519 if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
7521 addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
7522 inst_TT(INS_NOT, op1);
7523 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7525 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7529 #endif // !CPU_LOAD_STORE_ARCH
7531 /* Setup target mask for op2 (byte-regs for small operands) */
7534 needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
7536 /* Is the second operand a constant? */
7538 if (op2->IsIntCnsFitsInI32())
7540 int ival = (int)op2->gtIntCon.gtIconVal;
7542 /* Make the target addressable */
7543 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
7545 inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
7547 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
7549 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7553 /* Is the value or the address to be computed first? */
7555 if (tree->gtFlags & GTF_REVERSE_OPS)
7557 /* Compute the new value into a register */
7559 genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7561 /* Make the target addressable for load/store */
7562 addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
7563 regSet.rsLockUsedReg(addrReg);
7565 #if !CPU_LOAD_STORE_ARCH
7566 // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7567 /* For small types with overflow check, we need to
7568 sign/zero extend the result, so we need it in a reg */
7570 if (ovfl && genTypeSize(treeType) < sizeof(int))
7571 #endif // !CPU_LOAD_STORE_ARCH
7573 reg = regSet.rsPickReg();
7574 regSet.rsLockReg(genRegMask(reg));
7576 noway_assert(genIsValidReg(reg));
7578 /* Generate "ldr reg, [var]" */
7580 inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
7582 if (op1->gtOper == GT_LCL_VAR)
7583 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7585 regTracker.rsTrackRegTrash(reg);
7587 /* Make sure the new value is in a register */
7589 genRecoverReg(op2, 0, RegSet::KEEP_REG);
7591 /* Compute the new value */
7593 inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7596 genCheckOverflow(tree);
7598 /* Move the new value back to the variable */
7599 /* Generate "str reg, [var]" */
7601 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7602 regSet.rsUnlockReg(genRegMask(reg));
7604 if (op1->gtOper == GT_LCL_VAR)
7605 regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
7607 #if !CPU_LOAD_STORE_ARCH
7610 /* Make sure the new value is in a register */
7612 genRecoverReg(op2, 0, RegSet::KEEP_REG);
7614 /* Add the new value into the target */
7616 inst_TT_RV(ins, op1, op2->gtRegNum);
7618 #endif // !CPU_LOAD_STORE_ARCH
7619 /* Free up anything that was tied up either side */
7620 regSet.rsUnlockUsedReg(addrReg);
7621 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7626 /* Make the target addressable */
7628 addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
7630 /* Compute the new value into a register */
7632 genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7633 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
7635 /* Make sure the target is still addressable */
7637 addrReg = genKeepAddressable(op1, addrReg);
7638 regSet.rsLockUsedReg(addrReg);
7640 #if !CPU_LOAD_STORE_ARCH
7641 // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7643 /* For small types with overflow check, we need to
7644 sign/zero extend the result, so we need it in a reg */
7646 if (ovfl && genTypeSize(treeType) < sizeof(int))
7647 #endif // !CPU_LOAD_STORE_ARCH
7649 reg = regSet.rsPickReg();
7651 inst_RV_TT(INS_mov, reg, op1);
7653 inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7654 regTracker.rsTrackRegTrash(reg);
7657 genCheckOverflow(tree);
7659 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7661 if (op1->gtOper == GT_LCL_VAR)
7662 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7664 #if !CPU_LOAD_STORE_ARCH
7667 /* Add the new value into the target */
7669 inst_TT_RV(ins, op1, op2->gtRegNum);
7673 /* Free up anything that was tied up either side */
7674 regSet.rsUnlockUsedReg(addrReg);
7675 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7677 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
7681 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7684 /*****************************************************************************
7686 * Generate code for GT_UMOD.
7689 void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7691 assert(tree->OperGet() == GT_UMOD);
7693 GenTreePtr op1 = tree->gtOp.gtOp1;
7694 GenTreePtr op2 = tree->gtOp.gtOp2;
7695 const var_types treeType = tree->TypeGet();
7696 regMaskTP needReg = destReg;
7699 /* Is this a division by an integer constant? */
7702 if (compiler->fgIsUnsignedModOptimizable(op2))
7704 /* Generate the operand into some register */
7706 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7707 noway_assert(op1->InReg());
7709 reg = op1->gtRegNum;
7711 /* Generate the appropriate sequence */
7712 size_t ival = op2->gtIntCon.gtIconVal - 1;
7713 inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
7715 /* The register is now trashed */
7717 regTracker.rsTrackRegTrash(reg);
7719 genCodeForTree_DONE(tree, reg);
7723 genCodeForGeneralDivide(tree, destReg, bestReg);
7726 /*****************************************************************************
7728 * Generate code for GT_MOD.
7731 void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7733 assert(tree->OperGet() == GT_MOD);
7735 GenTreePtr op1 = tree->gtOp.gtOp1;
7736 GenTreePtr op2 = tree->gtOp.gtOp2;
7737 const var_types treeType = tree->TypeGet();
7738 regMaskTP needReg = destReg;
7741 /* Is this a division by an integer constant? */
7744 if (compiler->fgIsSignedModOptimizable(op2))
7746 ssize_t ival = op2->gtIntCon.gtIconVal;
7747 BasicBlock* skip = genCreateTempLabel();
7749 /* Generate the operand into some register */
7751 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7752 noway_assert(op1->InReg());
7754 reg = op1->gtRegNum;
7756 /* Generate the appropriate sequence */
7758 inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
7760 /* The register is now trashed */
7762 regTracker.rsTrackRegTrash(reg);
7764 /* Check and branch for a postive value */
7765 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7766 inst_JMP(jmpGEL, skip);
7768 /* Generate the rest of the sequence and we're done */
7770 genIncRegBy(reg, -1, NULL, treeType);
7772 if ((treeType == TYP_LONG) && ((int)ival != ival))
7774 regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
7775 instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
7776 inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
7780 inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
7782 genIncRegBy(reg, 1, NULL, treeType);
7784 /* Define the 'skip' label and we're done */
7786 genDefineTempLabel(skip);
7788 genCodeForTree_DONE(tree, reg);
7792 genCodeForGeneralDivide(tree, destReg, bestReg);
7795 /*****************************************************************************
7797 * Generate code for GT_UDIV.
7800 void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7802 assert(tree->OperGet() == GT_UDIV);
7804 GenTreePtr op1 = tree->gtOp.gtOp1;
7805 GenTreePtr op2 = tree->gtOp.gtOp2;
7806 const var_types treeType = tree->TypeGet();
7807 regMaskTP needReg = destReg;
7810 /* Is this a division by an integer constant? */
7813 if (compiler->fgIsUnsignedDivOptimizable(op2))
7815 size_t ival = op2->gtIntCon.gtIconVal;
7817 /* Division by 1 must be handled elsewhere */
7819 noway_assert(ival != 1 || compiler->opts.MinOpts());
7821 /* Generate the operand into some register */
7823 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7824 noway_assert(op1->InReg());
7826 reg = op1->gtRegNum;
7828 /* Generate "shr reg, log2(value)" */
7830 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
7832 /* The register is now trashed */
7834 regTracker.rsTrackRegTrash(reg);
7836 genCodeForTree_DONE(tree, reg);
7840 genCodeForGeneralDivide(tree, destReg, bestReg);
7843 /*****************************************************************************
7845 * Generate code for GT_DIV.
7848 void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7850 assert(tree->OperGet() == GT_DIV);
7852 GenTreePtr op1 = tree->gtOp.gtOp1;
7853 GenTreePtr op2 = tree->gtOp.gtOp2;
7854 const var_types treeType = tree->TypeGet();
7855 regMaskTP needReg = destReg;
7858 /* Is this a division by an integer constant? */
7861 if (compiler->fgIsSignedDivOptimizable(op2))
7863 ssize_t ival_s = op2->gtIntConCommon.IconValue();
7864 assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
7865 size_t ival = static_cast<size_t>(ival_s);
7867 /* Division by 1 must be handled elsewhere */
7869 noway_assert(ival != 1);
7871 BasicBlock* onNegDivisee = genCreateTempLabel();
7873 /* Generate the operand into some register */
7875 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7876 noway_assert(op1->InReg());
7878 reg = op1->gtRegNum;
7882 /* Generate "sar reg, log2(value)" */
7884 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
7886 // Check and branch for a postive value, skipping the INS_ADDC instruction
7887 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7888 inst_JMP(jmpGEL, onNegDivisee);
7890 // Add the carry flag to 'reg'
7891 inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
7893 /* Define the 'onNegDivisee' label and we're done */
7895 genDefineTempLabel(onNegDivisee);
7897 /* The register is now trashed */
7899 regTracker.rsTrackRegTrash(reg);
7901 /* The result is the same as the operand */
7903 reg = op1->gtRegNum;
7907 /* Generate the following sequence */
7916 instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
7918 // Check and branch for a postive value, skipping the INS_add instruction
7919 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7920 inst_JMP(jmpGEL, onNegDivisee);
7922 inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
7924 /* Define the 'onNegDivisee' label and we're done */
7926 genDefineTempLabel(onNegDivisee);
7928 /* Generate "sar reg, log2(value)" */
7930 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
7932 /* The register is now trashed */
7934 regTracker.rsTrackRegTrash(reg);
7936 /* The result is the same as the operand */
7938 reg = op1->gtRegNum;
7941 genCodeForTree_DONE(tree, reg);
7945 genCodeForGeneralDivide(tree, destReg, bestReg);
7948 /*****************************************************************************
7950 * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
7951 * (if op2 is not a power of 2 constant).
7954 void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
7956 assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
7957 tree->OperGet() == GT_DIV);
7959 GenTreePtr op1 = tree->gtOp.gtOp1;
7960 GenTreePtr op2 = tree->gtOp.gtOp2;
7961 const var_types treeType = tree->TypeGet();
7962 regMaskTP needReg = destReg;
7968 #if USE_HELPERS_FOR_INT_DIV
7969 noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
7972 #if defined(_TARGET_XARCH_)
7974 /* Which operand are we supposed to evaluate first? */
7976 if (tree->gtFlags & GTF_REVERSE_OPS)
7978 /* We'll evaluate 'op2' first */
7981 destReg &= ~op1->gtRsvdRegs;
7983 /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
7984 if (op1->gtOper == GT_LCL_VAR)
7986 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
7987 noway_assert(varNum < compiler->lvaCount);
7988 LclVarDsc* varDsc = compiler->lvaTable + varNum;
7989 if (varDsc->lvRegister)
7991 destReg &= ~genRegMask(varDsc->lvRegNum);
7997 /* We'll evaluate 'op1' first */
8002 if (RBM_EAX & op2->gtRsvdRegs)
8003 op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8005 op1Mask = RBM_EAX; // EAX would be ideal
8007 /* Generate the dividend into EAX and hold on to it. freeOnly=true */
8009 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8012 /* We want to avoid using EAX or EDX for the second operand */
8014 destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
8016 /* Make the second operand addressable */
8017 op2 = genCodeForCommaTree(op2);
8019 /* Special case: if op2 is a local var we are done */
8021 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
8024 addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
8030 genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8032 noway_assert(op2->InReg());
8033 addrReg = genRegMask(op2->gtRegNum);
8036 /* Make sure we have the dividend in EAX */
8040 /* We've previously computed op1 into EAX */
8042 genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
8046 /* Compute op1 into EAX and hold on to it */
8048 genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8051 noway_assert(op1->InReg());
8052 noway_assert(op1->gtRegNum == REG_EAX);
8054 /* We can now safely (we think) grab EDX */
8056 regSet.rsGrabReg(RBM_EDX);
8057 regSet.rsLockReg(RBM_EDX);
8059 /* Convert the integer in EAX into a un/signed long in EDX:EAX */
8061 const genTreeOps oper = tree->OperGet();
8063 if (oper == GT_UMOD || oper == GT_UDIV)
8064 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
8068 /* Make sure the divisor is still addressable */
8070 addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
8072 /* Perform the division */
8074 if (oper == GT_UMOD || oper == GT_UDIV)
8075 inst_TT(INS_UNSIGNED_DIVIDE, op2);
8077 inst_TT(INS_SIGNED_DIVIDE, op2);
8079 /* Free up anything tied up by the divisor's address */
8081 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
8083 /* Unlock and free EDX */
8085 regSet.rsUnlockReg(RBM_EDX);
8087 /* Free up op1 (which is in EAX) as well */
8091 /* Both EAX and EDX are now trashed */
8093 regTracker.rsTrackRegTrash(REG_EAX);
8094 regTracker.rsTrackRegTrash(REG_EDX);
8096 /* Figure out which register the result is in */
8098 reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
8100 /* Don't forget to mark the first operand as using EAX and EDX */
8102 op1->gtRegNum = reg;
8104 genCodeForTree_DONE(tree, reg);
8106 #elif defined(_TARGET_ARM_)
8108 /* Which operand are we supposed to evaluate first? */
8110 if (tree->gtFlags & GTF_REVERSE_OPS)
8112 /* We'll evaluate 'op2' first */
8115 destReg &= ~op1->gtRsvdRegs;
8117 /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8118 if (op1->gtOper == GT_LCL_VAR)
8120 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8121 noway_assert(varNum < compiler->lvaCount);
8122 LclVarDsc* varDsc = compiler->lvaTable + varNum;
8123 if (varDsc->lvRegister)
8125 destReg &= ~genRegMask(varDsc->lvRegNum);
8131 /* We'll evaluate 'op1' first */
8134 regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8136 /* Generate the dividend into a register and hold on to it. */
8138 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8141 /* Evaluate the second operand into a register and hold onto it. */
8143 genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8145 noway_assert(op2->InReg());
8146 addrReg = genRegMask(op2->gtRegNum);
8150 // Recover op1 if spilled
8151 genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
8155 /* Compute op1 into any register and hold on to it */
8156 genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8158 noway_assert(op1->InReg());
8160 reg = regSet.rsPickReg(needReg, bestReg);
8162 // Perform the divison
8164 const genTreeOps oper = tree->OperGet();
8166 if (oper == GT_UMOD || oper == GT_UDIV)
8171 getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
8173 if (oper == GT_UMOD || oper == GT_MOD)
8175 getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
8176 getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
8178 /* Free up op1 and op2 */
8182 genCodeForTree_DONE(tree, reg);
8185 #error "Unknown _TARGET_"
8189 /*****************************************************************************
8191 * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
8194 void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
8196 assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
8198 const genTreeOps oper = tree->OperGet();
8199 GenTreePtr op1 = tree->gtOp.gtOp1;
8200 GenTreePtr op2 = tree->gtOp.gtOp2;
8201 const var_types treeType = tree->TypeGet();
8202 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8203 regMaskTP needReg = destReg;
8211 ins = INS_SHIFT_LEFT_LOGICAL;
8214 ins = INS_SHIFT_RIGHT_ARITHM;
8217 ins = INS_SHIFT_RIGHT_LOGICAL;
8223 noway_assert(!varTypeIsGC(treeType));
8226 /* Shifts by a constant amount are easier */
8228 if (op2->IsCnsIntOrI())
8230 /* Make the target addressable */
8232 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
8234 /* Are we shifting a register left by 1 bit? */
8236 if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && op1->InReg())
8238 /* The target lives in a register */
8240 reg = op1->gtRegNum;
8242 /* "add reg, reg" is cheaper than "shl reg, 1" */
8244 inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
8248 #if CPU_LOAD_STORE_ARCH
8251 regSet.rsLockUsedReg(addrReg);
8253 // Load op1 into a reg
8255 reg = regSet.rsPickReg(RBM_ALLINT);
8257 inst_RV_TT(INS_mov, reg, op1);
8261 inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
8262 regTracker.rsTrackRegTrash(reg);
8264 /* Store the (sign/zero extended) result back to the stack location of the variable */
8266 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
8268 regSet.rsUnlockUsedReg(addrReg);
8271 #endif // CPU_LOAD_STORE_ARCH
8273 /* Shift by the constant value */
8275 inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
8279 /* If the target is a register, it has a new value */
8282 regTracker.rsTrackRegTrash(op1->gtRegNum);
8284 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8286 /* The zero flag is now equal to the target value */
8287 /* X86: But only if the shift count is != 0 */
8289 if (op2->gtIntCon.gtIconVal != 0)
8291 if (tree->gtSetFlags())
8293 if (op1->gtOper == GT_LCL_VAR)
8295 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
8297 else if (op1->gtOper == GT_REG_VAR)
8299 genFlagsEqualToReg(tree, op1->gtRegNum);
8305 // It is possible for the shift count to equal 0 with valid
8306 // IL, and not be optimized away, in the case where the node
8307 // is of a small type. The sequence of instructions looks like
8308 // ldsfld, shr, stsfld and executed on a char field. This will
8309 // never happen with code produced by our compilers, because the
8310 // compilers will insert a conv.u2 before the stsfld (which will
8311 // lead us down a different codepath in the JIT and optimize away
8312 // the shift by zero). This case is not worth optimizing and we
8313 // will just make sure to generate correct code for it.
8315 genFlagsEqualToNone();
8320 regMaskTP op2Regs = RBM_NONE;
8321 if (REG_SHIFT != REG_NA)
8322 op2Regs = RBM_SHIFT;
8326 if (tree->gtFlags & GTF_REVERSE_OPS)
8328 tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
8329 genCodeForTree(op2, tempRegs);
8330 regSet.rsMarkRegUsed(op2);
8332 tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
8333 addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8335 genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
8339 /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
8340 regMaskTP excludeMask = op2->gtRsvdRegs;
8341 if (REG_SHIFT != REG_NA)
8342 excludeMask |= RBM_SHIFT;
8344 tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
8345 addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8347 /* Load the shift count into the necessary register */
8348 genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
8351 /* Make sure the address registers are still here */
8352 addrReg = genKeepAddressable(op1, addrReg, op2Regs);
8354 #ifdef _TARGET_XARCH_
8355 /* Perform the shift */
8356 inst_TT_CL(ins, op1);
8358 /* Perform the shift */
8359 noway_assert(op2->InReg());
8360 op2Regs = genRegMask(op2->gtRegNum);
8362 regSet.rsLockUsedReg(addrReg | op2Regs);
8363 inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
8364 regSet.rsUnlockUsedReg(addrReg | op2Regs);
8366 /* Free the address registers */
8367 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8369 /* If the value is in a register, it's now trash */
8372 regTracker.rsTrackRegTrash(op1->gtRegNum);
8374 /* Release the op2 [RBM_SHIFT] operand */
8379 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
8382 /*****************************************************************************
8384 * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
8387 void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
8389 assert(tree->OperIsShift());
8391 const genTreeOps oper = tree->OperGet();
8392 GenTreePtr op1 = tree->gtOp.gtOp1;
8393 GenTreePtr op2 = tree->gtOp.gtOp2;
8394 const var_types treeType = tree->TypeGet();
8395 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8396 regMaskTP needReg = destReg;
8403 ins = INS_SHIFT_LEFT_LOGICAL;
8406 ins = INS_SHIFT_RIGHT_ARITHM;
8409 ins = INS_SHIFT_RIGHT_LOGICAL;
8415 /* Is the shift count constant? */
8417 if (op2->IsIntCnsFitsInI32())
8419 // TODO: Check to see if we could generate a LEA instead!
8421 /* Compute the left operand into any free register */
8423 genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
8425 noway_assert(op1->InReg());
8426 reg = op1->gtRegNum;
8428 /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
8430 // On ARM, until proven otherwise by performance numbers, just do the shift.
8431 // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
8432 // It's smaller than two "add reg, reg".
8434 CLANG_FORMAT_COMMENT_ANCHOR;
8436 #ifndef _TARGET_ARM_
8439 emitAttr size = emitActualTypeSize(treeType);
8440 if (op2->gtIntConCommon.IconValue() == 1)
8442 /* "add reg, reg" is smaller and faster than "shl reg, 1" */
8443 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8445 else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
8447 /* two "add reg, reg" instructions are faster than "shl reg, 2" */
8448 inst_RV_RV(INS_add, reg, reg, treeType);
8449 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8452 goto DO_SHIFT_BY_CNS;
8455 #endif // _TARGET_ARM_
8457 #ifndef _TARGET_ARM_
8459 #endif // _TARGET_ARM_
8460 // If we are shifting 'reg' by zero bits and do not need the flags to be set
8461 // then we can just skip emitting the instruction as 'reg' is already correct.
8463 if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
8465 /* Generate the appropriate shift instruction */
8466 inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
8472 /* Calculate a useful register mask for computing op1 */
8473 needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
8474 regMaskTP op2RegMask;
8475 #ifdef _TARGET_XARCH_
8476 op2RegMask = RBM_ECX;
8478 op2RegMask = RBM_NONE;
8480 needReg = regSet.rsMustExclude(needReg, op2RegMask);
8484 /* Which operand are we supposed to evaluate first? */
8485 if (tree->gtFlags & GTF_REVERSE_OPS)
8487 /* Load the shift count [into ECX on XARCH] */
8488 tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
8489 genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8491 /* We must not target the register that is holding op2 */
8492 needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
8494 /* Now evaluate 'op1' into a free register */
8495 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8497 /* Recover op2 into ECX */
8498 genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
8502 /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
8503 tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
8504 genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8506 /* Load the shift count [into ECX on XARCH] */
8507 genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8510 noway_assert(op2->InReg());
8511 #ifdef _TARGET_XARCH_
8512 noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
8514 // Check for the case of op1 being spilled during the evaluation of op2
8515 if (op1->gtFlags & GTF_SPILLED)
8517 // The register has been spilled -- reload it to any register except ECX
8518 regSet.rsLockUsedReg(op2RegMask);
8519 regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
8520 regSet.rsUnlockUsedReg(op2RegMask);
8523 noway_assert(op1->InReg());
8524 reg = op1->gtRegNum;
8527 /* Perform the shift */
8528 getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
8530 /* Perform the shift */
8531 inst_RV_CL(ins, reg);
8536 noway_assert(op1->InReg());
8537 noway_assert(reg == op1->gtRegNum);
8539 /* The register is now trashed */
8541 regTracker.rsTrackRegTrash(reg);
8543 genCodeForTree_DONE(tree, reg);
8546 /*****************************************************************************
8548 * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
8549 * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
8552 void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
8554 assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
8555 tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
8557 const genTreeOps oper = tree->OperGet();
8558 GenTreePtr op1 = tree->gtOp.gtOp1;
8559 const var_types treeType = tree->TypeGet();
8560 regMaskTP needReg = destReg;
8563 // Longs and float comparisons are converted to "?:"
8564 noway_assert(!compiler->fgMorphRelopToQmark(op1));
8566 // Check if we can use the currently set flags. Else set them
8568 emitJumpKind jumpKind = genCondSetFlags(tree);
8570 // Grab a register to materialize the bool value into
8572 bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
8574 // Check that the predictor did the right job
8575 noway_assert(bestReg);
8577 // If needReg is in bestReg then use it
8578 if (needReg & bestReg)
8579 reg = regSet.rsGrabReg(needReg & bestReg);
8581 reg = regSet.rsGrabReg(bestReg);
8583 #if defined(_TARGET_ARM_)
8586 // jump-if-true L_true
8596 L_true = genCreateTempLabel();
8597 L_end = genCreateTempLabel();
8599 inst_JMP(jumpKind, L_true);
8600 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
8601 inst_JMP(EJ_jmp, L_end);
8602 genDefineTempLabel(L_true);
8603 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
8604 genDefineTempLabel(L_end);
8606 regTracker.rsTrackRegTrash(reg);
8608 #elif defined(_TARGET_XARCH_)
8609 regMaskTP regs = genRegMask(reg);
8610 noway_assert(regs & RBM_BYTE_REGS);
8612 // Set (lower byte of) reg according to the flags
8614 /* Look for the special case where just want to transfer the carry bit */
8616 if (jumpKind == EJ_jb)
8618 inst_RV_RV(INS_SUBC, reg, reg);
8619 inst_RV(INS_NEG, reg, TYP_INT);
8620 regTracker.rsTrackRegTrash(reg);
8622 else if (jumpKind == EJ_jae)
8624 inst_RV_RV(INS_SUBC, reg, reg);
8625 genIncRegBy(reg, 1, tree, TYP_INT);
8626 regTracker.rsTrackRegTrash(reg);
8630 inst_SET(jumpKind, reg);
8632 regTracker.rsTrackRegTrash(reg);
8634 if (treeType == TYP_INT)
8636 // Set the higher bytes to 0
8637 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
8641 noway_assert(treeType == TYP_BYTE);
8646 #endif // _TARGET_XXX
8648 genCodeForTree_DONE(tree, reg);
8651 //------------------------------------------------------------------------
8652 // genCodeForCopyObj: Generate code for a CopyObj node
8655 // tree - The CopyObj node we are going to generate code for.
8656 // destReg - The register mask for register(s), if any, that will be defined.
8661 void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
8663 // If the value class doesn't have any fields that are GC refs or
8664 // the target isn't on the GC-heap, we can merge it with CPBLK.
8665 // GC fields cannot be copied directly, instead we will
8666 // need to use a jit-helper for that.
8667 assert(tree->gtOper == GT_ASG);
8668 assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
8670 GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
8671 assert(cpObjOp->HasGCPtr());
8674 if (cpObjOp->IsVolatile())
8676 // Emit a memory barrier instruction before the CopyBlk
8677 instGen_MemoryBarrier();
8680 assert(tree->gtOp.gtOp2->OperIsIndir());
8681 GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
8682 GenTreePtr dstObj = cpObjOp->Addr();
8684 noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
8687 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
8688 size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
8690 // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
8691 // The EE currently does not allow this. Let's assert it just to be safe.
8692 noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
8695 size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
8696 unsigned slots = cpObjOp->gtSlots;
8697 BYTE* gcPtrs = cpObjOp->gtGcPtrs;
8698 unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
8699 assert(blkSize == cpObjOp->gtBlkSize);
8701 GenTreePtr treeFirst, treeSecond;
8702 regNumber regFirst, regSecond;
8704 // Check what order the object-ptrs have to be evaluated in ?
8706 if (tree->gtFlags & GTF_REVERSE_OPS)
8709 treeSecond = dstObj;
8710 #if CPU_USES_BLOCK_MOVE
8712 regSecond = REG_EDI;
8714 regFirst = REG_ARG_1;
8715 regSecond = REG_ARG_0;
8721 treeSecond = srcObj;
8722 #if CPU_USES_BLOCK_MOVE
8724 regSecond = REG_ESI;
8726 regFirst = REG_ARG_0;
8727 regSecond = REG_ARG_1;
8731 bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
8732 bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
8733 emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8734 emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8736 #if CPU_USES_BLOCK_MOVE
8737 // Materialize the trees in the order desired
8739 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8740 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8741 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8743 // Grab ECX because it will be trashed by the helper
8745 regSet.rsGrabReg(RBM_ECX);
8747 while (blkSize >= TARGET_POINTER_SIZE)
8749 if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
8751 // Note that we can use movsd even if it is a GC pointer being transfered
8752 // because the value is not cached anywhere. If we did this in two moves,
8753 // we would have to make certain we passed the appropriate GC info on to
8759 // This helper will act like a MOVSD
8760 // -- inputs EDI and ESI are byrefs
8761 // -- including incrementing of ESI and EDI by 4
8762 // -- helper will trash ECX
8764 regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8765 regSet.rsLockUsedReg(argRegs);
8766 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8768 EA_PTRSIZE); // retSize
8769 regSet.rsUnlockUsedReg(argRegs);
8772 blkSize -= TARGET_POINTER_SIZE;
8775 // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
8777 regTracker.rsTrackRegTrash(REG_EDI);
8778 regTracker.rsTrackRegTrash(REG_ESI);
8779 regTracker.rsTrackRegTrash(REG_ECX);
8781 gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
8783 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8784 it is a emitNoGChelper. However, we have to let the emitter know that
8785 the GC liveness has changed. We do this by creating a new label.
8788 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8790 genDefineTempLabel(&dummyBB);
8792 #else // !CPU_USES_BLOCK_MOVE
8794 #ifndef _TARGET_ARM_
8795 // Currently only the ARM implementation is provided
8796 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
8799 // Materialize the trees in the order desired
8805 if ((gcPtrCount > 0) && !dstIsOnStack)
8807 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8808 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8809 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8811 /* The helper is a Asm-routine that will trash R2,R3 and LR */
8813 /* Spill any callee-saved registers which are being used */
8814 regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
8818 regSet.rsSpillRegs(spillRegs);
8822 // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
8823 // We will also use it as the temp register for our load/store sequences
8825 assert(REG_R2 == REG_TMP_1);
8826 regTemp = regSet.rsGrabReg(RBM_R2);
8831 genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
8832 genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
8833 genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
8835 // Grab any temp register to use for our load/store sequences
8837 regTemp = regSet.rsGrabReg(RBM_ALLINT);
8840 assert(dstObj->InReg());
8841 assert(srcObj->InReg());
8843 regDst = dstObj->gtRegNum;
8844 regSrc = srcObj->gtRegNum;
8846 assert(regDst != regTemp);
8847 assert(regSrc != regTemp);
8849 instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
8850 instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
8853 while (blkSize >= TARGET_POINTER_SIZE)
8855 CorInfoGCType gcType;
8856 CorInfoGCType gcTypeNext = TYPE_GC_NONE;
8857 var_types type = TYP_I_IMPL;
8859 #if FEATURE_WRITE_BARRIER
8860 gcType = (CorInfoGCType)(*gcPtrs++);
8861 if (blkSize > TARGET_POINTER_SIZE)
8862 gcTypeNext = (CorInfoGCType)(*gcPtrs);
8864 if (gcType == TYPE_GC_REF)
8866 else if (gcType == TYPE_GC_BYREF)
8871 assert(regDst == REG_ARG_0);
8872 assert(regSrc == REG_ARG_1);
8873 assert(regTemp == REG_R2);
8876 gcType = TYPE_GC_NONE;
8877 #endif // FEATURE_WRITE_BARRIER
8879 blkSize -= TARGET_POINTER_SIZE;
8881 emitAttr opSize = emitTypeSize(type);
8883 if (!helperUsed || (gcType == TYPE_GC_NONE))
8885 getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
8886 getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
8887 offset += TARGET_POINTER_SIZE;
8889 if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
8891 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
8892 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
8898 assert(offset == 0);
8900 // The helper will act like this:
8901 // -- inputs R0 and R1 are byrefs
8902 // -- helper will perform copy from *R1 into *R0
8903 // -- helper will perform post increment of R0 and R1 by 4
8904 // -- helper will trash R2
8905 // -- helper will trash R3
8906 // -- calling the helper implicitly trashes LR
8909 regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8910 regSet.rsLockUsedReg(argRegs);
8911 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8913 EA_PTRSIZE); // retSize
8915 regSet.rsUnlockUsedReg(argRegs);
8916 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
8920 regTracker.rsTrackRegTrash(regDst);
8921 regTracker.rsTrackRegTrash(regSrc);
8922 regTracker.rsTrackRegTrash(regTemp);
8924 gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
8926 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8927 it is a emitNoGChelper. However, we have to let the emitter know that
8928 the GC liveness has changed. We do this by creating a new label.
8931 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8933 genDefineTempLabel(&dummyBB);
8935 #endif // !CPU_USES_BLOCK_MOVE
8937 assert(blkSize == 0);
8939 genReleaseReg(dstObj);
8940 genReleaseReg(srcObj);
8942 genCodeForTree_DONE(tree, REG_NA);
8945 if (cpObjOp->IsVolatile())
8947 // Emit a memory barrier instruction after the CopyBlk
8948 instGen_MemoryBarrier();
8953 //------------------------------------------------------------------------
8954 // genCodeForBlkOp: Generate code for a block copy or init operation
8957 // tree - The block assignment
8958 // destReg - The expected destination register
8960 void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
8962 genTreeOps oper = tree->OperGet();
8963 GenTreePtr dest = tree->gtOp.gtOp1;
8964 GenTreePtr src = tree->gtGetOp2();
8965 regMaskTP needReg = destReg;
8966 regMaskTP regs = regSet.rsMaskUsed;
8967 GenTreePtr opsPtr[3];
8968 regMaskTP regsPtr[3];
8970 GenTreePtr srcPtrOrVal;
8972 noway_assert(tree->OperIsBlkOp());
8974 bool isCopyBlk = false;
8975 bool isInitBlk = false;
8976 bool hasGCpointer = false;
8977 unsigned blockSize = dest->AsBlk()->gtBlkSize;
8978 GenTreePtr sizeNode = nullptr;
8979 bool sizeIsConst = true;
8980 if (dest->gtOper == GT_DYN_BLK)
8982 sizeNode = dest->AsDynBlk()->gtDynamicSize;
8983 sizeIsConst = false;
8986 if (tree->OperIsCopyBlkOp())
8989 if (dest->gtOper == GT_OBJ)
8991 if (dest->AsObj()->gtGcPtrCount != 0)
8993 genCodeForCopyObj(tree, destReg);
9003 // Ensure that we have an address in the CopyBlk case.
9006 // TODO-1stClassStructs: Allow a lclVar here.
9007 assert(src->OperIsIndir());
9008 srcPtrOrVal = src->AsIndir()->Addr();
9016 if (dest->AsBlk()->IsVolatile())
9018 // Emit a memory barrier instruction before the InitBlk/CopyBlk
9019 instGen_MemoryBarrier();
9023 destPtr = dest->AsBlk()->Addr();
9024 noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
9026 (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
9027 (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
9029 noway_assert(destPtr && srcPtrOrVal);
9031 #if CPU_USES_BLOCK_MOVE
9032 regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
9034 /* Some special code for block moves/inits for constant sizes */
9037 // Is this a fixed size COPYBLK?
9038 // or a fixed size INITBLK with a constant init value?
9040 if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
9042 size_t length = blockSize;
9044 instruction ins_P, ins_PR, ins_B;
9049 ins_PR = INS_r_stosp;
9052 /* Properly extend the init constant from a U1 to a U4 */
9053 initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
9055 /* If it is a non-zero value we have to replicate */
9056 /* the byte value four times to form the DWORD */
9057 /* Then we change this new value into the tree-node */
9061 initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9062 #ifdef _TARGET_64BIT_
9065 initVal = initVal | (initVal << 32);
9066 srcPtrOrVal->gtType = TYP_LONG;
9070 srcPtrOrVal->gtType = TYP_INT;
9072 #endif // _TARGET_64BIT_
9074 srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9079 ins_PR = INS_r_movsp;
9083 // Determine if we will be using SSE2
9084 unsigned movqLenMin = 8;
9085 unsigned movqLenMax = 24;
9087 bool bWillUseSSE2 = false;
9088 bool bWillUseOnlySSE2 = false;
9089 bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
9091 #ifdef _TARGET_64BIT_
9093 // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
9094 // there is no point in wasting space on the bigger instructions
9096 #else // !_TARGET_64BIT_
9098 if (compiler->opts.compCanUseSSE2)
9100 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
9102 /* Adjust for BB weight */
9103 if (curBBweight == BB_ZERO_WEIGHT)
9105 // Don't bother with this optimization in
9106 // rarely run blocks
9107 movqLenMax = movqLenMin = 0;
9109 else if (curBBweight < BB_UNITY_WEIGHT)
9111 // Be less aggressive when we are inside a conditional
9114 else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
9116 // Be more aggressive when we are inside a loop
9120 if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
9122 // Be more aggressive when optimizing for speed
9123 // InitBlk uses fewer instructions
9127 if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
9129 bWillUseSSE2 = true;
9131 if ((length % 8) == 0)
9133 bWillUseOnlySSE2 = true;
9134 if (isInitBlk && (initVal == 0))
9136 bNeedEvaluateCnst = false;
9137 noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
9143 #endif // !_TARGET_64BIT_
9145 const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
9146 /* Evaluate dest and src/val */
9148 if (tree->gtFlags & GTF_REVERSE_OPS)
9150 if (bNeedEvaluateCnst)
9152 genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9154 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9155 if (bNeedEvaluateCnst)
9157 genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
9162 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9163 if (bNeedEvaluateCnst)
9165 genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9167 genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
9170 bool bTrashedESI = false;
9171 bool bTrashedEDI = false;
9176 regNumber xmmReg = REG_XMM0;
9182 getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
9183 getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
9187 getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
9191 JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
9192 length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
9198 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9202 getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
9203 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9211 noway_assert(bNeedEvaluateCnst);
9212 noway_assert(!bWillUseOnlySSE2);
9216 inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
9220 inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
9223 if (length >= REGSIZE_BYTES)
9226 length -= REGSIZE_BYTES;
9230 else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
9232 /* For small code, we can only use ins_DR to generate fast
9233 and small code. We also can't use "rep movsb" because
9234 we may not atomically reading and writing the DWORD */
9236 noway_assert(bNeedEvaluateCnst);
9240 else if (length <= 4 * REGSIZE_BYTES)
9242 noway_assert(bNeedEvaluateCnst);
9244 while (length >= REGSIZE_BYTES)
9247 length -= REGSIZE_BYTES;
9257 noway_assert(bNeedEvaluateCnst);
9259 /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
9260 genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
9262 length &= (REGSIZE_BYTES - 1);
9266 regTracker.rsTrackRegTrash(REG_ECX);
9273 /* Now take care of the remainder */
9274 CLANG_FORMAT_COMMENT_ANCHOR;
9276 #ifdef _TARGET_64BIT_
9279 noway_assert(bNeedEvaluateCnst);
9280 noway_assert(length < 8);
9282 instGen((isInitBlk) ? INS_stosd : INS_movsd);
9290 #endif // _TARGET_64BIT_
9294 noway_assert(bNeedEvaluateCnst);
9306 noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
9308 regTracker.rsTrackRegTrash(REG_EDI);
9310 regTracker.rsTrackRegTrash(REG_ESI);
9311 // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9313 genReleaseReg(destPtr);
9314 if (bNeedEvaluateCnst)
9315 genReleaseReg(srcPtrOrVal);
9320 // This a variable-sized COPYBLK/INITBLK,
9321 // or a fixed size INITBLK with a variable init value,
9324 // What order should the Dest, Val/Src, and Size be calculated
9326 compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
9328 noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
9329 genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
9330 genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
9331 if (opsPtr[2] != nullptr)
9333 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
9335 genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9336 genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9338 noway_assert((destPtr->InReg()) && // Dest
9339 (destPtr->gtRegNum == REG_EDI));
9341 noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9342 (genRegMask(srcPtrOrVal->gtRegNum) == regs));
9346 inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
9350 noway_assert((sizeNode->InReg()) && // Size
9351 (sizeNode->gtRegNum == REG_ECX));
9355 instGen(INS_r_stosb);
9357 instGen(INS_r_movsb);
9359 regTracker.rsTrackRegTrash(REG_EDI);
9360 regTracker.rsTrackRegTrash(REG_ECX);
9363 regTracker.rsTrackRegTrash(REG_ESI);
9364 // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9366 genReleaseReg(opsPtr[0]);
9367 genReleaseReg(opsPtr[1]);
9368 if (opsPtr[2] != nullptr)
9370 genReleaseReg(opsPtr[2]);
9374 #else // !CPU_USES_BLOCK_MOVE
9376 #ifndef _TARGET_ARM_
9377 // Currently only the ARM implementation is provided
9378 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
9381 // Is this a fixed size COPYBLK?
9382 // or a fixed size INITBLK with a constant init value?
9384 if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
9386 GenTreePtr dstOp = destPtr;
9387 GenTreePtr srcOp = srcPtrOrVal;
9388 unsigned length = blockSize;
9389 unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
9390 unsigned initVal = 0;
9391 bool useLoop = false;
9395 /* Properly extend the init constant from a U1 to a U4 */
9396 initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
9398 /* If it is a non-zero value we have to replicate */
9399 /* the byte value four times to form the DWORD */
9400 /* Then we store this new value into the tree-node */
9404 initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9405 srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9409 // Will we be using a loop to implement this INITBLK/COPYBLK?
9410 if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
9420 /* Evaluate dest and src/val */
9422 if (tree->gtFlags & GTF_REVERSE_OPS)
9424 genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9425 assert(srcOp->InReg());
9427 genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9428 assert(dstOp->InReg());
9429 regDst = dstOp->gtRegNum;
9431 genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
9432 regSrc = srcOp->gtRegNum;
9436 genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9437 assert(dstOp->InReg());
9439 genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9440 assert(srcOp->InReg());
9441 regSrc = srcOp->gtRegNum;
9443 genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
9444 regDst = dstOp->gtRegNum;
9446 assert(dstOp->InReg());
9447 assert(srcOp->InReg());
9449 regDst = dstOp->gtRegNum;
9450 regSrc = srcOp->gtRegNum;
9451 usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
9452 bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
9453 emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9458 // Prefer a low register,but avoid one of the ones we've already grabbed
9459 regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9460 usedRegs |= genRegMask(regTemp);
9461 bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
9462 srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9467 srcType = EA_PTRSIZE;
9470 instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
9471 instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
9475 // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
9478 for (unsigned i = 0; i < fullStoreCount; i++)
9482 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
9483 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
9484 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9485 regTracker.rsTrackRegTrash(regTemp);
9489 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
9493 finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
9494 length -= finalOffset;
9496 else // We will use a loop to implement this INITBLK/COPYBLK
9498 unsigned pairStoreLoopCount = fullStoreCount / 2;
9500 // We need a second temp register for CopyBlk
9501 regNumber regTemp2 = REG_STK;
9504 // Prefer a low register, but avoid one of the ones we've already grabbed
9506 regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9507 usedRegs |= genRegMask(regTemp2);
9510 // Pick and initialize the loop counter register
9511 regNumber regLoopIndex;
9513 regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9514 genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
9516 // Create and define the Basic Block for the loop top
9517 BasicBlock* loopTopBlock = genCreateTempLabel();
9518 genDefineTempLabel(loopTopBlock);
9523 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9524 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
9525 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9526 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
9527 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
9528 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9529 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
9530 regTracker.rsTrackRegTrash(regSrc);
9531 regTracker.rsTrackRegTrash(regTemp);
9532 regTracker.rsTrackRegTrash(regTemp2);
9536 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9537 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
9540 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
9541 regTracker.rsTrackRegTrash(regDst);
9542 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
9543 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
9544 inst_JMP(jmpGTS, loopTopBlock);
9546 regTracker.rsTrackRegIntCns(regLoopIndex, 0);
9548 length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
9550 if (length & TARGET_POINTER_SIZE)
9554 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9555 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9559 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9561 finalOffset = TARGET_POINTER_SIZE;
9562 length -= TARGET_POINTER_SIZE;
9570 if (length & sizeof(short))
9572 loadIns = ins_Load(TYP_USHORT); // INS_ldrh
9573 storeIns = ins_Store(TYP_USHORT); // INS_strh
9577 getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
9578 getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
9579 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9580 regTracker.rsTrackRegTrash(regTemp);
9584 getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
9586 length -= sizeof(short);
9587 finalOffset += sizeof(short);
9590 if (length & sizeof(char))
9592 loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
9593 storeIns = ins_Store(TYP_UBYTE); // INS_strb
9597 getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
9598 getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
9599 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9600 regTracker.rsTrackRegTrash(regTemp);
9604 getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
9606 length -= sizeof(char);
9608 assert(length == 0);
9610 genReleaseReg(dstOp);
9611 genReleaseReg(srcOp);
9616 // This a variable-sized COPYBLK/INITBLK,
9617 // or a fixed size INITBLK with a variable init value,
9620 // What order should the Dest, Val/Src, and Size be calculated
9622 compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
9624 genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
9625 genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
9626 if (opsPtr[2] != nullptr)
9628 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
9630 genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9631 genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9633 noway_assert((destPtr->InReg()) && // Dest
9634 (destPtr->gtRegNum == REG_ARG_0));
9636 noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9637 (srcPtrOrVal->gtRegNum == REG_ARG_1));
9641 inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
9645 noway_assert((sizeNode->InReg()) && // Size
9646 (sizeNode->gtRegNum == REG_ARG_2));
9649 regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9651 genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
9653 : CORINFO_HELP_MEMSET,
9656 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
9658 regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
9659 genReleaseReg(opsPtr[0]);
9660 genReleaseReg(opsPtr[1]);
9661 if (opsPtr[2] != nullptr)
9663 genReleaseReg(opsPtr[2]);
9667 if (isCopyBlk && dest->AsBlk()->IsVolatile())
9669 // Emit a memory barrier instruction after the CopyBlk
9670 instGen_MemoryBarrier();
9672 #endif // !CPU_USES_BLOCK_MOVE
9678 #pragma warning(push)
9679 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
9681 void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
9683 const genTreeOps oper = tree->OperGet();
9684 const var_types treeType = tree->TypeGet();
9685 GenTreePtr op1 = tree->gtOp.gtOp1;
9686 GenTreePtr op2 = tree->gtGetOp2IfPresent();
9687 regNumber reg = DUMMY_INIT(REG_CORRUPT);
9688 regMaskTP regs = regSet.rsMaskUsed;
9689 regMaskTP needReg = destReg;
9690 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
9694 GenTreePtr opsPtr[3];
9695 regMaskTP regsPtr[3];
9698 addrReg = 0xDEADCAFE;
9701 noway_assert(tree->OperKind() & GTK_SMPOP);
9706 if (tree->OperIsBlkOp() && op1->gtOper != GT_LCL_VAR)
9708 genCodeForBlkOp(tree, destReg);
9712 genCodeForTreeSmpOpAsg(tree);
9719 genCodeForAsgShift(tree, destReg, bestReg);
9727 genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
9731 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
9732 #ifdef _TARGET_XARCH_
9733 // Note that the specialCase here occurs when the treeType specifies a byte sized operation
9734 // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
9737 specialCase = false;
9738 if (op1->gtOper == GT_REG_VAR)
9740 /* Get hold of the target register */
9742 reg = op1->gtRegVar.gtRegNum;
9743 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
9745 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
9747 inst_RV_RV(INS_mov, byteReg, reg);
9748 regTracker.rsTrackRegTrash(byteReg);
9750 inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
9751 var_types op1Type = op1->TypeGet();
9752 instruction wideningIns = ins_Move_Extend(op1Type, true);
9753 inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
9754 regTracker.rsTrackRegTrash(reg);
9761 inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
9763 #else // not _TARGET_XARCH_
9766 inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
9770 // Fix 388382 ARM JitStress WP7
9771 var_types op1Type = op1->TypeGet();
9772 regNumber reg = regSet.rsPickFreeReg();
9773 inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
9774 regTracker.rsTrackRegTrash(reg);
9775 inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
9776 inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
9780 regTracker.rsTrackRegTrash(op1->gtRegNum);
9781 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
9783 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
9792 genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
9796 genCodeForUnsignedMod(tree, destReg, bestReg);
9800 genCodeForSignedMod(tree, destReg, bestReg);
9804 genCodeForUnsignedDiv(tree, destReg, bestReg);
9808 genCodeForSignedDiv(tree, destReg, bestReg);
9814 genCodeForShift(tree, destReg, bestReg);
9820 /* Generate the operand into some register */
9822 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
9823 noway_assert(op1->InReg());
9825 reg = op1->gtRegNum;
9827 /* Negate/reverse the value in the register */
9829 inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
9831 /* The register is now trashed */
9833 regTracker.rsTrackRegTrash(reg);
9835 genCodeForTree_DONE(tree, reg);
9839 case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
9841 /* Make sure the operand is addressable */
9843 addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
9845 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9847 /* Figure out the size of the value being loaded */
9849 size = EA_ATTR(genTypeSize(tree->gtType));
9851 /* Pick a register for the value */
9853 if (needReg == RBM_ALLINT && bestReg == 0)
9855 /* Absent a better suggestion, pick a useless register */
9857 bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
9860 reg = regSet.rsPickReg(needReg, bestReg);
9862 if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
9864 noway_assert(size == EA_PTRSIZE);
9865 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
9866 (int)op1->gtIntCon.gtIconVal);
9870 /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
9872 inst_mov_RV_ST(reg, tree);
9876 if (tree->gtFlags & GTF_IND_VOLATILE)
9878 // Emit a memory barrier instruction after the load
9879 instGen_MemoryBarrier();
9883 /* Note the new contents of the register we used */
9885 regTracker.rsTrackRegTrash(reg);
9888 /* Update the live set of register variables */
9889 if (compiler->opts.varNames)
9890 genUpdateLife(tree);
9893 /* Now we can update the register pointer information */
9895 // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9896 gcInfo.gcMarkRegPtrVal(reg, treeType);
9898 genCodeForTree_DONE_LIFE(tree, reg);
9903 genCodeForNumericCast(tree, destReg, bestReg);
9908 /* Is this a test of a relational operator? */
9910 if (op1->OperIsCompare())
9912 /* Generate the conditional jump */
9916 genUpdateLife(tree);
9921 compiler->gtDispTree(tree);
9923 NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
9927 genCodeForSwitch(tree);
9931 noway_assert(tree->gtType == TYP_VOID || op1 != 0);
9932 if (op1 == 0) // endfinally
9936 #ifdef _TARGET_XARCH_
9937 /* Return using a pop-jmp sequence. As the "try" block calls
9938 the finally with a jmp, this leaves the x86 call-ret stack
9939 balanced in the normal flow of path. */
9941 noway_assert(isFramePointerRequired());
9942 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
9943 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
9944 #elif defined(_TARGET_ARM_)
9945 // Nothing needed for ARM
9952 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9953 noway_assert(op1->InReg());
9954 noway_assert(op1->gtRegNum == REG_INTRET);
9955 /* The return value has now been computed */
9956 reg = op1->gtRegNum;
9962 genCodeForTree_DONE(tree, reg);
9967 // TODO: this should be done AFTER we called exit mon so that
9968 // we are sure that we don't have to keep 'this' alive
9970 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
9972 /* either it's an "empty" statement or the return statement
9973 of a synchronized method
9976 genPInvokeMethodEpilog();
9979 /* Is there a return value and/or an exit statement? */
9983 if (op1->gtType == TYP_VOID)
9985 // We're returning nothing, just generate the block (shared epilog calls).
9986 genCodeForTree(op1, 0);
9989 else if (op1->gtType == TYP_STRUCT)
9991 if (op1->gtOper == GT_CALL)
9993 // We have a return call() because we failed to tail call.
9994 // In any case, just generate the call and be done.
9995 assert(compiler->IsHfa(op1));
9996 genCodeForCall(op1->AsCall(), true);
9997 genMarkTreeInReg(op1, REG_FLOATRET);
10001 assert(op1->gtOper == GT_LCL_VAR);
10002 assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
10003 genLoadIntoFltRetRegs(op1);
10006 else if (op1->TypeGet() == TYP_FLOAT)
10008 // This can only occur when we are returning a non-HFA struct
10009 // that is composed of a single float field and we performed
10010 // struct promotion and enregistered the float field.
10012 genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
10013 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
10015 #endif // _TARGET_ARM_
10018 // we can now go through this code for compiler->genReturnBB. I've regularized all the code.
10020 // noway_assert(compiler->compCurBB != compiler->genReturnBB);
10022 noway_assert(op1->gtType != TYP_VOID);
10024 /* Generate the return value into the return register */
10026 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
10028 /* The result must now be in the return register */
10030 noway_assert(op1->InReg());
10031 noway_assert(op1->gtRegNum == REG_INTRET);
10034 /* The return value has now been computed */
10036 reg = op1->gtRegNum;
10038 genCodeForTree_DONE(tree, reg);
10041 #ifdef PROFILING_SUPPORTED
10042 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
10043 // the GT_RETURN tree.
10045 if (compiler->compCurBB == compiler->genReturnBB)
10047 genProfilingLeaveCallback();
10051 if (compiler->opts.compStackCheckOnRet)
10053 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
10054 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
10055 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
10056 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
10058 BasicBlock* esp_check = genCreateTempLabel();
10059 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
10060 inst_JMP(jmpEqual, esp_check);
10061 getEmitter()->emitIns(INS_BREAKPOINT);
10062 genDefineTempLabel(esp_check);
10069 if (tree->gtFlags & GTF_REVERSE_OPS)
10071 if (tree->gtType == TYP_VOID)
10073 genEvalSideEffects(op2);
10074 genUpdateLife(op2);
10075 genEvalSideEffects(op1);
10076 genUpdateLife(tree);
10081 genCodeForTree(op2, needReg);
10082 genUpdateLife(op2);
10084 noway_assert(op2->InReg());
10086 regSet.rsMarkRegUsed(op2);
10088 // Do side effects of op1
10089 genEvalSideEffects(op1);
10091 // Recover op2 if spilled
10092 genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
10094 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
10096 // set gc info if we need so
10097 gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
10099 genUpdateLife(tree);
10100 genCodeForTree_DONE(tree, op2->gtRegNum);
10106 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
10108 /* Generate side effects of the first operand */
10110 genEvalSideEffects(op1);
10111 genUpdateLife(op1);
10113 /* Is the value of the second operand used? */
10115 if (tree->gtType == TYP_VOID)
10117 /* The right operand produces no result. The morpher is
10118 responsible for resetting the type of GT_COMMA nodes
10119 to TYP_VOID if op2 isn't meant to yield a result. */
10121 genEvalSideEffects(op2);
10122 genUpdateLife(tree);
10126 /* Generate the second operand, i.e. the 'real' value */
10128 genCodeForTree(op2, needReg);
10129 noway_assert(op2->InReg());
10131 /* The result of 'op2' is also the final result */
10133 reg = op2->gtRegNum;
10135 /* Remember whether we set the flags */
10137 tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
10139 genCodeForTree_DONE(tree, reg);
10144 genCodeForTree(op1, needReg);
10145 noway_assert(op1->InReg());
10147 /* The result of 'op1' is also the final result */
10149 reg = op1->gtRegNum;
10151 /* Remember whether we set the flags */
10153 tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
10155 genCodeForTree_DONE(tree, reg);
10160 genCodeForQmark(tree, destReg, bestReg);
10173 /* Generate the operand into some register */
10175 genCodeForTree(op1, needReg);
10177 /* The result is the same as the operand */
10179 reg = op1->gtRegNum;
10181 genCodeForTree_DONE(tree, reg);
10186 switch (tree->gtIntrinsic.gtIntrinsicId)
10188 case CORINFO_INTRINSIC_Round:
10190 noway_assert(tree->gtType == TYP_INT);
10192 #if FEATURE_STACK_FP_X87
10193 genCodeForTreeFlt(op1);
10195 /* Store the FP value into the temp */
10196 TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
10198 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10199 FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
10200 inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
10202 reg = regSet.rsPickReg(needReg, bestReg);
10203 regTracker.rsTrackRegTrash(reg);
10205 inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
10207 compiler->tmpRlsTemp(temp);
10209 genCodeForTreeFloat(tree, needReg, bestReg);
10216 noway_assert(!"unexpected math intrinsic");
10219 genCodeForTree_DONE(tree, reg);
10224 reg = genLclHeap(op1);
10225 genCodeForTree_DONE(tree, reg);
10234 genCodeForRelop(tree, destReg, bestReg);
10239 genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
10242 #ifdef _TARGET_XARCH_
10245 // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
10246 // For reference, op1 is the location. op2 is the addend or the value.
10247 if (op2->OperIsConst())
10249 noway_assert(op2->TypeGet() == TYP_INT);
10250 ssize_t cns = op2->gtIntCon.gtIconVal;
10252 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
10257 instEmit_RM(INS_inc, op1, op1, 0);
10261 instEmit_RM(INS_dec, op1, op1, 0);
10264 assert((int)cns == cns); // By test above for AMD64.
10266 inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
10269 genReleaseReg(op1);
10273 // non constant addend means it needs to go into a register.
10275 goto LockBinOpCommon;
10278 genFlagsEqualToNone(); // We didn't compute a result into a register.
10279 genUpdateLife(tree); // We didn't compute an operand into anything.
10284 goto LockBinOpCommon;
10287 goto LockBinOpCommon;
10290 // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
10291 // is a constant, it needs to be in a register. This should be the output register if
10294 // For reference, gtOp1 is the location. gtOp2 is the addend or the value.
10296 GenTreePtr location = op1;
10297 GenTreePtr value = op2;
10299 // Again, a friendly reminder. IL calling convention is left to right.
10300 if (tree->gtFlags & GTF_REVERSE_OPS)
10302 // The atomic operations destroy this argument, so force it into a scratch register
10303 reg = regSet.rsPickFreeReg();
10304 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10306 // Must evaluate location into a register
10307 genCodeForTree(location, needReg, RBM_NONE);
10308 assert(location->InReg());
10309 regSet.rsMarkRegUsed(location);
10310 regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
10311 genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
10312 regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
10314 if (ins != INS_xchg)
10316 // xchg implies the lock prefix, but xadd and add require it.
10319 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10320 genReleaseReg(value);
10321 regTracker.rsTrackRegTrash(reg);
10322 genReleaseReg(location);
10327 if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
10328 needReg, RegSet::KEEP_REG, &addrReg))
10330 genUpdateLife(location);
10332 reg = regSet.rsPickFreeReg();
10333 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10334 addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
10336 if (ins != INS_xchg)
10338 // xchg implies the lock prefix, but xadd and add require it.
10342 // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10343 // inst_TT_RV(ins, location, reg);
10344 sched_AM(ins, EA_4BYTE, reg, false, location, 0);
10346 genReleaseReg(value);
10347 regTracker.rsTrackRegTrash(reg);
10348 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
10352 // Must evalute location into a register.
10353 genCodeForTree(location, needReg, RBM_NONE);
10354 assert(location->InReg());
10355 regSet.rsMarkRegUsed(location);
10357 // xadd destroys this argument, so force it into a scratch register
10358 reg = regSet.rsPickFreeReg();
10359 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10360 regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
10361 genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
10362 regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
10364 if (ins != INS_xchg)
10366 // xchg implies the lock prefix, but xadd and add require it.
10370 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10372 genReleaseReg(value);
10373 regTracker.rsTrackRegTrash(reg);
10374 genReleaseReg(location);
10378 // The flags are equal to the target of the tree (i.e. the result of the add), not to the
10379 // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
10380 // that information to set the flags. Doesn't seem like there is a good reason for that.
10381 // Therefore, trash the flags.
10382 genFlagsEqualToNone();
10384 if (ins == INS_add)
10386 // If the operator was add, then we were called from the GT_LOCKADD
10387 // case. In that case we don't use the result, so we don't need to
10388 // update anything.
10389 genUpdateLife(tree);
10393 genCodeForTree_DONE(tree, reg);
10398 #else // !_TARGET_XARCH_
10404 NYI_ARM("LOCK instructions");
10407 case GT_ARR_LENGTH:
10409 // Make the corresponding ind(a + c) node, and do codegen for that.
10410 GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
10411 compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
10412 tree->SetOper(GT_IND);
10413 tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
10414 assert(tree->TypeGet() == TYP_INT);
10415 tree->gtOp.gtOp1 = addr;
10416 genCodeForTree(tree, destReg, bestReg);
10421 // All GT_OBJ nodes must have been morphed prior to this.
10422 noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
10426 compiler->gtDispTree(tree);
10428 noway_assert(!"unexpected unary/binary operator");
10429 } // end switch (oper)
10434 #pragma warning(pop) // End suppress PREFast warning about overly large function
10437 regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
10444 GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
10445 var_types dstType = tree->CastToType();
10446 var_types srcType = op1->TypeGet();
10448 if (genTypeSize(srcType) < genTypeSize(dstType))
10452 /* we need the source size */
10454 size = EA_ATTR(genTypeSize(srcType));
10456 noway_assert(size < EA_PTRSIZE);
10458 unsv = varTypeIsUnsigned(srcType);
10459 ins = ins_Move_Extend(srcType, op1->InReg());
10462 Special case: for a cast of byte to char we first
10463 have to expand the byte (w/ sign extension), then
10464 mask off the high bits.
10465 Use 'movsx' followed by 'and'
10467 if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
10469 noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
10475 // Narrowing cast, or sign-changing cast
10477 noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
10479 size = EA_ATTR(genTypeSize(dstType));
10481 unsv = varTypeIsUnsigned(dstType);
10482 ins = ins_Move_Extend(dstType, op1->InReg());
10485 noway_assert(size < EA_PTRSIZE);
10487 // Set bestReg to the same register a op1 if op1 is a regVar and is available
10490 regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
10491 if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
10493 bestReg = op1RegMask;
10497 /* Is the value sitting in a non-byte-addressable register? */
10499 if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
10503 // for unsigned values we can AND, so it needs not be a byte register
10505 reg = regSet.rsPickReg(needReg, bestReg);
10511 /* Move the value into a byte register */
10513 reg = regSet.rsGrabReg(RBM_BYTE_REGS);
10516 if (reg != op1->gtRegNum)
10518 /* Move the value into that register */
10520 regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
10521 inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
10523 /* The value has a new home now */
10525 op1->gtRegNum = reg;
10530 /* Pick a register for the value (general case) */
10532 reg = regSet.rsPickReg(needReg, bestReg);
10534 // if we (might) need to set the flags and the value is in the same register
10535 // and we have an unsigned value then use AND instead of MOVZX
10536 if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
10538 #ifdef _TARGET_X86_
10539 noway_assert(ins == INS_movzx);
10545 if (ins == INS_AND)
10547 noway_assert(andv == false && unsv);
10549 /* Generate "and reg, MASK */
10551 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10552 inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
10554 if (tree->gtSetFlags())
10555 genFlagsEqualToReg(tree, reg);
10559 #ifdef _TARGET_XARCH_
10560 noway_assert(ins == INS_movsx || ins == INS_movzx);
10563 /* Generate "movsx/movzx reg, [addr]" */
10565 inst_RV_ST(ins, size, reg, op1);
10567 /* Mask off high bits for cast from byte to char */
10571 #ifdef _TARGET_XARCH_
10572 noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
10574 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10575 inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
10577 if (tree->gtSetFlags())
10578 genFlagsEqualToReg(tree, reg);
10582 regTracker.rsTrackRegTrash(reg);
10586 void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
10588 GenTreePtr op1 = tree->gtOp.gtOp1;
10589 var_types dstType = tree->CastToType();
10590 var_types baseType = TYP_INT;
10591 regNumber reg = DUMMY_INIT(REG_CORRUPT);
10592 regMaskTP needReg = destReg;
10598 * Constant casts should have been folded earlier
10599 * If not finite don't bother
10600 * We don't do this optimization for debug code/no optimization
10603 noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
10604 tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
10605 !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
10607 noway_assert(dstType != TYP_VOID);
10609 /* What type are we casting from? */
10611 switch (op1->TypeGet())
10615 /* Special case: the long is generated via the mod of long
10616 with an int. This is really an int and need not be
10617 converted to a reg pair. NOTE: the flag only indicates
10618 that this is a case to TYP_INT, it hasn't actually
10619 verified the second operand of the MOD! */
10621 if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
10624 /* Verify that the op2 of the mod node is
10625 1) An integer tree, or
10626 2) A long constant that is small enough to fit in an integer
10629 GenTreePtr modop2 = op1->gtOp.gtOp2;
10630 if ((genActualType(modop2->gtType) == TYP_INT) ||
10631 ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
10633 genCodeForTree(op1, destReg, bestReg);
10635 #ifdef _TARGET_64BIT_
10636 reg = op1->gtRegNum;
10637 #else // _TARGET_64BIT_
10638 reg = genRegPairLo(op1->gtRegPair);
10639 #endif //_TARGET_64BIT_
10641 genCodeForTree_DONE(tree, reg);
10646 /* Make the operand addressable. When gtOverflow() is true,
10647 hold on to the addrReg as we will need it to access the higher dword */
10649 op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
10651 // See, e.g., the TYP_INT case below...
10653 addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
10655 /* Load the lower half of the value into some register */
10659 /* Can we simply use the low part of the value? */
10660 reg = genRegPairLo(op1->gtRegPair);
10662 if (tree->gtOverflow())
10666 loMask = genRegMask(reg);
10667 if (loMask & regSet.rsRegMaskFree())
10671 // for cast overflow we need to preserve addrReg for testing the hiDword
10672 // so we lock it to prevent regSet.rsPickReg from picking it.
10673 if (tree->gtOverflow())
10674 regSet.rsLockUsedReg(addrReg);
10676 reg = regSet.rsPickReg(needReg, bestReg);
10678 if (tree->gtOverflow())
10679 regSet.rsUnlockUsedReg(addrReg);
10681 noway_assert(genStillAddressable(op1));
10684 if (!op1->InReg() || (reg != genRegPairLo(op1->gtRegPair)))
10686 /* Generate "mov reg, [addr-mode]" */
10687 inst_RV_TT(ins_Load(TYP_INT), reg, op1);
10690 /* conv.ovf.i8i4, or conv.ovf.u8u4 */
10692 if (tree->gtOverflow())
10694 regNumber hiReg = (op1->InReg()) ? genRegPairHi(op1->gtRegPair) : REG_NA;
10696 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10697 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
10703 /* Generate the following sequence
10705 test loDWord, loDWord // set flags
10707 pos: test hiDWord, hiDWord // set flags
10710 neg: cmp hiDWord, 0xFFFFFFFF
10716 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
10717 if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
10719 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
10720 goto UPPER_BITS_ZERO;
10723 #if CPU_LOAD_STORE_ARCH
10725 // We will generate code like
10734 // We load the tree op1 into regs when we generate code for if clause.
10735 // When we generate else clause, we see the tree is already loaded into reg, and start use it
10737 // Well, when the code is run, we may execute else clause without going through if clause.
10739 genCodeForTree(op1, 0);
10745 neg = genCreateTempLabel();
10746 done = genCreateTempLabel();
10748 // Is the loDWord positive or negative
10749 inst_JMP(jmpLTS, neg);
10751 // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
10753 if (hiReg < REG_STK)
10755 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
10759 inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
10762 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10763 inst_JMP(EJ_jmp, done);
10765 // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
10767 genDefineTempLabel(neg);
10769 if (hiReg < REG_STK)
10771 inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
10775 inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
10777 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10781 genDefineTempLabel(done);
10785 case TYP_UINT: // conv.ovf.u8u4
10787 // Just check that the upper DWord is 0
10789 if (hiReg < REG_STK)
10791 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
10795 inst_TT_IV(INS_cmp, op1, 0, 4);
10798 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10802 noway_assert(!"Unexpected dstType");
10806 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
10809 regTracker.rsTrackRegTrash(reg);
10810 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
10812 genCodeForTree_DONE(tree, reg);
10826 #if FEATURE_STACK_FP_X87
10828 NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
10832 if (compiler->opts.compCanUseSSE2)
10834 // do the SSE2 based cast inline
10835 // getting the fp operand
10837 regMaskTP addrRegInt = 0;
10838 regMaskTP addrRegFlt = 0;
10840 // make the operand addressable
10841 // We don't want to collapse constant doubles into floats, as the SSE2 instruction
10842 // operates on doubles. Note that these (casts from constant doubles) usually get
10843 // folded, but we don't do it for some cases (infinitys, etc). So essentially this
10844 // shouldn't affect performance or size at all. We're fixing this for #336067
10845 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
10846 if (!addrRegFlt && !op1->IsRegVar())
10848 // we have the address
10850 inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
10851 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10852 genUpdateLife(op1);
10854 reg = regSet.rsPickReg(needReg);
10855 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10857 regTracker.rsTrackRegTrash(reg);
10858 genCodeForTree_DONE(tree, reg);
10862 // we will need to use a temp to get it into the xmm reg
10863 var_types typeTemp = op1->TypeGet();
10864 TempDsc* temp = compiler->tmpGetTemp(typeTemp);
10866 size = EA_ATTR(genTypeSize(typeTemp));
10870 // On the fp stack; Take reg to top of stack
10872 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10878 reg = regSet.PickRegFloat();
10879 if (!op1->IsRegVarDeath())
10881 // Load it on the fp stack
10882 genLoadStackFP(op1, reg);
10886 // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
10887 genLoadStackFP(op1, reg);
10888 FlatFPX87_MoveToTOS(&compCurFPState, reg);
10892 // pop it off the fp stack
10893 compCurFPState.Pop();
10895 getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
10897 reg = regSet.rsPickReg(needReg);
10899 inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
10900 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10902 // done..release the temp
10903 compiler->tmpRlsTemp(temp);
10905 // the reg is now trashed
10906 regTracker.rsTrackRegTrash(reg);
10907 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10908 genUpdateLife(op1);
10909 genCodeForTree_DONE(tree, reg);
10915 genCodeForTreeFloat(tree, needReg, bestReg);
10916 #endif // FEATURE_STACK_FP_X87
10920 noway_assert(!"unexpected cast type");
10923 if (tree->gtOverflow())
10925 /* Compute op1 into a register, and free the register */
10927 genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
10928 reg = op1->gtRegNum;
10930 /* Do we need to compare the value, or just check masks */
10932 ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
10938 typeMask = ssize_t((int)0xFFFFFF80);
10939 typeMin = SCHAR_MIN;
10940 typeMax = SCHAR_MAX;
10941 unsv = (tree->gtFlags & GTF_UNSIGNED);
10944 typeMask = ssize_t((int)0xFFFF8000);
10945 typeMin = SHRT_MIN;
10946 typeMax = SHRT_MAX;
10947 unsv = (tree->gtFlags & GTF_UNSIGNED);
10950 typeMask = ssize_t((int)0x80000000L);
10951 #ifdef _TARGET_64BIT_
10952 unsv = (tree->gtFlags & GTF_UNSIGNED);
10955 #else // _TARGET_64BIT_
10956 noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
10958 #endif // _TARGET_64BIT_
10962 typeMask = ssize_t((int)0xFFFFFF00L);
10966 typeMask = ssize_t((int)0xFFFF0000L);
10970 #ifdef _TARGET_64BIT_
10971 typeMask = 0xFFFFFFFF00000000LL;
10972 #else // _TARGET_64BIT_
10973 typeMask = 0x80000000L;
10974 noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
10975 #endif // _TARGET_64BIT_
10978 NO_WAY("Unknown type");
10982 // If we just have to check a mask.
10983 // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
10988 inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
10989 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10990 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10994 // Check the value is in range.
10995 // This must be conv.ovf.i4i1, etc.
10997 // Compare with the MAX
10999 noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
11001 inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
11002 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
11003 genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
11005 // Compare with the MIN
11007 inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
11008 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
11009 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
11012 genCodeForTree_DONE(tree, reg);
11016 /* Make the operand addressable */
11018 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
11020 reg = genIntegerCast(tree, needReg, bestReg);
11022 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11024 genCodeForTree_DONE(tree, reg);
11027 /*****************************************************************************
11029 * Generate code for a leaf node of type GT_ADDR
11032 void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
11034 genTreeOps oper = tree->OperGet();
11035 const var_types treeType = tree->TypeGet();
11038 regMaskTP needReg = destReg;
11042 reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
11043 addrReg = 0xDEADCAFE;
11046 // We should get here for ldloca, ldarga, ldslfda, ldelema,
11048 if (oper == GT_ARR_ELEM)
11054 op1 = tree->gtOp.gtOp1;
11057 // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
11058 if (oper == GT_ARR_ELEM)
11060 // To get the address of the array element,
11061 // we first call genMakeAddrArrElem to make the element addressable.
11062 // (That is, for example, we first emit code to calculate EBX, and EAX.)
11063 // And then use lea to obtain the address.
11064 // (That is, for example, we then emit
11065 // lea EBX, bword ptr [EBX+4*EAX+36]
11066 // to obtain the address of the array element.)
11067 addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
11071 addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
11074 noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
11076 // We want to reuse one of the scratch registers that were used
11077 // in forming the address mode as the target register for the lea.
11078 // If bestReg is unset or if it is set to one of the registers used to
11079 // form the address (i.e. addrReg), we calculate the scratch register
11080 // to use as the target register for the LEA
11082 bestReg = regSet.rsUseIfZero(bestReg, addrReg);
11083 bestReg = regSet.rsNarrowHint(bestReg, addrReg);
11085 /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
11086 it since keepReg==false.
11087 If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
11088 So this is guaranteed not to spill addrReg */
11090 reg = regSet.rsPickReg(needReg, bestReg);
11092 // Slight workaround, force the inst routine to think that
11093 // value being loaded is an int (since that is what what
11094 // LEA will return) otherwise it would try to allocate
11095 // two registers for a long etc.
11096 noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
11097 op1->gtType = treeType;
11099 inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
11101 // The Lea instruction above better not have tried to put the
11102 // 'value' pointed to by 'op1' in a register, LEA will not work.
11103 noway_assert(!(op1->InReg()));
11105 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11106 // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
11107 noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
11109 regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
11110 gcInfo.gcMarkRegPtrVal(reg, treeType);
11112 genCodeForTree_DONE(tree, reg);
11115 #ifdef _TARGET_ARM_
11117 /*****************************************************************************
11119 * Move (load/store) between float ret regs and struct promoted variable.
11121 * varDsc - The struct variable to be loaded from or stored into.
11122 * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
11125 void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
11127 regNumber curReg = REG_FLOATRET;
11129 unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
11130 for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
11132 LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
11134 // Is the struct field promoted and sitting in a register?
11135 if (varDscFld->lvRegister)
11137 // Move from the struct field into curReg if load
11138 // else move into struct field from curReg if store
11139 regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
11140 regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
11141 if (srcReg != dstReg)
11143 inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
11144 regTracker.rsTrackRegCopy(dstReg, srcReg);
11149 // This field is in memory, do a move between the field and float registers.
11150 emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
11153 getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11154 regTracker.rsTrackRegTrash(curReg);
11158 getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11162 // Advance the current reg.
11163 curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
11167 void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
11169 assert(tree->TypeGet() == TYP_STRUCT);
11170 assert(tree->gtOper == GT_LCL_VAR);
11171 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
11172 int slots = varDsc->lvSize() / REGSIZE_BYTES;
11173 if (varDsc->lvPromoted)
11175 genLdStFltRetRegsPromotedVar(varDsc, true);
11181 // Use the load float/double instruction.
11182 inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
11183 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11187 // Use the load store multiple instruction.
11188 regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11189 inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
11190 regTracker.rsTrackRegTrash(reg);
11191 getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11194 genMarkTreeInReg(tree, REG_FLOATRET);
11197 void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
11199 assert(tree->TypeGet() == TYP_STRUCT);
11200 assert(tree->OperGet() == GT_ASG);
11202 // LHS should be lcl var or fld.
11203 GenTreePtr op1 = tree->gtOp.gtOp1;
11205 // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
11206 // handling multiple levels of inlined functions that return HFA on the right-hand-side.
11207 // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
11208 // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
11209 // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
11210 // as a regular assert().
11211 noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
11212 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11213 assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
11215 // The RHS should be a call.
11216 GenTreePtr op2 = tree->gtOp.gtOp2;
11217 assert(op2->gtOper == GT_CALL);
11219 // Generate code for call and copy the return registers into the local.
11220 regMaskTP retMask = genCodeForCall(op2->AsCall(), true);
11222 // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
11223 CLANG_FORMAT_COMMENT_ANCHOR;
11226 regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
11227 assert((mask & (mask - 1)) == 0);
11228 assert(mask <= (1 << MAX_HFA_RET_SLOTS));
11229 assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
11232 int slots = genCountBits(retMask & RBM_ALLFLOAT);
11234 LclVarDsc* varDsc = &compiler->lvaTable[varNum];
11236 if (varDsc->lvPromoted)
11238 genLdStFltRetRegsPromotedVar(varDsc, false);
11244 inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
11245 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11249 regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11250 inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
11251 regTracker.rsTrackRegTrash(reg);
11252 getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11257 #endif // _TARGET_ARM_
11259 /*****************************************************************************
11261 * Generate code for a GT_ASG tree
11265 #pragma warning(push)
11266 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11268 void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
11270 noway_assert(tree->gtOper == GT_ASG);
11272 GenTreePtr op1 = tree->gtOp.gtOp1;
11273 GenTreePtr op2 = tree->gtOp.gtOp2;
11274 regMaskTP needReg = RBM_ALLINT;
11275 regMaskTP bestReg = RBM_CORRUPT;
11276 regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
11277 bool ovfl = false; // Do we need an overflow check
11278 bool volat = false; // Is this a volatile store
11281 unsigned lclVarNum = compiler->lvaCount;
11282 unsigned lclILoffs = DUMMY_INIT(0);
11284 #ifdef _TARGET_ARM_
11285 if (tree->gtType == TYP_STRUCT)
11287 // We use copy block to assign structs, however to receive HFAs in registers
11288 // from a CALL, we use assignment, var = (hfa) call();
11289 assert(compiler->IsHfa(tree));
11290 genStoreFromFltRetRegs(tree);
11296 if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
11298 if (varTypeIsFloating(op1))
11299 assert(!"Bad IL: Illegal assignment of integer into float!");
11301 assert(!"Bad IL: Illegal assignment of float into integer!");
11305 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
11307 op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
11310 /* Is the target a register or local variable? */
11311 switch (op1->gtOper)
11317 varNum = op1->gtLclVarCommon.gtLclNum;
11318 noway_assert(varNum < compiler->lvaCount);
11319 varDsc = compiler->lvaTable + varNum;
11321 /* For non-debuggable code, every definition of a lcl-var has
11322 * to be checked to see if we need to open a new scope for it.
11323 * Remember the local var info to call siCheckVarScope
11324 * AFTER code generation of the assignment.
11326 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
11328 lclVarNum = varNum;
11329 lclILoffs = op1->gtLclVar.gtLclILoffs;
11332 /* Check against dead store ? (with min opts we may have dead stores) */
11334 noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
11336 /* Does this variable live in a register? */
11338 if (genMarkLclVar(op1))
11345 /* Get hold of the target register */
11349 op1Reg = op1->gtRegVar.gtRegNum;
11352 /* Compute the RHS (hopefully) into the variable's register.
11353 For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
11354 as variables are kept alive everywhere. So we have to be
11355 careful if we want to compute the value directly into
11356 the variable's register. */
11358 bool needToUpdateRegSetCheckLevel;
11359 needToUpdateRegSetCheckLevel = false;
11362 // We should only be accessing lvVarIndex if varDsc is tracked.
11363 assert(varDsc->lvTracked);
11365 if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
11367 noway_assert(compiler->opts.compDbgCode);
11369 /* The predictor might expect us to generate op2 directly
11370 into the var's register. However, since the variable is
11371 already alive, first kill it and its register. */
11373 if (rpCanAsgOperWithoutReg(op2, true))
11375 genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
11376 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11378 needToUpdateRegSetCheckLevel = true;
11384 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11389 /* Special cases: op2 is a GT_CNS_INT */
11391 if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
11393 /* Save the old life status */
11395 VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
11396 VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
11398 /* Set a flag to avoid printing the message
11399 and remember that life was changed. */
11401 genTempLiveChg = false;
11406 if (needToUpdateRegSetCheckLevel)
11407 compiler->compRegSetCheckLevel++;
11409 genCodeForTree(op2, needReg, genRegMask(op1Reg));
11411 if (needToUpdateRegSetCheckLevel)
11412 compiler->compRegSetCheckLevel--;
11413 noway_assert(compiler->compRegSetCheckLevel >= 0);
11415 noway_assert(op2->InReg());
11417 /* Make sure the value ends up in the right place ... */
11419 if (op2->gtRegNum != op1Reg)
11421 /* Make sure the target of the store is available */
11423 if (regSet.rsMaskUsed & genRegMask(op1Reg))
11424 regSet.rsSpillReg(op1Reg);
11426 #ifdef _TARGET_ARM_
11427 if (op1->TypeGet() == TYP_FLOAT)
11429 // This can only occur when we are returning a non-HFA struct
11430 // that is composed of a single float field.
11432 inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
11435 #endif // _TARGET_ARM_
11437 inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
11440 /* The value has been transferred to 'op1Reg' */
11442 regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
11444 if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
11445 gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
11447 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11451 // First we need to remove it from the original reg set mask (or else trigger an
11452 // assert when we add it to the other reg set mask).
11453 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
11454 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11456 // The emitter has logic that tracks the GCness of registers and asserts if you
11457 // try to do bad things to a GC pointer (like lose its GCness).
11459 // An explict cast of a GC pointer to an int (which is legal if the
11460 // pointer is pinned) is encoded as an assignment of a GC source
11461 // to a integer variable. Unfortunately if the source was the last
11462 // use, and the source register gets reused by the destination, no
11463 // code gets emitted (That is where we are at right now). The emitter
11464 // thinks the register is a GC pointer (it did not see the cast).
11465 // This causes asserts, as well as bad GC info since we will continue
11466 // to report the register as a GC pointer even if we do arithmetic
11467 // with it. So force the emitter to see the change in the type
11468 // of variable by placing a label.
11469 // We only have to do this check at this point because in the
11470 // CAST morphing, we create a temp and assignment whenever we
11471 // have a cast that loses its GCness.
11473 if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
11475 void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
11476 gcInfo.gcRegByrefSetCur);
11482 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
11487 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
11488 // to worry about it being enregistered.
11489 noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
11499 assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
11501 if (op1->gtFlags & GTF_IND_VOLATILE)
11512 /* Is the value being assigned a simple one? */
11515 switch (op2->gtOper)
11519 if (!genMarkLclVar(op2))
11526 /* Is the target a byte/short/char value? */
11528 if (varTypeIsSmall(op1->TypeGet()))
11531 if (tree->gtFlags & GTF_REVERSE_OPS)
11534 /* Make the target addressable */
11536 op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
11538 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11540 /* Does the write barrier helper do the assignment? */
11542 regGC = WriteBarrier(op1, op2, addrReg);
11544 // Was assignment done by the WriteBarrier
11545 if (regGC == RBM_NONE)
11547 #ifdef _TARGET_ARM_
11550 // Emit a memory barrier instruction before the store
11551 instGen_MemoryBarrier();
11555 /* Move the value into the target */
11557 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
11559 // This is done in WriteBarrier when (regGC != RBM_NONE)
11561 /* Free up anything that was tied up by the LHS */
11562 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11565 /* Free up the RHS */
11566 genUpdateLife(op2);
11568 /* Remember that we've also touched the op2 register */
11570 addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
11576 ival = op2->gtIntCon.gtIconVal;
11578 size = emitTypeSize(tree->TypeGet());
11580 ins = ins_Store(op1->TypeGet());
11582 // If we are storing a constant into a local variable
11583 // we extend the size of the store here
11584 // this normally takes place in CodeGen::inst_TT_IV on x86.
11586 if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
11588 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11589 LclVarDsc* varDsc = compiler->lvaTable + varNum;
11591 // Fix the immediate by sign extending if needed
11592 if (!varTypeIsUnsigned(varDsc->TypeGet()))
11594 if (size == EA_1BYTE)
11596 if ((ival & 0x7f) != ival)
11597 ival = ival | 0xffffff00;
11601 assert(size == EA_2BYTE);
11602 if ((ival & 0x7fff) != ival)
11603 ival = ival | 0xffff0000;
11607 // A local stack slot is at least 4 bytes in size, regardless of
11608 // what the local var is typed as, so auto-promote it here
11609 // unless it is a field of a promoted struct
11610 if (!varDsc->lvIsStructField)
11612 size = EA_SET_SIZE(size, EA_4BYTE);
11613 ins = ins_Store(TYP_INT);
11617 /* Make the target addressable */
11619 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11621 #ifdef _TARGET_ARM_
11624 // Emit a memory barrier instruction before the store
11625 instGen_MemoryBarrier();
11629 /* Move the value into the target */
11631 noway_assert(op1->gtOper != GT_REG_VAR);
11632 if (compiler->opts.compReloc && op2->IsIconHandle())
11634 /* The constant is actually a handle that may need relocation
11635 applied to it. genComputeReg will do the right thing (see
11636 code in genCodeForTreeConst), so we'll just call it to load
11637 the constant into a register. */
11639 genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
11640 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11641 noway_assert(op2->InReg());
11642 inst_TT_RV(ins, op1, op2->gtRegNum);
11643 genReleaseReg(op2);
11647 regSet.rsLockUsedReg(addrReg);
11650 bool copyIconFromReg = true;
11651 regNumber iconReg = REG_NA;
11653 #ifdef _TARGET_ARM_
11654 // Only if the constant can't be encoded in a small instruction,
11655 // look for another register to copy the value from. (Assumes
11656 // target is a small register.)
11657 if ((op1->InReg()) && !isRegPairType(tree->gtType) &&
11658 arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
11660 copyIconFromReg = false;
11662 #endif // _TARGET_ARM_
11664 if (copyIconFromReg)
11666 iconReg = regTracker.rsIconIsInReg(ival);
11667 if (iconReg == REG_NA)
11668 copyIconFromReg = false;
11671 if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
11672 (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
11674 /* Move the value into the target */
11676 inst_TT_RV(ins, op1, iconReg, 0, size);
11679 #endif // REDUNDANT_LOAD
11681 inst_TT_IV(ins, op1, ival, 0, size);
11684 regSet.rsUnlockUsedReg(addrReg);
11687 /* Free up anything that was tied up by the LHS */
11689 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11696 bool isWriteBarrier = false;
11697 regMaskTP needRegOp1 = RBM_ALLINT;
11698 RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
11700 /* Is the LHS more complex than the RHS? */
11702 if (tree->gtFlags & GTF_REVERSE_OPS)
11704 /* Is the target a byte/short/char value? */
11706 if (varTypeIsSmall(op1->TypeGet()))
11708 noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
11709 // TODO: Why does this have to be true?
11710 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
11711 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
11713 if (op2->gtOper == GT_CAST && !op2->gtOverflow())
11715 /* Special case: cast to small type */
11717 if (op2->CastToType() >= op1->gtType)
11719 /* Make sure the cast operand is not > int */
11721 if (op2->CastFromType() <= TYP_INT)
11723 /* Cast via a non-smaller type */
11725 op2 = op2->gtCast.CastOp();
11730 if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
11733 switch (op1->gtType)
11748 if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
11750 /* Redundant AND */
11752 op2 = op2->gtOp.gtOp1;
11756 /* Must get the new value into a byte register */
11759 if (varTypeIsByte(op1->TypeGet()))
11760 genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
11767 /* Generate the RHS into a register */
11769 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11770 if (isWriteBarrier)
11772 #if NOGC_WRITE_BARRIERS
11773 // Exclude the REG_WRITE_BARRIER from op2's needReg mask
11774 needReg = Target::exclude_WriteBarrierReg(needReg);
11775 mustReg = RegSet::EXACT_REG;
11776 #else // !NOGC_WRITE_BARRIERS
11777 // This code should be generic across architectures.
11779 // For the standard JIT Helper calls
11780 // op1 goes into REG_ARG_0 and
11781 // op2 goes into REG_ARG_1
11783 needRegOp1 = RBM_ARG_0;
11784 needReg = RBM_ARG_1;
11785 #endif // !NOGC_WRITE_BARRIERS
11787 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11790 noway_assert(op2->InReg());
11792 /* Make the target addressable */
11794 op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
11795 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11797 /* Make sure the RHS register hasn't been spilled;
11798 keep the register marked as "used", otherwise
11799 we might get the pointer lifetimes wrong.
11802 if (varTypeIsByte(op1->TypeGet()))
11803 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11805 genRecoverReg(op2, needReg, RegSet::KEEP_REG);
11806 noway_assert(op2->InReg());
11808 /* Lock the RHS temporarily (lock only already used) */
11810 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
11812 /* Make sure the LHS is still addressable */
11814 addrReg = genKeepAddressable(op1, addrReg);
11816 /* We can unlock (only already used ) the RHS register */
11818 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
11820 /* Does the write barrier helper do the assignment? */
11822 regGC = WriteBarrier(op1, op2, addrReg);
11826 // Yes, assignment done by the WriteBarrier
11827 noway_assert(isWriteBarrier);
11831 #ifdef _TARGET_ARM_
11834 // Emit a memory barrier instruction before the store
11835 instGen_MemoryBarrier();
11839 /* Move the value into the target */
11841 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11845 /* Update the current liveness info */
11846 if (compiler->opts.varNames)
11847 genUpdateLife(tree);
11850 // If op2 register is still in use, free it. (Might not be in use, if
11851 // a full-call write barrier was done, and the register was a caller-saved
11853 regMaskTP op2RM = genRegMask(op2->gtRegNum);
11854 if (op2RM & regSet.rsMaskUsed)
11855 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
11857 // This is done in WriteBarrier when (regGC != 0)
11860 /* Free up anything that was tied up by the LHS */
11861 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11866 /* Make the target addressable */
11868 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11870 if (isWriteBarrier)
11872 #if NOGC_WRITE_BARRIERS
11873 /* Try to avoid RBM_TMP_0 */
11874 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
11875 mustReg = RegSet::EXACT_REG; // For op2
11876 #else // !NOGC_WRITE_BARRIERS
11877 // This code should be generic across architectures.
11879 // For the standard JIT Helper calls
11880 // op1 goes into REG_ARG_0 and
11881 // op2 goes into REG_ARG_1
11883 needRegOp1 = RBM_ARG_0;
11884 needReg = RBM_ARG_1;
11885 mustReg = RegSet::EXACT_REG; // For op2
11886 #endif // !NOGC_WRITE_BARRIERS
11889 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
11891 op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
11893 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11895 #if CPU_HAS_BYTE_REGS
11896 /* Is the target a byte value? */
11897 if (varTypeIsByte(op1->TypeGet()))
11899 /* Must get the new value into a byte register */
11900 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11901 mustReg = RegSet::EXACT_REG;
11903 if (op2->gtType >= op1->gtType)
11904 op2->gtFlags |= GTF_SMALL_OK;
11908 #if NOGC_WRITE_BARRIERS
11909 /* For WriteBarrier we can't use REG_WRITE_BARRIER */
11910 if (isWriteBarrier)
11911 needReg = Target::exclude_WriteBarrierReg(needReg);
11913 /* Also avoid using the previously computed addrReg(s) */
11914 bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
11916 /* If we have a reg available to grab then use bestReg */
11917 if (bestReg & regSet.rsRegMaskCanGrab())
11920 mustReg = RegSet::EXACT_REG;
11923 /* Generate the RHS into a register */
11924 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11925 noway_assert(op2->InReg());
11927 /* Make sure the target is still addressable */
11928 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11929 noway_assert(op2->InReg());
11931 /* Does the write barrier helper do the assignment? */
11933 regGC = WriteBarrier(op1, op2, addrReg);
11937 // Yes, assignment done by the WriteBarrier
11938 noway_assert(isWriteBarrier);
11942 assert(!isWriteBarrier);
11944 #ifdef _TARGET_ARM_
11947 // Emit a memory barrier instruction before the store
11948 instGen_MemoryBarrier();
11952 /* Move the value into the target */
11954 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11957 /* The new value is no longer needed */
11959 genReleaseReg(op2);
11962 /* Update the current liveness info */
11963 if (compiler->opts.varNames)
11964 genUpdateLife(tree);
11967 // This is done in WriteBarrier when (regGC != 0)
11970 /* Free up anything that was tied up by the LHS */
11971 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11975 addrReg = RBM_NONE;
11979 noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
11980 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
11983 /* For non-debuggable code, every definition of a lcl-var has
11984 * to be checked to see if we need to open a new scope for it.
11986 if (lclVarNum < compiler->lvaCount)
11987 siCheckVarScope(lclVarNum, lclILoffs);
11990 #pragma warning(pop)
11993 /*****************************************************************************
11995 * Generate code to complete the assignment operation
11998 void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
12000 const var_types treeType = tree->TypeGet();
12001 GenTreePtr op1 = tree->gtOp.gtOp1;
12002 GenTreePtr op2 = tree->gtOp.gtOp2;
12005 if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
12006 genUpdateLife(op1);
12007 genUpdateLife(tree);
12011 if (op1->gtOper == GT_LCL_VAR)
12012 regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
12014 /* Have we just assigned a value that is in a register? */
12016 if (op2->InReg() && tree->gtOper == GT_ASG)
12018 regTracker.rsTrackRegAssign(op1, op2);
12023 noway_assert(addrReg != 0xDEADCAFE);
12025 gcInfo.gcMarkRegSetNpt(addrReg);
12029 noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
12031 /* If it is not in a register and it is a small type, then
12032 we must have loaded it up from memory, done the increment,
12033 checked for overflow, and then stored it back to memory */
12035 bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->InReg());
12039 // For small sizes, reg should be set as we sign/zero extend it.
12041 noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
12043 /* Currently we don't morph x=x+y into x+=y in try blocks
12044 * if we need overflow check, as x+y may throw an exception.
12045 * We can do it if x is not live on entry to the catch block.
12047 noway_assert(!compiler->compCurBB->hasTryIndex());
12049 genCheckOverflow(tree);
12054 /*****************************************************************************
12056 * Generate code for a special op tree
12059 void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
12061 genTreeOps oper = tree->OperGet();
12062 regNumber reg = DUMMY_INIT(REG_CORRUPT);
12063 regMaskTP regs = regSet.rsMaskUsed;
12065 noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
12070 regs = genCodeForCall(tree->AsCall(), true);
12072 /* If the result is in a register, make sure it ends up in the right place */
12074 if (regs != RBM_NONE)
12076 genMarkTreeInReg(tree, genRegNumFromMask(regs));
12079 genUpdateLife(tree);
12083 NO_WAY("should not see this operator in this phase");
12086 case GT_ARR_BOUNDS_CHECK:
12088 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
12089 // MUST NEVER CHECK-IN WITH THIS ENABLED.
12090 // This is just for convenience in doing performance investigations and requires x86ret builds
12091 if (!JitConfig.JitNoRngChk())
12093 genRangeCheck(tree);
12098 genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
12103 #if defined(_TARGET_XARCH_)
12104 // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
12106 // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
12107 // right now, just get the trees evaluated.
12109 // As a friendly reminder. IL args are evaluated left to right.
12111 GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
12112 GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
12113 GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
12116 bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
12117 RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
12121 genCodeForTree(location, RBM_NONE, RBM_NONE);
12122 assert(location->InReg());
12123 addrReg = genRegMask(location->gtRegNum);
12124 regSet.rsMarkRegUsed(location);
12127 // We must have a reg for the Value, but it doesn't really matter which register.
12129 // Try to avoid EAX and the address regsiter if possible.
12130 genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
12133 // cmpxchg uses EAX as an implicit operand to hold the comparand
12134 // We're going to destroy EAX in this operation, so we better not be keeping
12135 // anything important in it.
12136 if (RBM_EAX & regSet.rsMaskVars)
12138 // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
12139 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
12141 const LclVarDsc& varDesc = compiler->lvaTable[varNum];
12142 if (!varDesc.lvIsRegCandidate())
12144 if (!varDesc.lvRegister)
12146 if (isFloatRegType(varDesc.lvType))
12148 if (varDesc.lvRegNum != REG_EAX)
12150 // We may need to check lvOtherReg.
12152 // If the variable isn't going dead during this tree, we've just trashed a local with
12154 noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
12160 genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
12162 // By this point we've evaluated everything. However the odds are that we've spilled something by
12163 // now. Let's recover all the registers and force them to stay.
12165 // Well, we just computed comparand, so it's still in EAX.
12166 noway_assert(comparand->gtRegNum == REG_EAX);
12167 regSet.rsLockUsedReg(RBM_EAX);
12169 // Stick it anywhere other than EAX.
12170 genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
12171 reg = value->gtRegNum;
12172 noway_assert(reg != REG_EAX);
12173 regSet.rsLockUsedReg(genRegMask(reg));
12177 addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
12181 genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
12184 regSet.rsUnlockUsedReg(genRegMask(reg));
12185 regSet.rsUnlockUsedReg(RBM_EAX);
12190 sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
12191 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
12195 instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
12196 genReleaseReg(location);
12199 genReleaseReg(value);
12200 genReleaseReg(comparand);
12202 // EAX and the value register are both trashed at this point.
12203 regTracker.rsTrackRegTrash(REG_EAX);
12204 regTracker.rsTrackRegTrash(reg);
12208 genFlagsEqualToNone();
12210 #else // not defined(_TARGET_XARCH_)
12211 NYI("GT_CMPXCHG codegen");
12218 compiler->gtDispTree(tree);
12220 noway_assert(!"unexpected operator");
12221 NO_WAY("unexpected operator");
12224 noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
12225 genCodeForTree_DONE(tree, reg);
12228 /*****************************************************************************
12230 * Generate code for the given tree. tree->gtRegNum will be set to the
12231 * register where the tree lives.
12233 * If 'destReg' is non-zero, we'll do our best to compute the value into a
12234 * register that is in that register set.
12235 * Use genComputeReg() if you need the tree in a specific register.
12236 * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
12237 * the register can only be used for read, but not for write.
12238 * Use genMakeAddressable() if you only need the tree to be accessible
12239 * using a complex addressing mode, and do not necessarily need the tree
12240 * materialized in a register.
12242 * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
12244 * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
12245 * register will not be consumed right away and could possibly be spilled.
12248 void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
12251 if (compiler->verbose)
12253 printf("Generating code for tree ");
12254 Compiler::printTreeID(tree);
12255 printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
12257 genStressRegs(tree);
12260 noway_assert(tree);
12261 noway_assert(tree->gtOper != GT_STMT);
12262 assert(tree->IsNodeProperlySized());
12264 // When assigning to a enregistered local variable we receive
12265 // a hint that we should target the register that is used to
12266 // hold the enregistered local variable.
12267 // When receiving this hint both destReg and bestReg masks are set
12268 // to the register that is used by the enregistered local variable.
12270 // However it is possible to us to have a different local variable
12271 // targeting the same register to become alive (and later die)
12272 // as we descend the expression tree.
12274 // To handle such cases we will remove any registers that are alive from the
12275 // both the destReg and bestReg masks.
12277 regMaskTP liveMask = genLiveMask(tree);
12279 // This removes any registers used to hold enregistered locals
12280 // from the destReg and bestReg masks.
12281 // After this either mask could become 0
12283 destReg &= ~liveMask;
12284 bestReg &= ~liveMask;
12286 /* 'destReg' of 0 really means 'any' */
12288 destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
12290 if (destReg != RBM_ALL(tree->TypeGet()))
12291 bestReg = regSet.rsUseIfZero(bestReg, destReg);
12293 // Long, float, and double have their own codegen functions
12294 switch (tree->TypeGet())
12298 #if !CPU_HAS_FP_SUPPORT
12301 genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
12304 #if CPU_HAS_FP_SUPPORT
12308 // For comma nodes, we'll get back here for the last node in the comma list.
12309 if (tree->gtOper != GT_COMMA)
12311 genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
12320 noway_assert(!"These types are only used as markers in GT_CAST nodes");
12328 /* Is the value already in a register? */
12332 genCodeForTree_REG_VAR1(tree);
12336 /* We better not have a spilled value here */
12338 noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
12340 /* Figure out what kind of a node we have */
12342 unsigned kind = tree->OperKind();
12344 if (kind & GTK_CONST)
12346 /* Handle constant nodes */
12348 genCodeForTreeConst(tree, destReg, bestReg);
12350 else if (kind & GTK_LEAF)
12352 /* Handle leaf nodes */
12354 genCodeForTreeLeaf(tree, destReg, bestReg);
12356 else if (kind & GTK_SMPOP)
12358 /* Handle 'simple' unary/binary operators */
12360 genCodeForTreeSmpOp(tree, destReg, bestReg);
12364 /* Handle special operators */
12366 genCodeForTreeSpecialOp(tree, destReg, bestReg);
12370 /*****************************************************************************
12372 * Generate code for all the basic blocks in the function.
12376 #pragma warning(push)
12377 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12379 void CodeGen::genCodeForBBlist()
12384 unsigned savedStkLvl;
12387 genInterruptibleUsed = true;
12388 unsigned stmtNum = 0;
12389 unsigned totalCostEx = 0;
12390 unsigned totalCostSz = 0;
12392 // You have to be careful if you create basic blocks from now on
12393 compiler->fgSafeBasicBlockCreation = false;
12395 // This stress mode is not comptible with fully interruptible GC
12396 if (genInterruptible && compiler->opts.compStackCheckOnCall)
12398 compiler->opts.compStackCheckOnCall = false;
12401 // This stress mode is not comptible with fully interruptible GC
12402 if (genInterruptible && compiler->opts.compStackCheckOnRet)
12404 compiler->opts.compStackCheckOnRet = false;
12408 // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
12409 genPrepForEHCodegen();
12411 assert(!compiler->fgFirstBBScratch ||
12412 compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
12414 /* Initialize the spill tracking logic */
12416 regSet.rsSpillBeg();
12418 /* Initialize the line# tracking logic */
12420 if (compiler->opts.compScopeInfo)
12425 #ifdef _TARGET_X86_
12426 if (compiler->compTailCallUsed)
12428 noway_assert(isFramePointerUsed());
12429 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12433 if (compiler->opts.compDbgEnC)
12435 noway_assert(isFramePointerUsed());
12436 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12439 /* If we have any pinvoke calls, we might potentially trash everything */
12441 if (compiler->info.compCallUnmanaged)
12443 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
12444 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12447 /* Initialize the pointer tracking code */
12449 gcInfo.gcRegPtrSetInit();
12450 gcInfo.gcVarPtrSetInit();
12452 /* If any arguments live in registers, mark those regs as such */
12454 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
12456 /* Is this variable a parameter assigned to a register? */
12458 if (!varDsc->lvIsParam || !varDsc->lvRegister)
12461 /* Is the argument live on entry to the method? */
12463 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
12466 #if CPU_HAS_FP_SUPPORT
12467 /* Is this a floating-point argument? */
12469 if (varDsc->IsFloatRegType())
12472 noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
12475 /* Mark the register as holding the variable */
12477 if (isRegPairType(varDsc->lvType))
12479 regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
12481 if (varDsc->lvOtherReg != REG_STK)
12482 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12486 regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
12490 unsigned finallyNesting = 0;
12492 // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
12493 // allocation at the start of each basic block.
12494 VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
12496 /*-------------------------------------------------------------------------
12498 * Walk the basic blocks and generate code for each one
12503 BasicBlock* lblk; /* previous block */
12505 for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
12508 if (compiler->verbose)
12510 printf("\n=============== Generating ");
12511 block->dspBlockHeader(compiler, true, true);
12512 compiler->fgDispBBLiveness(block);
12516 VARSET_TP liveSet(VarSetOps::UninitVal());
12518 regMaskTP gcrefRegs = 0;
12519 regMaskTP byrefRegs = 0;
12521 /* Does any other block jump to this point ? */
12523 if (block->bbFlags & BBF_JMP_TARGET)
12525 /* Someone may jump here, so trash all regs */
12527 regTracker.rsTrackRegClr();
12529 genFlagsEqualToNone();
12533 /* No jump, but pointers always need to get trashed for proper GC tracking */
12535 regTracker.rsTrackRegClrPtr();
12538 /* No registers are used or locked on entry to a basic block */
12540 regSet.rsMaskUsed = RBM_NONE;
12541 regSet.rsMaskMult = RBM_NONE;
12542 regSet.rsMaskLock = RBM_NONE;
12544 // If we need to reserve registers such that they are not used
12545 // by CodeGen in this BasicBlock we do so here.
12546 // On the ARM when we have large frame offsets for locals we
12547 // will have RBM_R10 in the regSet.rsMaskResvd set,
12548 // additionally if a LocAlloc or alloca is used RBM_R9 is in
12549 // the regSet.rsMaskResvd set and we lock these registers here.
12551 if (regSet.rsMaskResvd != RBM_NONE)
12553 regSet.rsLockReg(regSet.rsMaskResvd);
12554 regSet.rsSetRegsModified(regSet.rsMaskResvd);
12557 /* Figure out which registers hold variables on entry to this block */
12559 regMaskTP specialUseMask = regSet.rsMaskResvd;
12561 specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
12562 regSet.ClearMaskVars();
12563 VarSetOps::ClearD(compiler, compiler->compCurLife);
12564 VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
12566 #if FEATURE_STACK_FP_X87
12567 VarSetOps::AssignNoCopy(compiler, genFPregVars,
12568 VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
12569 genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
12570 genFPdeadRegCnt = 0;
12572 gcInfo.gcResetForBB();
12574 genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
12575 #if FEATURE_STACK_FP_X87
12576 VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
12579 // We should never enregister variables in any of the specialUseMask registers
12580 noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
12582 VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
12583 while (iter.NextElem(&varIndex))
12585 varNum = compiler->lvaTrackedToVarNum[varIndex];
12586 varDsc = compiler->lvaTable + varNum;
12587 assert(varDsc->lvTracked);
12588 /* Ignore the variable if it's not not in a reg */
12590 if (!varDsc->lvRegister)
12592 if (isFloatRegType(varDsc->lvType))
12595 /* Get hold of the index and the bitmask for the variable */
12596 regNumber regNum = varDsc->lvRegNum;
12597 regMaskTP regMask = genRegMask(regNum);
12599 regSet.AddMaskVars(regMask);
12601 if (varDsc->lvType == TYP_REF)
12602 gcrefRegs |= regMask;
12603 else if (varDsc->lvType == TYP_BYREF)
12604 byrefRegs |= regMask;
12606 /* Mark the register holding the variable as such */
12608 if (varTypeIsMultiReg(varDsc))
12610 regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
12611 if (varDsc->lvOtherReg != REG_STK)
12613 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12614 regMask |= genRegMask(varDsc->lvOtherReg);
12619 regTracker.rsTrackRegLclVar(regNum, varNum);
12623 gcInfo.gcPtrArgCnt = 0;
12625 #if FEATURE_STACK_FP_X87
12627 regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
12629 memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
12630 memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
12632 // Setup fp state on block entry
12633 genSetupStateStackFP(block);
12636 if (compiler->verbose)
12641 #endif // FEATURE_STACK_FP_X87
12643 /* Make sure we keep track of what pointers are live */
12645 noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
12646 gcInfo.gcRegGCrefSetCur = gcrefRegs;
12647 gcInfo.gcRegByrefSetCur = byrefRegs;
12649 /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
12650 represent the exception object (TYP_REF).
12651 We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
12652 to the block, it will be the first thing evaluated
12653 (thanks to GTF_ORDER_SIDEEFF).
12656 if (handlerGetsXcptnObj(block->bbCatchTyp))
12658 GenTreePtr firstStmt = block->FirstNonPhiDef();
12659 if (firstStmt != NULL)
12661 GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
12662 if (compiler->gtHasCatchArg(firstTree))
12664 gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
12669 /* Start a new code output block */
12670 CLANG_FORMAT_COMMENT_ANCHOR;
12672 #if FEATURE_EH_FUNCLETS
12673 #if defined(_TARGET_ARM_)
12674 genInsertNopForUnwinder(block);
12675 #endif // defined(_TARGET_ARM_)
12677 genUpdateCurrentFunclet(block);
12678 #endif // FEATURE_EH_FUNCLETS
12680 #ifdef _TARGET_XARCH_
12681 if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
12683 getEmitter()->emitLoopAlign();
12688 if (compiler->opts.dspCode)
12689 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
12692 block->bbEmitCookie = NULL;
12694 if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
12696 /* Mark a label and update the current set of live GC refs */
12698 block->bbEmitCookie =
12699 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
12700 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
12701 /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
12708 if (block == compiler->fgFirstColdBlock)
12711 if (compiler->verbose)
12713 printf("\nThis is the start of the cold region of the method\n");
12716 // We should never have a block that falls through into the Cold section
12717 noway_assert(!lblk->bbFallsThrough());
12719 // We require the block that starts the Cold section to have a label
12720 noway_assert(block->bbEmitCookie);
12721 getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
12724 /* Both stacks are always empty on entry to a basic block */
12727 #if FEATURE_STACK_FP_X87
12728 genResetFPstkLevel();
12729 #endif // FEATURE_STACK_FP_X87
12731 genAdjustStackLevel(block);
12733 savedStkLvl = genStackLevel;
12735 /* Tell everyone which basic block we're working on */
12737 compiler->compCurBB = block;
12739 siBeginBlock(block);
12741 // BBF_INTERNAL blocks don't correspond to any single IL instruction.
12742 if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
12743 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
12745 bool firstMapping = true;
12747 /*---------------------------------------------------------------------
12749 * Generate code for each statement-tree in the block
12752 CLANG_FORMAT_COMMENT_ANCHOR;
12754 #if FEATURE_EH_FUNCLETS
12755 if (block->bbFlags & BBF_FUNCLET_BEG)
12757 genReserveFuncletProlog(block);
12759 #endif // FEATURE_EH_FUNCLETS
12761 for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
12763 noway_assert(stmt->gtOper == GT_STMT);
12765 /* Do we have a new IL-offset ? */
12767 if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
12769 /* Create and append a new IP-mapping entry */
12770 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
12771 firstMapping = false;
12775 if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
12777 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
12778 if (compiler->opts.dspCode && compiler->opts.dspInstrs)
12780 while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
12782 genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
12788 /* Get hold of the statement tree */
12789 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
12793 if (compiler->verbose)
12795 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
12796 printf("Holding variables: ");
12797 dspRegMask(regSet.rsMaskVars);
12799 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
12801 #if FEATURE_STACK_FP_X87
12805 printf("Execution Order:\n");
12806 for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
12808 compiler->gtDispTree(treeNode, 0, NULL, true);
12812 totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
12813 totalCostSz += stmt->gtCostSz;
12816 compiler->compCurStmt = stmt;
12818 compiler->compCurLifeTree = NULL;
12819 switch (tree->gtOper)
12822 // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
12823 // reported as alive even though not used within the caller for managed debugger sake. So
12824 // consider the return value of the method as used if generating debuggable code.
12825 genCodeForCall(tree->AsCall(), compiler->opts.MinOpts() || compiler->opts.compDbgCode);
12826 genUpdateLife(tree);
12827 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
12833 // Just do the side effects
12834 genEvalSideEffects(tree);
12838 /* Generate code for the tree */
12840 genCodeForTree(tree, 0);
12844 regSet.rsSpillChk();
12846 /* The value of the tree isn't used, unless it's a return stmt */
12848 if (tree->gtOper != GT_RETURN)
12849 gcInfo.gcMarkRegPtrVal(tree);
12851 #if FEATURE_STACK_FP_X87
12852 genEndOfStatement();
12856 /* Make sure we didn't bungle pointer register tracking */
12858 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
12859 regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
12861 // If return is a GC-type, clear it. Note that if a common
12862 // epilog is generated (compiler->genReturnBB) it has a void return
12863 // even though we might return a ref. We can't use the compRetType
12864 // as the determiner because something we are tracking as a byref
12865 // might be used as a return value of a int function (which is legal)
12866 if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
12867 (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
12869 nonVarPtrRegs &= ~RBM_INTRET;
12872 // When profiling, the first statement in a catch block will be the
12873 // harmless "inc" instruction (does not interfere with the exception
12876 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (stmt == block->bbTreeList) &&
12877 (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
12879 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
12884 printf("Regset after tree=");
12885 Compiler::printTreeID(tree);
12886 printf(" BB%02u gcr=", block->bbNum);
12887 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12888 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12890 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12891 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12892 printf(", regVars=");
12893 printRegMaskInt(regSet.rsMaskVars);
12894 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
12898 noway_assert(nonVarPtrRegs == 0);
12901 noway_assert(stmt->gtOper == GT_STMT);
12903 genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
12905 } //-------- END-FOR each statement-tree of the current block ---------
12907 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
12911 /* Is this the last block, and are there any open scopes left ? */
12913 bool isLastBlockProcessed = (block->bbNext == NULL);
12914 if (block->isBBCallAlwaysPair())
12916 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
12919 if (isLastBlockProcessed && siOpenScopeList.scNext)
12921 /* This assert no longer holds, because we may insert a throw
12922 block to demarcate the end of a try or finally region when they
12923 are at the end of the method. It would be nice if we could fix
12924 our code so that this throw block will no longer be necessary. */
12926 // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
12928 siCloseAllOpenScopes();
12932 SubtractStackLevel(savedStkLvl);
12934 gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
12936 if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
12937 compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
12939 /* Both stacks should always be empty on exit from a basic block */
12941 noway_assert(genStackLevel == 0);
12942 #if FEATURE_STACK_FP_X87
12943 noway_assert(genGetFPstkLevel() == 0);
12945 // Do the FPState matching that may have to be done
12946 genCodeForEndBlockTransitionStackFP(block);
12949 noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
12951 /* Do we need to generate a jump or return? */
12953 switch (block->bbJumpKind)
12956 inst_JMP(EJ_jmp, block->bbJumpDest);
12960 genExitCode(block);
12964 // If we have a throw at the end of a function or funclet, we need to emit another instruction
12965 // afterwards to help the OS unwinder determine the correct context during unwind.
12966 // We insert an unexecuted breakpoint instruction in several situations
12967 // following a throw instruction:
12968 // 1. If the throw is the last instruction of the function or funclet. This helps
12969 // the OS unwinder determine the correct context during an unwind from the
12970 // thrown exception.
12971 // 2. If this is this is the last block of the hot section.
12972 // 3. If the subsequent block is a special throw block.
12973 if ((block->bbNext == NULL)
12974 #if FEATURE_EH_FUNCLETS
12975 || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
12976 #endif // FEATURE_EH_FUNCLETS
12977 || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
12978 block->bbNext == compiler->fgFirstColdBlock)
12980 instGen(INS_BREAKPOINT); // This should never get executed
12985 case BBJ_CALLFINALLY:
12987 #if defined(_TARGET_X86_)
12989 /* If we are about to invoke a finally locally from a try block,
12990 we have to set the hidden slot corresponding to the finally's
12991 nesting level. When invoked in response to an exception, the
12992 EE usually does it.
12994 We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
12996 This code depends on this order not being messed up.
13003 step: mov [ebp- n ],0
13008 noway_assert(isFramePointerUsed());
13010 // Get the nesting level which contains the finally
13011 compiler->fgGetNestingLevel(block, &finallyNesting);
13013 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
13014 unsigned filterEndOffsetSlotOffs;
13015 filterEndOffsetSlotOffs =
13016 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
13018 unsigned curNestingSlotOffs;
13019 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
13021 // Zero out the slot for the next nesting level
13022 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
13023 curNestingSlotOffs - sizeof(void*));
13025 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
13026 curNestingSlotOffs);
13028 // Now push the address of where the finally funclet should
13029 // return to directly.
13030 if (!(block->bbFlags & BBF_RETLESS_CALL))
13032 assert(block->isBBCallAlwaysPair());
13033 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
13037 // EE expects a DWORD, so we give him 0
13038 inst_IV(INS_push_hide, 0);
13041 // Jump to the finally BB
13042 inst_JMP(EJ_jmp, block->bbJumpDest);
13044 #elif defined(_TARGET_ARM_)
13046 // Now set REG_LR to the address of where the finally funclet should
13047 // return to directly.
13049 BasicBlock* bbFinallyRet;
13050 bbFinallyRet = NULL;
13052 // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
13053 // we would have otherwise created retless calls.
13054 assert(block->isBBCallAlwaysPair());
13056 assert(block->bbNext != NULL);
13057 assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
13058 assert(block->bbNext->bbJumpDest != NULL);
13059 assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
13061 bbFinallyRet = block->bbNext->bbJumpDest;
13062 bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
13064 // Load the address where the finally funclet should return into LR.
13065 // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
13067 genMov32RelocatableDisplacement(bbFinallyRet, REG_LR);
13068 regTracker.rsTrackRegTrash(REG_LR);
13070 // Jump to the finally BB
13071 inst_JMP(EJ_jmp, block->bbJumpDest);
13076 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
13077 // jump target using bbJumpDest - that is already used to point
13078 // to the finally block. So just skip past the BBJ_ALWAYS unless the
13079 // block is RETLESS.
13080 if (!(block->bbFlags & BBF_RETLESS_CALL))
13082 assert(block->isBBCallAlwaysPair());
13085 block = block->bbNext;
13089 #ifdef _TARGET_ARM_
13091 case BBJ_EHCATCHRET:
13092 // set r0 to the address the VM should return to after the catch
13093 genMov32RelocatableDisplacement(block->bbJumpDest, REG_R0);
13094 regTracker.rsTrackRegTrash(REG_R0);
13098 case BBJ_EHFINALLYRET:
13099 case BBJ_EHFILTERRET:
13100 genReserveFuncletEpilog(block);
13103 #else // _TARGET_ARM_
13105 case BBJ_EHFINALLYRET:
13106 case BBJ_EHFILTERRET:
13107 case BBJ_EHCATCHRET:
13110 #endif // _TARGET_ARM_
13118 noway_assert(!"Unexpected bbJumpKind");
13123 compiler->compCurBB = 0;
13126 } //------------------ END-FOR each block of the method -------------------
13128 /* Nothing is live at this point */
13129 genUpdateLife(VarSetOps::MakeEmpty(compiler));
13131 /* Finalize the spill tracking logic */
13133 regSet.rsSpillEnd();
13135 /* Finalize the temp tracking logic */
13137 compiler->tmpEnd();
13140 if (compiler->verbose)
13143 printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
13144 printf("%s\n", compiler->info.compFullName);
13149 #pragma warning(pop)
13152 /*****************************************************************************
13154 * Generate code for a long operation.
13155 * needReg is a recommendation of which registers to use for the tree.
13156 * For partially enregistered longs, the tree will be marked as in a register
13157 * without loading the stack part into a register. Note that only leaf
13158 * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
13159 * enregistered so that we can know the memory location of the other half.
13163 #pragma warning(push)
13164 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
13166 void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
13171 regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
13176 noway_assert(tree);
13177 noway_assert(tree->gtOper != GT_STMT);
13178 noway_assert(genActualType(tree->gtType) == TYP_LONG);
13180 /* Figure out what kind of a node we have */
13182 oper = tree->OperGet();
13183 kind = tree->OperKind();
13188 regPair = tree->gtRegPair;
13190 gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
13195 /* Is this a constant node? */
13197 if (kind & GTK_CONST)
13201 /* Pick a register pair for the value */
13203 regPair = regSet.rsPickRegPair(needReg);
13205 /* Load the value into the registers */
13206 CLANG_FORMAT_COMMENT_ANCHOR;
13208 #if !CPU_HAS_FP_SUPPORT
13209 if (oper == GT_CNS_DBL)
13211 noway_assert(sizeof(__int64) == sizeof(double));
13213 noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
13215 lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
13220 noway_assert(oper == GT_CNS_LNG);
13222 lval = tree->gtLngCon.gtLconVal;
13225 genSetRegToIcon(genRegPairLo(regPair), int(lval));
13226 genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
13230 /* Is this a leaf node? */
13232 if (kind & GTK_LEAF)
13240 /* This case has to consider the case in which an int64 LCL_VAR
13241 * may both be enregistered and also have a cached copy of itself
13242 * in a different set of registers.
13243 * We want to return the registers that have the most in common
13244 * with the needReg mask
13247 /* Does the var have a copy of itself in the cached registers?
13248 * And are these cached registers both free?
13249 * If so use these registers if they match any needReg.
13252 regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
13254 if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13255 ((genRegPairMask(regPair) & needReg) != RBM_NONE))
13260 /* Does the variable live in a register?
13261 * If so use these registers.
13263 if (genMarkLclVar(tree))
13266 /* If tree is not an enregistered variable then
13267 * be sure to use any cached register that contain
13268 * a copy of this local variable
13270 if (regPair != REG_PAIR_NONE)
13279 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
13280 // to worry about it being enregistered.
13281 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
13287 /* Pick a register pair for the value */
13289 regPair = regSet.rsPickRegPair(needReg);
13291 /* Load the value into the registers */
13293 instruction loadIns;
13295 loadIns = ins_Load(TYP_INT); // INS_ldr
13296 regLo = genRegPairLo(regPair);
13297 regHi = genRegPairHi(regPair);
13299 #if CPU_LOAD_STORE_ARCH
13301 regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
13302 inst_RV_TT(INS_lea, regAddr, tree, 0);
13303 regTracker.rsTrackRegTrash(regAddr);
13305 if (regLo != regAddr)
13307 // assert(regLo != regAddr); // forced by if statement
13308 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13309 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13313 // assert(regHi != regAddr); // implied by regpair property and the if statement
13314 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13315 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13319 inst_RV_TT(loadIns, regLo, tree, 0);
13320 inst_RV_TT(loadIns, regHi, tree, 4);
13323 #ifdef _TARGET_ARM_
13324 if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
13326 // Emit a memory barrier instruction after the load
13327 instGen_MemoryBarrier();
13331 regTracker.rsTrackRegTrash(regLo);
13332 regTracker.rsTrackRegTrash(regHi);
13338 compiler->gtDispTree(tree);
13340 noway_assert(!"unexpected leaf");
13344 /* Is it a 'simple' unary/binary operator? */
13346 if (kind & GTK_SMPOP)
13352 bool setCarry = false;
13355 GenTreePtr op1 = tree->gtOp.gtOp1;
13356 GenTreePtr op2 = tree->gtGetOp2IfPresent();
13362 unsigned lclVarNum = compiler->lvaCount;
13363 unsigned lclVarILoffs = DUMMY_INIT(0);
13365 /* Is the target a local ? */
13367 if (op1->gtOper == GT_LCL_VAR)
13369 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
13372 noway_assert(varNum < compiler->lvaCount);
13373 varDsc = compiler->lvaTable + varNum;
13375 // No dead stores, (with min opts we may have dead stores)
13376 noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
13378 /* For non-debuggable code, every definition of a lcl-var has
13379 * to be checked to see if we need to open a new scope for it.
13380 * Remember the local var info to call siCheckVarScope
13381 * AFTER codegen of the assignment.
13383 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
13384 (compiler->info.compVarScopesCount > 0))
13386 lclVarNum = varNum;
13387 lclVarILoffs = op1->gtLclVar.gtLclILoffs;
13390 /* Has the variable been assigned to a register (pair) ? */
13392 if (genMarkLclVar(op1))
13394 noway_assert(op1->InReg());
13395 regPair = op1->gtRegPair;
13396 regLo = genRegPairLo(regPair);
13397 regHi = genRegPairHi(regPair);
13398 noway_assert(regLo != regHi);
13400 /* Is the value being assigned a constant? */
13402 if (op2->gtOper == GT_CNS_LNG)
13404 /* Move the value into the target */
13406 genMakeRegPairAvailable(regPair);
13409 if (regLo == REG_STK)
13411 ins = ins_Store(TYP_INT);
13415 // Always do the stack first (in case it grabs a register it can't
13416 // clobber regLo this way)
13417 if (regHi == REG_STK)
13419 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13423 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
13425 // The REG_STK case has already been handled
13426 if (regHi != REG_STK)
13429 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13432 goto DONE_ASSG_REGS;
13435 /* Compute the RHS into desired register pair */
13437 if (regHi != REG_STK)
13439 genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
13440 noway_assert(op2->InReg());
13441 noway_assert(op2->gtRegPair == regPair);
13449 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
13451 noway_assert(op2->InReg());
13453 curPair = op2->gtRegPair;
13454 curLo = genRegPairLo(curPair);
13455 curHi = genRegPairHi(curPair);
13457 /* move high first, target is on stack */
13458 inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
13460 if (regLo != curLo)
13462 if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
13463 regSet.rsSpillReg(regLo);
13464 inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
13465 regTracker.rsTrackRegCopy(regLo, curLo);
13469 genReleaseRegPair(op2);
13470 goto DONE_ASSG_REGS;
13474 /* Is the value being assigned a constant? */
13476 if (op2->gtOper == GT_CNS_LNG)
13478 /* Make the target addressable */
13480 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
13482 /* Move the value into the target */
13484 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
13485 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13487 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13493 /* Catch a case where can avoid generating op reg, mem. Better pairing
13498 * To avoid problems with order of evaluation, only do this if op2 is
13499 * a non-enregistered local variable
13502 if (GenTree::OperIsCommutative(oper) &&
13503 op1->gtOper == GT_LCL_VAR &&
13504 op2->gtOper == GT_LCL_VAR)
13506 regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
13508 /* Is op2 a non-enregistered local variable? */
13509 if (regPair == REG_PAIR_NONE)
13511 regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
13513 /* Is op1 an enregistered local variable? */
13514 if (regPair != REG_PAIR_NONE)
13516 /* Swap the operands */
13517 GenTreePtr op = op1;
13525 /* Eliminate worthless assignment "lcl = lcl" */
13527 if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
13528 op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
13530 genUpdateLife(op2);
13534 if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
13535 // op1,op2 need to be materialized in the correct order.
13536 (tree->gtFlags & GTF_REVERSE_OPS))
13538 /* Generate the small RHS into a register pair */
13540 GenTreePtr smallOpr = op2->gtOp.gtOp1;
13542 genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
13544 /* Make the target addressable */
13546 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
13548 /* Make sure everything is still addressable */
13550 genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
13551 noway_assert(smallOpr->InReg());
13552 regHi = smallOpr->gtRegNum;
13553 addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
13555 // conv.ovf.u8 could overflow if the original number was negative
13556 if (op2->gtOverflow())
13558 noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
13559 0); // conv.ovf.u8.un should be bashed to conv.u8.un
13560 instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
13561 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
13562 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
13565 /* Move the value into the target */
13567 inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
13568 inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
13570 /* Free up anything that was tied up by either side */
13572 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13573 genReleaseReg(smallOpr);
13576 if (op1->gtOper == GT_LCL_VAR)
13578 /* clear this local from reg table */
13579 regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13581 /* mark RHS registers as containing the local var */
13582 regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
13588 /* Is the LHS more complex than the RHS? */
13590 if (tree->gtFlags & GTF_REVERSE_OPS)
13592 /* Generate the RHS into a register pair */
13594 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
13595 noway_assert(op2->InReg());
13597 /* Make the target addressable */
13598 op1 = genCodeForCommaTree(op1);
13599 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
13601 /* Make sure the RHS register hasn't been spilled */
13603 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
13607 /* Make the target addressable */
13609 op1 = genCodeForCommaTree(op1);
13610 addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
13612 /* Generate the RHS into a register pair */
13614 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
13617 /* Lock 'op2' and make sure 'op1' is still addressable */
13619 noway_assert(op2->InReg());
13620 regPair = op2->gtRegPair;
13622 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13624 /* Move the value into the target */
13626 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
13627 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
13629 /* Free up anything that was tied up by either side */
13631 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13632 genReleaseRegPair(op2);
13638 if (op1->gtOper == GT_LCL_VAR)
13640 /* Clear this local from reg table */
13642 regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13644 if ((op2->InReg()) &&
13645 /* constant has precedence over local */
13646 // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
13647 tree->gtOper == GT_ASG)
13651 /* mark RHS registers as containing the local var */
13653 regNo = genRegPairLo(op2->gtRegPair);
13654 if (regNo != REG_STK)
13655 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
13657 regNo = genRegPairHi(op2->gtRegPair);
13658 if (regNo != REG_STK)
13660 /* For partially enregistered longs, we might have
13661 stomped on op2's hiReg */
13662 if (!(op1->InReg()) || regNo != genRegPairLo(op1->gtRegPair))
13664 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
13673 genUpdateLife(op1);
13674 genUpdateLife(tree);
13676 /* For non-debuggable code, every definition of a lcl-var has
13677 * to be checked to see if we need to open a new scope for it.
13679 if (lclVarNum < compiler->lvaCount)
13680 siCheckVarScope(lclVarNum, lclVarILoffs);
13698 ovfl = tree->gtOverflow();
13702 insLo = insHi = INS_AND;
13705 insLo = insHi = INS_OR;
13708 insLo = insHi = INS_XOR;
13717 /* The following makes an assumption about gtSetEvalOrder(this) */
13719 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
13721 /* Special case: check for "(long(intval) << 32) | longval" */
13723 if (oper == GT_OR && op1->gtOper == GT_LSH)
13725 GenTreePtr lshLHS = op1->gtOp.gtOp1;
13726 GenTreePtr lshRHS = op1->gtOp.gtOp2;
13728 if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13729 genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13732 /* Throw away the cast of the shift operand. */
13734 op1 = lshLHS->gtCast.CastOp();
13736 /* Special case: check op2 for "ulong(intval)" */
13737 if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
13738 genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
13740 /* Throw away the cast of the second operand. */
13742 op2 = op2->gtCast.CastOp();
13743 goto SIMPLE_OR_LONG;
13745 /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
13746 else if (op2->gtOper == GT_AND)
13749 andLHS = op2->gtOp.gtOp1;
13751 andRHS = op2->gtOp.gtOp2;
13753 if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13754 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13755 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13757 /* Throw away the cast of the second operand. */
13759 op2 = andLHS->gtCast.CastOp();
13762 // Load the high DWORD, ie. op1
13764 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13766 noway_assert(op1->InReg());
13767 regHi = op1->gtRegNum;
13768 regSet.rsMarkRegUsed(op1);
13770 // Load the low DWORD, ie. op2
13772 genCodeForTree(op2, needReg & ~genRegMask(regHi));
13774 noway_assert(op2->InReg());
13775 regLo = op2->gtRegNum;
13777 /* Make sure regHi is still around. Also, force
13778 regLo to be excluded in case regLo==regHi */
13780 genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
13781 regHi = op1->gtRegNum;
13783 regPair = gen2regs2pair(regLo, regHi);
13788 /* Generate the following sequence:
13789 Prepare op1 (discarding shift)
13790 Compute op2 into some regpair
13794 /* First, make op1 addressable */
13796 /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
13798 It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
13799 which the long value is computed is from needReg. But at this point the safest fix is
13800 to exclude regSet.rsMaskResvd.
13802 Note that needReg could be the set of free registers (excluding reserved ones). If we don't
13803 exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
13805 reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
13807 regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
13809 addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
13811 genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
13813 noway_assert(op2->InReg());
13814 regPair = op2->gtRegPair;
13815 regHi = genRegPairHi(regPair);
13817 /* The operand might have interfered with the address */
13819 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13821 /* Now compute the result */
13823 inst_RV_TT(insHi, regHi, op1, 0);
13825 regTracker.rsTrackRegTrash(regHi);
13827 /* Free up anything that was tied up by the LHS */
13829 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13831 /* The result is where the second operand is sitting */
13833 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
13835 regPair = op2->gtRegPair;
13840 /* Special case: check for "longval | (long(intval) << 32)" */
13842 if (oper == GT_OR && op2->gtOper == GT_LSH)
13844 GenTreePtr lshLHS = op2->gtOp.gtOp1;
13845 GenTreePtr lshRHS = op2->gtOp.gtOp2;
13847 if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13848 genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13851 /* We throw away the cast of the shift operand. */
13853 op2 = lshLHS->gtCast.CastOp();
13855 /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
13857 if (op1->gtOper == GT_AND)
13859 GenTreePtr andLHS = op1->gtOp.gtOp1;
13860 GenTreePtr andRHS = op1->gtOp.gtOp2;
13862 if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13863 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13864 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13866 /* Throw away the cast of the first operand. */
13868 op1 = andLHS->gtCast.CastOp();
13870 // Load the low DWORD, ie. op1
13872 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13874 noway_assert(op1->InReg());
13875 regLo = op1->gtRegNum;
13876 regSet.rsMarkRegUsed(op1);
13878 // Load the high DWORD, ie. op2
13880 genCodeForTree(op2, needReg & ~genRegMask(regLo));
13882 noway_assert(op2->InReg());
13883 regHi = op2->gtRegNum;
13885 /* Make sure regLo is still around. Also, force
13886 regHi to be excluded in case regLo==regHi */
13888 genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
13889 regLo = op1->gtRegNum;
13891 regPair = gen2regs2pair(regLo, regHi);
13896 /* Generate the following sequence:
13897 Compute op1 into some regpair
13898 Make op2 (ignoring shift) addressable
13902 // First, generate the first operand into some register
13904 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13905 noway_assert(op1->InReg());
13907 /* Make the second operand addressable */
13909 addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
13911 /* Make sure the result is in a free register pair */
13913 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13914 regPair = op1->gtRegPair;
13915 regHi = genRegPairHi(regPair);
13917 /* The operand might have interfered with the address */
13919 addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
13921 /* Compute the new value */
13923 inst_RV_TT(insHi, regHi, op2, 0);
13925 /* The value in the high register has been trashed */
13927 regTracker.rsTrackRegTrash(regHi);
13933 /* Generate the first operand into registers */
13935 if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13936 ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
13938 regPair = regSet.rsPickRegPair(needReg);
13939 genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13943 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13945 noway_assert(op1->InReg());
13947 regPair = op1->gtRegPair;
13948 op1Mask = genRegPairMask(regPair);
13950 /* Make the second operand addressable */
13951 regMaskTP needReg2;
13952 needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
13953 addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
13955 // TODO: If 'op1' got spilled and 'op2' happens to be
13956 // TODO: in a register, and we have add/mul/and/or/xor,
13957 // TODO: reverse the operands since we can perform the
13958 // TODO: operation directly with the spill temp, e.g.
13959 // TODO: 'add regHi, [temp]'.
13961 /* Make sure the result is in a free register pair */
13963 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13964 regPair = op1->gtRegPair;
13965 op1Mask = genRegPairMask(regPair);
13967 regLo = genRegPairLo(regPair);
13968 regHi = genRegPairHi(regPair);
13970 /* Make sure that we don't spill regLo/regHi below */
13971 regSet.rsLockUsedReg(op1Mask);
13973 /* The operand might have interfered with the address */
13975 addrReg = genKeepAddressable(op2, addrReg);
13977 /* The value in the register pair is about to be trashed */
13979 regTracker.rsTrackRegTrash(regLo);
13980 regTracker.rsTrackRegTrash(regHi);
13982 /* Compute the new value */
13987 if (op2->gtOper == GT_CNS_LNG)
13989 __int64 icon = op2->gtLngCon.gtLconVal;
13991 /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
13996 if ((int)(icon) == -1)
13998 if ((int)(icon >> 32) == -1)
14001 if (!(icon & I64(0x00000000FFFFFFFF)))
14003 genSetRegToIcon(regLo, 0);
14007 if (!(icon & I64(0xFFFFFFFF00000000)))
14009 /* Just to always set low first*/
14013 inst_RV_TT(insLo, regLo, op2, 0);
14016 genSetRegToIcon(regHi, 0);
14024 if (!(icon & I64(0x00000000FFFFFFFF)))
14026 if (!(icon & I64(0xFFFFFFFF00000000)))
14034 // Fix 383813 X86/ARM ILGEN
14035 // Fix 383793 ARM ILGEN
14036 // Fix 383911 ARM ILGEN
14038 newMask = addrReg & ~op1Mask;
14039 regSet.rsLockUsedReg(newMask);
14043 insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14044 inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
14048 insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14049 inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
14052 regSet.rsUnlockUsedReg(newMask);
14053 regSet.rsUnlockUsedReg(op1Mask);
14057 /* Free up anything that was tied up by the LHS */
14059 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
14061 /* The result is where the first operand is sitting */
14063 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
14065 regPair = op1->gtRegPair;
14068 genCheckOverflow(tree);
14074 regPair = genCodeForLongModInt(tree, needReg);
14079 /* Special case: both operands promoted from int */
14081 assert(tree->gtIsValid64RsltMul());
14083 /* Change to an integer multiply temporarily */
14085 tree->gtType = TYP_INT;
14087 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
14088 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
14089 tree->gtOp.gtOp2 = op2->gtCast.CastOp();
14091 assert(tree->gtFlags & GTF_MUL_64RSLT);
14093 #if defined(_TARGET_X86_)
14094 // imul on x86 requires EDX:EAX
14095 genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
14096 noway_assert(tree->InReg());
14097 noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
14098 #elif defined(_TARGET_ARM_)
14099 genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
14100 noway_assert(tree->InReg());
14102 assert(!"Unsupported target for 64-bit multiply codegen");
14105 /* Restore gtType, op1 and op2 from the change above */
14107 tree->gtType = TYP_LONG;
14108 tree->gtOp.gtOp1 = op1;
14109 tree->gtOp.gtOp2 = op2;
14111 #if defined(_TARGET_X86_)
14112 /* The result is now in EDX:EAX */
14113 regPair = REG_PAIR_EAXEDX;
14114 #elif defined(_TARGET_ARM_)
14115 regPair = tree->gtRegPair;
14120 helper = CORINFO_HELP_LLSH;
14123 helper = CORINFO_HELP_LRSH;
14126 helper = CORINFO_HELP_LRSZ;
14131 noway_assert(op1->gtType == TYP_LONG);
14132 noway_assert(genActualType(op2->gtType) == TYP_INT);
14134 /* Is the second operand a constant? */
14136 if (op2->gtOper == GT_CNS_INT)
14138 unsigned int count = op2->gtIntCon.gtIconVal;
14140 /* Compute the left operand into a free register pair */
14142 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
14143 noway_assert(op1->InReg());
14145 regPair = op1->gtRegPair;
14146 regLo = genRegPairLo(regPair);
14147 regHi = genRegPairHi(regPair);
14149 /* Assume the value in the register pair is trashed. In some cases, though,
14150 a register might be set to zero, and we can use that information to improve
14151 some code generation.
14154 regTracker.rsTrackRegTrash(regLo);
14155 regTracker.rsTrackRegTrash(regHi);
14157 /* Generate the appropriate shift instructions */
14164 // regHi, regLo are correct
14166 else if (count < 32)
14168 #if defined(_TARGET_XARCH_)
14169 inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
14170 #elif defined(_TARGET_ARM_)
14171 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
14172 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
14173 INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
14176 #endif // _TARGET_*
14177 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
14179 else // count >= 32
14181 assert(count >= 32);
14184 #if defined(_TARGET_ARM_)
14187 // mov low dword into high dword (i.e. shift left by 32-bits)
14188 inst_RV_RV(INS_mov, regHi, regLo);
14192 assert(count > 32 && count < 64);
14193 getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
14197 // mov low dword into high dword (i.e. shift left by 32-bits)
14198 inst_RV_RV(INS_mov, regHi, regLo);
14201 // Shift high dword left by count - 32
14202 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
14204 #endif // _TARGET_*
14206 else // count >= 64
14208 assert(count >= 64);
14209 genSetRegToIcon(regHi, 0);
14211 genSetRegToIcon(regLo, 0);
14218 // regHi, regLo are correct
14220 else if (count < 32)
14222 #if defined(_TARGET_XARCH_)
14223 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14224 #elif defined(_TARGET_ARM_)
14225 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14226 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14227 INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14230 #endif // _TARGET_*
14231 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
14233 else // count >= 32
14235 assert(count >= 32);
14238 #if defined(_TARGET_ARM_)
14241 // mov high dword into low dword (i.e. shift right by 32-bits)
14242 inst_RV_RV(INS_mov, regLo, regHi);
14246 assert(count > 32 && count < 64);
14247 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
14251 // mov high dword into low dword (i.e. shift right by 32-bits)
14252 inst_RV_RV(INS_mov, regLo, regHi);
14255 // Shift low dword right by count - 32
14256 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
14258 #endif // _TARGET_*
14261 // Propagate sign bit in high dword
14262 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14266 // Propagate the sign from the high dword
14267 inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
14275 // regHi, regLo are correct
14277 else if (count < 32)
14279 #if defined(_TARGET_XARCH_)
14280 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14281 #elif defined(_TARGET_ARM_)
14282 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14283 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14284 INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14287 #endif // _TARGET_*
14288 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
14290 else // count >= 32
14292 assert(count >= 32);
14295 #if defined(_TARGET_ARM_)
14298 // mov high dword into low dword (i.e. shift right by 32-bits)
14299 inst_RV_RV(INS_mov, regLo, regHi);
14303 assert(count > 32 && count < 64);
14304 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
14308 // mov high dword into low dword (i.e. shift right by 32-bits)
14309 inst_RV_RV(INS_mov, regLo, regHi);
14312 // Shift low dword right by count - 32
14313 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
14315 #endif // _TARGET_*
14317 else // count >= 64
14319 assert(count >= 64);
14320 genSetRegToIcon(regLo, 0);
14322 genSetRegToIcon(regHi, 0);
14327 noway_assert(!"Illegal oper for long shift");
14334 /* Which operand are we supposed to compute first? */
14336 assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
14338 if (tree->gtFlags & GTF_REVERSE_OPS)
14340 /* The second operand can't be a constant */
14342 noway_assert(op2->gtOper != GT_CNS_INT);
14344 /* Load the shift count, hopefully into RBM_SHIFT */
14345 RegSet::ExactReg exactReg;
14346 if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
14347 exactReg = RegSet::EXACT_REG;
14349 exactReg = RegSet::ANY_REG;
14350 genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
14352 /* Compute the left operand into REG_LNGARG_0 */
14354 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14355 noway_assert(op1->InReg());
14357 /* Lock op1 so that it doesn't get trashed */
14359 regSet.rsLockUsedReg(RBM_LNGARG_0);
14361 /* Make sure the shift count wasn't displaced */
14363 genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
14367 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14371 /* Compute the left operand into REG_LNGARG_0 */
14373 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14374 noway_assert(op1->InReg());
14376 /* Compute the shift count into RBM_SHIFT */
14378 genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
14382 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14384 /* Make sure the value hasn't been displaced */
14386 genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
14388 /* Lock op1 so that it doesn't get trashed */
14390 regSet.rsLockUsedReg(RBM_LNGARG_0);
14393 #ifndef _TARGET_X86_
14394 /* The generic helper is a C-routine and so it follows the full ABI */
14396 /* Spill any callee-saved registers which are being used */
14397 regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
14399 /* But do not spill our argument registers. */
14400 spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14404 regSet.rsSpillRegs(spillRegs);
14407 #endif // !_TARGET_X86_
14409 /* Perform the shift by calling a helper function */
14411 noway_assert(op1->gtRegPair == REG_LNGARG_0);
14412 noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
14413 noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
14415 genEmitHelperCall(helper,
14417 EA_8BYTE); // retSize
14419 #ifdef _TARGET_X86_
14420 /* The value in the register pair is trashed */
14422 regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
14423 regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
14424 #else // _TARGET_X86_
14425 /* The generic helper is a C-routine and so it follows the full ABI */
14426 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
14427 #endif // _TARGET_X86_
14429 /* Release both operands */
14431 regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14432 genReleaseRegPair(op1);
14433 genReleaseReg(op2);
14437 noway_assert(op1->InReg());
14438 regPair = op1->gtRegPair;
14444 /* Generate the operand into some register pair */
14446 genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
14447 noway_assert(op1->InReg());
14449 regPair = op1->gtRegPair;
14451 /* Figure out which registers the value is in */
14453 regLo = genRegPairLo(regPair);
14454 regHi = genRegPairHi(regPair);
14456 /* The value in the register pair is about to be trashed */
14458 regTracker.rsTrackRegTrash(regLo);
14459 regTracker.rsTrackRegTrash(regHi);
14461 /* Unary "neg": negate the value in the register pair */
14462 if (oper == GT_NEG)
14464 #ifdef _TARGET_ARM_
14466 // ARM doesn't have an opcode that sets the carry bit like
14467 // x86, so we can't use neg/addc/neg. Instead we use subtract
14468 // with carry. Too bad this uses an extra register.
14470 // Lock regLo and regHi so we don't pick them, and then pick
14471 // a third register to be our 0.
14472 regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
14473 regSet.rsLockReg(regPairMask);
14474 regMaskTP regBest = RBM_ALLINT & ~avoidReg;
14475 regNumber regZero = genGetRegSetToIcon(0, regBest);
14476 regSet.rsUnlockReg(regPairMask);
14478 inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
14479 getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
14481 #elif defined(_TARGET_XARCH_)
14483 inst_RV(INS_NEG, regLo, TYP_LONG);
14484 inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
14485 inst_RV(INS_NEG, regHi, TYP_LONG);
14487 NYI("GT_NEG on TYP_LONG");
14492 /* Unary "not": flip all the bits in the register pair */
14494 inst_RV(INS_NOT, regLo, TYP_LONG);
14495 inst_RV(INS_NOT, regHi, TYP_LONG);
14506 regMaskTP availMask = RBM_ALLINT & ~needReg;
14508 /* Make sure the operand is addressable */
14510 addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
14512 GenTreePtr addr = oper == GT_IND ? op1 : tree;
14514 /* Pick a register for the value */
14516 regPair = regSet.rsPickRegPair(needReg);
14517 tmpMask = genRegPairMask(regPair);
14519 /* Is there any overlap between the register pair and the address? */
14523 if (tmpMask & addrReg)
14525 /* Does one or both of the target registers overlap? */
14527 if ((tmpMask & addrReg) != tmpMask)
14529 /* Only one register overlaps */
14531 noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
14533 /* If the low register overlaps, load the upper half first */
14535 if (addrReg & genRegMask(genRegPairLo(regPair)))
14542 /* The register completely overlaps with the address */
14544 noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
14546 /* Can we pick another pair easily? */
14548 regFree = regSet.rsRegMaskFree() & ~addrReg;
14550 regFree &= needReg;
14552 /* More than one free register available? */
14554 if (regFree && !genMaxOneBit(regFree))
14556 regPair = regSet.rsPickRegPair(regFree);
14557 tmpMask = genRegPairMask(regPair);
14561 // printf("Overlap: needReg = %08X\n", needReg);
14563 // Reg-prediction won't allow this
14564 noway_assert((regSet.rsMaskVars & addrReg) == 0);
14566 // Grab one fresh reg, and use any one of addrReg
14568 if (regFree) // Try to follow 'needReg'
14569 regLo = regSet.rsGrabReg(regFree);
14570 else // Pick any reg besides addrReg
14571 regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
14573 unsigned regBit = 0x1;
14576 for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
14578 // Found one of addrReg. Use it.
14579 if (regBit & addrReg)
14582 noway_assert(genIsValidReg(regNo)); // Should have found regNo
14584 regPair = gen2regs2pair(regLo, regNo);
14585 tmpMask = genRegPairMask(regPair);
14590 /* Make sure the value is still addressable */
14592 noway_assert(genStillAddressable(tree));
14594 /* Figure out which registers the value is in */
14596 regLo = genRegPairLo(regPair);
14597 regHi = genRegPairHi(regPair);
14599 /* The value in the register pair is about to be trashed */
14601 regTracker.rsTrackRegTrash(regLo);
14602 regTracker.rsTrackRegTrash(regHi);
14604 /* Load the target registers from where the value is */
14608 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14609 regSet.rsLockReg(genRegMask(regHi));
14610 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14611 regSet.rsUnlockReg(genRegMask(regHi));
14615 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14616 regSet.rsLockReg(genRegMask(regLo));
14617 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14618 regSet.rsUnlockReg(genRegMask(regLo));
14621 #ifdef _TARGET_ARM_
14622 if (tree->gtFlags & GTF_IND_VOLATILE)
14624 // Emit a memory barrier instruction after the load
14625 instGen_MemoryBarrier();
14629 genUpdateLife(tree);
14630 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
14636 /* What are we casting from? */
14638 switch (op1->gtType)
14648 regMaskTP hiRegMask;
14649 regMaskTP loRegMask;
14651 // For an unsigned cast we don't need to sign-extend the 32 bit value
14652 if (tree->gtFlags & GTF_UNSIGNED)
14654 // Does needReg have exactly two bits on and thus
14655 // specifies the exact register pair that we want to use
14656 if (!genMaxOneBit(needReg))
14658 regPair = regSet.rsFindRegPairNo(needReg);
14659 if (needReg != genRegPairMask(regPair))
14660 goto ANY_FREE_REG_UNSIGNED;
14661 loRegMask = genRegMask(genRegPairLo(regPair));
14662 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14663 goto ANY_FREE_REG_UNSIGNED;
14664 hiRegMask = genRegMask(genRegPairHi(regPair));
14668 ANY_FREE_REG_UNSIGNED:
14669 loRegMask = needReg;
14670 hiRegMask = needReg;
14673 genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14674 noway_assert(op1->InReg());
14676 regLo = op1->gtRegNum;
14677 loRegMask = genRegMask(regLo);
14678 regSet.rsLockUsedReg(loRegMask);
14679 regHi = regSet.rsPickReg(hiRegMask);
14680 regSet.rsUnlockUsedReg(loRegMask);
14682 regPair = gen2regs2pair(regLo, regHi);
14684 // Move 0 to the higher word of the ULong
14685 genSetRegToIcon(regHi, 0, TYP_INT);
14687 /* We can now free up the operand */
14688 genReleaseReg(op1);
14692 #ifdef _TARGET_XARCH_
14693 /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
14694 and we need the result to be in those registers.
14695 cdq is smaller so we use it for SMALL_CODE
14698 if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
14699 (regSet.rsRegMaskFree() & RBM_EDX))
14701 genCodeForTree(op1, RBM_EAX);
14702 regSet.rsMarkRegUsed(op1);
14704 /* If we have to spill EDX, might as well use the faster
14705 sar as the spill will increase code size anyway */
14707 if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
14709 hiRegMask = regSet.rsRegMaskFree();
14710 goto USE_SAR_FOR_CAST;
14713 regSet.rsGrabReg(RBM_EDX);
14714 regTracker.rsTrackRegTrash(REG_EDX);
14716 /* Convert the int in EAX into a long in EDX:EAX */
14720 /* The result is in EDX:EAX */
14722 regPair = REG_PAIR_EAXEDX;
14727 /* use the sar instruction to sign-extend a 32-bit integer */
14729 // Does needReg have exactly two bits on and thus
14730 // specifies the exact register pair that we want to use
14731 if (!genMaxOneBit(needReg))
14733 regPair = regSet.rsFindRegPairNo(needReg);
14734 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
14735 goto ANY_FREE_REG_SIGNED;
14736 loRegMask = genRegMask(genRegPairLo(regPair));
14737 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14738 goto ANY_FREE_REG_SIGNED;
14739 hiRegMask = genRegMask(genRegPairHi(regPair));
14743 ANY_FREE_REG_SIGNED:
14744 loRegMask = needReg;
14745 hiRegMask = RBM_NONE;
14748 genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14749 #ifdef _TARGET_XARCH_
14752 noway_assert(op1->InReg());
14754 regLo = op1->gtRegNum;
14755 loRegMask = genRegMask(regLo);
14756 regSet.rsLockUsedReg(loRegMask);
14757 regHi = regSet.rsPickReg(hiRegMask);
14758 regSet.rsUnlockUsedReg(loRegMask);
14760 regPair = gen2regs2pair(regLo, regHi);
14762 #ifdef _TARGET_ARM_
14763 /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14764 // Use one instruction instead of two
14765 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
14767 /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14768 inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
14769 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14772 /* The value in the upper register is trashed */
14774 regTracker.rsTrackRegTrash(regHi);
14777 /* We can now free up the operand */
14778 genReleaseReg(op1);
14780 // conv.ovf.u8 could overflow if the original number was negative
14781 if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
14783 regNumber hiReg = genRegPairHi(regPair);
14784 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14785 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14786 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14795 /* Load the FP value onto the coprocessor stack */
14797 genCodeForTreeFlt(op1);
14799 /* Allocate a temp for the long value */
14801 temp = compiler->tmpGetTemp(TYP_LONG);
14803 /* Store the FP value into the temp */
14805 inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
14808 /* Pick a register pair for the value */
14810 regPair = regSet.rsPickRegPair(needReg);
14812 /* Figure out which registers the value is in */
14814 regLo = genRegPairLo(regPair);
14815 regHi = genRegPairHi(regPair);
14817 /* The value in the register pair is about to be trashed */
14819 regTracker.rsTrackRegTrash(regLo);
14820 regTracker.rsTrackRegTrash(regHi);
14822 /* Load the converted value into the registers */
14824 inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
14825 inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
14827 /* We no longer need the temp */
14829 compiler->tmpRlsTemp(temp);
14832 NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
14838 noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
14840 genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
14841 regPair = op1->gtRegPair;
14843 // Do we need to set the sign-flag, or can we checked if it is set?
14844 // and not do this "test" if so.
14848 regNumber hiReg = genRegPairHi(op1->gtRegPair);
14849 noway_assert(hiReg != REG_STK);
14850 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14854 inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
14857 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14858 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14864 compiler->gtDispTree(tree);
14866 NO_WAY("unexpected cast to long");
14873 * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
14874 * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
14875 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
14878 // TODO: this should be done AFTER we called exit mon so that
14879 // we are sure that we don't have to keep 'this' alive
14881 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
14883 /* either it's an "empty" statement or the return statement
14884 of a synchronized method
14887 genPInvokeMethodEpilog();
14890 #if CPU_LONG_USES_REGPAIR
14891 /* There must be a long return value */
14895 /* Evaluate the return value into EDX:EAX */
14897 genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
14899 noway_assert(op1->InReg());
14900 noway_assert(op1->gtRegPair == REG_LNGRET);
14903 NYI("64-bit return");
14906 #ifdef PROFILING_SUPPORTED
14907 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
14908 // the GT_RETURN tree.
14910 if (compiler->compCurBB == compiler->genReturnBB)
14912 genProfilingLeaveCallback();
14918 noway_assert(!"inliner-generated ?: for longs NYI");
14919 NO_WAY("inliner-generated ?: for longs NYI");
14924 if (tree->gtFlags & GTF_REVERSE_OPS)
14927 genCodeForTreeLng(op2, needReg, avoidReg);
14928 genUpdateLife(op2);
14930 noway_assert(op2->InReg());
14932 regSet.rsMarkRegPairUsed(op2);
14934 // Do side effects of op1
14935 genEvalSideEffects(op1);
14937 // Recover op2 if spilled
14938 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
14940 genReleaseRegPair(op2);
14942 genUpdateLife(tree);
14944 regPair = op2->gtRegPair;
14948 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
14950 /* Generate side effects of the first operand */
14952 genEvalSideEffects(op1);
14953 genUpdateLife(op1);
14955 /* Is the value of the second operand used? */
14957 if (tree->gtType == TYP_VOID)
14959 /* The right operand produces no result */
14961 genEvalSideEffects(op2);
14962 genUpdateLife(tree);
14966 /* Generate the second operand, i.e. the 'real' value */
14968 genCodeForTreeLng(op2, needReg, avoidReg);
14970 /* The result of 'op2' is also the final result */
14972 regPair = op2->gtRegPair;
14979 /* Generate the operand, i.e. the 'real' value */
14981 genCodeForTreeLng(op1, needReg, avoidReg);
14983 /* The result of 'op1' is also the final result */
14985 regPair = op1->gtRegPair;
14994 genCodeForTreeLng(op1, needReg, avoidReg);
14995 regPair = op1->gtRegPair;
15003 compiler->gtDispTree(tree);
15005 noway_assert(!"unexpected 64-bit operator");
15008 /* See what kind of a special operator we have here */
15014 retMask = genCodeForCall(tree->AsCall(), true);
15015 if (retMask == RBM_NONE)
15016 regPair = REG_PAIR_NONE;
15018 regPair = regSet.rsFindRegPairNo(retMask);
15023 compiler->gtDispTree(tree);
15025 NO_WAY("unexpected long operator");
15030 genUpdateLife(tree);
15032 /* Here we've computed the value of 'tree' into 'regPair' */
15034 noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
15036 genMarkTreeInRegPair(tree, regPair);
15039 #pragma warning(pop)
15042 /*****************************************************************************
15044 * Generate code for a mod of a long by an int.
15047 regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
15049 #ifdef _TARGET_X86_
15054 genTreeOps oper = tree->OperGet();
15055 GenTreePtr op1 = tree->gtOp.gtOp1;
15056 GenTreePtr op2 = tree->gtOp.gtOp2;
15058 /* Codegen only for Unsigned MOD */
15059 noway_assert(oper == GT_UMOD);
15061 /* op2 must be a long constant in the range 2 to 0x3fffffff */
15063 noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
15064 (op2->gtLngCon.gtLconVal <= 0x3fffffff));
15065 int val = (int)op2->gtLngCon.gtLconVal;
15067 op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
15069 op2->gtType = TYP_INT;
15070 op2->gtIntCon.gtIconVal = val;
15072 /* Which operand are we supposed to compute first? */
15074 if (tree->gtFlags & GTF_REVERSE_OPS)
15076 /* Compute the second operand into a scratch register, other
15079 needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15081 /* Special case: if op2 is a local var we are done */
15083 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15085 addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15089 genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15091 noway_assert(op2->InReg());
15092 addrReg = genRegMask(op2->gtRegNum);
15095 /* Compute the first operand into EAX:EDX */
15097 genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
15098 noway_assert(op1->InReg());
15099 noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15101 /* And recover the second argument while locking the first one */
15103 addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15107 /* Compute the first operand into EAX:EDX */
15109 genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
15110 noway_assert(op1->InReg());
15111 noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15113 /* Compute the second operand into a scratch register, other
15116 needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15118 /* Special case: if op2 is a local var we are done */
15120 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15122 addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15126 genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15128 noway_assert(op2->InReg());
15129 addrReg = genRegMask(op2->gtRegNum);
15132 /* Recover the first argument */
15134 genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
15136 /* And recover the second argument while locking the first one */
15138 addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15141 /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
15142 contains the 32bit divisor. We want to generate the following code:
15144 ==========================
15147 cmp edx, op2->gtRegNum
15158 ==========================
15159 This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
15162 BasicBlock* lab_no_overflow = genCreateTempLabel();
15164 // grab a temporary register other than eax, edx, and op2->gtRegNum
15166 regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
15168 // EAX and tempReg will be trashed by the mov instructions. Doing
15169 // this early won't hurt, and might prevent confusion in genSetRegToIcon.
15171 regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15172 regTracker.rsTrackRegTrash(tempReg);
15174 inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
15175 inst_JMP(EJ_jb, lab_no_overflow);
15177 inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
15178 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15179 genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15180 inst_TT(INS_UNSIGNED_DIVIDE, op2);
15181 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
15183 // Jump point for no overflow divide
15185 genDefineTempLabel(lab_no_overflow);
15187 // Issue the divide instruction
15189 inst_TT(INS_UNSIGNED_DIVIDE, op2);
15191 /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
15193 regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15194 regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
15195 regTracker.rsTrackRegTrash(tempReg);
15196 regTracker.rsTrackRegTrash(op2->gtRegNum);
15198 if (tree->gtFlags & GTF_MOD_INT_RESULT)
15200 /* We don't need to normalize the result, because the caller wants
15203 regPair = REG_PAIR_TMP_REVERSE;
15207 /* The result is now in EDX, we now have to normalize it, i.e. we have
15209 mov eax, edx; xor edx, edx (for UMOD)
15212 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15214 genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15216 regPair = REG_PAIR_TMP;
15219 genReleaseRegPair(op1);
15220 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
15224 #else // !_TARGET_X86_
15226 NYI("codegen for LongModInt");
15228 return REG_PAIR_NONE;
15230 #endif // !_TARGET_X86_
15233 // Given a tree, return the number of registers that are currently
15234 // used to hold integer enregistered local variables.
15235 // Note that, an enregistered TYP_LONG can take 1 or 2 registers.
15236 unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
15238 unsigned regCount = 0;
15240 VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
15241 while (iter.NextElem(&varNum))
15243 unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
15244 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
15246 if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
15250 if (varTypeIsLong(varDsc->TypeGet()))
15252 // For enregistered LONG/ULONG, the lower half should always be in a register.
15253 noway_assert(varDsc->lvRegNum != REG_STK);
15255 // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
15257 if (varDsc->lvOtherReg != REG_STK)
15268 /*****************************************************************************/
15269 /*****************************************************************************/
15270 #if CPU_HAS_FP_SUPPORT
15271 /*****************************************************************************
15273 * Generate code for a floating-point operation.
15276 void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
15277 regMaskTP needReg, /* = RBM_ALLFLOAT */
15278 regMaskTP bestReg) /* = RBM_NONE */
15280 genCodeForTreeFloat(tree, needReg, bestReg);
15282 if (tree->OperGet() == GT_RETURN)
15284 // Make sure to get ALL THE EPILOG CODE
15286 // TODO: this should be done AFTER we called exit mon so that
15287 // we are sure that we don't have to keep 'this' alive
15289 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15291 /* either it's an "empty" statement or the return statement
15292 of a synchronized method
15295 genPInvokeMethodEpilog();
15298 #ifdef PROFILING_SUPPORTED
15299 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
15300 // the GT_RETURN tree.
15302 if (compiler->compCurBB == compiler->genReturnBB)
15304 genProfilingLeaveCallback();
15310 /*****************************************************************************/
15311 #endif // CPU_HAS_FP_SUPPORT
15313 /*****************************************************************************
15315 * Generate a table switch - the switch value (0-based) is in register 'reg'.
15318 void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
15320 unsigned jmpTabBase;
15324 // In debug code, we don't optimize away the trivial switch statements. So we can get here with a
15325 // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
15326 noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
15327 inst_JMP(EJ_jmp, jumpTab[0]);
15331 noway_assert(jumpCnt >= 2);
15333 /* Is the number of cases right for a test and jump switch? */
15335 const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
15336 const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
15337 const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
15339 unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
15341 // This means really just a single cmp/jcc (aka a simple if/else)
15342 if (fFirstCaseFollows || fDefaultFollows)
15343 minSwitchTabJumpCnt++;
15345 #ifdef _TARGET_ARM_
15346 // On the ARM for small switch tables we will
15347 // generate a sequence of compare and branch instructions
15348 // because the code to load the base of the switch
15349 // table is huge and hideous due to the relocation... :(
15351 minSwitchTabJumpCnt++;
15352 if (fHaveScratchReg)
15353 minSwitchTabJumpCnt++;
15355 #endif // _TARGET_ARM_
15357 if (jumpCnt < minSwitchTabJumpCnt)
15359 /* Does the first case label follow? */
15360 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
15362 if (fFirstCaseFollows)
15364 /* Check for the default case */
15365 inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15366 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15367 inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15369 /* No need to jump to the first case */
15374 /* Generate a series of "dec reg; jmp label" */
15376 // Make sure that we can trash the register so
15377 // that we can generate a series of compares and jumps
15379 if ((jumpCnt > 0) && !fHaveScratchReg)
15381 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15382 inst_RV_RV(INS_mov, tmpReg, reg);
15383 regTracker.rsTrackRegTrash(tmpReg);
15387 while (jumpCnt > 0)
15389 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15390 inst_JMP(jmpEqual, *jumpTab++);
15396 /* Check for case0 first */
15397 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
15398 inst_JMP(jmpEqual, *jumpTab);
15400 /* No need to jump to the first case or the default */
15405 /* Generate a series of "dec reg; jmp label" */
15407 // Make sure that we can trash the register so
15408 // that we can generate a series of compares and jumps
15410 if ((jumpCnt > 0) && !fHaveScratchReg)
15412 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15413 inst_RV_RV(INS_mov, tmpReg, reg);
15414 regTracker.rsTrackRegTrash(tmpReg);
15418 while (jumpCnt > 0)
15420 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15421 inst_JMP(jmpEqual, *jumpTab++);
15425 if (!fDefaultFollows)
15427 inst_JMP(EJ_jmp, *jumpTab);
15431 if ((fFirstCaseFollows || fDefaultFollows) &&
15432 compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
15434 inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
15440 /* First take care of the default case */
15442 inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15443 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15444 inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15446 /* Generate the jump table contents */
15448 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
15451 if (compiler->opts.dspCode)
15452 printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
15455 for (unsigned index = 0; index < jumpCnt - 1; index++)
15457 BasicBlock* target = jumpTab[index];
15459 noway_assert(target->bbFlags & BBF_JMP_TARGET);
15462 if (compiler->opts.dspCode)
15463 printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
15466 getEmitter()->emitDataGenData(index, target);
15469 getEmitter()->emitDataGenEnd();
15471 #ifdef _TARGET_ARM_
15472 // We need to load the address of the table into a register.
15473 // The data section might get placed a long distance away, so we
15474 // can't safely do a PC-relative ADR. :(
15475 // Pick any register except the index register.
15477 regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
15478 genMov32RelocatableDataLabel(jmpTabBase, regTabBase);
15479 regTracker.rsTrackRegTrash(regTabBase);
15481 // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
15482 getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
15484 #else // !_TARGET_ARM_
15486 getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
15491 /*****************************************************************************
15493 * Generate code for a switch statement.
15496 void CodeGen::genCodeForSwitch(GenTreePtr tree)
15499 BasicBlock** jumpTab;
15504 noway_assert(tree->gtOper == GT_SWITCH);
15505 oper = tree->gtOp.gtOp1;
15506 noway_assert(genActualTypeIsIntOrI(oper->gtType));
15508 /* Get hold of the jump table */
15510 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
15512 jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
15513 jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
15515 /* Compute the switch value into some register */
15517 genCodeForTree(oper, 0);
15519 /* Get hold of the register the value is in */
15521 noway_assert(oper->InReg());
15522 reg = oper->gtRegNum;
15524 #if FEATURE_STACK_FP_X87
15525 if (!compCurFPState.IsEmpty())
15527 return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
15530 #endif // FEATURE_STACK_FP_X87
15532 return genTableSwitch(reg, jumpCnt, jumpTab);
15536 /*****************************************************************************/
15537 /*****************************************************************************
15538 * Emit a call to a helper function.
15542 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
15544 // Can we call the helper function directly
15546 void *addr = NULL, **pAddr = NULL;
15548 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
15549 // Don't ask VM if it hasn't requested ELT hooks
15550 if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
15551 (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
15552 helper == CORINFO_HELP_PROF_FCN_TAILCALL))
15554 addr = compiler->compProfilerMethHnd;
15559 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
15562 #ifdef _TARGET_ARM_
15563 if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
15565 // Load the address into a register and call through a register
15566 regNumber indCallReg =
15567 regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
15570 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
15574 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
15575 regTracker.rsTrackRegTrash(indCallReg);
15578 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
15579 INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
15580 argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15581 gcInfo.gcRegByrefSetCur,
15582 BAD_IL_OFFSET, // ilOffset
15583 indCallReg, // ireg
15584 REG_NA, 0, 0, // xreg, xmul, disp
15586 emitter::emitNoGChelper(helper),
15587 (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15591 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
15592 INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
15593 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15594 0, /* ilOffset, ireg, xreg, xmul, disp */
15595 false, /* isJump */
15596 emitter::emitNoGChelper(helper),
15597 (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15602 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
15606 callType = emitter::EC_FUNC_TOKEN_INDIR;
15610 getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
15611 argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15612 gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15613 0, /* ilOffset, ireg, xreg, xmul, disp */
15614 false, /* isJump */
15615 emitter::emitNoGChelper(helper));
15619 regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
15620 regTracker.rsTrashRegsForGCInterruptability();
15623 /*****************************************************************************
15625 * Push the given argument list, right to left; returns the total amount of
15629 #if !FEATURE_FIXED_OUT_ARGS
15631 #pragma warning(push)
15632 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
15634 size_t CodeGen::genPushArgList(GenTreeCall* call)
15636 GenTreeArgList* regArgs = call->gtCallLateArgs;
15640 GenTreeArgList* args;
15641 // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
15642 // so we can iterate over this argument list more uniformly.
15643 // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
15644 GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCallArgs);
15645 if (call->gtCallObjp == NULL)
15647 args = call->gtCallArgs;
15651 firstForObjp.Current() = call->gtCallObjp;
15652 args = &firstForObjp;
15659 for (; args; args = args->Rest())
15661 addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
15663 /* Get hold of the next argument value */
15664 curr = args->Current();
15666 if (curr->IsArgPlaceHolderNode())
15668 assert(curr->gtFlags & GTF_LATE_ARG);
15674 // If we have a comma expression, eval the non-last, then deal with the last.
15675 if (!(curr->gtFlags & GTF_LATE_ARG))
15676 curr = genCodeForCommaTree(curr);
15678 /* See what type of a value we're passing */
15679 type = curr->TypeGet();
15681 opsz = genTypeSize(genActualType(type));
15691 /* Don't want to push a small value, make it a full word */
15693 genCodeForTree(curr, 0);
15695 __fallthrough; // now the value should be in a register ...
15700 #if !CPU_HAS_FP_SUPPORT
15704 if (curr->gtFlags & GTF_LATE_ARG)
15706 assert(curr->gtOper == GT_ASG);
15707 /* one more argument will be passed in a register */
15708 noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
15710 /* arg is passed in the register, nothing on the stack */
15715 /* Is this value a handle? */
15717 if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
15719 /* Emit a fixup for the push instruction */
15721 inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
15728 /* Is the value a constant? */
15730 if (curr->gtOper == GT_CNS_INT)
15734 regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
15738 inst_RV(INS_push, reg, TYP_INT);
15743 inst_IV(INS_push, curr->gtIntCon.gtIconVal);
15746 /* If the type is TYP_REF, then this must be a "null". So we can
15747 treat it as a TYP_INT as we don't need to report it as a GC ptr */
15749 noway_assert(curr->TypeGet() == TYP_INT ||
15750 (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
15758 if (curr->gtFlags & GTF_LATE_ARG)
15760 /* This must be a register arg temp assignment */
15762 noway_assert(curr->gtOper == GT_ASG);
15764 /* Evaluate it to the temp */
15766 genCodeForTree(curr, 0);
15768 /* Increment the current argument register counter */
15770 intRegState.rsCurRegArgNum++;
15776 /* This is a 32-bit integer non-register argument */
15778 addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
15779 inst_TT(INS_push, curr);
15781 genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
15786 #if !CPU_HAS_FP_SUPPORT
15790 /* Is the value a constant? */
15792 if (curr->gtOper == GT_CNS_LNG)
15794 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
15796 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
15803 addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
15805 inst_TT(INS_push, curr, sizeof(int));
15807 inst_TT(INS_push, curr);
15812 #if CPU_HAS_FP_SUPPORT
15816 #if FEATURE_STACK_FP_X87
15817 addrReg = genPushArgumentStackFP(curr);
15826 /* Is this a nothing node, deferred register argument? */
15828 if (curr->gtFlags & GTF_LATE_ARG)
15830 GenTree* arg = curr;
15831 if (arg->gtOper == GT_COMMA)
15833 while (arg->gtOper == GT_COMMA)
15835 GenTreePtr op1 = arg->gtOp.gtOp1;
15836 genEvalSideEffects(op1);
15837 genUpdateLife(op1);
15838 arg = arg->gtOp.gtOp2;
15840 if (!arg->IsNothingNode())
15842 genEvalSideEffects(arg);
15843 genUpdateLife(arg);
15847 /* increment the register count and continue with the next argument */
15849 intRegState.rsCurRegArgNum++;
15851 noway_assert(opsz == 0);
15861 GenTree* arg = curr;
15862 while (arg->gtOper == GT_COMMA)
15864 GenTreePtr op1 = arg->gtOp.gtOp1;
15865 genEvalSideEffects(op1);
15866 genUpdateLife(op1);
15867 arg = arg->gtOp.gtOp2;
15870 noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
15871 noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
15872 noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
15874 if (arg->gtOper == GT_MKREFANY)
15876 GenTreePtr op1 = arg->gtOp.gtOp1;
15877 GenTreePtr op2 = arg->gtOp.gtOp2;
15879 addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
15881 /* Is this value a handle? */
15882 if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
15884 /* Emit a fixup for the push instruction */
15886 inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
15891 regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
15892 inst_TT(INS_push, op2);
15894 genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
15896 addrReg = genKeepAddressable(op1, addrReg);
15897 inst_TT(INS_push, op1);
15899 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
15901 opsz = 2 * TARGET_POINTER_SIZE;
15905 noway_assert(arg->gtOper == GT_OBJ);
15907 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
15909 GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
15910 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
15911 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
15913 // As much as we would like this to be a noway_assert, we can't because
15914 // there are some weird casts out there, and backwards compatiblity
15915 // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
15916 // lvPromoted in general currently do not require the local to be
15917 // TYP_STRUCT, so this assert is really more about how we wish the world
15918 // was then some JIT invariant.
15919 assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
15921 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
15923 if (varDsc->lvPromoted &&
15925 Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
15927 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
15931 // Get the number of BYTES to copy to the stack
15932 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
15933 size_t bytesToBeCopied = opsz;
15935 // postponedFields is true if we have any postponed fields
15936 // Any field that does not start on a 4-byte boundary is a postponed field
15937 // Such a field is required to be a short or a byte
15939 // postponedRegKind records the kind of scratch register we will
15940 // need to process the postponed fields
15941 // RBM_NONE means that we don't need a register
15943 // expectedAlignedOffset records the aligned offset that
15944 // has to exist for a push to cover the postponed fields.
15945 // Since all promoted structs have the tightly packed property
15946 // we are guaranteed that we will have such a push
15948 bool postponedFields = false;
15949 regMaskTP postponedRegKind = RBM_NONE;
15950 size_t expectedAlignedOffset = UINT_MAX;
15952 VARSET_TP* deadVarBits = NULL;
15953 compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
15955 // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
15957 for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
15958 varNum >= (int)varDsc->lvFieldLclStart; varNum--)
15960 LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
15962 if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
15964 noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
15965 noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
15968 // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
15969 // For packed structs we will go back and store the unaligned bytes and shorts
15970 // in the next loop
15972 if (fieldVarDsc->lvStackAligned())
15974 if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
15975 fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
15977 // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
15978 // Just push some junk (i.e EAX) on the stack.
15979 inst_RV(INS_push, REG_EAX, TYP_INT);
15982 bytesToBeCopied -= sizeof(void*);
15985 // If we have an expectedAlignedOffset make sure that this push instruction
15986 // is what we expect to cover the postponedFields
15988 if (expectedAlignedOffset != UINT_MAX)
15990 // This push must be for a small field
15991 noway_assert(fieldVarDsc->lvExactSize < 4);
15992 // The fldOffset for this push should be equal to the expectedAlignedOffset
15993 noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
15994 expectedAlignedOffset = UINT_MAX;
15997 // Push the "upper half" of LONG var first
15999 if (isRegPairType(fieldVarDsc->lvType))
16001 if (fieldVarDsc->lvOtherReg != REG_STK)
16003 inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
16006 // Prepare the set of vars to be cleared from gcref/gcbyref set
16007 // in case they become dead after genUpdateLife.
16008 // genDoneAddressable() will remove dead gc vars by calling
16009 // gcInfo.gcMarkRegSetNpt.
16010 // Although it is not addrReg, we just borrow the name here.
16011 addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
16015 getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
16019 bytesToBeCopied -= sizeof(void*);
16022 // Push the "upper half" of DOUBLE var if it is not enregistered.
16024 if (fieldVarDsc->lvType == TYP_DOUBLE)
16026 if (!fieldVarDsc->lvRegister)
16028 getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
16032 bytesToBeCopied -= sizeof(void*);
16036 // Push the field local.
16039 if (fieldVarDsc->lvRegister)
16041 if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
16043 inst_RV(INS_push, fieldVarDsc->lvRegNum,
16044 genActualType(fieldVarDsc->TypeGet()));
16047 // Prepare the set of vars to be cleared from gcref/gcbyref set
16048 // in case they become dead after genUpdateLife.
16049 // genDoneAddressable() will remove dead gc vars by calling
16050 // gcInfo.gcMarkRegSetNpt.
16051 // Although it is not addrReg, we just borrow the name here.
16052 addrReg |= genRegMask(fieldVarDsc->lvRegNum);
16056 // Must be TYP_FLOAT or TYP_DOUBLE
16057 noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
16059 noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
16060 fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
16062 inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
16065 if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
16070 #if FEATURE_STACK_FP_X87
16071 GenTree* fieldTree = new (compiler, GT_REG_VAR)
16072 GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
16073 fieldTree->gtOper = GT_REG_VAR;
16074 fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
16075 fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
16076 if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
16078 if (fieldVarDsc->lvTracked &&
16079 (deadVarBits == NULL ||
16080 VarSetOps::IsMember(compiler, *deadVarBits,
16081 fieldVarDsc->lvVarIndex)))
16083 fieldTree->gtFlags |= GTF_VAR_DEATH;
16086 genCodeForTreeStackFP_Leaf(fieldTree);
16088 // Take reg to top of stack
16090 FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
16092 // Pop it off to stack
16093 compCurFPState.Pop();
16095 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
16096 REG_NA, REG_SPBASE, 0);
16098 NYI_FLAT_FP_X87("FP codegen");
16104 getEmitter()->emitIns_S(INS_push,
16105 (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
16111 bytesToBeCopied -= sizeof(void*);
16113 else // not stack aligned
16115 noway_assert(fieldVarDsc->lvExactSize < 4);
16117 // We will need to use a store byte or store word
16118 // to set this unaligned location
16119 postponedFields = true;
16121 if (expectedAlignedOffset != UINT_MAX)
16123 // This should never change until it is set back to UINT_MAX by an aligned
16125 noway_assert(expectedAlignedOffset ==
16126 roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
16129 expectedAlignedOffset =
16130 roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
16132 noway_assert(expectedAlignedOffset < bytesToBeCopied);
16134 if (fieldVarDsc->lvRegister)
16136 // Do we need to use a byte-able register?
16137 if (fieldVarDsc->lvExactSize == 1)
16139 // Did we enregister fieldVarDsc2 in a non byte-able register?
16140 if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
16142 // then we will need to grab a byte-able register
16143 postponedRegKind = RBM_BYTE_REGS;
16147 else // not enregistered
16149 if (fieldVarDsc->lvExactSize == 1)
16151 // We will need to grab a byte-able register
16152 postponedRegKind = RBM_BYTE_REGS;
16156 // We will need to grab any scratch register
16157 if (postponedRegKind != RBM_BYTE_REGS)
16158 postponedRegKind = RBM_ALLINT;
16164 // Now we've pushed all of the aligned fields.
16166 // We should have pushed bytes equal to the entire struct
16167 noway_assert(bytesToBeCopied == 0);
16169 // We should have seen a push that covers every postponed field
16170 noway_assert(expectedAlignedOffset == UINT_MAX);
16172 // Did we have any postponed fields?
16173 if (postponedFields)
16175 regNumber regNum = REG_STK; // means no register
16177 // If we needed a scratch register then grab it here
16179 if (postponedRegKind != RBM_NONE)
16180 regNum = regSet.rsGrabReg(postponedRegKind);
16182 // Forward loop, starts from the lowest field offset
16184 for (unsigned varNum = varDsc->lvFieldLclStart;
16185 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
16187 LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
16189 // All stack aligned fields have already been pushed
16190 if (fieldVarDsc->lvStackAligned())
16193 // We have a postponed field
16195 // It must be a byte or a short
16196 noway_assert(fieldVarDsc->lvExactSize < 4);
16198 // Is the field enregistered?
16199 if (fieldVarDsc->lvRegister)
16201 // Frequently we can just use that register
16202 regNumber tmpRegNum = fieldVarDsc->lvRegNum;
16204 // Do we need to use a byte-able register?
16205 if (fieldVarDsc->lvExactSize == 1)
16207 // Did we enregister the field in a non byte-able register?
16208 if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
16210 // then we will need to use the byte-able register 'regNum'
16211 noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
16213 // Copy the register that contains fieldVarDsc into 'regNum'
16214 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
16215 fieldVarDsc->lvRegNum);
16216 regTracker.rsTrackRegLclVar(regNum, varNum);
16218 // tmpRegNum is the register that we will extract the byte value from
16219 tmpRegNum = regNum;
16221 noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
16224 getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16225 (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
16226 REG_SPBASE, fieldVarDsc->lvFldOffset);
16228 else // not enregistered
16230 // We will copy the non-enregister fieldVar into our scratch register 'regNum'
16232 noway_assert(regNum != REG_STK);
16233 getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
16234 (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
16237 regTracker.rsTrackRegLclVar(regNum, varNum);
16239 // Store the value (byte or short) into the stack
16241 getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16242 (emitAttr)fieldVarDsc->lvExactSize, regNum,
16243 REG_SPBASE, fieldVarDsc->lvFldOffset);
16247 genUpdateLife(structLocalTree);
16253 genCodeForTree(arg->gtObj.gtOp1, 0);
16254 noway_assert(arg->gtObj.gtOp1->InReg());
16255 regNumber reg = arg->gtObj.gtOp1->gtRegNum;
16256 // Get the number of DWORDS to copy to the stack
16257 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
16258 unsigned slots = (unsigned)(opsz / sizeof(void*));
16260 BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16262 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
16264 BOOL bNoneGC = TRUE;
16265 for (int i = slots - 1; i >= 0; --i)
16267 if (gcLayout[i] != TYPE_GC_NONE)
16274 /* passing large structures using movq instead of pushes does not increase codesize very much */
16275 unsigned movqLenMin = 8;
16276 unsigned movqLenMax = 64;
16277 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
16279 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
16281 // Don't bother with this optimization in
16282 // rarely run blocks or when optimizing for size
16283 movqLenMax = movqLenMin = 0;
16285 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
16287 // Be more aggressive when optimizing for speed
16291 /* Adjust for BB weight */
16292 if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
16294 // Be more aggressive when we are inside a loop
16298 if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
16300 JITLOG_THIS(compiler, (LL_INFO10000,
16301 "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
16302 opsz, compiler->info.compFullName));
16304 int stkDisp = (int)(unsigned)opsz;
16306 regNumber xmmReg = REG_XMM0;
16310 stkDisp -= sizeof(void*);
16311 getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
16315 inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
16316 AddStackLevel(stkDisp);
16318 while (curDisp < stkDisp)
16320 getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
16321 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
16322 curDisp += 2 * sizeof(void*);
16324 noway_assert(curDisp == stkDisp);
16328 for (int i = slots - 1; i >= 0; --i)
16330 emitAttr fieldSize;
16331 if (gcLayout[i] == TYPE_GC_NONE)
16332 fieldSize = EA_4BYTE;
16333 else if (gcLayout[i] == TYPE_GC_REF)
16334 fieldSize = EA_GCREF;
16337 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16338 fieldSize = EA_BYREF;
16340 getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
16344 gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
16352 noway_assert(!"unhandled/unexpected arg type");
16353 NO_WAY("unhandled/unexpected arg type");
16356 /* Update the current set of live variables */
16358 genUpdateLife(curr);
16360 /* Update the current set of register pointers */
16362 noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
16363 genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
16365 /* Remember how much stuff we've pushed on the stack */
16369 /* Update the current argument stack offset */
16371 /* Continue with the next argument, if any more are present */
16375 /* Move the deferred arguments to registers */
16377 for (args = regArgs; args; args = args->Rest())
16379 curr = args->Current();
16381 assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
16383 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16384 assert(curArgTabEntry);
16385 regNumber regNum = curArgTabEntry->regNum;
16387 noway_assert(isRegParamType(curr->TypeGet()));
16388 noway_assert(curr->gtType != TYP_VOID);
16390 /* Evaluate the argument to a register [pair] */
16392 if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
16394 /* Check if this is the guess area for the resolve interface call
16395 * Pass a size of EA_OFFSET*/
16396 if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
16398 getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
16399 regTracker.rsTrackRegTrash(regNum);
16401 /* The value is now in the appropriate register */
16403 genMarkTreeInReg(curr, regNum);
16407 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
16410 noway_assert(curr->gtRegNum == regNum);
16412 /* If the register is already marked as used, it will become
16413 multi-used. However, since it is a callee-trashed register,
16414 we will have to spill it before the call anyway. So do it now */
16416 if (regSet.rsMaskUsed & genRegMask(regNum))
16418 noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
16419 regSet.rsSpillReg(regNum);
16422 /* Mark the register as 'used' */
16424 regSet.rsMarkRegUsed(curr);
16428 noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
16432 /* If any of the previously loaded arguments were spilled - reload them */
16434 for (args = regArgs; args; args = args->Rest())
16436 curr = args->Current();
16439 if (curr->gtFlags & GTF_SPILLED)
16441 if (isRegPairType(curr->gtType))
16443 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
16447 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
16452 /* Return the total size pushed */
16457 #pragma warning(pop)
16460 #else // FEATURE_FIXED_OUT_ARGS
16463 // ARM and AMD64 uses this method to pass the stack based args
16465 // returns size pushed (always zero)
16466 size_t CodeGen::genPushArgList(GenTreeCall* call)
16468 GenTreeArgList* lateArgs = call->gtCallLateArgs;
16473 GenTreeArgList* args;
16474 // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16475 // so we can iterate over this argument list more uniformly.
16476 // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16477 GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCallArgs);
16478 if (call->gtCallObjp == NULL)
16480 args = call->gtCallArgs;
16484 objpArgList.Current() = call->gtCallObjp;
16485 args = &objpArgList;
16488 for (; args; args = args->Rest())
16490 /* Get hold of the next argument value */
16491 curr = args->Current();
16493 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16494 assert(curArgTabEntry);
16495 regNumber regNum = curArgTabEntry->regNum;
16496 int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
16498 /* See what type of a value we're passing */
16499 type = curr->TypeGet();
16501 if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
16506 // This holds the set of registers corresponding to enregistered promoted struct field variables
16507 // that go dead after this use of the variable in the argument list.
16508 regMaskTP deadFieldVarRegs = RBM_NONE;
16510 argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
16512 if (curr->IsArgPlaceHolderNode())
16514 assert(curr->gtFlags & GTF_LATE_ARG);
16518 if (varTypeIsSmall(type))
16520 // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
16530 #if defined(_TARGET_ARM_)
16532 argSize = (TARGET_POINTER_SIZE * 2);
16534 /* Is the value a constant? */
16536 if (curr->gtOper == GT_CNS_LNG)
16538 assert((curr->gtFlags & GTF_LATE_ARG) == 0);
16540 int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
16541 int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
16543 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
16545 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
16552 genCodeForTree(curr, 0);
16554 if (curr->gtFlags & GTF_LATE_ARG)
16556 // The arg was assigned into a temp and
16557 // will be moved to the correct register or slot later
16559 argSize = 0; // nothing is passed on the stack
16563 // The arg is passed in the outgoing argument area of the stack frame
16565 assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16566 assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
16568 if (type == TYP_LONG)
16570 regNumber regLo = genRegPairLo(curr->gtRegPair);
16571 regNumber regHi = genRegPairHi(curr->gtRegPair);
16573 assert(regLo != REG_STK);
16574 inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
16575 if (regHi == REG_STK)
16577 regHi = regSet.rsPickFreeReg();
16578 inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
16579 regTracker.rsTrackRegTrash(regHi);
16581 inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
16583 else // (type == TYP_DOUBLE)
16585 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16591 #elif defined(_TARGET_64BIT_)
16594 #error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
16602 /* Is the value a constant? */
16604 if (curr->gtOper == GT_CNS_INT)
16606 assert(!(curr->gtFlags & GTF_LATE_ARG));
16609 regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
16613 inst_SA_RV(ins_Store(type), argOffset, reg, type);
16618 bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
16619 emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
16620 instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
16621 compiler->lvaOutgoingArgSpaceVar, argOffset);
16626 /* This is passed as a pointer-sized integer argument */
16628 genCodeForTree(curr, 0);
16630 // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
16631 if (curr->gtFlags & GTF_LATE_ARG)
16633 #ifdef _TARGET_ARM_
16634 argSize = 0; // nothing is passed on the stack
16639 // The arg is passed in the outgoing argument area of the stack frame
16641 assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16642 assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
16643 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16645 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
16646 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
16651 /* Is this a nothing node, deferred register argument? */
16653 if (curr->gtFlags & GTF_LATE_ARG)
16655 /* Handle side-effects */
16657 if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
16659 #ifdef _TARGET_ARM_
16661 GenTreePtr curArgNode = curArgTabEntry->node;
16662 var_types curRegArgType = curArgNode->gtType;
16663 assert(curRegArgType != TYP_UNDEF);
16665 if (curRegArgType == TYP_STRUCT)
16667 // If the RHS of the COPYBLK is a promoted struct local, then the use of that
16668 // is an implicit use of all its field vars. If these are last uses, remember that,
16669 // so we can later update the GC compiler->info.
16670 if (curr->OperIsCopyBlkOp())
16671 deadFieldVarRegs |= genFindDeadFieldRegs(curr);
16674 #endif // _TARGET_ARM_
16676 genCodeForTree(curr, 0);
16680 assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
16683 #if defined(_TARGET_ARM_)
16684 argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
16689 for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
16691 GenTreePtr op1 = arg->gtOp.gtOp1;
16693 genEvalSideEffects(op1);
16694 genUpdateLife(op1);
16699 #ifdef _TARGET_ARM_
16703 GenTree* arg = curr;
16704 while (arg->gtOper == GT_COMMA)
16706 GenTreePtr op1 = arg->gtOp.gtOp1;
16707 genEvalSideEffects(op1);
16708 genUpdateLife(op1);
16709 arg = arg->gtOp.gtOp2;
16711 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
16713 CORINFO_CLASS_HANDLE clsHnd;
16716 BYTE* gcLayout = NULL;
16718 // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
16719 // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
16720 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
16721 // table entry for the promoted struct local. As we fill slots with the contents of a
16722 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
16723 // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
16724 // variable number of the next field variable to be copied.
16725 LclVarDsc* promotedStructLocalVarDesc = NULL;
16726 GenTreePtr structLocalTree = NULL;
16727 unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
16728 unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
16729 unsigned promotedStructOffsetOfFirstStackSlot = 0;
16730 unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
16732 if (arg->OperGet() == GT_OBJ)
16734 clsHnd = arg->gtObj.gtClass;
16735 unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
16737 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
16738 argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
16740 slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
16742 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16744 compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
16746 // Are we loading a promoted struct local var?
16747 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16749 structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
16750 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
16751 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
16753 // As much as we would like this to be a noway_assert, we can't because
16754 // there are some weird casts out there, and backwards compatiblity
16755 // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
16756 // lvPromoted in general currently do not require the local to be
16757 // TYP_STRUCT, so this assert is really more about how we wish the world
16758 // was then some JIT invariant.
16759 assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
16761 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
16763 if (varDsc->lvPromoted &&
16764 promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
16767 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
16768 promotedStructLocalVarDesc = varDsc;
16769 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
16775 noway_assert(arg->OperGet() == GT_MKREFANY);
16778 argAlign = TARGET_POINTER_SIZE;
16779 argSize = 2 * TARGET_POINTER_SIZE;
16783 // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
16784 noway_assert(regNum == REG_STK);
16786 // This code passes a TYP_STRUCT by value using the outgoing arg space var
16788 if (arg->OperGet() == GT_OBJ)
16790 regNumber regSrc = REG_STK;
16791 regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
16792 int cStackSlots = 0;
16794 if (promotedStructLocalVarDesc == NULL)
16796 genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
16797 noway_assert(arg->gtObj.gtOp1->InReg());
16798 regSrc = arg->gtObj.gtOp1->gtRegNum;
16801 // The number of bytes to add "argOffset" to get the arg offset of the current slot.
16802 int extraArgOffset = 0;
16804 for (unsigned i = 0; i < slots; i++)
16806 emitAttr fieldSize;
16807 if (gcLayout[i] == TYPE_GC_NONE)
16808 fieldSize = EA_PTRSIZE;
16809 else if (gcLayout[i] == TYPE_GC_REF)
16810 fieldSize = EA_GCREF;
16813 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16814 fieldSize = EA_BYREF;
16817 // Pass the argument using the lvaOutgoingArgSpaceVar
16819 if (promotedStructLocalVarDesc != NULL)
16821 if (argOffsetOfFirstStackSlot == UINT32_MAX)
16822 argOffsetOfFirstStackSlot = argOffset;
16824 regNumber maxRegArg = regNumber(MAX_REG_ARG);
16825 bool filledExtraSlot = genFillSlotFromPromotedStruct(
16826 arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
16827 &bytesOfNextSlotOfCurPromotedStruct,
16828 /*pCurRegNum*/ &maxRegArg,
16829 /*argOffset*/ argOffset + extraArgOffset,
16830 /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
16831 argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp);
16832 extraArgOffset += TARGET_POINTER_SIZE;
16833 // If we filled an extra slot with an 8-byte value, skip a slot.
16834 if (filledExtraSlot)
16838 extraArgOffset += TARGET_POINTER_SIZE;
16843 if (regTmp == REG_STK)
16845 regTmp = regSet.rsPickFreeReg();
16848 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
16849 i * TARGET_POINTER_SIZE);
16851 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
16852 compiler->lvaOutgoingArgSpaceVar,
16853 argOffset + cStackSlots * TARGET_POINTER_SIZE);
16854 regTracker.rsTrackRegTrash(regTmp);
16859 if (promotedStructLocalVarDesc == NULL)
16861 regSet.rsMarkRegFree(genRegMask(regSrc));
16863 if (structLocalTree != NULL)
16864 genUpdateLife(structLocalTree);
16868 assert(arg->OperGet() == GT_MKREFANY);
16869 PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
16870 argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
16874 #endif // _TARGET_ARM_
16877 assert(!"unhandled/unexpected arg type");
16878 NO_WAY("unhandled/unexpected arg type");
16881 /* Update the current set of live variables */
16883 genUpdateLife(curr);
16885 // Now, if some copied field locals were enregistered, and they're now dead, update the set of
16886 // register holding gc pointers.
16887 if (deadFieldVarRegs != 0)
16888 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
16890 /* Update the current argument stack offset */
16892 argOffset += argSize;
16894 /* Continue with the next argument, if any more are present */
16899 SetupLateArgs(call);
16902 /* Return the total size pushed */
16907 #ifdef _TARGET_ARM_
16908 bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg,
16909 fgArgTabEntryPtr curArgTabEntry,
16910 LclVarDsc* promotedStructLocalVarDesc,
16911 emitAttr fieldSize,
16912 unsigned* pNextPromotedStructFieldVar,
16913 unsigned* pBytesOfNextSlotOfCurPromotedStruct,
16914 regNumber* pCurRegNum,
16916 int fieldOffsetOfFirstStackSlot,
16917 int argOffsetOfFirstStackSlot,
16918 regMaskTP* deadFieldVarRegs,
16919 regNumber* pRegTmp)
16921 unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
16922 unsigned limitPromotedStructFieldVar =
16923 promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
16924 unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
16926 regNumber curRegNum = *pCurRegNum;
16927 regNumber regTmp = *pRegTmp;
16928 bool filledExtraSlot = false;
16930 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
16932 // We've already finished; just return.
16933 // We can reach this because the calling loop computes a # of slots based on the size of the struct.
16934 // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
16935 // the fourth slot, even though we've copied all the fields.
16939 LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
16941 // Does this field fill an entire slot, and does it go at the start of the slot?
16942 // If so, things are easier...
16944 bool oneFieldFillsSlotFromStart =
16945 (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
16946 && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
16947 && (nextPromotedStructFieldVar + 1 ==
16948 limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
16949 || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
16951 // Compute the proper size.
16952 if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
16954 switch (fieldVarDsc->lvExactSize)
16957 fieldSize = EA_1BYTE;
16960 fieldSize = EA_2BYTE;
16963 // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
16964 // in which case we should not have promoted the struct variable.
16965 noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
16967 // If the current reg number is not aligned, align it, and return to the calling loop, which will
16968 // consider that a filled slot and move on to the next argument register.
16969 if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
16971 // We must update the slot target, however!
16972 bytesOfNextSlotOfCurPromotedStruct += 4;
16973 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
16976 // Dest is an aligned pair of arg regs, if the struct type demands it.
16977 noway_assert((curRegNum % 2) == 0);
16978 // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
16981 assert(fieldVarDsc->lvExactSize == 4);
16987 // If the gc layout said it's a GC ref or byref, then the field size must be 4.
16988 noway_assert(fieldVarDsc->lvExactSize == 4);
16991 // We may need the type of the field to influence instruction selection.
16992 // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
16993 // If the fieldVarDsc is enregistered float we must use the field's exact type
16994 // however if it is in memory we can use an integer type TYP_I_IMPL
16996 var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
16997 if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
16999 fieldTypeForInstr = TYP_I_IMPL;
17002 // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
17003 if (curArgTabEntry->isHfaRegArg)
17005 assert(oneFieldFillsSlotFromStart);
17007 // Is the field variable promoted?
17008 if (fieldVarDsc->lvRegister)
17010 // Move the field var living in register to dst, if they are different registers.
17011 regNumber srcReg = fieldVarDsc->lvRegNum;
17012 regNumber dstReg = curRegNum;
17013 if (srcReg != dstReg)
17015 inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
17016 assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
17021 // Move the field var living in stack to dst.
17022 getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
17023 fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
17024 nextPromotedStructFieldVar, 0);
17025 assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
17028 // Mark the arg as used and using reg val.
17029 genMarkTreeInReg(arg, curRegNum);
17030 regSet.SetUsedRegFloat(arg, true);
17032 // Advance for double.
17033 if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
17035 bytesOfNextSlotOfCurPromotedStruct += 4;
17036 curRegNum = REG_NEXT(curRegNum);
17037 arg->gtRegNum = curRegNum;
17038 regSet.SetUsedRegFloat(arg, true);
17039 filledExtraSlot = true;
17041 arg->gtRegNum = curArgTabEntry->regNum;
17044 bytesOfNextSlotOfCurPromotedStruct += 4;
17045 nextPromotedStructFieldVar++;
17049 if (oneFieldFillsSlotFromStart)
17051 // If we write to the stack, offset in outgoing args at which we'll write.
17052 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17053 assert(fieldArgOffset >= 0);
17055 // Is the source a register or memory?
17056 if (fieldVarDsc->lvRegister)
17058 if (fieldTypeForInstr == TYP_DOUBLE)
17060 fieldSize = EA_8BYTE;
17063 // Are we writing to a register or to the stack?
17064 if (curRegNum != MAX_REG_ARG)
17066 // Source is register and Dest is register.
17068 instruction insCopy = INS_mov;
17070 if (varTypeIsFloating(fieldTypeForInstr))
17072 if (fieldTypeForInstr == TYP_FLOAT)
17074 insCopy = INS_vmov_f2i;
17078 assert(fieldTypeForInstr == TYP_DOUBLE);
17079 insCopy = INS_vmov_d2i;
17083 // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
17084 // register (which may become a tmp register, if its held in the argument register that the first
17085 // register to be copied will overwrite).
17086 regNumber otherRegNum = REG_STK;
17087 if (fieldVarDsc->lvType == TYP_LONG)
17089 otherRegNum = fieldVarDsc->lvOtherReg;
17090 // Are we about to overwrite?
17091 if (otherRegNum == curRegNum)
17093 if (regTmp == REG_STK)
17095 regTmp = regSet.rsPickFreeReg();
17097 // Copy the second register to the temp reg.
17098 getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
17099 regTracker.rsTrackRegCopy(regTmp, otherRegNum);
17100 otherRegNum = regTmp;
17104 if (fieldVarDsc->lvType == TYP_DOUBLE)
17106 assert(curRegNum <= REG_R2);
17107 getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
17108 fieldVarDsc->lvRegNum);
17109 regTracker.rsTrackRegTrash(curRegNum);
17110 regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
17114 // Now do the first register.
17115 // It might be the case that it's already in the desired register; if so do nothing.
17116 if (curRegNum != fieldVarDsc->lvRegNum)
17118 getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
17119 regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
17123 // In either case, mark the arg register as used.
17124 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17126 // Is there a second half of the value?
17127 if (fieldVarDsc->lvExactSize == 8)
17129 curRegNum = genRegArgNext(curRegNum);
17130 // The second dest reg must also be an argument register.
17131 noway_assert(curRegNum < MAX_REG_ARG);
17133 // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17134 if (fieldVarDsc->lvType == TYP_LONG)
17136 // Copy the second register into the next argument register
17138 // If it's a register variable for a TYP_LONG value, then otherReg now should
17139 // hold the second register or it might say that it's in the stack.
17140 if (otherRegNum == REG_STK)
17142 // Apparently when we partially enregister, we allocate stack space for the full
17143 // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
17144 // parameter, to get the high half.
17145 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17146 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17147 regTracker.rsTrackRegTrash(curRegNum);
17151 // The other half is in a register.
17152 // Again, it might be the case that it's already in the desired register; if so do
17154 if (curRegNum != otherRegNum)
17156 getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
17157 regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
17162 // Also mark the 2nd arg register as used.
17163 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
17164 // Record the fact that we filled in an extra register slot
17165 filledExtraSlot = true;
17170 // Source is register and Dest is memory (OutgoingArgSpace).
17172 // Now write the srcReg into the right location in the outgoing argument list.
17173 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17174 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17176 if (fieldVarDsc->lvExactSize == 8)
17178 // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17179 if (fieldVarDsc->lvType == TYP_LONG)
17181 if (fieldVarDsc->lvOtherReg == REG_STK)
17183 // Source is stack.
17184 if (regTmp == REG_STK)
17186 regTmp = regSet.rsPickFreeReg();
17188 // Apparently if we partially enregister, we allocate stack space for the full
17189 // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
17190 // parameter, to get the high half.
17191 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17192 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17193 regTracker.rsTrackRegTrash(regTmp);
17194 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17195 compiler->lvaOutgoingArgSpaceVar,
17196 fieldArgOffset + TARGET_POINTER_SIZE);
17200 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
17201 compiler->lvaOutgoingArgSpaceVar,
17202 fieldArgOffset + TARGET_POINTER_SIZE);
17205 // Record the fact that we filled in an extra register slot
17206 filledExtraSlot = true;
17209 assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
17210 // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
17211 if (arg->gtFlags & GTF_VAR_DEATH)
17213 *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
17214 // We don't bother with the second reg of a register pair, since if it has one,
17215 // it obviously doesn't hold a pointer.
17220 // Source is in memory.
17222 if (curRegNum != MAX_REG_ARG)
17225 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17226 nextPromotedStructFieldVar, 0);
17227 regTracker.rsTrackRegTrash(curRegNum);
17229 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17231 if (fieldVarDsc->lvExactSize == 8)
17233 noway_assert(fieldSize == EA_4BYTE);
17234 curRegNum = genRegArgNext(curRegNum);
17235 noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
17236 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
17237 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17238 regTracker.rsTrackRegTrash(curRegNum);
17239 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17240 // Record the fact that we filled in an extra stack slot
17241 filledExtraSlot = true;
17247 if (regTmp == REG_STK)
17249 regTmp = regSet.rsPickFreeReg();
17251 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17252 nextPromotedStructFieldVar, 0);
17254 // Now write regTmp into the right location in the outgoing argument list.
17255 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17256 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17257 // We overwrote "regTmp", so erase any previous value we recorded that it contained.
17258 regTracker.rsTrackRegTrash(regTmp);
17260 if (fieldVarDsc->lvExactSize == 8)
17262 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17263 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17265 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17266 compiler->lvaOutgoingArgSpaceVar,
17267 fieldArgOffset + TARGET_POINTER_SIZE);
17268 // Record the fact that we filled in an extra stack slot
17269 filledExtraSlot = true;
17274 // Bump up the following if we filled in an extra slot
17275 if (filledExtraSlot)
17276 bytesOfNextSlotOfCurPromotedStruct += 4;
17278 // Go to the next field.
17279 nextPromotedStructFieldVar++;
17280 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17282 fieldVarDsc = NULL;
17286 // The next field should have the same parent variable, and we should have put the field vars in order
17287 // sorted by offset.
17288 assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17289 fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17290 fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17291 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17293 bytesOfNextSlotOfCurPromotedStruct += 4;
17295 else // oneFieldFillsSlotFromStart == false
17297 // The current slot should contain more than one field.
17298 // We'll construct a word in memory for the slot, then load it into a register.
17299 // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
17300 // slot, in which case we'll just skip this loop altogether.)
17301 while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
17303 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
17304 // whose fields have their natural alignment, and alignment == size on ARM).
17305 noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
17307 // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
17308 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17309 noway_assert(argOffset == INT32_MAX ||
17310 (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
17312 if (fieldVarDsc->lvRegister)
17314 if (curRegNum != MAX_REG_ARG)
17316 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17318 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17319 compiler->lvaPromotedStructAssemblyScratchVar,
17320 fieldVarDsc->lvFldOffset % 4);
17324 // Dest is stack; write directly.
17325 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17326 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17331 // Source is in memory.
17333 // Make sure we have a temporary register to use...
17334 if (regTmp == REG_STK)
17336 regTmp = regSet.rsPickFreeReg();
17338 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17339 nextPromotedStructFieldVar, 0);
17340 regTracker.rsTrackRegTrash(regTmp);
17342 if (curRegNum != MAX_REG_ARG)
17344 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17346 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17347 compiler->lvaPromotedStructAssemblyScratchVar,
17348 fieldVarDsc->lvFldOffset % 4);
17352 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17353 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17356 // Go to the next field.
17357 nextPromotedStructFieldVar++;
17358 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17360 fieldVarDsc = NULL;
17364 // The next field should have the same parent variable, and we should have put the field vars in
17365 // order sorted by offset.
17366 noway_assert(fieldVarDsc->lvIsStructField &&
17367 compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17368 fieldVarDsc->lvParentLcl ==
17369 compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17370 fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17371 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17374 // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
17375 // write to an argument register, do so.
17376 if (curRegNum != MAX_REG_ARG)
17378 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17380 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
17381 compiler->lvaPromotedStructAssemblyScratchVar, 0);
17382 regTracker.rsTrackRegTrash(curRegNum);
17383 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17385 // We've finished a slot; set the goal of the next slot.
17386 bytesOfNextSlotOfCurPromotedStruct += 4;
17390 // Write back the updates.
17391 *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
17392 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17393 *pCurRegNum = curRegNum;
17396 return filledExtraSlot;
17398 #endif // _TARGET_ARM_
17400 regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
17402 noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
17403 GenTreePtr rhs = cpBlk->gtOp.gtOp1;
17405 if (rhs->OperIsIndir())
17407 GenTree* addr = rhs->AsIndir()->Addr();
17408 if (addr->gtOper == GT_ADDR)
17410 rhs = addr->gtOp.gtOp1;
17413 if (rhs->OperGet() == GT_LCL_VAR)
17415 LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
17416 if (rhsDsc->lvPromoted)
17418 // It is promoted; iterate over its field vars.
17419 unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
17420 for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
17422 LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
17423 // Did the variable go dead, and is it enregistered?
17424 if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
17426 // Add the register number to the set of registers holding field vars that are going dead.
17427 res |= genRegMask(fieldVarDsc->lvRegNum);
17435 void CodeGen::SetupLateArgs(GenTreeCall* call)
17437 GenTreeArgList* lateArgs;
17440 /* Generate the code to move the late arguments into registers */
17442 for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
17444 curr = lateArgs->Current();
17447 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
17448 assert(curArgTabEntry);
17449 regNumber regNum = curArgTabEntry->regNum;
17450 unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
17452 assert(isRegParamType(curr->TypeGet()));
17453 assert(curr->gtType != TYP_VOID);
17455 /* If the register is already marked as used, it will become
17456 multi-used. However, since it is a callee-trashed register,
17457 we will have to spill it before the call anyway. So do it now */
17460 // Remember which registers hold pointers. We will spill
17461 // them, but the code that follows will fetch reg vars from
17462 // the registers, so we need that gc compiler->info.
17463 // Also regSet.rsSpillReg doesn't like to spill enregistered
17464 // variables, but if this is their last use that is *exactly*
17465 // what we need to do, so we have to temporarily pretend
17466 // they are no longer live.
17467 // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
17468 // when their last use is about to occur?
17469 // It is because this is the second operand to be evaluated
17470 // of some parent binary op, and the first operand is
17471 // live across this tree, and thought it could re-use the
17472 // variables register (like a GT_REG_VAR). This probably
17473 // is caused by RegAlloc assuming the first operand would
17474 // evaluate into another register.
17475 regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
17476 regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
17477 regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
17478 regSet.RemoveMaskVars(rsTemp);
17480 regNumber regNum2 = regNum;
17481 for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
17483 if (regSet.rsMaskUsed & genRegMask(regNum2))
17485 assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
17486 regSet.rsSpillReg(regNum2);
17488 regNum2 = genRegArgNext(regNum2);
17489 assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
17492 // Restore gc tracking masks.
17493 gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
17494 gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
17496 // Set maskvars back to normal
17497 regSet.AddMaskVars(rsTemp);
17500 /* Evaluate the argument to a register */
17502 /* Check if this is the guess area for the resolve interface call
17503 * Pass a size of EA_OFFSET*/
17504 if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
17506 getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
17507 regTracker.rsTrackRegTrash(regNum);
17509 /* The value is now in the appropriate register */
17511 genMarkTreeInReg(curr, regNum);
17513 regSet.rsMarkRegUsed(curr);
17515 #ifdef _TARGET_ARM_
17516 else if (curr->gtType == TYP_STRUCT)
17518 GenTree* arg = curr;
17519 while (arg->gtOper == GT_COMMA)
17521 GenTreePtr op1 = arg->gtOp.gtOp1;
17522 genEvalSideEffects(op1);
17523 genUpdateLife(op1);
17524 arg = arg->gtOp.gtOp2;
17526 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
17527 (arg->OperGet() == GT_MKREFANY));
17529 // This code passes a TYP_STRUCT by value using
17530 // the argument registers first and
17531 // then the lvaOutgoingArgSpaceVar area.
17534 // We prefer to choose low registers here to reduce code bloat
17535 regMaskTP regNeedMask = RBM_LOW_REGS;
17536 unsigned firstStackSlot = 0;
17537 unsigned argAlign = TARGET_POINTER_SIZE;
17538 size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
17540 unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
17543 if (regNum == REG_STK)
17545 firstStackSlot = 0;
17549 if (argAlign == (TARGET_POINTER_SIZE * 2))
17551 assert((regNum & 1) == 0);
17554 // firstStackSlot is an index of the first slot of the struct
17555 // that is on the stack, in the range [0,slots]. If it is 'slots',
17556 // then the entire struct is in registers. It is also equal to
17557 // the number of slots of the struct that are passed in registers.
17559 if (curArgTabEntry->isHfaRegArg)
17561 // HFA arguments that have been decided to go into registers fit the reg space.
17562 assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
17563 assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
17564 "HFA argument doesn't fit entirely in FP argument registers");
17565 firstStackSlot = slots;
17567 else if (regNum + slots > MAX_REG_ARG)
17569 firstStackSlot = MAX_REG_ARG - regNum;
17570 assert(firstStackSlot > 0);
17574 firstStackSlot = slots;
17577 if (curArgTabEntry->isHfaRegArg)
17579 // Mask out the registers used by an HFA arg from the ones used to compute tree into.
17580 for (unsigned i = regNum; i < regNum + slots; i++)
17582 regNeedMask &= ~genRegMask(regNumber(i));
17587 // This holds the set of registers corresponding to enregistered promoted struct field variables
17588 // that go dead after this use of the variable in the argument list.
17589 regMaskTP deadFieldVarRegs = RBM_NONE;
17591 // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
17592 // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
17593 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
17594 // table entry for the promoted struct local. As we fill slots with the contents of a
17595 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
17596 // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
17597 // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
17598 // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
17600 LclVarDsc* promotedStructLocalVarDesc = NULL;
17601 unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
17602 unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
17603 GenTreePtr structLocalTree = NULL;
17605 BYTE* gcLayout = NULL;
17606 regNumber regSrc = REG_NA;
17607 if (arg->gtOper == GT_OBJ)
17609 // Are we loading a promoted struct local var?
17610 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17612 structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
17613 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17614 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
17616 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17618 if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17622 // Fix 388395 ARM JitStress WP7
17623 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17625 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17626 promotedStructLocalVarDesc = varDsc;
17627 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17631 if (promotedStructLocalVarDesc == NULL)
17633 // If it's not a promoted struct variable, set "regSrc" to the address
17634 // of the struct local.
17635 genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
17636 noway_assert(arg->gtObj.gtOp1->InReg());
17637 regSrc = arg->gtObj.gtOp1->gtRegNum;
17638 // Remove this register from the set of registers that we pick from, unless slots equals 1
17640 regNeedMask &= ~genRegMask(regSrc);
17643 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17644 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
17646 else if (arg->gtOper == GT_LCL_VAR)
17648 // Move the address of the LCL_VAR in arg into reg
17650 unsigned varNum = arg->gtLclVarCommon.gtLclNum;
17652 // Are we loading a promoted struct local var?
17653 structLocalTree = arg;
17654 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17655 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
17657 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17659 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17661 if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17662 // guaranteed to live
17665 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17666 promotedStructLocalVarDesc = varDsc;
17667 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17670 if (promotedStructLocalVarDesc == NULL)
17672 regSrc = regSet.rsPickFreeReg(regNeedMask);
17673 // Remove this register from the set of registers that we pick from, unless slots equals 1
17675 regNeedMask &= ~genRegMask(regSrc);
17677 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
17678 regTracker.rsTrackRegTrash(regSrc);
17679 gcLayout = compiler->lvaGetGcLayout(varNum);
17682 else if (arg->gtOper == GT_MKREFANY)
17684 assert(slots == 2);
17685 assert((firstStackSlot == 1) || (firstStackSlot == 2));
17686 assert(argOffset == 0); // ???
17687 PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
17689 // Adjust argOffset if part of this guy was pushed onto the stack
17690 if (firstStackSlot < slots)
17692 argOffset += TARGET_POINTER_SIZE;
17695 // Skip the copy loop below because we have already placed the argument in the right place
17701 assert(!"Unsupported TYP_STRUCT arg kind");
17702 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17705 if (promotedStructLocalVarDesc != NULL)
17707 // We must do do the stack parts first, since those might need values
17708 // from argument registers that will be overwritten in the portion of the
17709 // loop that writes into the argument registers.
17710 bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
17711 // Now find the var number of the first that starts in the first stack slot.
17712 unsigned fieldVarLim =
17713 promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
17714 while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
17715 (firstStackSlot * TARGET_POINTER_SIZE) &&
17716 nextPromotedStructFieldVar < fieldVarLim)
17718 nextPromotedStructFieldVar++;
17720 // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
17721 // first stack slot is after the last slot.
17722 assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
17725 if (slots > 0) // the mkref case may have set "slots" to zero.
17727 // First pass the stack portion of the struct (if any)
17729 int argOffsetOfFirstStackSlot = argOffset;
17730 for (unsigned i = firstStackSlot; i < slots; i++)
17732 emitAttr fieldSize;
17733 if (gcLayout[i] == TYPE_GC_NONE)
17734 fieldSize = EA_PTRSIZE;
17735 else if (gcLayout[i] == TYPE_GC_REF)
17736 fieldSize = EA_GCREF;
17739 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17740 fieldSize = EA_BYREF;
17743 regNumber maxRegArg = regNumber(MAX_REG_ARG);
17744 if (promotedStructLocalVarDesc != NULL)
17746 regNumber regTmp = REG_STK;
17748 bool filledExtraSlot =
17749 genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
17750 &nextPromotedStructFieldVar,
17751 &bytesOfNextSlotOfCurPromotedStruct,
17752 /*pCurRegNum*/ &maxRegArg, argOffset,
17753 /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
17754 TARGET_POINTER_SIZE,
17755 argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp);
17756 if (filledExtraSlot)
17759 argOffset += TARGET_POINTER_SIZE;
17762 else // (promotedStructLocalVarDesc == NULL)
17764 // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
17765 // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
17766 // to regSet.rsPickFreeReg, so we need to be a little more forceful.
17767 // Otherwise, just re-use the same register.
17769 regNumber regTmp = regSrc;
17772 regMaskTP regSrcUsed;
17773 regSet.rsLockReg(genRegMask(regSrc), ®SrcUsed);
17775 regTmp = regSet.rsPickFreeReg(regNeedMask);
17777 noway_assert(regTmp != regSrc);
17779 regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
17782 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
17783 i * TARGET_POINTER_SIZE);
17785 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17786 compiler->lvaOutgoingArgSpaceVar, argOffset);
17787 regTracker.rsTrackRegTrash(regTmp);
17789 argOffset += TARGET_POINTER_SIZE;
17792 // Now pass the register portion of the struct
17795 bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
17796 if (promotedStructLocalVarDesc != NULL)
17797 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17799 // Create a nested loop here so that the first time thru the loop
17800 // we setup all of the regArg registers except for possibly
17801 // the one that would overwrite regSrc. Then in the final loop
17802 // (if necessary) we just setup regArg/regSrc with the overwrite
17804 bool overwriteRegSrc = false;
17805 bool needOverwriteRegSrc = false;
17808 if (needOverwriteRegSrc)
17809 overwriteRegSrc = true;
17811 for (unsigned i = 0; i < firstStackSlot; i++)
17813 regNumber regArg = (regNumber)(regNum + i);
17815 if (overwriteRegSrc == false)
17817 if (regArg == regSrc)
17819 needOverwriteRegSrc = true;
17825 if (regArg != regSrc)
17829 emitAttr fieldSize;
17830 if (gcLayout[i] == TYPE_GC_NONE)
17831 fieldSize = EA_PTRSIZE;
17832 else if (gcLayout[i] == TYPE_GC_REF)
17833 fieldSize = EA_GCREF;
17836 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17837 fieldSize = EA_BYREF;
17840 regNumber regTmp = REG_STK;
17841 if (promotedStructLocalVarDesc != NULL)
17843 bool filledExtraSlot =
17844 genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
17845 fieldSize, &nextPromotedStructFieldVar,
17846 &bytesOfNextSlotOfCurPromotedStruct,
17847 /*pCurRegNum*/ ®Arg,
17848 /*argOffset*/ INT32_MAX,
17849 /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
17850 /*argOffsetOfFirstStackSlot*/ INT32_MAX,
17851 &deadFieldVarRegs, ®Tmp);
17852 if (filledExtraSlot)
17857 getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
17858 fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
17860 regTracker.rsTrackRegTrash(regArg);
17862 } while (needOverwriteRegSrc != overwriteRegSrc);
17865 if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
17867 regSet.rsMarkRegFree(genRegMask(regSrc));
17870 if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
17874 for (unsigned i = 1; i < firstStackSlot; i++)
17876 arg->gtRegNum = (regNumber)(regNum + i);
17877 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17879 arg->gtRegNum = regNum;
17880 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17883 // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
17884 // so update liveness.
17885 genUpdateLife(arg);
17887 // Now, if some copied field locals were enregistered, and they're now dead, update the set of
17888 // register holding gc pointers.
17889 if (deadFieldVarRegs != RBM_NONE)
17890 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
17892 else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
17894 if (curArgTabEntry->regNum == REG_STK)
17896 // The arg is passed in the outgoing argument area of the stack frame
17897 genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
17898 assert(curr->InReg()); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
17900 inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
17901 inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
17905 assert(regNum < REG_ARG_LAST);
17906 regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
17907 genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
17908 assert(curr->gtRegPair == regPair);
17909 regSet.rsMarkRegPairUsed(curr);
17912 #endif // _TARGET_ARM_
17913 else if (curArgTabEntry->regNum == REG_STK)
17915 // The arg is passed in the outgoing argument area of the stack frame
17917 genCodeForTree(curr, 0);
17918 assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
17920 inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
17922 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
17923 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
17927 if (!varTypeIsFloating(curr->gtType))
17929 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
17930 assert(curr->gtRegNum == regNum);
17931 regSet.rsMarkRegUsed(curr);
17933 else // varTypeIsFloating(curr->gtType)
17935 if (genIsValidFloatReg(regNum))
17937 genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
17939 assert(curr->gtRegNum == regNum);
17940 regSet.rsMarkRegUsed(curr);
17944 genCodeForTree(curr, 0);
17945 // If we are loading a floating point type into integer registers
17946 // then it must be for varargs.
17947 // genCodeForTree will load it into a floating point register,
17948 // now copy it into the correct integer register(s)
17949 if (curr->TypeGet() == TYP_FLOAT)
17951 assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
17952 regSet.rsSpillRegIfUsed(regNum);
17953 #ifdef _TARGET_ARM_
17954 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
17956 #error "Unsupported target"
17958 regTracker.rsTrackRegTrash(regNum);
17960 curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
17961 curr->gtRegNum = regNum;
17962 regSet.rsMarkRegUsed(curr);
17966 assert(curr->TypeGet() == TYP_DOUBLE);
17967 regNumber intRegNumLo = regNum;
17968 curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
17969 #ifdef _TARGET_ARM_
17970 regNumber intRegNumHi = regNumber(intRegNumLo + 1);
17971 assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
17972 assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
17973 regSet.rsSpillRegIfUsed(intRegNumHi);
17974 regSet.rsSpillRegIfUsed(intRegNumLo);
17976 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
17977 regTracker.rsTrackRegTrash(intRegNumLo);
17978 regTracker.rsTrackRegTrash(intRegNumHi);
17979 curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
17980 regSet.rsMarkRegPairUsed(curr);
17982 #error "Unsupported target"
17990 /* If any of the previously loaded arguments were spilled - reload them */
17992 for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
17994 curr = lateArgs->Current();
17997 if (curr->gtFlags & GTF_SPILLED)
17999 if (isRegPairType(curr->gtType))
18001 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
18005 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
18011 #ifdef _TARGET_ARM_
18013 // 'Push' a single GT_MKREFANY argument onto a call's argument list
18014 // The argument is passed as described by the fgArgTabEntry
18015 // If any part of the struct is to be passed in a register the
18016 // regNum value will be equal to the the registers used to pass the
18017 // the first part of the struct.
18018 // If any part is to go onto the stack, we first generate the
18019 // value into a register specified by 'regNeedMask' and
18020 // then store it to the out going argument area.
18021 // When this method returns, both parts of the TypeReference have
18022 // been pushed onto the stack, but *no* registers have been marked
18023 // as 'in-use', that is the responsibility of the caller.
18025 void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
18027 regNumber regNum = curArgTabEntry->regNum;
18029 assert(mkRefAnyTree->gtOper == GT_MKREFANY);
18030 regMaskTP arg1RegMask = 0;
18031 int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18033 // Construct the TypedReference directly into the argument list of the call by
18034 // 'pushing' the first field of the typed reference: the pointer.
18035 // Do this by directly generating it into the argument register or outgoing arg area of the stack.
18036 // Mark it as used so we don't trash it while generating the second field.
18038 if (regNum == REG_STK)
18040 genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18041 noway_assert(mkRefAnyTree->gtOp.gtOp1->InReg());
18042 regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
18043 inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
18044 regTracker.rsTrackRegTrash(tmpReg1);
18045 argOffset += TARGET_POINTER_SIZE;
18050 assert(regNum <= REG_ARG_LAST);
18051 arg1RegMask = genRegMask(regNum);
18052 genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18053 regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
18056 // Now 'push' the second field of the typed reference: the method table.
18057 if (regNum2 == REG_STK)
18059 genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18060 noway_assert(mkRefAnyTree->gtOp.gtOp2->InReg());
18061 regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
18062 inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
18063 regTracker.rsTrackRegTrash(tmpReg2);
18067 assert(regNum2 <= REG_ARG_LAST);
18068 // We don't have to mark this register as being in use here because it will
18069 // be done by the caller, and we don't want to double-count it.
18070 genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
18073 // Now that we are done generating the second part of the TypeReference, we can mark
18074 // the first register as free.
18075 // The caller in the shared path we will re-mark all registers used by this argument
18076 // as being used, so we don't want to double-count this one.
18077 if (arg1RegMask != 0)
18079 GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
18080 if (op1->gtFlags & GTF_SPILLED)
18082 /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
18084 regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
18088 regSet.rsMarkRegFree(arg1RegMask);
18092 #endif // _TARGET_ARM_
18094 #endif // FEATURE_FIXED_OUT_ARGS
18096 regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreeCall* call)
18098 assert((gtCallTypes)call->gtCallType == CT_INDIRECT);
18100 regMaskTP fptrRegs;
18102 /* Loading the indirect call target might cause one or more of the previously
18103 loaded argument registers to be spilled. So, we save information about all
18104 the argument registers, and unspill any of them that get spilled, after
18105 the call target is loaded.
18114 } regArgTab[MAX_REG_ARG];
18116 /* Record the previously loaded arguments, if any */
18119 regMaskTP prefRegs = regSet.rsRegMaskFree();
18120 regMaskTP argRegs = RBM_NONE;
18121 for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18124 regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
18125 GenTreePtr argTree = regSet.rsUsedTree[regNum];
18126 regArgTab[regIndex].node = argTree;
18127 if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
18129 assert(argTree->InReg());
18130 if (isRegPairType(argTree->gtType))
18132 regPairNo regPair = argTree->gtRegPair;
18133 assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
18134 regArgTab[regIndex].regPair = regPair;
18135 mask = genRegPairMask(regPair);
18139 assert(regNum == argTree->gtRegNum);
18140 regArgTab[regIndex].regNum = regNum;
18141 mask = genRegMask(regNum);
18143 assert(!(prefRegs & mask));
18148 /* Record the register(s) used for the indirect call func ptr */
18149 fptrRegs = genMakeRvalueAddressable(call->gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
18151 /* If any of the previously loaded arguments were spilled, reload them */
18153 for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18155 GenTreePtr argTree = regArgTab[regIndex].node;
18156 if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
18158 assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
18159 if (isRegPairType(argTree->gtType))
18161 regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
18165 regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
18170 /* Make sure the target is still addressable while avoiding the argument registers */
18172 fptrRegs = genKeepAddressable(call->gtCallAddr, fptrRegs, argRegs);
18177 /*****************************************************************************
18179 * Generate code for a call. If the call returns a value in register(s), the
18180 * register mask that describes where the result will be found is returned;
18181 * otherwise, RBM_NONE is returned.
18185 #pragma warning(push)
18186 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
18188 regMaskTP CodeGen::genCodeForCall(GenTreeCall* call, bool valUsed)
18194 emitter::EmitCallType emitCallType;
18196 unsigned saveStackLvl;
18198 BasicBlock* returnLabel = DUMMY_INIT(NULL);
18199 LclVarDsc* frameListRoot = NULL;
18201 unsigned savCurIntArgReg;
18202 unsigned savCurFloatArgReg;
18206 regMaskTP fptrRegs = RBM_NONE;
18207 regMaskTP vptrMask = RBM_NONE;
18210 unsigned stackLvl = getEmitter()->emitCurStackLvl;
18212 if (compiler->verbose)
18214 printf("\t\t\t\t\t\t\tBeg call ");
18215 Compiler::printTreeID(call);
18216 printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
18220 #ifdef _TARGET_ARM_
18221 if (compiler->opts.ShouldUsePInvokeHelpers() && (call->gtFlags & GTF_CALL_UNMANAGED) &&
18222 ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_NONVIRT))
18224 (void)genPInvokeCallProlog(nullptr, 0, (CORINFO_METHOD_HANDLE) nullptr, nullptr);
18228 gtCallTypes callType = (gtCallTypes)call->gtCallType;
18229 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
18231 CORINFO_SIG_INFO* sigInfo = nullptr;
18233 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
18235 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
18238 /* Make some sanity checks on the call node */
18240 // "this" only makes sense for user functions
18241 noway_assert(call->gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
18242 // tailcalls won't be done for helpers, caller-pop args, and check that
18243 // the global flag is set
18244 noway_assert(!call->IsTailCall() ||
18245 (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
18248 // Pass the call signature information down into the emitter so the emitter can associate
18249 // native call sites with the signatures they were generated from.
18250 if (callType != CT_HELPER)
18252 sigInfo = call->callSig;
18256 unsigned pseudoStackLvl = 0;
18258 if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
18260 noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
18262 pseudoStackLvl = genStackLevel;
18264 noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
18265 "so fgAddCodeRef() should have set isFramePointerRequired()");
18268 /* Mark the current stack level and list of pointer arguments */
18270 saveStackLvl = genStackLevel;
18272 /*-------------------------------------------------------------------------
18273 * Set up the registers and arguments
18276 /* We'll keep track of how much we've pushed on the stack */
18280 /* We need to get a label for the return address with the proper stack depth. */
18281 /* For the callee pops case (the default) that is before the args are pushed. */
18283 if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
18285 returnLabel = genCreateTempLabel();
18289 Make sure to save the current argument register status
18290 in case we have nested calls.
18293 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
18294 savCurIntArgReg = intRegState.rsCurRegArgNum;
18295 savCurFloatArgReg = floatRegState.rsCurRegArgNum;
18296 intRegState.rsCurRegArgNum = 0;
18297 floatRegState.rsCurRegArgNum = 0;
18299 /* Pass the arguments */
18301 if ((call->gtCallObjp != NULL) || (call->gtCallArgs != NULL))
18303 argSize += genPushArgList(call);
18306 /* We need to get a label for the return address with the proper stack depth. */
18307 /* For the caller pops case (cdecl) that is after the args are pushed. */
18309 if (call->gtFlags & GTF_CALL_UNMANAGED)
18311 if (call->gtFlags & GTF_CALL_POP_ARGS)
18312 returnLabel = genCreateTempLabel();
18314 /* Make sure that we now have a label */
18315 noway_assert(returnLabel != DUMMY_INIT(NULL));
18318 if (callType == CT_INDIRECT)
18320 fptrRegs = genLoadIndirectCallTarget(call);
18323 /* Make sure any callee-trashed registers are saved */
18325 regMaskTP calleeTrashedRegs = RBM_NONE;
18327 #if GTF_CALL_REG_SAVE
18328 if (call->gtFlags & GTF_CALL_REG_SAVE)
18330 /* The return value reg(s) will definitely be trashed */
18332 switch (call->gtType)
18337 #if !CPU_HAS_FP_SUPPORT
18340 calleeTrashedRegs = RBM_INTRET;
18344 #if !CPU_HAS_FP_SUPPORT
18347 calleeTrashedRegs = RBM_LNGRET;
18351 #if CPU_HAS_FP_SUPPORT
18355 calleeTrashedRegs = 0;
18359 noway_assert(!"unhandled/unexpected type");
18365 calleeTrashedRegs = RBM_CALLEE_TRASH;
18368 /* Spill any callee-saved registers which are being used */
18370 regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
18372 /* We need to save all GC registers to the InlinedCallFrame.
18373 Instead, just spill them to temps. */
18375 if (call->gtFlags & GTF_CALL_UNMANAGED)
18376 spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
18378 // Ignore fptrRegs as it is needed only to perform the indirect call
18380 spillRegs &= ~fptrRegs;
18382 /* Do not spill the argument registers.
18383 Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
18385 noway_assert((regSet.rsMaskMult & call->gtCallRegUsedMask) == 0);
18386 spillRegs &= ~call->gtCallRegUsedMask;
18390 regSet.rsSpillRegs(spillRegs);
18393 #if FEATURE_STACK_FP_X87
18395 SpillForCallStackFP();
18397 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
18400 regNumber regReturn = regSet.PickRegFloat();
18402 // Assign reg to tree
18403 genMarkTreeInReg(call, regReturn);
18406 regSet.SetUsedRegFloat(call, true);
18409 compCurFPState.Push(regReturn);
18412 SpillForCallRegisterFP(call->gtCallRegUsedMask);
18415 /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
18417 retSize = EA_PTRSIZE;
18421 if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
18423 retSize = EA_GCREF;
18425 else if (call->gtType == TYP_BYREF)
18427 retSize = EA_BYREF;
18431 /*-------------------------------------------------------------------------
18432 * For caller-pop calls, the GC info will report the arguments as pending
18433 arguments as the caller explicitly pops them. Also should be
18434 reported as non-GC arguments as they effectively go dead at the
18435 call site (callee owns them)
18438 args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
18440 #ifdef PROFILING_SUPPORTED
18442 /*-------------------------------------------------------------------------
18443 * Generate the profiling hooks for the call
18446 /* Treat special cases first */
18448 /* fire the event at the call site */
18449 /* alas, right now I can only handle calls via a method handle */
18450 if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->IsTailCall())
18452 unsigned saveStackLvl2 = genStackLevel;
18455 // Push the profilerHandle
18457 CLANG_FORMAT_COMMENT_ANCHOR;
18459 #ifdef _TARGET_X86_
18460 regMaskTP byrefPushedRegs;
18461 regMaskTP norefPushedRegs;
18462 regMaskTP pushedArgRegs = genPushRegs(call->gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
18464 if (compiler->compProfilerMethHndIndirected)
18466 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
18467 (ssize_t)compiler->compProfilerMethHnd);
18471 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
18475 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18476 sizeof(int) * 1, // argSize
18477 EA_UNKNOWN); // retSize
18480 // Adjust the number of stack slots used by this managed method if necessary.
18482 if (compiler->fgPtrArgCntMax < 1)
18484 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
18485 compiler->fgPtrArgCntMax = 1;
18488 genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
18490 // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
18491 // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
18492 // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
18494 // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
18495 // to disturb them and hence argument registers are locked here.
18496 regMaskTP usedMask = RBM_NONE;
18497 regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
18499 regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
18500 regSet.rsLockReg(genRegMask(scratchReg));
18502 emitAttr attr = EA_UNKNOWN;
18503 if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
18506 gcInfo.gcMarkRegSetGCref(scratchReg);
18508 else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
18511 gcInfo.gcMarkRegSetByref(scratchReg);
18518 getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
18519 regTracker.rsTrackRegTrash(scratchReg);
18521 if (compiler->compProfilerMethHndIndirected)
18523 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18524 regTracker.rsTrackRegTrash(REG_R0);
18528 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18531 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18533 EA_UNKNOWN); // retSize
18535 // Restore back to the state that existed before profiler callback
18536 gcInfo.gcMarkRegSetNpt(scratchReg);
18537 getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
18538 regTracker.rsTrackRegTrash(REG_R0);
18539 regSet.rsUnlockReg(genRegMask(scratchReg));
18540 regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
18542 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
18543 #endif //_TARGET_X86_
18545 /* Restore the stack level */
18546 SetStackLevel(saveStackLvl2);
18549 #endif // PROFILING_SUPPORTED
18552 /*-------------------------------------------------------------------------
18553 * Generate an ESP check for the call
18556 if (compiler->opts.compStackCheckOnCall
18557 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
18558 // check the stacks as frequently as possible
18559 && !call->IsHelperCall()
18561 && call->gtCallType == CT_USER_FUNC
18565 noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
18566 compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
18567 compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
18568 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
18572 /*-------------------------------------------------------------------------
18573 * Generate the call
18576 bool fPossibleSyncHelperCall = false;
18577 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
18579 bool fTailCallTargetIsVSD = false;
18581 bool fTailCall = (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
18583 /* Check for Delegate.Invoke. If so, we inline it. We get the
18584 target-object and target-function from the delegate-object, and do
18588 if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
18590 noway_assert(call->gtCallType == CT_USER_FUNC);
18592 assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
18593 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
18594 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
18596 /* Find the offsets of the 'this' pointer and new target */
18598 CORINFO_EE_INFO* pInfo;
18599 unsigned instOffs; // offset of new 'this' pointer
18600 unsigned firstTgtOffs; // offset of first target to invoke
18601 const regNumber regThis = genGetThisArgReg(call);
18603 pInfo = compiler->eeGetEEInfo();
18604 instOffs = pInfo->offsetOfDelegateInstance;
18605 firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
18607 #ifdef _TARGET_ARM_
18608 if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
18610 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, compiler->virtualStubParamInfo->GetReg(), regThis,
18611 pInfo->offsetOfSecureDelegateIndirectCell);
18612 regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18614 #endif // _TARGET_ARM_
18616 // Grab an available register to use for the CALL indirection
18617 regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
18619 // Save the invoke-target-function in indCallReg
18620 // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
18621 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
18622 regTracker.rsTrackRegTrash(indCallReg);
18624 /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
18626 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
18627 regTracker.rsTrackRegTrash(regThis);
18628 noway_assert(instOffs < 127);
18630 /* Call through indCallReg */
18632 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18634 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18635 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18636 gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
18640 /*-------------------------------------------------------------------------
18641 * Virtual and interface calls
18644 switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
18646 case GTF_CALL_VIRT_STUB:
18648 regSet.rsSetRegsModified(compiler->virtualStubParamInfo->GetRegMask());
18650 // An x86 JIT which uses full stub dispatch must generate only
18651 // the following stub dispatch calls:
18653 // (1) isCallRelativeIndirect:
18654 // call dword ptr [rel32] ; FF 15 ---rel32----
18655 // (2) isCallRelative:
18656 // call abc ; E8 ---rel32----
18657 // (3) isCallRegisterIndirect:
18659 // call dword ptr [eax] ; FF 10
18661 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18662 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18665 // Please do not insert any Random NOPs while constructing this VSD call
18667 getEmitter()->emitDisableRandomNops();
18671 // This is code to set up an indirect call to a stub address computed
18672 // via dictionary lookup. However the dispatch stub receivers aren't set up
18673 // to accept such calls at the moment.
18674 if (callType == CT_INDIRECT)
18678 // -------------------------------------------------------------------------
18679 // The importer decided we needed a stub call via a computed
18680 // stub dispatch address, i.e. an address which came from a dictionary lookup.
18681 // - The dictionary lookup produces an indirected address, suitable for call
18682 // via "call [virtualStubParamInfo.reg]"
18684 // This combination will only be generated for shared generic code and when
18685 // stub dispatch is active.
18687 // No need to null check the this pointer - the dispatch code will deal with this.
18689 noway_assert(genStillAddressable(call->gtCallAddr));
18691 // Now put the address in virtualStubParamInfo.reg.
18692 // This is typically a nop when the register used for
18693 // the gtCallAddr is virtualStubParamInfo.reg
18695 inst_RV_TT(INS_mov, compiler->virtualStubParamInfo->GetReg(), call->gtCallAddr);
18696 regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18698 #if defined(_TARGET_X86_)
18699 // Emit enough bytes of nops so that this sequence can be distinguished
18700 // from other virtual stub dispatch calls.
18702 // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18703 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18705 getEmitter()->emitIns_Nop(3);
18707 // Make the virtual stub call:
18708 // call [virtualStubParamInfo.reg]
18710 emitCallType = emitter::EC_INDIR_ARD;
18712 indReg = compiler->virtualStubParamInfo->GetReg();
18713 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18715 #elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
18717 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18719 // Make the virtual stub call:
18720 // ldr indReg, [virtualStubParamInfo.reg]
18723 emitCallType = emitter::EC_INDIR_R;
18725 // Now dereference [virtualStubParamInfo.reg] and put it in a new temp register 'indReg'
18727 indReg = regSet.rsGrabReg(RBM_ALLINT & ~compiler->virtualStubParamInfo->GetRegMask());
18728 assert(call->gtCallAddr->InReg());
18729 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg,
18730 compiler->virtualStubParamInfo->GetReg(), 0);
18731 regTracker.rsTrackRegTrash(indReg);
18734 #error "Unknown target for VSD call"
18737 getEmitter()->emitIns_Call(emitCallType,
18739 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18740 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18741 gcInfo.gcRegByrefSetCur, ilOffset, indReg);
18745 // -------------------------------------------------------------------------
18746 // Check for a direct stub call.
18749 // Get stub addr. This will return NULL if virtual call stubs are not active
18750 void* stubAddr = NULL;
18752 stubAddr = (void*)call->gtStubCallStubAddr;
18754 noway_assert(stubAddr != NULL);
18756 // -------------------------------------------------------------------------
18757 // Direct stub calls, though the stubAddr itself may still need to be
18758 // accesed via an indirection.
18761 // No need to null check - the dispatch code will deal with null this.
18763 emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
18764 void* addr = stubAddr;
18766 regNumber callReg = REG_NA;
18768 if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
18770 #if CPU_LOAD_STORE_ARCH
18771 callReg = regSet.rsGrabReg(compiler->virtualStubParamInfo->GetRegMask());
18772 noway_assert(callReg == compiler->virtualStubParamInfo->GetReg());
18774 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, compiler->virtualStubParamInfo->GetReg(),
18775 (ssize_t)stubAddr);
18776 // The stub will write-back to this register, so don't track it
18777 regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18778 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
18779 compiler->virtualStubParamInfo->GetReg(), 0);
18780 regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
18781 callTypeStubAddr = emitter::EC_INDIR_R;
18782 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18784 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18785 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18786 gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
18789 // emit an indirect call
18790 callTypeStubAddr = emitter::EC_INDIR_C;
18792 disp = (ssize_t)stubAddr;
18795 #if CPU_LOAD_STORE_ARCH
18796 if (callTypeStubAddr != emitter::EC_INDIR_R)
18799 getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCallMethHnd,
18800 INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
18801 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18802 gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
18806 else // tailCall is true
18809 // Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
18810 // moves the this pointer out of it's usual place and into the argument list.
18811 #ifdef _TARGET_X86_
18813 // Generate "cmp ECX, [ECX]" to trap null pointers
18814 const regNumber regThis = genGetThisArgReg(call);
18815 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
18817 #endif // _TARGET_X86_
18819 if (callType == CT_INDIRECT)
18821 noway_assert(genStillAddressable(call->gtCallAddr));
18823 // Now put the address in EAX.
18824 inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
18825 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
18827 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18831 // importer/EE should guarantee the indirection
18832 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
18834 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
18835 ssize_t(call->gtStubCallStubAddr));
18838 fTailCallTargetIsVSD = true;
18842 // OK to start inserting random NOPs again
18844 getEmitter()->emitEnableRandomNops();
18848 case GTF_CALL_VIRT_VTABLE:
18849 // stub dispatching is off or this is not a virtual call (could be a tailcall)
18852 unsigned vtabOffsOfIndirection;
18853 unsigned vtabOffsAfterIndirection;
18855 noway_assert(callType == CT_USER_FUNC);
18858 regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
18859 vptrMask = genRegMask(vptrReg);
18861 /* The register no longer holds a live pointer value */
18862 gcInfo.gcMarkRegSetNpt(vptrMask);
18864 // MOV vptrReg, [REG_CALL_THIS + offs]
18865 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
18867 regTracker.rsTrackRegTrash(vptrReg);
18869 noway_assert(vptrMask & ~call->gtCallRegUsedMask);
18871 /* Get hold of the vtable offset (note: this might be expensive) */
18873 compiler->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
18874 &vtabOffsAfterIndirection);
18876 /* The register no longer holds a live pointer value */
18877 gcInfo.gcMarkRegSetNpt(vptrMask);
18879 /* Get the appropriate vtable chunk */
18881 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
18883 // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
18884 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
18885 vtabOffsOfIndirection);
18888 /* Call through the appropriate vtable slot */
18892 /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
18894 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
18895 vtabOffsAfterIndirection);
18899 #if CPU_LOAD_STORE_ARCH
18900 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
18901 vtabOffsAfterIndirection);
18903 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCallMethHnd,
18904 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18905 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18906 gcInfo.gcRegByrefSetCur, ilOffset,
18909 getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCallMethHnd,
18910 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18911 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18912 gcInfo.gcRegByrefSetCur, ilOffset,
18916 vtabOffsAfterIndirection); // disp
18917 #endif // CPU_LOAD_STORE_ARCH
18922 case GTF_CALL_NONVIRT:
18924 //------------------------ Non-virtual/Indirect calls -------------------------
18925 // Lots of cases follow
18926 // - Direct P/Invoke calls
18927 // - Indirect calls to P/Invoke functions via the P/Invoke stub
18928 // - Direct Helper calls
18929 // - Indirect Helper calls
18930 // - Direct calls to known addresses
18931 // - Direct calls where address is accessed by one or two indirections
18932 // - Indirect calls to computed addresses
18933 // - Tailcall versions of all of the above
18935 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
18937 //------------------------------------------------------
18938 // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
18940 // For (final and private) functions which were called with
18941 // invokevirtual, but which we call directly, we need to
18942 // dereference the object pointer to make sure it's not NULL.
18945 if (call->gtFlags & GTF_CALL_NULLCHECK)
18947 /* Generate "cmp ECX, [ECX]" to trap null pointers */
18948 const regNumber regThis = genGetThisArgReg(call);
18949 #if CPU_LOAD_STORE_ARCH
18951 regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
18952 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
18953 regTracker.rsTrackRegTrash(indReg);
18955 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
18959 if (call->gtFlags & GTF_CALL_UNMANAGED)
18961 //------------------------------------------------------
18962 // Non-virtual/Indirect calls: PInvoke calls.
18964 noway_assert(compiler->info.compCallUnmanaged != 0);
18966 /* args shouldn't be greater than 64K */
18968 noway_assert((argSize & 0xffff0000) == 0);
18970 /* Remember the varDsc for the callsite-epilog */
18972 frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
18974 // exact codegen is required
18975 getEmitter()->emitDisableRandomNops();
18979 regNumber indCallReg = REG_NA;
18981 if (callType == CT_INDIRECT)
18983 noway_assert(genStillAddressable(call->gtCallAddr));
18985 if (call->gtCallAddr->InReg())
18986 indCallReg = call->gtCallAddr->gtRegNum;
18988 nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
18993 noway_assert(callType == CT_USER_FUNC);
18996 regNumber tcbReg = REG_NA;
18998 if (!compiler->opts.ShouldUsePInvokeHelpers())
19000 tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
19005 if (callType == CT_INDIRECT)
19007 /* Double check that the callee didn't use/trash the
19008 registers holding the call target.
19010 noway_assert(tcbReg != indCallReg);
19012 if (indCallReg == REG_NA)
19014 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19017 /* Please note that this even works with tcbReg == REG_EAX.
19018 tcbReg contains an interesting value only if frameListRoot is
19019 an enregistered local that stays alive across the call
19020 (certainly not EAX). If frameListRoot has been moved into
19021 EAX, we can trash it since it won't survive across the call
19025 inst_RV_TT(INS_mov, indCallReg, call->gtCallAddr);
19026 regTracker.rsTrackRegTrash(indCallReg);
19029 emitCallType = emitter::EC_INDIR_R;
19033 noway_assert(callType == CT_USER_FUNC);
19036 addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
19039 #if CPU_LOAD_STORE_ARCH
19040 // Load the address into a register, indirect it and call through a register
19041 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19043 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19044 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19045 regTracker.rsTrackRegTrash(indCallReg);
19046 // Now make the call "call indCallReg"
19048 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19049 methHnd, // methHnd
19050 INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
19053 retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19054 gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
19056 emitCallType = emitter::EC_INDIR_R;
19059 emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19060 indCallReg = REG_NA;
19065 // Double-indirection. Load the address into a register
19066 // and call indirectly through a register
19067 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19070 #if CPU_LOAD_STORE_ARCH
19071 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
19072 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19073 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19074 regTracker.rsTrackRegTrash(indCallReg);
19076 emitCallType = emitter::EC_INDIR_R;
19079 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
19080 regTracker.rsTrackRegTrash(indCallReg);
19081 emitCallType = emitter::EC_INDIR_ARD;
19083 #endif // CPU_LOAD_STORE_ARCH
19087 getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
19088 INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19089 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
19090 ilOffset, indCallReg);
19092 if (callType == CT_INDIRECT)
19093 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19095 getEmitter()->emitEnableRandomNops();
19097 // Done with PInvoke calls
19101 if (callType == CT_INDIRECT)
19103 noway_assert(genStillAddressable(call->gtCallAddr));
19105 if (call->gtCallCookie)
19107 //------------------------------------------------------
19108 // Non-virtual indirect calls via the P/Invoke stub
19110 GenTreePtr cookie = call->gtCallCookie;
19111 GenTreePtr target = call->gtCallAddr;
19113 noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
19115 noway_assert(cookie->gtOper == GT_CNS_INT ||
19116 cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
19118 noway_assert(args == argSize);
19120 #if defined(_TARGET_X86_)
19121 /* load eax with the real target */
19123 inst_RV_TT(INS_mov, REG_EAX, target);
19124 regTracker.rsTrackRegTrash(REG_EAX);
19126 if (cookie->gtOper == GT_CNS_INT)
19127 inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
19129 inst_TT(INS_push, cookie);
19131 /* Keep track of ESP for EBP-less frames */
19134 argSize += sizeof(void*);
19136 #elif defined(_TARGET_ARM_)
19138 // Ensure that we spill these registers (if caller saved) in the prolog
19139 regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
19141 // ARM: load r12 with the real target
19142 // X64: load r10 with the real target
19143 inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
19144 regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
19146 // ARM: load r4 with the pinvoke VASigCookie
19147 // X64: load r11 with the pinvoke VASigCookie
19148 if (cookie->gtOper == GT_CNS_INT)
19149 inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
19150 EA_HANDLE_CNS_RELOC);
19152 inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
19153 regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19155 noway_assert(args == argSize);
19157 // Ensure that we don't trash any of these registers if we have to load
19158 // the helper call target into a register to invoke it.
19159 regMaskTP regsUsed;
19160 regSet.rsLockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | RBM_PINVOKE_COOKIE_PARAM,
19163 NYI("Non-virtual indirect calls via the P/Invoke stub");
19167 noway_assert((size_t)(int)args == args);
19169 genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
19171 #if defined(_TARGET_ARM_)
19172 regSet.rsUnlockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
19173 RBM_PINVOKE_COOKIE_PARAM,
19177 #ifdef _TARGET_ARM_
19178 // genEmitHelperCall doesn't record all registers a helper call would trash.
19179 regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19184 //------------------------------------------------------
19185 // Non-virtual indirect calls
19189 inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
19190 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19193 instEmit_indCall(call, args, retSize);
19196 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19198 // Done with indirect calls
19202 //------------------------------------------------------
19203 // Non-virtual direct/indirect calls: Work out if the address of the
19204 // call is known at JIT time (if not it is either an indirect call
19205 // or the address must be accessed via an single/double indirection)
19207 noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
19210 InfoAccessType accessType;
19212 helperNum = compiler->eeGetHelperNum(methHnd);
19214 if (callType == CT_HELPER)
19216 noway_assert(helperNum != CORINFO_HELP_UNDEF);
19218 #ifdef FEATURE_READYTORUN_COMPILER
19219 if (call->gtEntryPoint.addr != NULL)
19221 accessType = call->gtEntryPoint.accessType;
19222 addr = call->gtEntryPoint.addr;
19225 #endif // FEATURE_READYTORUN_COMPILER
19229 accessType = IAT_VALUE;
19230 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
19234 accessType = IAT_PVALUE;
19241 noway_assert(helperNum == CORINFO_HELP_UNDEF);
19243 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
19245 if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
19246 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
19248 if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
19249 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
19251 #ifdef FEATURE_READYTORUN_COMPILER
19252 if (call->gtEntryPoint.addr != NULL)
19254 accessType = call->gtEntryPoint.accessType;
19255 addr = call->gtEntryPoint.addr;
19258 #endif // FEATURE_READYTORUN_COMPILER
19260 CORINFO_CONST_LOOKUP addrInfo;
19261 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
19263 accessType = addrInfo.accessType;
19264 addr = addrInfo.addr;
19270 noway_assert(callType == CT_USER_FUNC);
19272 switch (accessType)
19275 //------------------------------------------------------
19276 // Non-virtual direct calls to known addressess
19278 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19282 //------------------------------------------------------
19283 // Non-virtual direct calls to addresses accessed by
19284 // a single indirection.
19286 // For tailcalls we place the target address in REG_TAILCALL_ADDR
19287 CLANG_FORMAT_COMMENT_ANCHOR;
19289 #if CPU_LOAD_STORE_ARCH
19291 regNumber indReg = REG_TAILCALL_ADDR;
19292 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19293 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19294 regTracker.rsTrackRegTrash(indReg);
19297 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19298 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19303 //------------------------------------------------------
19304 // Non-virtual direct calls to addresses accessed by
19305 // a double indirection.
19307 // For tailcalls we place the target address in REG_TAILCALL_ADDR
19308 CLANG_FORMAT_COMMENT_ANCHOR;
19310 #if CPU_LOAD_STORE_ARCH
19312 regNumber indReg = REG_TAILCALL_ADDR;
19313 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19314 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19315 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19316 regTracker.rsTrackRegTrash(indReg);
19319 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19320 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19321 REG_TAILCALL_ADDR, 0);
19322 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19327 noway_assert(!"Bad accessType");
19333 switch (accessType)
19335 regNumber indCallReg;
19339 //------------------------------------------------------
19340 // Non-virtual direct calls to known addressess
19342 // The vast majority of calls end up here.... Wouldn't
19343 // it be nice if they all did!
19344 CLANG_FORMAT_COMMENT_ANCHOR;
19345 #ifdef _TARGET_ARM_
19346 // We may use direct call for some of recursive calls
19347 // as we can safely estimate the distance from the call site to the top of the method
19348 const int codeOffset = MAX_PROLOG_SIZE_BYTES + // prolog size
19349 getEmitter()->emitCurCodeOffset + // offset of the current IG
19350 getEmitter()->emitCurIGsize + // size of the current IG
19351 4; // size of the jump instruction
19352 // that we are now emitting
19353 if (compiler->gtIsRecursiveCall(call) && codeOffset <= -CALL_DIST_MAX_NEG)
19355 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19356 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19357 args, retSize, gcInfo.gcVarPtrSetCur,
19358 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19359 REG_NA, REG_NA, 0, 0, // ireg, xreg, xmul, disp
19361 emitter::emitNoGChelper(helperNum));
19363 else if (!arm_Valid_Imm_For_BL((ssize_t)addr))
19365 // Load the address into a register and call through a register
19366 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
19367 // CALL indirection
19368 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19370 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
19371 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19372 args, retSize, gcInfo.gcVarPtrSetCur,
19373 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19374 indCallReg, // ireg
19375 REG_NA, 0, 0, // xreg, xmul, disp
19377 emitter::emitNoGChelper(helperNum));
19382 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19383 INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19384 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19385 gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
19386 0, /* ireg, xreg, xmul, disp */
19387 false, /* isJump */
19388 emitter::emitNoGChelper(helperNum));
19394 //------------------------------------------------------
19395 // Non-virtual direct calls to addresses accessed by
19396 // a single indirection.
19399 // Load the address into a register, load indirect and call through a register
19400 CLANG_FORMAT_COMMENT_ANCHOR;
19401 #if CPU_LOAD_STORE_ARCH
19402 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19405 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19406 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19407 regTracker.rsTrackRegTrash(indCallReg);
19409 emitCallType = emitter::EC_INDIR_R;
19413 emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19414 indCallReg = REG_NA;
19416 #endif // CPU_LOAD_STORE_ARCH
19418 getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
19419 retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19420 gcInfo.gcRegByrefSetCur, ilOffset,
19421 indCallReg, // ireg
19422 REG_NA, 0, 0, // xreg, xmul, disp
19423 false, /* isJump */
19424 emitter::emitNoGChelper(helperNum));
19429 //------------------------------------------------------
19430 // Non-virtual direct calls to addresses accessed by
19431 // a double indirection.
19433 // Double-indirection. Load the address into a register
19434 // and call indirectly through the register
19436 noway_assert(helperNum == CORINFO_HELP_UNDEF);
19438 // Grab an available register to use for the CALL indirection
19439 indCallReg = regSet.rsGrabReg(RBM_ALLINT);
19441 #if CPU_LOAD_STORE_ARCH
19442 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19443 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19444 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19445 regTracker.rsTrackRegTrash(indCallReg);
19447 emitCallType = emitter::EC_INDIR_R;
19451 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
19452 regTracker.rsTrackRegTrash(indCallReg);
19454 emitCallType = emitter::EC_INDIR_ARD;
19456 #endif // CPU_LOAD_STORE_ARCH
19458 getEmitter()->emitIns_Call(emitCallType, methHnd,
19459 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19460 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19461 gcInfo.gcRegByrefSetCur, ilOffset,
19462 indCallReg, // ireg
19463 REG_NA, 0, 0, // xreg, xmul, disp
19465 emitter::emitNoGChelper(helperNum));
19470 noway_assert(!"Bad accessType");
19474 // tracking of region protected by the monitor in synchronized methods
19475 if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
19477 fPossibleSyncHelperCall = true;
19484 noway_assert(!"strange call type");
19488 /*-------------------------------------------------------------------------
19489 * For tailcalls, REG_INTRET contains the address of the target function,
19490 * enregistered args are in the correct registers, and the stack arguments
19491 * have been pushed on the stack. Now call the stub-sliding helper
19497 if (compiler->info.compCallUnmanaged)
19498 genPInvokeMethodEpilog();
19500 #ifdef _TARGET_X86_
19501 noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
19503 // Push the count of the incoming stack arguments
19505 unsigned nOldStkArgs =
19506 (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
19507 getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
19508 genSinglePush(); // Keep track of ESP for EBP-less frames
19509 args += sizeof(void*);
19511 // Push the count of the outgoing stack arguments
19513 getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
19514 genSinglePush(); // Keep track of ESP for EBP-less frames
19515 args += sizeof(void*);
19517 // Push info about the callee-saved registers to be restored
19518 // For now, we always spill all registers if compiler->compTailCallUsed
19520 DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX
19521 (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
19522 getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
19523 genSinglePush(); // Keep track of ESP for EBP-less frames
19524 args += sizeof(void*);
19526 // Push the address of the target function
19528 getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
19529 genSinglePush(); // Keep track of ESP for EBP-less frames
19530 args += sizeof(void*);
19532 #else // _TARGET_X86_
19535 retSize = EA_UNKNOWN;
19537 #endif // _TARGET_X86_
19539 if (compiler->getNeedsGSSecurityCookie())
19541 genEmitGSCookieCheck(true);
19544 // TailCall helper does not poll for GC. An explicit GC poll
19545 // Should have been placed in when we morphed this into a tail call.
19546 noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
19548 // Now call the helper
19550 genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
19553 /*-------------------------------------------------------------------------
19555 * Trash registers, pop arguments if needed, etc
19558 /* Mark the argument registers as free */
19560 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19562 for (areg = 0; areg < MAX_REG_ARG; areg++)
19564 regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
19566 // Is this one of the used argument registers?
19567 if ((curArgMask & call->gtCallRegUsedMask) == 0)
19570 #ifdef _TARGET_ARM_
19571 if (regSet.rsUsedTree[areg] == NULL)
19573 noway_assert(areg % 2 == 1 &&
19574 (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
19575 (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
19580 regSet.rsMarkRegFree(curArgMask);
19582 // We keep regSet.rsMaskVars current during codegen, so we have to remove any
19583 // that have been copied into arg regs.
19585 regSet.RemoveMaskVars(curArgMask);
19586 gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
19587 gcInfo.gcRegByrefSetCur &= ~(curArgMask);
19590 #if !FEATURE_STACK_FP_X87
19591 //-------------------------------------------------------------------------
19592 // free up the FP args
19594 for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
19596 regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
19597 regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
19599 // Is this one of the used argument registers?
19600 if ((curArgMask & call->gtCallRegUsedMask) == 0)
19603 regSet.rsMaskUsed &= ~curArgMask;
19604 regSet.rsUsedTree[argRegNum] = NULL;
19606 #endif // !FEATURE_STACK_FP_X87
19608 /* restore the old argument register status */
19610 intRegState.rsCurRegArgNum = savCurIntArgReg;
19611 floatRegState.rsCurRegArgNum = savCurFloatArgReg;
19613 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19615 /* Mark all trashed registers as such */
19617 if (calleeTrashedRegs)
19618 regTracker.rsTrashRegSet(calleeTrashedRegs);
19620 regTracker.rsTrashRegsForGCInterruptability();
19624 if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19626 if (compiler->verbose)
19628 printf("\t\t\t\t\t\t\tEnd call ");
19629 Compiler::printTreeID(call);
19630 printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
19632 noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
19637 #if FEATURE_STACK_FP_X87
19638 /* All float temps must be spilled around function calls */
19639 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19641 noway_assert(compCurFPState.m_uStackSize == 1);
19645 noway_assert(compCurFPState.m_uStackSize == 0);
19648 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19650 #ifdef _TARGET_ARM_
19651 if (call->IsVarargs() || compiler->opts.compUseSoftFP)
19653 // Result return for vararg methods is in r0, r1, but our callers would
19654 // expect the return in s0, s1 because of floating type. Do the move now.
19655 if (call->gtType == TYP_FLOAT)
19657 inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
19661 inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
19665 genMarkTreeInReg(call, REG_FLOATRET);
19669 /* The function will pop all arguments before returning */
19671 SetStackLevel(saveStackLvl);
19673 /* No trashed registers may possibly hold a pointer at this point */
19674 CLANG_FORMAT_COMMENT_ANCHOR;
19678 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
19679 ~regSet.rsMaskVars & ~vptrMask;
19682 // A reg may be dead already. The assertion is too strong.
19686 // use compiler->compCurLife
19687 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
19689 /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
19691 if (!varDsc->lvTracked)
19693 if (!varDsc->lvRegister)
19695 if (varDsc->IsFloatRegType())
19698 /* Get hold of the index and the bitmask for the variable */
19700 unsigned varIndex = varDsc->lvVarIndex;
19702 /* Is this variable live currently? */
19704 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
19706 regNumber regNum = varDsc->lvRegNum;
19707 regMaskTP regMask = genRegMask(regNum);
19709 if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
19710 ptrRegs &= ~regMask;
19715 printf("Bad call handling for ");
19716 Compiler::printTreeID(call);
19718 noway_assert(!"A callee trashed reg is holding a GC pointer");
19723 #if defined(_TARGET_X86_)
19724 //-------------------------------------------------------------------------
19725 // Create a label for tracking of region protected by the monitor in synchronized methods.
19726 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
19727 // so the GC state vars have been updated before creating the label.
19729 if (fPossibleSyncHelperCall)
19733 case CORINFO_HELP_MON_ENTER:
19734 case CORINFO_HELP_MON_ENTER_STATIC:
19735 noway_assert(compiler->syncStartEmitCookie == NULL);
19736 compiler->syncStartEmitCookie =
19737 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19738 noway_assert(compiler->syncStartEmitCookie != NULL);
19740 case CORINFO_HELP_MON_EXIT:
19741 case CORINFO_HELP_MON_EXIT_STATIC:
19742 noway_assert(compiler->syncEndEmitCookie == NULL);
19743 compiler->syncEndEmitCookie =
19744 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19745 noway_assert(compiler->syncEndEmitCookie != NULL);
19751 #endif // _TARGET_X86_
19753 if (call->gtFlags & GTF_CALL_UNMANAGED)
19755 genDefineTempLabel(returnLabel);
19757 #ifdef _TARGET_X86_
19758 if (getInlinePInvokeCheckEnabled())
19760 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
19761 BasicBlock* esp_check;
19763 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19764 /* mov ecx, dword ptr [frame.callSiteTracker] */
19766 getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
19767 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19768 regTracker.rsTrackRegTrash(REG_ARG_0);
19770 /* Generate the conditional jump */
19772 if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19776 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
19781 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
19783 esp_check = genCreateTempLabel();
19785 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
19786 inst_JMP(jmpEqual, esp_check);
19788 getEmitter()->emitIns(INS_BREAKPOINT);
19790 /* genCondJump() closes the current emitter block */
19792 genDefineTempLabel(esp_check);
19797 /* Are we supposed to pop the arguments? */
19798 CLANG_FORMAT_COMMENT_ANCHOR;
19800 #if defined(_TARGET_X86_)
19801 if (call->gtFlags & GTF_CALL_UNMANAGED)
19803 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PINVOKE_RESTORE_ESP) ||
19804 compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
19806 // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
19807 // take care of the cdecl argument popping here as well but the stack depth tracking logic
19808 // makes this very hard, i.e. it needs to "see" the actual pop.
19810 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19812 if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
19814 /* mov esp, dword ptr [frame.callSiteTracker] */
19815 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
19816 compiler->lvaInlinedPInvokeFrameVar,
19817 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19821 /* mov ecx, dword ptr [frame.callSiteTracker] */
19822 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
19823 compiler->lvaInlinedPInvokeFrameVar,
19824 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19825 regTracker.rsTrackRegTrash(REG_ARG_0);
19827 /* lea esp, [ecx + argSize] */
19828 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
19832 #endif // _TARGET_X86_
19834 if (call->gtFlags & GTF_CALL_POP_ARGS)
19836 noway_assert(args == (size_t) - (int)argSize);
19840 genAdjustSP(argSize);
19844 if (pseudoStackLvl)
19846 noway_assert(call->gtType == TYP_VOID);
19853 /* What does the function return? */
19857 switch (call->gtType)
19862 gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
19867 #if !CPU_HAS_FP_SUPPORT
19870 retVal = RBM_INTRET;
19873 #ifdef _TARGET_ARM_
19876 assert(call->gtRetClsHnd != NULL);
19877 assert(compiler->IsHfa(call->gtRetClsHnd));
19878 int retSlots = compiler->GetHfaCount(call->gtRetClsHnd);
19879 assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
19880 assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
19881 retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
19887 #if !CPU_HAS_FP_SUPPORT
19890 retVal = RBM_LNGRET;
19893 #if CPU_HAS_FP_SUPPORT
19904 noway_assert(!"unexpected/unhandled fn return type");
19907 // We now have to generate the "call epilog" (if it was a call to unmanaged code).
19908 /* if it is a call to unmanaged code, frameListRoot must be set */
19910 noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
19913 genPInvokeCallEpilog(frameListRoot, retVal);
19915 if (frameListRoot && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
19917 if (frameListRoot->lvRegister)
19919 bool isBorn = false;
19920 bool isDying = true;
19921 genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
19926 if (compiler->opts.compStackCheckOnCall
19927 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
19928 // check the stack as frequently as possible
19929 && !call->IsHelperCall()
19931 && call->gtCallType == CT_USER_FUNC
19935 noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
19936 compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
19937 compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
19940 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
19941 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
19942 getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
19943 regTracker.rsTrackRegTrash(REG_ARG_0);
19946 getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
19948 BasicBlock* esp_check = genCreateTempLabel();
19949 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
19950 inst_JMP(jmpEqual, esp_check);
19951 getEmitter()->emitIns(INS_BREAKPOINT);
19952 genDefineTempLabel(esp_check);
19956 #if FEATURE_STACK_FP_X87
19957 UnspillRegVarsStackFp();
19958 #endif // FEATURE_STACK_FP_X87
19960 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19962 // Restore return node if necessary
19963 if (call->gtFlags & GTF_SPILLED)
19965 UnspillFloat(call);
19968 #if FEATURE_STACK_FP_X87
19970 regSet.SetUsedRegFloat(call, false);
19974 #if FEATURE_STACK_FP_X87
19976 if (compiler->verbose)
19986 #pragma warning(pop)
19989 /*****************************************************************************
19991 * Create and record GC Info for the function.
19993 #ifdef JIT32_GCENCODER
19998 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20000 #ifdef JIT32_GCENCODER
20001 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
20003 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
20007 #ifdef JIT32_GCENCODER
20008 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
20009 unsigned prologSize,
20010 unsigned epilogSize DEBUGARG(void* codePtr))
20012 BYTE headerBuf[64];
20017 size_t headerSize =
20019 compiler->compInfoBlkSize =
20020 gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
20022 size_t argTabOffset = 0;
20023 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
20027 if (genInterruptible)
20029 gcHeaderISize += compiler->compInfoBlkSize;
20030 gcPtrMapISize += ptrMapSize;
20034 gcHeaderNSize += compiler->compInfoBlkSize;
20035 gcPtrMapNSize += ptrMapSize;
20038 #endif // DISPLAY_SIZES
20040 compiler->compInfoBlkSize += ptrMapSize;
20042 /* Allocate the info block for the method */
20044 compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
20046 #if 0 // VERBOSE_SIZES
20047 // TODO-Review: 'dataSize', below, is not defined
20049 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
20051 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
20052 compiler->info.compILCodeSize,
20053 compiler->compInfoBlkSize,
20054 codeSize + dataSize,
20055 codeSize + dataSize - prologSize - epilogSize,
20056 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
20057 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
20058 compiler->info.compClassName,
20059 compiler->info.compMethodName);
20064 /* Fill in the info block and return it to the caller */
20066 void* infoPtr = compiler->compInfoBlkAddr;
20068 /* Create the method info block: header followed by GC tracking tables */
20070 compiler->compInfoBlkAddr +=
20071 gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
20073 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
20074 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
20075 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
20081 BYTE* temp = (BYTE*)infoPtr;
20082 unsigned size = compiler->compInfoBlkAddr - temp;
20083 BYTE* ptab = temp + headerSize;
20085 noway_assert(size == headerSize + ptrMapSize);
20087 printf("Method info block - header [%u bytes]:", headerSize);
20089 for (unsigned i = 0; i < size; i++)
20093 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
20094 printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
20099 printf("\n %04X: ", i);
20102 printf("%02X ", *temp++);
20112 if (compiler->opts.dspGCtbls)
20114 const BYTE* base = (BYTE*)infoPtr;
20116 unsigned methodSize;
20117 InfoHdr dumpHeader;
20119 printf("GC Info for method %s\n", compiler->info.compFullName);
20120 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
20122 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
20123 // printf("size of header encoding is %3u\n", size);
20126 if (compiler->opts.dspGCtbls)
20129 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
20130 // printf("size of pointer table is %3u\n", size);
20132 noway_assert(compiler->compInfoBlkAddr == (base + size));
20137 if (jitOpts.testMask & 128)
20139 for (unsigned offs = 0; offs < codeSize; offs++)
20141 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
20145 #endif // DUMP_GC_TABLES
20147 /* Make sure we ended up generating the expected number of bytes */
20149 noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
20154 #else // JIT32_GCENCODER
20156 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
20158 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
20159 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
20160 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
20161 assert(gcInfoEncoder);
20163 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
20164 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
20166 // We keep the call count for the second call to gcMakeRegPtrTable() below.
20167 unsigned callCnt = 0;
20168 // First we figure out the encoder ID's for the stack slots and registers.
20169 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
20170 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
20171 gcInfoEncoder->FinalizeSlotIds();
20172 // Now we can actually use those slot ID's to declare live ranges.
20173 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
20175 gcInfoEncoder->Build();
20177 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
20178 // let's save the values anyway for debugging purposes
20179 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
20180 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
20184 /*****************************************************************************
20188 regNumber CodeGen::genLclHeap(GenTreePtr size)
20190 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
20192 // regCnt is a register used to hold both
20193 // the amount to stack alloc (either in bytes or pointer sized words)
20194 // and the final stack alloc address to return as the result
20196 regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
20197 var_types type = genActualType(size->gtType);
20198 emitAttr easz = emitTypeSize(type);
20202 if (compiler->opts.compStackCheckOnRet)
20204 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20205 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20206 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20207 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20209 BasicBlock* esp_check = genCreateTempLabel();
20210 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20211 inst_JMP(jmpEqual, esp_check);
20212 getEmitter()->emitIns(INS_BREAKPOINT);
20213 genDefineTempLabel(esp_check);
20217 noway_assert(isFramePointerUsed());
20218 noway_assert(genStackLevel == 0); // Can't have anything on the stack
20220 BasicBlock* endLabel = NULL;
20221 #if FEATURE_FIXED_OUT_ARGS
20222 bool stackAdjusted = false;
20225 if (size->IsCnsIntOrI())
20227 #if FEATURE_FIXED_OUT_ARGS
20228 // If we have an outgoing arg area then we must adjust the SP
20229 // essentially popping off the outgoing arg area,
20230 // We will restore it right before we return from this method
20232 if (compiler->lvaOutgoingArgSpaceSize > 0)
20234 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20235 0); // This must be true for the stack to remain aligned
20236 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20237 stackAdjusted = true;
20240 size_t amount = size->gtIntCon.gtIconVal;
20242 // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
20243 amount += (STACK_ALIGN - 1);
20244 amount &= ~(STACK_ALIGN - 1);
20245 amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
20246 size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20248 /* If amount is zero then return null in RegCnt */
20251 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20252 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20256 /* For small allocations we will generate up to six push 0 inline */
20259 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20260 #if CPU_LOAD_STORE_ARCH
20261 regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20262 // Set 'regZero' to zero
20263 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
20266 while (amount != 0)
20268 #if CPU_LOAD_STORE_ARCH
20269 inst_IV(INS_push, (unsigned)genRegMask(regZero));
20271 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
20276 regTracker.rsTrackRegTrash(regCnt);
20277 // --- move regCnt, ESP
20278 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20283 if (!compiler->info.compInitMem)
20285 // Re-bias amount to be number of bytes to adjust the SP
20286 amount <<= STACK_ALIGN_SHIFT;
20287 size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20288 if (amount < compiler->eeGetPageSize()) // must be < not <=
20290 // Since the size is a page or less, simply adjust ESP
20292 // ESP might already be in the guard page, must touch it BEFORE
20293 // the alloc, not after.
20294 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20295 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20296 #if CPU_LOAD_STORE_ARCH
20297 regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20298 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
20299 regTracker.rsTrackRegTrash(regTmp);
20301 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20303 inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
20304 inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
20305 regTracker.rsTrackRegTrash(regCnt);
20312 // Compute the size of the block to allocate
20313 genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
20314 noway_assert(size->InReg());
20315 regCnt = size->gtRegNum;
20317 #if FEATURE_FIXED_OUT_ARGS
20318 // If we have an outgoing arg area then we must adjust the SP
20319 // essentially popping off the outgoing arg area,
20320 // We will restore it right before we return from this method
20322 if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
20324 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20325 0); // This must be true for the stack to remain aligned
20326 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20327 stackAdjusted = true;
20331 // Perform alignment if we don't have a GT_CNS size
20333 if (!size->IsCnsIntOrI())
20335 endLabel = genCreateTempLabel();
20337 // If 0 we bail out
20338 instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
20339 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20340 inst_JMP(jmpEqual, endLabel);
20342 // Align to STACK_ALIGN
20343 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
20345 if (compiler->info.compInitMem)
20347 #if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
20348 // regCnt will be the number of pointer-sized words to locAlloc
20349 // If the shift right won't do the 'and' do it here
20350 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20352 // --- shr regCnt, 2 ---
20353 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
20357 // regCnt will be the total number of bytes to locAlloc
20359 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20364 loop = genCreateTempLabel();
20366 if (compiler->info.compInitMem)
20368 // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
20370 /* Since we have to zero out the allocated memory AND ensure that
20371 ESP is always valid by tickling the pages, we will just push 0's
20373 CLANG_FORMAT_COMMENT_ANCHOR;
20375 #if defined(_TARGET_ARM_)
20376 regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20377 regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
20378 // Set 'regZero1' and 'regZero2' to zero
20379 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
20380 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
20384 genDefineTempLabel(loop);
20386 #if defined(_TARGET_X86_)
20388 inst_IV(INS_push_hide, 0); // --- push 0
20390 inst_RV(INS_dec, regCnt, type);
20392 #elif defined(_TARGET_ARM_)
20394 inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
20396 inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
20399 assert(!"Codegen missing");
20402 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
20403 inst_JMP(jmpNotEqual, loop);
20405 // Move the final value of ESP into regCnt
20406 inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
20407 regTracker.rsTrackRegTrash(regCnt);
20411 // At this point 'regCnt' is set to the total number of bytes to locAlloc
20413 /* We don't need to zero out the allocated memory. However, we do have
20414 to tickle the pages to ensure that ESP is always valid and is
20415 in sync with the "stack guard page". Note that in the worst
20416 case ESP is on the last byte of the guard page. Thus you must
20417 touch ESP+0 first not ESP+x01000.
20419 Another subtlety is that you don't want ESP to be exactly on the
20420 boundary of the guard page because PUSH is predecrement, thus
20421 call setup would not touch the guard page but just beyond it */
20423 /* Note that we go through a few hoops so that ESP never points to
20424 illegal pages at any time during the ticking process
20427 add REG, ESP // reg now holds ultimate ESP
20428 jb loop // result is smaller than orignial ESP (no wrap around)
20429 xor REG, REG, // Overflow, pick lowest possible number
20431 test ESP, [ESP+0] // X86 - tickle the page
20432 ldr REGH,[ESP+0] // ARM - tickle the page
20434 sub REGH, GetOsPageSize()
20442 CLANG_FORMAT_COMMENT_ANCHOR;
20444 #ifdef _TARGET_ARM_
20446 inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
20447 inst_JMP(EJ_hs, loop);
20449 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
20450 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
20451 inst_JMP(EJ_jb, loop);
20453 regTracker.rsTrackRegTrash(regCnt);
20455 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20457 genDefineTempLabel(loop);
20459 // This is a workaround to avoid the emitter trying to track the
20460 // decrement of the ESP - we do the subtraction in another reg
20461 // instead of adjusting ESP directly.
20463 regNumber regTemp = regSet.rsPickReg();
20465 // Tickle the decremented value, and move back to ESP,
20466 // note that it has to be done BEFORE the update of ESP since
20467 // ESP might already be on the guard page. It is OK to leave
20468 // the final value of ESP on the guard page
20469 CLANG_FORMAT_COMMENT_ANCHOR;
20471 #if CPU_LOAD_STORE_ARCH
20472 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
20474 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20477 inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
20478 regTracker.rsTrackRegTrash(regTemp);
20480 inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
20481 inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
20483 genRecoverReg(size, RBM_ALLINT,
20484 RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
20485 noway_assert(size->InReg());
20486 regCnt = size->gtRegNum;
20487 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
20488 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
20489 inst_JMP(jmpGEU, loop);
20491 // Move the final value to ESP
20492 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
20494 regSet.rsMarkRegFree(genRegMask(regCnt));
20498 noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
20500 if (endLabel != NULL)
20501 genDefineTempLabel(endLabel);
20503 #if FEATURE_FIXED_OUT_ARGS
20504 // If we have an outgoing arg area then we must readjust the SP
20508 assert(compiler->lvaOutgoingArgSpaceSize > 0);
20509 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20510 0); // This must be true for the stack to remain aligned
20511 inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20515 /* Write the lvaLocAllocSPvar stack frame slot */
20516 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
20518 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
20522 // Don't think it is worth it the codegen complexity to embed this
20523 // when it's possible in each of the customized allocas.
20524 if (compiler->opts.compNeedStackProbes)
20526 genGenerateStackProbe();
20532 if (compiler->opts.compStackCheckOnRet)
20534 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20535 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20536 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20537 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20544 /*****************************************************************************
20546 * Return non-zero if the given register is free after the given tree is
20547 * evaluated (i.e. the register is either not used at all, or it holds a
20548 * register variable which is not live after the given node).
20549 * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
20550 * constant operand, and one that's in a register. Thus, the only thing we
20551 * need to determine is whether the register holding op1 is dead.
20553 bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
20556 regMaskTP mask = genRegMask(reg);
20558 if (regSet.rsMaskUsed & mask)
20561 assert(tree->gtOper == GT_ADD);
20562 GenTreePtr regValTree = tree->gtOp.gtOp1;
20563 if (!tree->gtOp.gtOp2->IsCnsIntOrI())
20565 regValTree = tree->gtOp.gtOp2;
20566 assert(tree->gtOp.gtOp1->IsCnsIntOrI());
20568 assert(regValTree->InReg());
20570 /* At this point, the only way that the register will remain live
20571 * is if it is itself a register variable that isn't dying.
20573 assert(regValTree->gtRegNum == reg);
20574 if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
20580 /*****************************************************************************/
20582 // This method calculates the USE and DEF values for a statement.
20583 // It also calls fgSetRngChkTarget for the statement.
20585 // We refactor out this code from fgPerBlockLocalVarLiveness
20586 // and add QMARK logics to it.
20588 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20590 // The usage of this method is very limited.
20591 // We should only call it for the first node in the statement or
20592 // for the node after the GTF_RELOP_QMARK node.
20594 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20597 Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
20598 when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
20601 Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
20604 The algorithm we use is:
20605 (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
20606 (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
20607 We cache copies of current fgCurDefSet and fgCurUseSet.
20608 (The fact that it is recursively calling itself is for nested QMARK case,
20609 where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
20610 (3) We walk the thenTree.
20611 (4) When we see GT_COLON node, we know that we just finished the thenTree.
20612 We then make a copy of the current fgCurDefSet and fgCurUseSet,
20613 restore them to the ones before the thenTree, and then continue walking
20615 (5) When we see the GT_QMARK node, we know we just finished the elseTree.
20616 So we combine the results from the thenTree and elseTree and then return.
20619 +--------------------+
20621 +----------+---------+
20627 +---------------------+ +--------------------+
20628 | GT_<cond> 3 | | GT_COLON 7 |
20629 | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
20630 +----------+----------+ +---------+----------+
20636 2 1 thenTree 6 elseTree 10
20639 +----------------+ / / \ / \
20640 |prevExpr->gtNext+------/ / \ / \
20641 +----------------+ / \ / \
20647 GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
20648 GenTreePtr relopNode) // The node before the startNode.
20649 // (It should either be NULL or
20650 // a GTF_RELOP_QMARK node.)
20654 VARSET_TP defSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurDefSet)); // Store the current fgCurDefSet and
20656 VARSET_TP useSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurUseSet)); // we can restore then before entering the
20659 MemoryKindSet memoryUse_BeforeSplit = fgCurMemoryUse;
20660 MemoryKindSet memoryDef_BeforeSplit = fgCurMemoryDef;
20661 MemoryKindSet memoryHavoc_BeforeSplit = fgCurMemoryHavoc;
20663 VARSET_TP defSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // These two variables will store
20664 // the USE and DEF sets after
20665 VARSET_TP useSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
20667 MemoryKindSet memoryUse_AfterThenTree = fgCurMemoryUse;
20668 MemoryKindSet memoryDef_AfterThenTree = fgCurMemoryDef;
20669 MemoryKindSet memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20671 // relopNode is either NULL or a GTF_RELOP_QMARK node.
20672 assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
20674 // If relopNode is NULL, then the startNode must be the 1st node of the statement.
20675 // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
20676 assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
20677 (relopNode && startNode == relopNode->gtNext));
20679 for (tree = startNode; tree; tree = tree->gtNext)
20681 switch (tree->gtOper)
20686 // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
20687 noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
20689 // By the time we see a GT_QMARK, we must have finished processing the elseTree.
20690 // So it's the time to combine the results
20691 // from the the thenTree and the elseTree, and then return.
20693 VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
20694 VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
20696 fgCurMemoryDef = fgCurMemoryDef & memoryDef_AfterThenTree;
20697 fgCurMemoryHavoc = fgCurMemoryHavoc & memoryHavoc_AfterThenTree;
20698 fgCurMemoryUse = fgCurMemoryUse | memoryUse_AfterThenTree;
20700 // Return the GT_QMARK node itself so the caller can continue from there.
20701 // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
20702 // in the "for" statement.
20706 // By the time we see GT_COLON, we must have just walked the thenTree.
20707 // So we need to do two things here.
20708 // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
20709 // with the result from the elseTree.
20710 // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
20711 // and then continue walking the elseTree.
20712 VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
20713 VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
20715 memoryDef_AfterThenTree = fgCurMemoryDef;
20716 memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20717 memoryUse_AfterThenTree = fgCurMemoryUse;
20719 VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
20720 VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
20722 fgCurMemoryDef = memoryDef_BeforeSplit;
20723 fgCurMemoryHavoc = memoryHavoc_BeforeSplit;
20724 fgCurMemoryUse = memoryUse_BeforeSplit;
20730 case GT_LCL_VAR_ADDR:
20731 case GT_LCL_FLD_ADDR:
20732 case GT_STORE_LCL_VAR:
20733 case GT_STORE_LCL_FLD:
20734 fgMarkUseDef(tree->AsLclVarCommon());
20738 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20739 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20740 // This models Volatile reads as def-then-use of the heap.
20741 // and allows for a CSE of a subsequent non-volatile read
20742 if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
20744 // For any Volatile indirection, we must handle it as a
20745 // definition of GcHeap/ByrefExposed
20746 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20748 // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
20750 // Otherwise, we treat it as a use here.
20751 if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
20753 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20758 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20759 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20760 // This models Volatile reads as def-then-use of the heap.
20761 // and allows for a CSE of a subsequent non-volatile read
20762 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
20764 // For any Volatile indirection, we must handle it as a
20765 // definition of GcHeap/ByrefExposed
20766 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20769 // If the GT_IND is the lhs of an assignment, we'll handle it
20770 // as a heap/byref def, when we get to assignment.
20771 // Otherwise, we treat it as a use here.
20772 if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
20774 GenTreeLclVarCommon* dummyLclVarTree = NULL;
20775 bool dummyIsEntire = false;
20776 GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
20777 if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
20779 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20783 // Defines a local addr
20784 assert(dummyLclVarTree != nullptr);
20785 fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
20790 // These should have been morphed away to become GT_INDs:
20796 // We'll assume these are use-then-defs of GcHeap/ByrefExposed.
20801 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20802 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20803 fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20806 case GT_MEMORYBARRIER:
20807 // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
20808 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20811 // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety. Might tighten this case
20815 GenTreeCall* call = tree->AsCall();
20816 bool modHeap = true;
20817 if (call->gtCallType == CT_HELPER)
20819 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
20821 if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
20828 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20829 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20830 fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20834 // If this is a p/invoke unmanaged call or if this is a tail-call
20835 // and we have an unmanaged p/invoke call in the method,
20836 // then we're going to run the p/invoke epilog.
20837 // So we mark the FrameRoot as used by this instruction.
20838 // This ensures that the block->bbVarUse will contain
20839 // the FrameRoot local var if is it a tracked variable.
20841 if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
20843 /* Get the TCB local and mark it as used */
20845 noway_assert(info.compLvFrameListRoot < lvaCount);
20847 LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
20849 if (varDsc->lvTracked)
20851 if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
20853 VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
20862 // Determine what memory kinds it defines.
20863 if (tree->OperIsAssignment() || tree->OperIsBlkOp())
20865 GenTreeLclVarCommon* dummyLclVarTree = NULL;
20866 if (tree->DefinesLocal(this, &dummyLclVarTree))
20868 if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
20870 fgCurMemoryDef |= memoryKindSet(ByrefExposed);
20872 // We've found a store that modifies ByrefExposed
20873 // memory but not GcHeap memory, so track their
20874 // states separately.
20875 byrefStatesMatchGcHeapStates = false;
20880 // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
20881 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20885 // Are we seeing a GT_<cond> for a GT_QMARK node?
20886 if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
20888 // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
20889 // Recursively call fgLegacyPerStatementLocalVarLiveness.
20890 // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
20892 // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
20893 // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
20895 tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree);
20897 // We must have been returned here after seeing a GT_QMARK node.
20898 noway_assert(tree->gtOper == GT_QMARK);
20909 /*****************************************************************************/
20911 /*****************************************************************************
20912 * Initialize the TCB local and the NDirect stub, afterwards "push"
20913 * the hoisted NDirect stub.
20915 * 'initRegs' is the set of registers which will be zeroed out by the prolog
20916 * typically initRegs is zero
20918 * The layout of the NDirect Inlined Call Frame is as follows:
20919 * (see VM/frames.h and VM/JITInterface.cpp for more information)
20921 * offset field name when set
20922 * --------------------------------------------------------------
20923 * +00h vptr for class InlinedCallFrame method prolog
20924 * +04h m_Next method prolog
20925 * +08h m_Datum call site
20926 * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
20927 * +10h m_pCallerReturnAddress call site
20928 * +14h m_pCalleeSavedRegisters not set by JIT
20929 * +18h JIT retval spill area (int) before call_gc
20930 * +1ch JIT retval spill area (long) before call_gc
20931 * +20h Saved value of EBP method prolog
20934 regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
20936 assert(compiler->compGeneratingProlog);
20937 noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
20938 noway_assert(compiler->info.compCallUnmanaged);
20940 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
20941 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
20943 /* let's find out if compLvFrameListRoot is enregistered */
20945 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
20947 noway_assert(!varDsc->lvIsParam);
20948 noway_assert(varDsc->lvType == TYP_I_IMPL);
20950 DWORD threadTlsIndex, *pThreadTlsIndex;
20952 threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
20953 #if defined(_TARGET_X86_)
20954 if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
20959 // Instead of calling GetThread(), and getting GS cookie and
20960 // InlinedCallFrame vptr through indirections, we'll call only one helper.
20961 // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
20962 // and uses REG_PINVOKE_SCRATCH as scratch register.
20963 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
20964 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
20965 regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
20967 // We're about to trask REG_PINVOKE_TCB, it better not be in use!
20968 assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
20970 // Don't use the argument registers (including the special argument in
20971 // REG_PINVOKE_FRAME) for computing the target address.
20972 regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
20974 genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
20976 regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
20978 if (varDsc->lvRegister)
20980 regNumber regTgt = varDsc->lvRegNum;
20982 // we are about to initialize it. So turn the bit off in initRegs to prevent
20983 // the prolog reinitializing it.
20984 initRegs &= ~genRegMask(regTgt);
20986 if (regTgt != REG_PINVOKE_TCB)
20988 // move TCB to the its register if necessary
20989 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
20990 regTracker.rsTrackRegTrash(regTgt);
20995 // move TCB to its stack location
20996 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
20997 compiler->info.compLvFrameListRoot, 0);
21000 // We are done, the rest of this function deals with the inlined case.
21006 if (varDsc->lvRegister)
21008 regTCB = varDsc->lvRegNum;
21010 // we are about to initialize it. So turn the bit off in initRegs to prevent
21011 // the prolog reinitializing it.
21012 initRegs &= ~genRegMask(regTCB);
21014 else // varDsc is allocated on the Stack
21016 regTCB = REG_PINVOKE_TCB;
21019 #if !defined(_TARGET_ARM_)
21020 #define WIN_NT_TLS_OFFSET (0xE10)
21021 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
21023 /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
21025 // TODO-ARM-CQ: should we inline TlsGetValue here?
21027 if (threadTlsIndex < 64)
21029 // mov reg, FS:[0xE10+threadTlsIndex*4]
21030 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
21031 WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
21032 regTracker.rsTrackRegTrash(regTCB);
21036 DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
21037 threadTlsIndex -= 64;
21039 // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
21040 // mov reg, [reg+threadTlsIndex*4]
21042 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
21043 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
21044 regTracker.rsTrackRegTrash(regTCB);
21048 /* save TCB in local var if not enregistered */
21050 if (!varDsc->lvRegister)
21052 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
21055 /* set frame's vptr */
21057 const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
21058 inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
21059 noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
21061 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
21062 compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
21063 REG_PINVOKE_SCRATCH);
21065 // Set the GSCookie
21066 GSCookie gsCookie, *pGSCookie;
21067 compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
21068 noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
21070 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
21071 pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
21073 /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
21074 set next field in frame */
21076 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21077 pInfo->offsetOfThreadFrame);
21078 regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21080 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
21081 compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21083 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
21085 /* set EBP value in frame */
21086 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
21087 compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
21089 /* reset track field in frame */
21090 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21091 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
21093 /* get address of our frame */
21095 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
21096 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21097 regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21099 /* now "push" our N/direct frame */
21101 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21102 pInfo->offsetOfThreadFrame);
21107 /*****************************************************************************
21108 * Unchain the InlinedCallFrame.
21109 * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
21112 void CodeGen::genPInvokeMethodEpilog()
21114 if (compiler->opts.ShouldUsePInvokeHelpers())
21117 noway_assert(compiler->info.compCallUnmanaged);
21118 noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21119 noway_assert(compiler->compCurBB == compiler->genReturnBB ||
21120 (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
21121 (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
21123 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21124 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21126 getEmitter()->emitDisableRandomNops();
21127 // debug check to make sure that we're not using ESI and/or EDI across this call, except for
21128 // compLvFrameListRoot.
21129 unsigned regTrashCheck = 0;
21131 /* XXX Tue 5/29/2007
21132 * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
21133 * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
21135 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21137 regNumber reg2 = REG_PINVOKE_FRAME;
21140 // Two cases for epilog invocation:
21143 // We can trash the ESI/EDI registers.
21146 // When tail called, we'd like to preserve enregistered args,
21147 // in ESI/EDI so we can pass it to the callee.
21149 // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
21150 // Instead use the reserved local variable slot.
21152 if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21154 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21156 #if FEATURE_FIXED_OUT_ARGS
21157 // Save the register in the reserved local var slot.
21158 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21159 compiler->lvaPInvokeFrameRegSaveVar, 0);
21161 inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
21164 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21166 #if FEATURE_FIXED_OUT_ARGS
21167 // Save the register in the reserved local var slot.
21168 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21169 compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21171 inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
21176 if (varDsc->lvRegister)
21178 reg = varDsc->lvRegNum;
21180 reg2 = REG_PINVOKE_TCB;
21182 regTrashCheck |= genRegMask(reg2);
21186 /* mov esi, [tcb address] */
21188 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
21190 regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21191 reg = REG_PINVOKE_TCB;
21193 regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
21196 /* mov edi, [ebp-frame.next] */
21198 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
21199 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21200 regTracker.rsTrackRegTrash(reg2);
21202 /* mov [esi+offsetOfThreadFrame], edi */
21204 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
21206 noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
21208 if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
21209 compiler->lvaTable[compiler->genReturnLocal].lvRegister)
21211 // really make sure we're not clobbering compiler->genReturnLocal.
21213 !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
21214 ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
21217 (void)regTrashCheck;
21219 // Restore the registers ESI and EDI.
21220 if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21222 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21224 #if FEATURE_FIXED_OUT_ARGS
21225 // Restore the register from the reserved local var slot.
21226 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21227 compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21229 inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
21231 regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21233 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21235 #if FEATURE_FIXED_OUT_ARGS
21236 // Restore the register from the reserved local var slot.
21237 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21238 compiler->lvaPInvokeFrameRegSaveVar, 0);
21240 inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
21242 regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21245 getEmitter()->emitEnableRandomNops();
21248 /*****************************************************************************
21249 This function emits the call-site prolog for direct calls to unmanaged code.
21250 It does all the necessary setup of the InlinedCallFrame.
21251 frameListRoot specifies the local containing the thread control block.
21252 argSize or methodToken is the value to be copied into the m_datum
21253 field of the frame (methodToken may be indirected & have a reloc)
21254 The function returns the register now containing the thread control block,
21255 (it could be either enregistered or loaded into one of the scratch registers)
21258 regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
21260 CORINFO_METHOD_HANDLE methodToken,
21261 BasicBlock* returnLabel)
21263 // Some stack locals might be 'cached' in registers, we need to trash them
21264 // from the regTracker *and* also ensure the gc tracker does not consider
21265 // them live (see the next assert). However, they might be live reg vars
21266 // that are non-pointers CSE'd from pointers.
21267 // That means the register will be live in rsMaskVars, so we can't just
21268 // call gcMarkSetNpt().
21270 regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
21271 gcInfo.gcRegGCrefSetCur &= ~deadRegs;
21272 gcInfo.gcRegByrefSetCur &= ~deadRegs;
21275 deadRegs &= regSet.rsMaskVars;
21278 for (LclVarDsc* varDsc = compiler->lvaTable;
21279 ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
21281 if (!varDsc->lvTracked || !varDsc->lvRegister)
21284 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
21287 regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
21288 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
21289 varRegMask |= genRegMask(varDsc->lvOtherReg);
21291 if (varRegMask & deadRegs)
21293 // We found the enregistered var that should not be live if it
21294 // was a GC pointer.
21295 noway_assert(!varTypeIsGC(varDsc));
21296 deadRegs &= ~varRegMask;
21303 /* Since we are using the InlinedCallFrame, we should have spilled all
21304 GC pointers to it - even from callee-saved registers */
21306 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
21308 /* must specify only one of these parameters */
21309 noway_assert((argSize == 0) || (methodToken == NULL));
21311 /* We are about to call unmanaged code directly.
21312 Before we can do that we have to emit the following sequence:
21314 mov dword ptr [frame.callTarget], MethodToken
21315 mov dword ptr [frame.callSiteTracker], esp
21316 mov reg, dword ptr [tcb_address]
21317 mov byte ptr [tcb+offsetOfGcState], 0
21321 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21323 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21325 #ifdef _TARGET_ARM_
21326 if (compiler->opts.ShouldUsePInvokeHelpers())
21329 int adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, true, &baseReg, 0);
21331 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21332 genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_BEGIN,
21334 EA_UNKNOWN); // retSize
21335 regTracker.rsTrackRegTrash(REG_ARG_0);
21340 /* mov dword ptr [frame.callSiteTarget], value */
21342 if (methodToken == NULL)
21344 /* mov dword ptr [frame.callSiteTarget], argSize */
21345 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
21346 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21350 void *embedMethHnd, *pEmbedMethHnd;
21352 embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
21354 noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
21356 if (embedMethHnd != NULL)
21358 /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
21360 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
21361 compiler->lvaInlinedPInvokeFrameVar,
21362 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21366 /* mov reg, dword ptr [MethodDescIndir]
21367 mov dword ptr [frame.callSiteTarget], reg */
21369 regNumber reg = regSet.rsPickFreeReg();
21371 #if CPU_LOAD_STORE_ARCH
21372 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
21373 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
21374 #else // !CPU_LOAD_STORE_ARCH
21375 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
21376 #endif // !CPU_LOAD_STORE_ARCH
21377 regTracker.rsTrackRegTrash(reg);
21378 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
21379 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21383 regNumber tcbReg = REG_NA;
21385 if (frameListRoot->lvRegister)
21387 tcbReg = frameListRoot->lvRegNum;
21391 tcbReg = regSet.rsGrabReg(RBM_ALLINT);
21393 /* mov reg, dword ptr [tcb address] */
21395 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
21396 (unsigned)(frameListRoot - compiler->lvaTable), 0);
21397 regTracker.rsTrackRegTrash(tcbReg);
21400 #ifdef _TARGET_X86_
21401 /* mov dword ptr [frame.callSiteTracker], esp */
21403 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
21404 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
21405 #endif // _TARGET_X86_
21407 #if CPU_LOAD_STORE_ARCH
21408 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
21409 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
21410 regTracker.rsTrackRegTrash(tmpReg);
21411 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
21412 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21413 #else // !CPU_LOAD_STORE_ARCH
21414 /* mov dword ptr [frame.callSiteReturnAddress], label */
21416 getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
21417 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21418 #endif // !CPU_LOAD_STORE_ARCH
21420 #if CPU_LOAD_STORE_ARCH
21421 instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
21423 noway_assert(tmpReg != tcbReg);
21425 getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
21426 #else // !CPU_LOAD_STORE_ARCH
21427 /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
21429 getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
21430 #endif // !CPU_LOAD_STORE_ARCH
21435 /*****************************************************************************
21437 First we have to mark in the hoisted NDirect stub that we are back
21438 in managed code. Then we have to check (a global flag) whether GC is
21439 pending or not. If so, we just call into a jit-helper.
21440 Right now we have this call always inlined, i.e. we always skip around
21441 the jit-helper call.
21443 The tcb address is a regular local (initialized in the prolog), so it is either
21444 enregistered or in the frame:
21446 tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
21447 mov byte ptr[tcb_reg+offsetOfGcState], 1
21448 cmp 'global GC pending flag', 0
21450 [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
21451 [mov [EBP+spill_area+0], eax] ; spill the int return value if any
21452 [mov [EBP+spill_area+4], edx] ; spill the long return value if any
21454 [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
21455 [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
21459 void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
21461 #ifdef _TARGET_ARM_
21462 if (compiler->opts.ShouldUsePInvokeHelpers())
21464 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21467 int adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, true, &baseReg, 0);
21469 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21470 genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_END,
21472 EA_UNKNOWN); // retSize
21473 regTracker.rsTrackRegTrash(REG_ARG_0);
21478 BasicBlock* clab_nostop;
21479 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21483 #ifdef _TARGET_ARM_
21489 getEmitter()->emitDisableRandomNops();
21491 if (frameListRoot->lvRegister)
21493 /* make sure that register is live across the call */
21495 reg2 = frameListRoot->lvRegNum;
21496 noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
21500 /* mov reg2, dword ptr [tcb address] */
21501 CLANG_FORMAT_COMMENT_ANCHOR;
21503 #ifdef _TARGET_ARM_
21509 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
21510 (unsigned)(frameListRoot - compiler->lvaTable), 0);
21511 regTracker.rsTrackRegTrash(reg2);
21514 #ifdef _TARGET_ARM_
21516 /* strb [r2+offsetOfGcState], r3 */
21517 instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
21518 getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
21520 /* mov byte ptr [tcb+offsetOfGcState], 1 */
21521 getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
21524 /* test global flag (we return to managed code) */
21526 LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
21528 addrOfCaptureThreadGlobal =
21529 compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
21530 noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
21532 // Can we directly use addrOfCaptureThreadGlobal?
21534 if (addrOfCaptureThreadGlobal)
21536 #ifdef _TARGET_ARM_
21537 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
21538 getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21539 regTracker.rsTrackRegTrash(reg3);
21540 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21542 getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
21547 #ifdef _TARGET_ARM_
21548 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
21549 getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21550 regTracker.rsTrackRegTrash(reg3);
21551 getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21552 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21553 #else // !_TARGET_ARM_
21555 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
21556 (ssize_t)pAddrOfCaptureThreadGlobal);
21557 regTracker.rsTrackRegTrash(REG_ECX);
21559 getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
21561 #endif // !_TARGET_ARM_
21565 clab_nostop = genCreateTempLabel();
21567 /* Generate the conditional jump */
21568 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
21569 inst_JMP(jmpEqual, clab_nostop);
21571 #ifdef _TARGET_ARM_
21572 // The helper preserves the return value on ARM
21574 /* save return value (if necessary) */
21575 if (retVal != RBM_NONE)
21577 if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21581 inst_RV(INS_push, REG_INTRET, TYP_INT);
21583 if (retVal == RBM_LNGRET)
21587 inst_RV(INS_push, REG_EDX, TYP_INT);
21593 /* emit the call to the EE-helper that stops for GC (or other reasons) */
21595 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
21596 EA_UNKNOWN); /* retSize */
21598 #ifdef _TARGET_ARM_
21599 // The helper preserves the return value on ARM
21601 /* restore return value (if necessary) */
21603 if (retVal != RBM_NONE)
21605 if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21607 if (retVal == RBM_LNGRET)
21611 inst_RV(INS_pop, REG_EDX, TYP_INT);
21612 regTracker.rsTrackRegTrash(REG_EDX);
21617 inst_RV(INS_pop, REG_INTRET, TYP_INT);
21618 regTracker.rsTrackRegTrash(REG_INTRET);
21623 /* genCondJump() closes the current emitter block */
21625 genDefineTempLabel(clab_nostop);
21627 // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
21628 // the above code. So the process is:
21629 // 1) Return to cooperative mode
21630 // 2) Check to see if we need to stop for GC
21631 // 3) Return from the p/invoke (as far as the stack walker is concerned).
21633 /* mov dword ptr [frame.callSiteTracker], 0 */
21635 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21636 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21638 getEmitter()->emitEnableRandomNops();
21641 /*****************************************************************************/
21643 /*****************************************************************************
21644 * TRACKING OF FLAGS
21645 *****************************************************************************/
21647 void CodeGen::genFlagsEqualToNone()
21649 genFlagsEqReg = REG_NA;
21650 genFlagsEqVar = (unsigned)-1;
21651 genFlagsEqLoc.Init();
21654 /*****************************************************************************
21656 * Record the fact that the flags register has a value that reflects the
21657 * contents of the given register.
21660 void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
21662 genFlagsEqLoc.CaptureLocation(getEmitter());
21663 genFlagsEqReg = reg;
21665 /* previous setting of flags by a var becomes invalid */
21667 genFlagsEqVar = 0xFFFFFFFF;
21669 /* Set appropriate flags on the tree */
21673 tree->gtFlags |= GTF_ZSF_SET;
21674 assert(tree->gtSetFlags());
21678 /*****************************************************************************
21680 * Record the fact that the flags register has a value that reflects the
21681 * contents of the given local variable.
21684 void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
21686 genFlagsEqLoc.CaptureLocation(getEmitter());
21687 genFlagsEqVar = var;
21689 /* previous setting of flags by a register becomes invalid */
21691 genFlagsEqReg = REG_NA;
21693 /* Set appropriate flags on the tree */
21697 tree->gtFlags |= GTF_ZSF_SET;
21698 assert(tree->gtSetFlags());
21702 /*****************************************************************************
21704 * Return an indication of whether the flags register is set to the current
21705 * value of the given register/variable. The return value is as follows:
21708 * true .. the zero flag (ZF) and sign flag (SF) is set
21711 bool CodeGen::genFlagsAreReg(regNumber reg)
21713 if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21721 bool CodeGen::genFlagsAreVar(unsigned var)
21723 if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21731 /*****************************************************************************
21732 * This utility function returns true iff the execution path from "from"
21733 * (inclusive) to "to" (exclusive) contains a death of the given var
21735 bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
21738 for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
21740 if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
21742 unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
21743 if (dyingVarNum == varNum)
21745 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
21746 if (varDsc->lvPromoted)
21748 assert(varDsc->lvType == TYP_STRUCT);
21749 unsigned firstFieldNum = varDsc->lvFieldLclStart;
21750 if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
21757 assert(tree != NULL);
21761 #endif // LEGACY_BACKEND